mirror of https://github.com/Wilfred/difftastic/
533 lines
10 KiB
JavaScript
533 lines
10 KiB
JavaScript
// r5rs: case insensitive
|
|
// comment: only line comment
|
|
// r6rs: case sensitive except boolean, number, unicode hex literals
|
|
// comment: all
|
|
// r7rs: case insensitive except letters, character names, mnemonic escapes.
|
|
// comment: all
|
|
|
|
const PREC = {
|
|
first: $ => prec(100, $),
|
|
last: $ => prec(-100, $),
|
|
};
|
|
|
|
const common = {
|
|
whitespace: /[ \r\n\t\f\v\p{Zs}\p{Zl}\p{Zp}]/,
|
|
intra_whitespace: /[\t\p{Zs}]/,
|
|
line_ending: /[\n\r\u{2028}\u{0085}]|(\r\n)|(\r\u{0085})/,
|
|
any_char: /.|[\r\n\u{85}\u{2028}\u{2029}]/,
|
|
|
|
symbol_element: /[^ \r\n\t\f\v\p{Zs}\p{Zl}\p{Zp}#;"'`,(){}\[\]\\|]/,
|
|
};
|
|
|
|
const r5rs = {
|
|
boolean: seq("#", /[tTfF]/),
|
|
number:
|
|
choice(
|
|
r5rs_number_base(2),
|
|
r5rs_number_base(8),
|
|
r5rs_number_base(10),
|
|
r5rs_number_base(16)),
|
|
character:
|
|
seq(
|
|
"#\\",
|
|
choice(
|
|
/[sS][pP][aA][cC][eE]/,
|
|
/[nN][eE][wW][lL][iI][nN][eE]/,
|
|
common.any_char)),
|
|
escape_sequence:
|
|
choice(
|
|
"\\\"",
|
|
"\\\\"),
|
|
};
|
|
|
|
const r6rs = {
|
|
boolean: seq("#", /[tTfF]/),
|
|
number:
|
|
choice(
|
|
r6rs_number_base(2),
|
|
r6rs_number_base(8),
|
|
r6rs_number_base(10),
|
|
r6rs_number_base(16)),
|
|
character:
|
|
seq(
|
|
"#\\",
|
|
choice(
|
|
"nul", "alarm", "backspace", "tab",
|
|
"linefeed", "newline", "vtab", "page",
|
|
"return", "esc", "space", "delete",
|
|
/x[0-9a-fA-F]+/,
|
|
common.any_char)),
|
|
escape_sequence:
|
|
choice(
|
|
/\\[abtnvfr"\\]/,
|
|
/\\x[0-9a-fA-F]+;/,
|
|
seq(
|
|
"\\",
|
|
common.intra_whitespace,
|
|
common.line_ending,
|
|
common.intra_whitespace)),
|
|
};
|
|
|
|
const r7rs = {
|
|
boolean:
|
|
seq(
|
|
"#",
|
|
choice(
|
|
/[tTfF]/,
|
|
/[tT][rR][uU][eE]/,
|
|
/[fF][aA][lL][sS][eE]/)),
|
|
number:
|
|
choice(
|
|
r7rs_number_base(2),
|
|
r7rs_number_base(8),
|
|
r7rs_number_base(10),
|
|
r7rs_number_base(16)),
|
|
character:
|
|
seq(
|
|
"#\\",
|
|
choice(
|
|
"alarm", "backspace", "delete",
|
|
"escape", "newline", "null",
|
|
"return", "space", "tab",
|
|
/[xX][0-9a-fA-F]+/,
|
|
common.any_char)),
|
|
escape_sequence:
|
|
seq(
|
|
"\\",
|
|
choice(
|
|
/[abtnr"\\]/,
|
|
seq(
|
|
repeat(common.intra_whitespace),
|
|
common.line_ending,
|
|
repeat(common.intra_whitespace)),
|
|
/[xX][0-9a-fA-F]+;/)),
|
|
symbol:
|
|
seq(
|
|
"|",
|
|
repeat(
|
|
choice(
|
|
/[^|\\]+/,
|
|
/\\[xX][0-9a-fA-F]+;/,
|
|
/\\[abtnr]/,
|
|
"\\|")),
|
|
"|")
|
|
};
|
|
|
|
const extension = {
|
|
escape_sequence: /\\./,
|
|
character:
|
|
seq(
|
|
"#\\",
|
|
choice("bel", "ls", "nel", "rubout", "vt")),
|
|
};
|
|
|
|
const hidden_node = {
|
|
symbol:
|
|
token(
|
|
choice(
|
|
repeat1(common.symbol_element),
|
|
r7rs.symbol)),
|
|
};
|
|
|
|
module.exports = grammar({
|
|
name: "scheme",
|
|
|
|
extras: _ => [],
|
|
|
|
rules: {
|
|
program: $ => repeat($._token),
|
|
|
|
_token: $ =>
|
|
choice(
|
|
$._intertoken,
|
|
$._datum),
|
|
|
|
_intertoken: $ =>
|
|
choice(
|
|
// NOTE: `repeat1` here can significantly reduce code size than `repeat`
|
|
token(repeat1(common.whitespace)),
|
|
$.directive,
|
|
$.comment,
|
|
$.block_comment),
|
|
|
|
comment: $ =>
|
|
choice(
|
|
/;.*/,
|
|
seq("#;", repeat($._intertoken), $._datum)),
|
|
|
|
directive: $ =>
|
|
seq("#!", repeat($._intertoken), hidden_node.symbol),
|
|
|
|
block_comment: $ =>
|
|
seq("#|",
|
|
repeat(
|
|
choice(
|
|
PREC.first($.block_comment),
|
|
common.any_char)),
|
|
PREC.first("|#")),
|
|
|
|
_datum: $ => choice(
|
|
$.boolean,
|
|
$.character,
|
|
$.string,
|
|
|
|
// number/symbol precedence
|
|
// for same length token, prefer number
|
|
// otherwise, prefer symbol which is also longer
|
|
$.number,
|
|
$.symbol,
|
|
|
|
$.vector,
|
|
$.byte_vector,
|
|
$.list,
|
|
|
|
$.quote,
|
|
$.quasiquote,
|
|
$.unquote,
|
|
$.unquote_splicing,
|
|
$.syntax,
|
|
$.quasisyntax,
|
|
$.unsyntax,
|
|
$.unsyntax_splicing,
|
|
|
|
$.keyword),
|
|
|
|
// simple datum {{{
|
|
|
|
boolean: _ =>
|
|
token(
|
|
choice(
|
|
r5rs.boolean,
|
|
r6rs.boolean,
|
|
r7rs.boolean)),
|
|
|
|
number: _ =>
|
|
token(
|
|
choice(
|
|
r5rs.number,
|
|
r6rs.number,
|
|
r7rs.number)),
|
|
|
|
character: _ =>
|
|
token(
|
|
choice(
|
|
r5rs.character,
|
|
r6rs.character,
|
|
r7rs.character,
|
|
extension.character)),
|
|
|
|
string: $ =>
|
|
seq(
|
|
'"',
|
|
repeat(
|
|
choice(
|
|
$.escape_sequence,
|
|
/[^"\\]+/)),
|
|
'"'),
|
|
|
|
escape_sequence: _ =>
|
|
token(
|
|
choice(
|
|
r5rs.escape_sequence,
|
|
r6rs.escape_sequence,
|
|
r7rs.escape_sequence,
|
|
extension.escape_sequence)),
|
|
|
|
symbol: _ => token(hidden_node.symbol),
|
|
|
|
keyword: _ =>
|
|
token(
|
|
seq(
|
|
"#:",
|
|
hidden_node.symbol)),
|
|
|
|
// simple datum }}}
|
|
|
|
// compound datum {{{
|
|
|
|
list: $ =>
|
|
choice(
|
|
seq("(", repeat($._token), ")"),
|
|
seq("[", repeat($._token), "]"),
|
|
seq("{", repeat($._token), "}")),
|
|
|
|
quote: $ =>
|
|
seq(
|
|
"'",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
quasiquote: $ =>
|
|
seq(
|
|
"`",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
syntax: $ =>
|
|
seq(
|
|
"#'",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
quasisyntax: $ =>
|
|
seq(
|
|
"#`",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
unquote: $ =>
|
|
seq(
|
|
",",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
unquote_splicing: $ =>
|
|
seq(
|
|
",@",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
unsyntax: $ =>
|
|
seq(
|
|
"#,",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
unsyntax_splicing: $ =>
|
|
seq(
|
|
"#,@",
|
|
repeat($._intertoken),
|
|
$._datum),
|
|
|
|
vector: $ => seq("#(", repeat($._token), ")"),
|
|
|
|
byte_vector: $ => seq("#vu8(", repeat($._token), ")"),
|
|
// compound datum }}}
|
|
},
|
|
});
|
|
|
|
// number {{{
|
|
|
|
function r5rs_number_base(n) {
|
|
const radixn = {
|
|
2: choice("#b", "#B"),
|
|
8: choice("#o", "#O"),
|
|
10: optional(choice("#d", "#D")),
|
|
16: choice("#x", "#X"),
|
|
};
|
|
const digitsn = {
|
|
2: /[01]/,
|
|
8: /[0-7]/,
|
|
10: /[0-9]/,
|
|
16: /[0-9a-fA-F]/,
|
|
};
|
|
|
|
const exactness =
|
|
optional(
|
|
choice("#i", "#e", "#I", "#E"));
|
|
const radix = radixn[n];
|
|
const prefix =
|
|
choice(
|
|
seq(radix, exactness),
|
|
seq(exactness, radix));
|
|
|
|
const sign = optional(/[+-]/);
|
|
const digits = digitsn[n];
|
|
|
|
const exponent = /[eEsSfFdDlL]/;
|
|
const suffix =
|
|
optional(
|
|
seq(
|
|
exponent,
|
|
sign,
|
|
repeat1(digitsn[10])));
|
|
|
|
const uinteger =
|
|
seq(
|
|
repeat1(digits),
|
|
repeat("#"));
|
|
const decimal10 = choice(
|
|
seq(uinteger, suffix),
|
|
seq(".", repeat1(digits), repeat("#"), suffix),
|
|
seq(repeat1(digits), ".", repeat(digits), repeat("#"), suffix),
|
|
seq(repeat1(digits), repeat1("#"), ".", repeat("#"), suffix)
|
|
);
|
|
const decimal = {
|
|
2: "",
|
|
8: "",
|
|
10: decimal10,
|
|
16: "",
|
|
}[n];
|
|
|
|
const ureal =
|
|
choice(
|
|
uinteger,
|
|
seq(uinteger, "/", uinteger),
|
|
decimal);
|
|
const real = seq(sign, ureal);
|
|
const complex = choice(
|
|
real,
|
|
seq(real, "@", real),
|
|
seq(optional(real), /[+-]/, optional(ureal), "i")
|
|
);
|
|
|
|
return seq(prefix, complex);
|
|
}
|
|
|
|
function r6rs_number_base(n) {
|
|
const radixn = {
|
|
2: choice("#b", "#B"),
|
|
8: choice("#o", "#O"),
|
|
10: optional(choice("#d", "#D")),
|
|
16: choice("#x", "#X"),
|
|
};
|
|
const digitsn = {
|
|
2: /[01]/,
|
|
8: /[0-7]/,
|
|
10: /[0-9]/,
|
|
16: /[0-9a-fA-F]/,
|
|
};
|
|
|
|
const exactness =
|
|
optional(
|
|
choice("#i", "#e", "#I", "#E"));
|
|
const radix = radixn[n];
|
|
const prefix =
|
|
choice(
|
|
seq(radix, exactness),
|
|
seq(exactness, radix));
|
|
|
|
const sign = optional(/[+-]/);
|
|
const digits = digitsn[n];
|
|
const digits10 = digitsn[10];
|
|
|
|
const exponent = /[eEsSfFdDlL]/;
|
|
const suffix =
|
|
optional(
|
|
seq(
|
|
exponent,
|
|
sign,
|
|
repeat1(digits10)));
|
|
|
|
const uinteger = repeat1(digits);
|
|
const decimal10 =
|
|
choice(
|
|
seq(uinteger, suffix),
|
|
seq(".", repeat1(digits), suffix),
|
|
seq(repeat1(digits), ".", repeat(digits), suffix),
|
|
seq(repeat1(digits), ".", suffix));
|
|
const decimal = {
|
|
2: "",
|
|
8: "",
|
|
10: decimal10,
|
|
16: "",
|
|
}[n];
|
|
|
|
const mantissa_width =
|
|
optional(
|
|
seq("|", repeat1(digits10)));
|
|
|
|
const naninf = choice("nan.0", "inf.0");
|
|
|
|
const ureal =
|
|
seq(
|
|
choice(
|
|
uinteger,
|
|
seq(uinteger, "/", uinteger),
|
|
seq(decimal, mantissa_width)));
|
|
const real =
|
|
choice(
|
|
seq(sign, ureal),
|
|
seq(/[+-]/, naninf));
|
|
const complex =
|
|
choice(
|
|
real,
|
|
seq(real, "@", real),
|
|
seq(
|
|
optional(real),
|
|
/[+-]/,
|
|
optional(choice(ureal, naninf)),
|
|
"i"));
|
|
|
|
return seq(prefix, complex);
|
|
}
|
|
|
|
function r7rs_number_base(n) {
|
|
const infnan =
|
|
choice(
|
|
/[+-][iI][nN][fF]\.0/,
|
|
/[+-][nN][aA][nN]\.0/);
|
|
|
|
const exponent_marker = /[eE]/;
|
|
const sign = optional(/[+-]/);
|
|
const exactness = optional(/#[ieIE]/);
|
|
const radix =
|
|
{
|
|
2: /#[bB]/,
|
|
8: /#[oO]/,
|
|
10: optional(/#[dD]/),
|
|
16: /#[xX]/,
|
|
}[n];
|
|
const digit =
|
|
{
|
|
2: /[01]/,
|
|
8: /[0-7]/,
|
|
10: /[0-9]/,
|
|
16: /[0-9a-fA-F]/,
|
|
}[n];
|
|
|
|
const suffix =
|
|
optional(
|
|
seq(exponent_marker, sign, repeat1(digit)));
|
|
|
|
const prefix =
|
|
choice(
|
|
seq(radix, exactness),
|
|
seq(exactness, radix));
|
|
|
|
const uinteger = repeat1(digit);
|
|
|
|
const decimal =
|
|
{
|
|
2: "",
|
|
8: "",
|
|
10:
|
|
choice(
|
|
seq(uinteger, suffix),
|
|
seq(".", repeat1(digit), suffix),
|
|
seq(repeat1(digit), ".", repeat(digit), suffix)),
|
|
16: "",
|
|
}[n];
|
|
|
|
const ureal =
|
|
choice(
|
|
uinteger,
|
|
seq(uinteger, "/", uinteger),
|
|
decimal);
|
|
|
|
const real =
|
|
choice(
|
|
seq(sign, ureal),
|
|
infnan);
|
|
|
|
const complex =
|
|
choice(
|
|
real,
|
|
seq(real, "@", real),
|
|
seq(real, /[+-]/, ureal, "i"),
|
|
seq(real, /[+-]/, "i"),
|
|
seq(real, infnan, "i"),
|
|
seq(/[+-]/, ureal, "i"),
|
|
seq(infnan, "i"),
|
|
seq(/[+-]/, "i"));
|
|
|
|
const num =
|
|
seq(
|
|
prefix,
|
|
complex);
|
|
|
|
return num;
|
|
}
|
|
|
|
// number }}}
|