mirror of https://github.com/Wilfred/difftastic/
529 lines
13 KiB
JavaScript
529 lines
13 KiB
JavaScript
// one aim is to try to parse what is correct (in the sense of
|
|
// officially supported), but also be looser in parsing additional
|
|
// things. this is more or less in line with advice from tree-sitter
|
|
// folks.
|
|
|
|
function regex(...patts) {
|
|
return RegExp(patts.join(""));
|
|
}
|
|
|
|
// java.lang.Character.isWhitespace AND comma
|
|
//
|
|
// Space Separator (Zs) but NOT including (U+00A0, U+2007, U+202F)
|
|
// U+0020, U+1680, U+2000, U+2001, U+2002, U+2003, U+2004, U+2005,
|
|
// U+2006, U+2008, U+2009, U+200A, U+205F, U+3000
|
|
// Line Separator (Zl)
|
|
// U+2028
|
|
// Paragraph Separator (Zp)
|
|
// U+2029
|
|
// Horizontal Tabulation
|
|
// \t
|
|
// Line Feed
|
|
// \n
|
|
// Vertical Tabulation
|
|
// U+000B
|
|
// Form Feed
|
|
// \f
|
|
// Carriage Return
|
|
// \r
|
|
// File Separator
|
|
// U+001C
|
|
// Group Separator
|
|
// U+001D
|
|
// Record Separator
|
|
// U+001E
|
|
// Unit Separator
|
|
// U+001F
|
|
const WHITESPACE_CHAR =
|
|
regex("[",
|
|
"\\f\\n\\r\\t, ",
|
|
"\\u000B\\u001C\\u001D\\u001E\\u001F",
|
|
"\\u2028\\u2029\\u1680",
|
|
"\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008",
|
|
"\\u2009\\u200a\\u205f\\u3000",
|
|
"]");
|
|
|
|
const WHITESPACE =
|
|
token(repeat1(WHITESPACE_CHAR));
|
|
|
|
const COMMENT =
|
|
token(regex('(;|#!).*\n?'));
|
|
|
|
const DIGIT =
|
|
regex('[0-9]');
|
|
|
|
const ALPHANUMERIC =
|
|
regex('[0-9a-zA-Z]');
|
|
|
|
const HEX_DIGIT =
|
|
regex('[0-9a-fA-F]');
|
|
|
|
const OCTAL_DIGIT =
|
|
regex('[0-7]');
|
|
|
|
const HEX_NUMBER =
|
|
seq("0",
|
|
regex('[xX]'),
|
|
repeat1(HEX_DIGIT),
|
|
optional("N"));
|
|
|
|
const OCTAL_NUMBER =
|
|
seq("0",
|
|
repeat1(OCTAL_DIGIT),
|
|
optional("N"));
|
|
|
|
// XXX: not constraining number before r/R
|
|
// XXX: not constraining portion after r/R
|
|
const RADIX_NUMBER =
|
|
seq(repeat1(DIGIT),
|
|
regex('[rR]'),
|
|
repeat1(ALPHANUMERIC));
|
|
|
|
const RATIO =
|
|
seq(repeat1(DIGIT),
|
|
"/",
|
|
repeat1(DIGIT));
|
|
|
|
const DOUBLE =
|
|
seq(repeat1(DIGIT),
|
|
optional(seq(".",
|
|
repeat(DIGIT))),
|
|
optional(seq(regex('[eE]'),
|
|
optional(regex('[+-]')),
|
|
repeat1(DIGIT))),
|
|
optional("M"));
|
|
|
|
const INTEGER =
|
|
seq(repeat1(DIGIT),
|
|
optional(regex('[MN]')));
|
|
|
|
const NUMBER =
|
|
token(prec(10, seq(optional(regex('[+-]')),
|
|
choice(HEX_NUMBER,
|
|
OCTAL_NUMBER,
|
|
RADIX_NUMBER,
|
|
RATIO,
|
|
DOUBLE,
|
|
INTEGER))));
|
|
|
|
const NIL =
|
|
token('nil');
|
|
|
|
const BOOLEAN =
|
|
token(choice('false',
|
|
'true'));
|
|
|
|
const KEYWORD_HEAD =
|
|
regex("[^",
|
|
"\\f\\n\\r\\t ",
|
|
"()",
|
|
"\\[\\]",
|
|
"{}",
|
|
'"',
|
|
"@~^;`",
|
|
"\\\\",
|
|
",:/",
|
|
"\\u000B\\u001C\\u001D\\u001E\\u001F",
|
|
"\\u2028\\u2029\\u1680",
|
|
"\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008",
|
|
"\\u2009\\u200a\\u205f\\u3000",
|
|
"]");
|
|
|
|
const KEYWORD_BODY =
|
|
choice(regex("[:']"), KEYWORD_HEAD);
|
|
|
|
const KEYWORD_NAMESPACED_BODY =
|
|
token(repeat1(choice(regex("[:'/]"), KEYWORD_HEAD)));
|
|
|
|
const KEYWORD_NO_SIGIL =
|
|
token(seq(KEYWORD_HEAD,
|
|
repeat(KEYWORD_BODY)));
|
|
|
|
const KEYWORD_MARK =
|
|
token(":");
|
|
|
|
const AUTO_RESOLVE_MARK =
|
|
token("::");
|
|
|
|
const STRING =
|
|
token(seq('"',
|
|
repeat(regex('[^"\\\\]')),
|
|
repeat(seq("\\",
|
|
regex("."),
|
|
repeat(regex('[^"\\\\]')))),
|
|
'"'));
|
|
|
|
// XXX: better to match \o378 as a single item
|
|
const OCTAL_CHAR =
|
|
seq("o",
|
|
choice(seq(DIGIT, DIGIT, DIGIT),
|
|
seq(DIGIT, DIGIT),
|
|
seq(DIGIT)));
|
|
|
|
const NAMED_CHAR =
|
|
choice("backspace",
|
|
"formfeed",
|
|
"newline",
|
|
"return",
|
|
"space",
|
|
"tab");
|
|
|
|
// XXX: outside of: (c >= '\uD800' && c <= '\uDFFF') - LispReader.java
|
|
// but not doing this
|
|
const UNICODE =
|
|
seq("u",
|
|
HEX_DIGIT,
|
|
HEX_DIGIT,
|
|
HEX_DIGIT,
|
|
HEX_DIGIT);
|
|
|
|
// XXX: not quite sure what this is supposed to be...
|
|
// return Character.valueOf(token.charAt(0)); -- LispReader.java
|
|
// java char is 16 bits...what can tree-sitter manage?
|
|
//
|
|
// XXX: null is supposed to be usable but putting \x00 below
|
|
// does not seem to work
|
|
const ANY_CHAR =
|
|
regex('.|\n');
|
|
|
|
const CHARACTER =
|
|
token(seq("\\",
|
|
choice(OCTAL_CHAR,
|
|
NAMED_CHAR,
|
|
UNICODE,
|
|
ANY_CHAR)));
|
|
|
|
const SYMBOL_HEAD =
|
|
regex("[^",
|
|
"\\f\\n\\r\\t ",
|
|
"/",
|
|
"()\\[\\]{}",
|
|
'"',
|
|
"@~^;`",
|
|
"\\\\",
|
|
",:#'0-9",
|
|
"\\u000B\\u001C\\u001D\\u001E\\u001F",
|
|
"\\u2028\\u2029\\u1680",
|
|
"\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2008",
|
|
"\\u2009\\u200a\\u205f\\u3000",
|
|
"]");
|
|
|
|
const NS_DELIMITER =
|
|
token("/");
|
|
|
|
const SYMBOL_BODY =
|
|
choice(SYMBOL_HEAD,
|
|
regex("[:#'0-9]"));
|
|
|
|
const SYMBOL_NAMESPACED_NAME =
|
|
token(repeat1(choice(SYMBOL_HEAD,
|
|
regex("[/:#'0-9]"))));
|
|
|
|
// XXX: no attempt is made to enforce certain complex things, e.g.
|
|
//
|
|
// Symbols beginning or ending with ':' are reserved by Clojure.
|
|
// A symbol can contain one or more non-repeating ':'s
|
|
const SYMBOL =
|
|
token(seq(SYMBOL_HEAD,
|
|
repeat(SYMBOL_BODY)));
|
|
|
|
module.exports = grammar({
|
|
name: 'clojure',
|
|
|
|
extras: $ =>
|
|
[],
|
|
|
|
conflicts: $ =>
|
|
[],
|
|
|
|
inline: $ =>
|
|
[$._kwd_leading_slash,
|
|
$._kwd_just_slash,
|
|
$._kwd_qualified,
|
|
$._kwd_unqualified,
|
|
$._kwd_marker,
|
|
$._sym_qualified,
|
|
$._sym_unqualified],
|
|
|
|
rules: {
|
|
// THIS MUST BE FIRST -- even though this doesn't look like it matters
|
|
source: $ =>
|
|
repeat(choice($._form,
|
|
$._gap)),
|
|
|
|
_gap: $ =>
|
|
choice($._ws,
|
|
$.comment,
|
|
$.dis_expr),
|
|
|
|
_ws: $ =>
|
|
WHITESPACE,
|
|
|
|
comment: $ =>
|
|
COMMENT,
|
|
|
|
dis_expr: $ =>
|
|
seq(field('marker', "#_"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
_form: $ =>
|
|
choice($.num_lit, // atom-ish
|
|
$.kwd_lit,
|
|
$.str_lit,
|
|
$.char_lit,
|
|
$.nil_lit,
|
|
$.bool_lit,
|
|
$.sym_lit,
|
|
// basic collection-ish
|
|
$.list_lit,
|
|
$.map_lit,
|
|
$.vec_lit,
|
|
// dispatch reader macros
|
|
$.set_lit,
|
|
$.anon_fn_lit,
|
|
$.regex_lit,
|
|
$.read_cond_lit,
|
|
$.splicing_read_cond_lit,
|
|
$.ns_map_lit,
|
|
$.var_quoting_lit,
|
|
$.sym_val_lit,
|
|
$.evaling_lit,
|
|
$.tagged_or_ctor_lit,
|
|
// some other reader macros
|
|
$.derefing_lit,
|
|
$.quoting_lit,
|
|
$.syn_quoting_lit,
|
|
$.unquote_splicing_lit,
|
|
$.unquoting_lit),
|
|
|
|
num_lit: $ =>
|
|
NUMBER,
|
|
|
|
kwd_lit: $ =>
|
|
choice($._kwd_leading_slash,
|
|
$._kwd_just_slash,
|
|
$._kwd_qualified,
|
|
$._kwd_unqualified),
|
|
|
|
// (namespace :/usr/bin/env) ; => ""
|
|
// (name :/usr/bin/env) ; => "usr/bin/env"
|
|
_kwd_leading_slash: $ =>
|
|
seq(field('marker', $._kwd_marker),
|
|
field('delimiter', NS_DELIMITER),
|
|
field('name', alias(KEYWORD_NAMESPACED_BODY, $.kwd_name))),
|
|
|
|
// (namespace :/) ;=> nil
|
|
// (name :/) ;=> "/"
|
|
_kwd_just_slash: $ =>
|
|
seq(field('marker', $._kwd_marker),
|
|
field('name', alias(NS_DELIMITER, $.kwd_name))),
|
|
|
|
_kwd_qualified: $ =>
|
|
prec(2, seq(field('marker', $._kwd_marker),
|
|
field('namespace', alias(KEYWORD_NO_SIGIL, $.kwd_ns)),
|
|
field('delimiter', NS_DELIMITER),
|
|
field('name', alias(KEYWORD_NAMESPACED_BODY, $.kwd_name)))),
|
|
|
|
_kwd_unqualified: $ =>
|
|
prec(1, seq(field('marker', $._kwd_marker),
|
|
field('name', alias(KEYWORD_NO_SIGIL, $.kwd_name)))),
|
|
|
|
_kwd_marker: $ =>
|
|
choice(KEYWORD_MARK, AUTO_RESOLVE_MARK),
|
|
|
|
str_lit: $ =>
|
|
STRING,
|
|
|
|
char_lit: $ =>
|
|
CHARACTER,
|
|
|
|
nil_lit: $ =>
|
|
NIL,
|
|
|
|
bool_lit: $ =>
|
|
BOOLEAN,
|
|
|
|
sym_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
choice($._sym_qualified, $._sym_unqualified)),
|
|
|
|
_sym_qualified: $ =>
|
|
prec(1, seq(field("namespace", alias(SYMBOL, $.sym_ns)),
|
|
field("delimiter", NS_DELIMITER),
|
|
field("name", alias(SYMBOL_NAMESPACED_NAME, $.sym_name)))),
|
|
|
|
_sym_unqualified: $ =>
|
|
field('name', alias(choice(NS_DELIMITER, // division symbol
|
|
SYMBOL),
|
|
$.sym_name)),
|
|
|
|
_metadata_lit: $ =>
|
|
seq(choice(field("meta", $.meta_lit),
|
|
field("old_meta", $.old_meta_lit)),
|
|
optional(repeat($._gap))),
|
|
|
|
meta_lit: $ =>
|
|
seq(field('marker', "^"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
old_meta_lit: $ =>
|
|
seq(field('marker', "#^"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
list_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
$._bare_list_lit),
|
|
|
|
_bare_list_lit: $ =>
|
|
seq(field('open', "("),
|
|
repeat(choice(field('value', $._form),
|
|
$._gap)),
|
|
field('close', ")")),
|
|
|
|
map_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
$._bare_map_lit),
|
|
|
|
_bare_map_lit: $ =>
|
|
seq(field('open', "{"),
|
|
repeat(choice(field('value', $._form),
|
|
$._gap)),
|
|
field('close', "}")),
|
|
|
|
vec_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
$._bare_vec_lit),
|
|
|
|
_bare_vec_lit: $ =>
|
|
seq(field('open', "["),
|
|
repeat(choice(field('value', $._form),
|
|
$._gap)),
|
|
field('close', "]")),
|
|
|
|
set_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
$._bare_set_lit),
|
|
|
|
_bare_set_lit: $ =>
|
|
seq(field('marker', "#"),
|
|
field('open', "{"),
|
|
repeat(choice(field('value', $._form),
|
|
$._gap)),
|
|
field('close', "}")),
|
|
|
|
anon_fn_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "#"),
|
|
$._bare_list_lit),
|
|
|
|
regex_lit: $ =>
|
|
seq(field('marker', "#"),
|
|
STRING),
|
|
|
|
read_cond_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "#?"),
|
|
// whitespace possible, but neither comment nor discard
|
|
repeat($._ws),
|
|
$._bare_list_lit),
|
|
|
|
splicing_read_cond_lit: $ =>
|
|
// XXX: metadata here doesn't seem to make sense, but the repl
|
|
// will accept: [^:x #?@(:clj [[:a]] :cljr [[:b]])]
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "#?@"),
|
|
// whitespace possible, but neither comment nor discard
|
|
repeat($._ws),
|
|
$._bare_list_lit),
|
|
|
|
auto_res_mark: $ =>
|
|
AUTO_RESOLVE_MARK,
|
|
|
|
ns_map_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "#"),
|
|
field('prefix', choice($.auto_res_mark,
|
|
$.kwd_lit)),
|
|
repeat($._gap),
|
|
$._bare_map_lit),
|
|
|
|
var_quoting_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "#'"),
|
|
repeat($._gap),
|
|
// XXX: symbol, reader conditional, and tagged literal can work
|
|
// any other things?
|
|
field('value', $._form)),
|
|
|
|
sym_val_lit: $ =>
|
|
seq(field('marker', "##"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
evaling_lit: $ =>
|
|
seq(repeat($._metadata_lit), // ^:x #=(vector 1)
|
|
field('marker', "#="),
|
|
repeat($._gap),
|
|
field('value', choice($.list_lit,
|
|
$.read_cond_lit,
|
|
// #= ^:a java.lang.String
|
|
$.sym_lit))),
|
|
|
|
// #uuid "00000000-0000-0000-0000-000000000000"
|
|
// #user.Fun[1 2]
|
|
// #user.Fun{:a 1 :b 2}
|
|
tagged_or_ctor_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "#"),
|
|
// # uuid "00000000-0000-0000-0000-000000000000"
|
|
// # #_ 1 uuid "00000000-0000-0000-0000-000000000000"
|
|
// etc.
|
|
repeat($._gap),
|
|
// # ^:a uuid "00000000-0000-0000-0000-000000000000"
|
|
field('tag', $.sym_lit),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
derefing_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "@"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
quoting_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "'"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
syn_quoting_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "`"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
unquote_splicing_lit: $ =>
|
|
// XXX: metadata here doesn't seem to make sense, but the repl
|
|
// will accept: `(^:x ~@[:a :b :c])
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "~@"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
|
|
unquoting_lit: $ =>
|
|
seq(repeat($._metadata_lit),
|
|
field('marker', "~"),
|
|
repeat($._gap),
|
|
field('value', $._form)),
|
|
}
|
|
});
|
|
|
|
// Local Variables:
|
|
// mode: js-mode
|
|
// js-indent-align-list-continuation: t
|
|
// js-indent-level: 2
|
|
// End:
|