difftastic/vendor/tree-sitter-janet-simple/grammar.js

229 lines
5.4 KiB
JavaScript

// numbers
const SIGN =
choice('-', '+');
const DIGIT =
/[0-9]/;
const HEX_DIGIT =
/[0-9A-Fa-f]/;
const RADIX =
choice('2', '3', '4', '5', '6', '7', '8', '9', '10',
'11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
'21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
'31', '32', '33', '34', '35', '36');
const ALPHA_NUM =
/[a-zA-Z0-9]/;
// symbols and keywords
// janet/tools/symcharsgen.c
const SYM_CHAR_NO_DIGIT_NO_COLON =
/[a-zA-Z!$%&*+\-./<?=>@^_]/;
const SYM_CHAR =
/[0-9:a-zA-Z!$%&*+\-./<?=>@^_]/;
// strings
const STRING_DOUBLE_QUOTE_CONTENT =
repeat(choice(/[^\\"]/,
/\\(.|\n)/)); // thanks to tree-sitter-haskell
module.exports = grammar({
name: 'janet_simple',
// mdn says \s is:
//
// [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]
//
// but that doesn't seem to match what tree-sitter thinks as it appears that
// for example, leaving out \x0b, \x0c, or \x00 from the following yields
// different behavior (other stuff may also differ)
extras: $ => [
/\s|\x0b|\x0c|\x00/,
$.comment
],
externals: $ => [
$.long_buf_lit,
$.long_str_lit
],
rules: {
// THIS MUST BE FIRST -- even though this doesn't look like it matters
source: $ =>
repeat($._lit),
comment: $ =>
/#.*/,
_lit: $ =>
choice($.bool_lit,
$.buf_lit,
$.kwd_lit,
$.long_buf_lit,
$.long_str_lit,
$.nil_lit,
$.num_lit,
$.str_lit,
$.sym_lit,
//
$.par_arr_lit,
$.sqr_arr_lit,
$.struct_lit,
$.tbl_lit,
$.par_tup_lit,
$.sqr_tup_lit,
//
$.qq_lit,
$.quote_lit,
$.short_fn_lit,
$.splice_lit,
$.unquote_lit),
// simplest things
bool_lit: $ =>
// XXX: without the token here, false and true are exposed as
// anonymous nodes it seems...
// yet, the same does not happen for nil...strange
token(choice('false',
'true')),
kwd_lit: $ =>
prec(2, token(seq(':',
repeat(SYM_CHAR)))),
nil_lit: $ =>
'nil',
num_lit: $ =>
prec(5, choice($._dec,
$._hex,
$._radix)),
_dec: $ =>
token(seq(optional(SIGN),
choice(seq(repeat1(DIGIT),
repeat('_'),
optional('.'),
repeat('_'),
repeat(DIGIT),
repeat('_')),
seq(repeat(DIGIT),
repeat('_'),
optional('.'),
repeat('_'),
repeat1(DIGIT),
repeat('_'))),
optional(seq(choice('e', 'E'),
optional(SIGN),
repeat1(DIGIT))))),
_hex: $ =>
token(seq(optional(SIGN),
'0',
'x',
choice(seq(repeat1(HEX_DIGIT),
repeat('_'),
optional('.'),
repeat('_'),
repeat(HEX_DIGIT),
repeat('_')),
seq(repeat(HEX_DIGIT),
repeat('_'),
optional('.'),
repeat('_'),
repeat1(HEX_DIGIT),
repeat('_'))))),
_radix: $ =>
token(seq(optional(SIGN),
seq(RADIX,
choice('r', 'R'),
ALPHA_NUM,
repeat(choice(repeat(ALPHA_NUM),
repeat('_'))),
optional(seq('&',
optional(SIGN),
repeat1(DIGIT)))))),
str_lit: $ =>
token(seq('"',
STRING_DOUBLE_QUOTE_CONTENT,
'"')),
buf_lit: $ =>
token(seq('@"',
STRING_DOUBLE_QUOTE_CONTENT,
'"')),
sym_lit: $ =>
token(seq(SYM_CHAR_NO_DIGIT_NO_COLON,
repeat(SYM_CHAR))),
// collection-ish things
par_arr_lit: $ =>
seq('@(',
repeat($._lit),
')'),
sqr_arr_lit: $ =>
seq('@[',
repeat($._lit),
']'),
struct_lit: $ =>
seq('{',
repeat($._lit),
'}'),
tbl_lit: $ =>
seq('@{',
repeat($._lit),
'}'),
par_tup_lit: $ =>
seq('(',
repeat($._lit),
')'),
sqr_tup_lit: $ =>
seq('[',
repeat($._lit),
']'),
// macro-related
qq_lit: $ =>
seq('~',
$._lit),
quote_lit: $ =>
seq("'",
$._lit),
// following all work at the repl..
// |8, ||8, |||8, etc.
// |~(:x)
// |{:a 1}
// |[1 2]
// |"a"
// |:w
// |a-sym
// |@[8 9]
// |(= $ 1)
// XXX: |() doesn't work...but don't bother disallowing
short_fn_lit: $ =>
seq('|',
$._lit),
// XXX: ?
splice_lit: $ =>
seq(';',
$._lit),
unquote_lit: $ =>
seq(',',
$._lit),
}
});