difftastic/vendored_parsers/tree-sitter-elisp/grammar.js

192 lines
4.8 KiB
JavaScript

const COMMENT = token(/;.*/);
const STRING = token(
seq('"', repeat(choice(/[^"\\]/, seq("\\", /(.|\n)/))), '"')
);
// Symbols can contain any character when escaped:
// https://www.gnu.org/software/emacs/manual/html_node/elisp/Symbol-Type.html
// Most characters do not need escaping, but space and parentheses
// certainly do.
//
// Symbols also cannot start with ?.
const SYMBOL = token(
/([^?# \n\s\f()\[\]'`,\\";]|\\.)([^# \n\s\f()\[\]'`,\\";]|\\.)*/
);
const ESCAPED_READER_SYMBOL = token(/\\(`|'|,)/);
const INTERNED_EMPTY_STRING = token("##");
const INTEGER_BASE10 = token(/[+-]?[0-9]+\.?/);
const INTEGER_WITH_BASE = token(/#([box]|[0-9][0-9]?r)[0-9a-zA-Z]/);
const FLOAT_WITH_DEC_POINT = token(/[+-]?[0-9]*\.[0-9]+/);
const FLOAT_WITH_EXPONENT = token(/[+-]?[0-9]+[eE][0-9]+/);
const FLOAT_WITH_BOTH = token(/[+-]?[0-9]*\.[0-9]+[eE][0-9]+/);
const FLOAT_INF = token(/-?1.0[eE]\+INF/);
const FLOAT_NAN = token(/-?0.0[eE]\+NaN/);
const CHAR = token(/\?(\\.|.)/);
const UNICODE_NAME_CHAR = token(/\?\\N\{[^}]+\}/);
const LOWER_CODE_POINT_CHAR = token(/\?\\u[0-9a-fA-F]{4}/);
const UPPER_CODE_POINT_CHAR = token(/\?\\U[0-9a-fA-F]{8}/);
const HEX_CHAR = token(/\?\\x[0-9a-fA-F]+/);
const OCTAL_CHAR = token(/\?\\[0-7]{1,3}/);
// E.g. ?\C-o or ?\^o or ?\C-\S-o
const KEY_CHAR = token(/\?(\\(([CMSHsA]-)|\^))+(\\;|.)/);
// E.g. ?\M-\123
const META_OCTAL_CHAR = token(/\?\\M-\\[0-9]{1,3}/);
// https://www.gnu.org/software/emacs/manual/html_node/elisp/Special-Read-Syntax.html
const BYTE_COMPILED_FILE_NAME = token("#$");
module.exports = grammar({
name: "elisp",
extras: ($) => [/(\s|\f)/, $.comment],
rules: {
source_file: ($) => repeat($._sexp),
_sexp: ($) =>
choice(
$.special_form,
$.function_definition,
$.macro_definition,
$.list,
$.vector,
$.hash_table,
$.bytecode,
$.string_text_properties,
$._atom,
$.quote,
$.unquote_splice,
$.unquote
),
special_form: ($) =>
seq(
"(",
choice(
"and",
"catch",
"cond",
"condition-case",
"defconst",
"defvar",
"function",
"if",
"interactive",
"lambda",
"let",
"let*",
"or",
"prog1",
"prog2",
"progn",
"quote",
"save-current-buffer",
"save-excursion",
"save-restriction",
"setq",
"setq-default",
"unwind-protect",
"while"
),
repeat($._sexp),
")"
),
function_definition: ($) =>
prec(
1,
seq(
"(",
choice("defun", "defsubst"),
field("name", $.symbol),
optional(field("parameters", $._sexp)),
optional(field("docstring", $.string)),
repeat($._sexp),
")"
)
),
macro_definition: ($) =>
prec(
1,
seq(
"(",
"defmacro",
field("name", $.symbol),
optional(field("parameters", $._sexp)),
optional(field("docstring", $.string)),
repeat($._sexp),
")"
)
),
_atom: ($) =>
choice(
$.float,
$.integer,
$.char,
$.string,
$.byte_compiled_file_name,
$.symbol
),
float: ($) =>
choice(
FLOAT_WITH_DEC_POINT,
FLOAT_WITH_EXPONENT,
FLOAT_WITH_BOTH,
FLOAT_INF,
FLOAT_NAN
),
integer: ($) => choice(INTEGER_BASE10, INTEGER_WITH_BASE),
char: ($) =>
choice(
CHAR,
UNICODE_NAME_CHAR,
LOWER_CODE_POINT_CHAR,
UPPER_CODE_POINT_CHAR,
HEX_CHAR,
OCTAL_CHAR,
KEY_CHAR,
META_OCTAL_CHAR
),
string: ($) => STRING,
byte_compiled_file_name: ($) => BYTE_COMPILED_FILE_NAME,
symbol: ($) =>
choice(
// Match nil and t separately so we can highlight them.
"nil",
"t",
// We need to define these as separate tokens so we can handle
// e.g '(defun) as a sexp. Without these, we just try
// function_definition and produce a parse failure.
"defun",
"defsubst",
"defmacro",
ESCAPED_READER_SYMBOL,
SYMBOL,
INTERNED_EMPTY_STRING
),
quote: ($) => seq(choice("#'", "'", "`"), $._sexp),
unquote_splice: ($) => seq(",@", $._sexp),
unquote: ($) => seq(",", $._sexp),
dot: ($) => token("."),
list: ($) => seq("(", choice(repeat($._sexp)), ")"),
vector: ($) => seq("[", repeat($._sexp), "]"),
bytecode: ($) => seq("#[", repeat($._sexp), "]"),
string_text_properties: ($) => seq("#(", $.string, repeat($._sexp), ")"),
hash_table: ($) => seq("#s(hash-table", repeat($._sexp), ")"),
comment: ($) => COMMENT,
},
});