mirror of https://github.com/Wilfred/difftastic/
532 lines
11 KiB
JavaScript
532 lines
11 KiB
JavaScript
|
|
// The R 4.2.0 syntax table, from ?Syntax:
|
|
//
|
|
// ':: :::' access variables in a namespace
|
|
// '$ @' component / slot extraction
|
|
// '[ [[' indexing
|
|
// '^' exponentiation (right to left)
|
|
// '- +' unary minus and plus
|
|
// ':' sequence operator
|
|
// '%any% |>' special operators (including '%%' and '%/%')
|
|
// '* /' multiply, divide
|
|
// '+ -' (binary) add, subtract
|
|
// '< > <= >= == !=' ordering and comparison
|
|
// '!' negation
|
|
// '& &&' and
|
|
// '| ||' or
|
|
// '~' as in formulae
|
|
// '-> ->>' rightwards assignment
|
|
// '<- <<-' assignment (right to left)
|
|
// '=' assignment (right to left)
|
|
// '?' help (unary and binary)
|
|
//
|
|
// R also has an operator precedence table defined here:
|
|
//
|
|
// https://github.com/wch/r-source/blob/36008873fb8ca2af3bdaaff418dbade5f7bce118/src/main/gram.y#L414-L436
|
|
//
|
|
// However, the effective precedence of '?' and '=' is a bit different, as R
|
|
// defines special reduction rules for these operators:
|
|
//
|
|
// https://github.com/wch/r-source/blob/36008873fb8ca2af3bdaaff418dbade5f7bce118/src/main/gram.y#L440-L453
|
|
//
|
|
// Rather than try to replicate those reduction rules, we just adjust the
|
|
// operator precedence to match the declared precedence in the R syntax table,
|
|
// while allowing for R's declared precedence differences between certain
|
|
// control flow keywords.
|
|
const PREC = {
|
|
COMMENT: -1,
|
|
LOW: 0,
|
|
WHILE: 0,
|
|
FOR: 0,
|
|
REPEAT: 0,
|
|
IF: 1,
|
|
ELSE: 2,
|
|
HELP: 3,
|
|
EQ_ASSIGN: 4,
|
|
LEFT_ASSIGN: 5,
|
|
RIGHT_ASSIGN: 6,
|
|
TILDE: 7,
|
|
OR: 8,
|
|
AND: 9,
|
|
NOT: 10,
|
|
REL: 11,
|
|
PLUS: 12,
|
|
TIMES: 13,
|
|
SPECIAL: 14,
|
|
PIPE: 14,
|
|
PIPEBIND: 15,
|
|
COLON: 16,
|
|
UPLUS: 17,
|
|
EXP: 18,
|
|
SUBSET: 19,
|
|
DOLLAR: 20,
|
|
NS_GET: 21,
|
|
CALL: 22,
|
|
CALL_PIPE: 23,
|
|
FLOAT: 24
|
|
}
|
|
|
|
newline = '\n',
|
|
terminator = choice(newline, ';'),
|
|
|
|
module.exports = grammar({
|
|
name: 'r',
|
|
|
|
extras: $ => [
|
|
$.comment,
|
|
/\s/
|
|
],
|
|
|
|
conflicts: ($) => [
|
|
[$._pipe_rhs_argument, $._argument],
|
|
[$.pipe_rhs_arguments, $.arguments]
|
|
],
|
|
|
|
externals: $ => [
|
|
$._raw_string_literal
|
|
],
|
|
|
|
rules: {
|
|
program: $ => repeat(seq($._expression, optional(terminator))),
|
|
|
|
_definition: $ => choice(
|
|
$.function_definition,
|
|
$.lambda_function
|
|
// TODO: other kinds of definitions
|
|
),
|
|
|
|
function_definition: $ => prec.left(PREC.LOW, seq(
|
|
'function',
|
|
$.formal_parameters,
|
|
$._expression
|
|
)),
|
|
|
|
lambda_function: $ => prec.left(PREC.LOW, seq(
|
|
'\\',
|
|
$.formal_parameters,
|
|
$._expression
|
|
)),
|
|
|
|
if: $ => prec.right(PREC.IF, seq(
|
|
'if',
|
|
'(',
|
|
field('condition', $._expression),
|
|
')',
|
|
field('consequence', $._expression),
|
|
field('alternative', optional(seq('else', $._expression)))
|
|
)),
|
|
|
|
while: $ => prec.left(PREC.WHILE, seq(
|
|
'while',
|
|
'(',
|
|
field('condition', $._expression),
|
|
')',
|
|
field('body', $._expression)
|
|
)),
|
|
|
|
repeat: $ => prec.left(PREC.REPEAT, seq(
|
|
'repeat',
|
|
field('body', $._expression)
|
|
)),
|
|
|
|
for: $ => prec.left(PREC.FOR, seq(
|
|
'for',
|
|
'(',
|
|
field('name', $.identifier),
|
|
'in',
|
|
field('vector', $._expression),
|
|
')',
|
|
field('body', $._expression)
|
|
)),
|
|
|
|
switch: $ => seq(
|
|
'switch',
|
|
'(',
|
|
field('value', $._expression),
|
|
',',
|
|
field('body', $.arguments),
|
|
')'
|
|
),
|
|
|
|
formal_parameters: $ => seq(
|
|
'(',
|
|
optional(seq(
|
|
commaSep1($._formal_parameter),
|
|
optional(',')
|
|
)),
|
|
')'
|
|
),
|
|
|
|
default_parameter: $ => seq(
|
|
field('name', $.identifier),
|
|
'=',
|
|
field('value', $._expression)
|
|
),
|
|
|
|
_formal_parameter: $ => choice(
|
|
$.identifier,
|
|
$.default_parameter,
|
|
$.dots
|
|
),
|
|
|
|
block: $ => seq(
|
|
'{',
|
|
repeat($._expression),
|
|
'}'
|
|
),
|
|
|
|
arguments: $ => repeat1(choice(
|
|
$._argument,
|
|
',',
|
|
)),
|
|
|
|
default_argument: $ => prec.right(seq(
|
|
field('name', choice($.identifier, $.string, $.dots)),
|
|
'=',
|
|
field('value', optional($._expression))
|
|
)),
|
|
|
|
_argument: $ => prec.left(choice(
|
|
$._expression,
|
|
$.default_argument,
|
|
)),
|
|
|
|
call: $ => prec(PREC.CALL, seq(
|
|
field('function', $._expression),
|
|
'(',
|
|
field('arguments', optional($.arguments)),
|
|
')'
|
|
)),
|
|
|
|
_assignment: $ => choice(
|
|
$.equals_assignment,
|
|
$.left_assignment,
|
|
$.left_assignment2,
|
|
$.right_assignment,
|
|
$.super_assignment,
|
|
$.super_right_assignment,
|
|
),
|
|
|
|
left_assignment: $ => prec.right(PREC.LEFT_ASSIGN,
|
|
seq(
|
|
field('name', $._expression),
|
|
'<-',
|
|
field('value', $._expression)
|
|
)),
|
|
|
|
left_assignment2: $ => prec.right(PREC.LEFT_ASSIGN,
|
|
seq(
|
|
field('name', $._expression),
|
|
':=',
|
|
field('value', $._expression)
|
|
)),
|
|
|
|
equals_assignment: $ => prec.right(PREC.EQ_ASSIGN,
|
|
seq(
|
|
field('name', $._expression),
|
|
'=',
|
|
field('value', $._expression)
|
|
)),
|
|
|
|
super_assignment: $ => prec.right(PREC.LEFT_ASSIGN,
|
|
seq(
|
|
field('name', $._expression),
|
|
'<<-',
|
|
field('value', $._expression)
|
|
)),
|
|
|
|
super_right_assignment: $ => prec.left(PREC.RIGHT_ASSIGN,
|
|
seq(
|
|
field('value', $._expression),
|
|
'->>',
|
|
field('name', $._expression)
|
|
)),
|
|
|
|
right_assignment: $ => prec.left(PREC.RIGHT_ASSIGN,
|
|
seq(
|
|
field('value', $._expression),
|
|
'->',
|
|
field('name', $._expression)
|
|
)),
|
|
|
|
brace_list: $ => seq(
|
|
'{',
|
|
repeat(
|
|
seq($._expression, optional(terminator))
|
|
),
|
|
'}'
|
|
),
|
|
|
|
paren_list: $ => seq(
|
|
'(',
|
|
repeat(
|
|
$._expression
|
|
),
|
|
')'
|
|
),
|
|
|
|
subset: $ => prec(PREC.SUBSET, seq(
|
|
$._expression,
|
|
'[',
|
|
optional($.arguments),
|
|
']'
|
|
)),
|
|
|
|
subset2: $ => prec(PREC.SUBSET, seq(
|
|
$._expression,
|
|
'[[',
|
|
optional($.arguments),
|
|
']]'
|
|
)),
|
|
|
|
dollar: $ => prec.left(PREC.DOLLAR, seq(
|
|
$._expression,
|
|
'$',
|
|
choice(
|
|
$.identifier,
|
|
$.string
|
|
)
|
|
)),
|
|
|
|
slot: $ => prec.left(PREC.DOLLAR, seq(
|
|
$._expression,
|
|
'@',
|
|
$.identifier
|
|
)),
|
|
|
|
namespace_get: $ => prec.left(PREC.NS_GET, seq(
|
|
field('namespace', $.identifier),
|
|
'::',
|
|
field('function', $.identifier),
|
|
)),
|
|
|
|
namespace_get_internal: $ => prec.left(PREC.NS_GET, seq(
|
|
field('namespace', $.identifier),
|
|
':::',
|
|
field('function', $.identifier),
|
|
)),
|
|
|
|
help: $ => prec.left(PREC.HELP, seq(
|
|
$._expression,
|
|
'?',
|
|
$._expression
|
|
)),
|
|
|
|
dots: $ => '...',
|
|
|
|
placeholder: $ => '_',
|
|
|
|
pipe_placeholder_argument: $ => prec.right(seq(
|
|
field('name', $.identifier),
|
|
'=',
|
|
field('value', $.placeholder)
|
|
)),
|
|
|
|
_pipe_rhs_argument: $ => prec.right(choice(
|
|
$._expression,
|
|
$.default_argument,
|
|
alias($.pipe_placeholder_argument, $.default_argument)
|
|
)),
|
|
|
|
pipe_rhs_arguments: $ => repeat1(choice(
|
|
$._pipe_rhs_argument,
|
|
','
|
|
)),
|
|
|
|
// pipe_rhs is a call function
|
|
pipe_rhs: $ => prec.left(PREC.CALL_PIPE, seq(
|
|
field('function', $._expression),
|
|
'(',
|
|
field('arguments', optional(alias($.pipe_rhs_arguments, $.arguments))),
|
|
')'
|
|
)),
|
|
|
|
pipe: $ => prec.left(PREC.PIPE, seq(
|
|
field('left', $._expression),
|
|
field('operator', '|>'),
|
|
field('right', alias($.pipe_rhs, $.call))
|
|
)),
|
|
|
|
unary: $ => {
|
|
const operators = [
|
|
[PREC.UPLUS, choice('-', '+')],
|
|
[PREC.NOT, '!'],
|
|
[PREC.TILDE, '~'],
|
|
[PREC.HELP, '?'],
|
|
];
|
|
|
|
return choice(...operators.map(([precedence, operator]) => prec.left(precedence, seq(
|
|
field('operator', operator),
|
|
field('operand', $._expression)
|
|
))));
|
|
},
|
|
|
|
binary: $ => {
|
|
const operators = [
|
|
[prec.left, PREC.PLUS, choice('+', '-')],
|
|
[prec.left, PREC.TIMES, choice('*', '/')],
|
|
[prec.right, PREC.EXP, '^'],
|
|
[prec.left, PREC.REL, choice('<', '>', '<=', '>=', '==', '!=')],
|
|
[prec.left, PREC.OR, choice('||', '|')],
|
|
[prec.left, PREC.AND, choice('&&', '&')],
|
|
[prec.left, PREC.SPECIAL, $.special],
|
|
[prec.left, PREC.COLON, ':'],
|
|
[prec.left, PREC.TILDE, '~'],
|
|
];
|
|
|
|
return choice(...operators.map(([fn, precedence, operator]) => fn(precedence, seq(
|
|
field('left', $._expression),
|
|
field('operator', operator),
|
|
field('right', $._expression)
|
|
))));
|
|
},
|
|
|
|
break: $ => 'break',
|
|
|
|
next: $ => 'next',
|
|
|
|
true: $ => 'TRUE',
|
|
false: $ => 'FALSE',
|
|
null: $ => 'NULL',
|
|
inf: $ => 'Inf',
|
|
nan: $ => 'NaN',
|
|
|
|
na: $ => choice(
|
|
'NA',
|
|
'NA_character_',
|
|
'NA_complex_',
|
|
'NA_integer_',
|
|
'NA_real_'
|
|
),
|
|
|
|
_expression: $ => prec.right(choice(
|
|
$.identifier,
|
|
$.integer,
|
|
$.float,
|
|
$.complex,
|
|
$.string,
|
|
$.call,
|
|
$.function_definition,
|
|
$.lambda_function,
|
|
$._assignment,
|
|
$.brace_list,
|
|
$.paren_list,
|
|
$.binary,
|
|
$.unary,
|
|
$.pipe,
|
|
$.subset,
|
|
$.subset2,
|
|
$.dollar,
|
|
$.slot,
|
|
$.namespace_get,
|
|
$.namespace_get_internal,
|
|
$.help,
|
|
$.if,
|
|
$.for,
|
|
$.while,
|
|
$.repeat,
|
|
$.switch,
|
|
$.break,
|
|
$.next,
|
|
$.true,
|
|
$.false,
|
|
$.null,
|
|
$.inf,
|
|
$.nan,
|
|
$.na,
|
|
$.dots,
|
|
// ';'
|
|
)),
|
|
|
|
identifier: $ =>
|
|
choice(
|
|
/[.\p{XID_Start}][._\p{XID_Continue}]*/,
|
|
seq(
|
|
'`',
|
|
repeat(choice(
|
|
/[^`\\\n]+|\\\r?\n/,
|
|
$.escape_sequence
|
|
)),
|
|
'`'
|
|
)
|
|
),
|
|
|
|
integer: $ => token(prec(PREC.FLOAT + 1,
|
|
seq(
|
|
choice(
|
|
seq(
|
|
choice('0x', '0X'),
|
|
/[A-Fa-f0-9]+/
|
|
),
|
|
/\d+/
|
|
),
|
|
'L'
|
|
))),
|
|
|
|
float: $ => {
|
|
const digits = repeat1(/[0-9]/);
|
|
const exponent = seq(/[eE][\+-]?/, digits)
|
|
|
|
return token(prec.left(PREC.FLOAT,
|
|
choice(
|
|
seq(digits, optional('.'), optional(digits), optional(exponent)),
|
|
seq(optional(digits), '.', digits, optional(exponent)),
|
|
seq(digits, exponent),
|
|
seq(
|
|
choice('0x', '0X'),
|
|
/[A-Fa-f0-9]+/
|
|
)
|
|
)
|
|
))
|
|
},
|
|
|
|
complex: $ => seq($.float, 'i'),
|
|
|
|
comment: $ => token(prec(PREC.COMMENT, seq('#', /.*/))),
|
|
|
|
string: $ => choice(
|
|
$._raw_string_literal,
|
|
seq(
|
|
'"',
|
|
repeat(choice(
|
|
/[^"\\\n]+|\\\r?\n/,
|
|
$.escape_sequence
|
|
)),
|
|
'"'
|
|
),
|
|
seq(
|
|
"'",
|
|
repeat(choice(
|
|
/[^'\\\n]+|\\\r?\n/,
|
|
$.escape_sequence
|
|
)),
|
|
"'"
|
|
)
|
|
),
|
|
|
|
special: $ => seq(
|
|
'%',
|
|
repeat(choice(
|
|
/[^%\\\n]+|\\\r?\n/,
|
|
$.escape_sequence
|
|
)),
|
|
'%'
|
|
),
|
|
|
|
escape_sequence: $ => token.immediate(seq(
|
|
'\\',
|
|
choice(
|
|
/[^xu0-7]/,
|
|
/[0-7]{1,3}/,
|
|
/x[0-9a-fA-F]{2}/,
|
|
/u[0-9a-fA-F]{4}/,
|
|
/u{[0-9a-fA-F]+}/
|
|
)
|
|
))
|
|
}
|
|
});
|
|
|
|
function commaSep1(rule) {
|
|
return seq(rule, repeat(seq(',', rule)));
|
|
}
|