difftastic/vendored_parsers/tree-sitter-r/grammar.js

532 lines
11 KiB
JavaScript

// The R 4.2.0 syntax table, from ?Syntax:
//
// ':: :::' access variables in a namespace
// '$ @' component / slot extraction
// '[ [[' indexing
// '^' exponentiation (right to left)
// '- +' unary minus and plus
// ':' sequence operator
// '%any% |>' special operators (including '%%' and '%/%')
// '* /' multiply, divide
// '+ -' (binary) add, subtract
// '< > <= >= == !=' ordering and comparison
// '!' negation
// '& &&' and
// '| ||' or
// '~' as in formulae
// '-> ->>' rightwards assignment
// '<- <<-' assignment (right to left)
// '=' assignment (right to left)
// '?' help (unary and binary)
//
// R also has an operator precedence table defined here:
//
// https://github.com/wch/r-source/blob/36008873fb8ca2af3bdaaff418dbade5f7bce118/src/main/gram.y#L414-L436
//
// However, the effective precedence of '?' and '=' is a bit different, as R
// defines special reduction rules for these operators:
//
// https://github.com/wch/r-source/blob/36008873fb8ca2af3bdaaff418dbade5f7bce118/src/main/gram.y#L440-L453
//
// Rather than try to replicate those reduction rules, we just adjust the
// operator precedence to match the declared precedence in the R syntax table,
// while allowing for R's declared precedence differences between certain
// control flow keywords.
const PREC = {
COMMENT: -1,
LOW: 0,
WHILE: 0,
FOR: 0,
REPEAT: 0,
IF: 1,
ELSE: 2,
HELP: 3,
EQ_ASSIGN: 4,
LEFT_ASSIGN: 5,
RIGHT_ASSIGN: 6,
TILDE: 7,
OR: 8,
AND: 9,
NOT: 10,
REL: 11,
PLUS: 12,
TIMES: 13,
SPECIAL: 14,
PIPE: 14,
PIPEBIND: 15,
COLON: 16,
UPLUS: 17,
EXP: 18,
SUBSET: 19,
DOLLAR: 20,
NS_GET: 21,
CALL: 22,
CALL_PIPE: 23,
FLOAT: 24
}
newline = '\n',
terminator = choice(newline, ';'),
module.exports = grammar({
name: 'r',
extras: $ => [
$.comment,
/\s/
],
conflicts: ($) => [
[$._pipe_rhs_argument, $._argument],
[$.pipe_rhs_arguments, $.arguments]
],
externals: $ => [
$._raw_string_literal
],
rules: {
program: $ => repeat(seq($._expression, optional(terminator))),
_definition: $ => choice(
$.function_definition,
$.lambda_function
// TODO: other kinds of definitions
),
function_definition: $ => prec.left(PREC.LOW, seq(
'function',
$.formal_parameters,
$._expression
)),
lambda_function: $ => prec.left(PREC.LOW, seq(
'\\',
$.formal_parameters,
$._expression
)),
if: $ => prec.right(PREC.IF, seq(
'if',
'(',
field('condition', $._expression),
')',
field('consequence', $._expression),
field('alternative', optional(seq('else', $._expression)))
)),
while: $ => prec.left(PREC.WHILE, seq(
'while',
'(',
field('condition', $._expression),
')',
field('body', $._expression)
)),
repeat: $ => prec.left(PREC.REPEAT, seq(
'repeat',
field('body', $._expression)
)),
for: $ => prec.left(PREC.FOR, seq(
'for',
'(',
field('name', $.identifier),
'in',
field('vector', $._expression),
')',
field('body', $._expression)
)),
switch: $ => seq(
'switch',
'(',
field('value', $._expression),
',',
field('body', $.arguments),
')'
),
formal_parameters: $ => seq(
'(',
optional(seq(
commaSep1($._formal_parameter),
optional(',')
)),
')'
),
default_parameter: $ => seq(
field('name', $.identifier),
'=',
field('value', $._expression)
),
_formal_parameter: $ => choice(
$.identifier,
$.default_parameter,
$.dots
),
block: $ => seq(
'{',
repeat($._expression),
'}'
),
arguments: $ => repeat1(choice(
$._argument,
',',
)),
default_argument: $ => prec.right(seq(
field('name', choice($.identifier, $.string, $.dots)),
'=',
field('value', optional($._expression))
)),
_argument: $ => prec.left(choice(
$._expression,
$.default_argument,
)),
call: $ => prec(PREC.CALL, seq(
field('function', $._expression),
'(',
field('arguments', optional($.arguments)),
')'
)),
_assignment: $ => choice(
$.equals_assignment,
$.left_assignment,
$.left_assignment2,
$.right_assignment,
$.super_assignment,
$.super_right_assignment,
),
left_assignment: $ => prec.right(PREC.LEFT_ASSIGN,
seq(
field('name', $._expression),
'<-',
field('value', $._expression)
)),
left_assignment2: $ => prec.right(PREC.LEFT_ASSIGN,
seq(
field('name', $._expression),
':=',
field('value', $._expression)
)),
equals_assignment: $ => prec.right(PREC.EQ_ASSIGN,
seq(
field('name', $._expression),
'=',
field('value', $._expression)
)),
super_assignment: $ => prec.right(PREC.LEFT_ASSIGN,
seq(
field('name', $._expression),
'<<-',
field('value', $._expression)
)),
super_right_assignment: $ => prec.left(PREC.RIGHT_ASSIGN,
seq(
field('value', $._expression),
'->>',
field('name', $._expression)
)),
right_assignment: $ => prec.left(PREC.RIGHT_ASSIGN,
seq(
field('value', $._expression),
'->',
field('name', $._expression)
)),
brace_list: $ => seq(
'{',
repeat(
seq($._expression, optional(terminator))
),
'}'
),
paren_list: $ => seq(
'(',
repeat(
$._expression
),
')'
),
subset: $ => prec(PREC.SUBSET, seq(
$._expression,
'[',
optional($.arguments),
']'
)),
subset2: $ => prec(PREC.SUBSET, seq(
$._expression,
'[[',
optional($.arguments),
']]'
)),
dollar: $ => prec.left(PREC.DOLLAR, seq(
$._expression,
'$',
choice(
$.identifier,
$.string
)
)),
slot: $ => prec.left(PREC.DOLLAR, seq(
$._expression,
'@',
$.identifier
)),
namespace_get: $ => prec.left(PREC.NS_GET, seq(
field('namespace', $.identifier),
'::',
field('function', $.identifier),
)),
namespace_get_internal: $ => prec.left(PREC.NS_GET, seq(
field('namespace', $.identifier),
':::',
field('function', $.identifier),
)),
help: $ => prec.left(PREC.HELP, seq(
$._expression,
'?',
$._expression
)),
dots: $ => '...',
placeholder: $ => '_',
pipe_placeholder_argument: $ => prec.right(seq(
field('name', $.identifier),
'=',
field('value', $.placeholder)
)),
_pipe_rhs_argument: $ => prec.right(choice(
$._expression,
$.default_argument,
alias($.pipe_placeholder_argument, $.default_argument)
)),
pipe_rhs_arguments: $ => repeat1(choice(
$._pipe_rhs_argument,
','
)),
// pipe_rhs is a call function
pipe_rhs: $ => prec.left(PREC.CALL_PIPE, seq(
field('function', $._expression),
'(',
field('arguments', optional(alias($.pipe_rhs_arguments, $.arguments))),
')'
)),
pipe: $ => prec.left(PREC.PIPE, seq(
field('left', $._expression),
field('operator', '|>'),
field('right', alias($.pipe_rhs, $.call))
)),
unary: $ => {
const operators = [
[PREC.UPLUS, choice('-', '+')],
[PREC.NOT, '!'],
[PREC.TILDE, '~'],
[PREC.HELP, '?'],
];
return choice(...operators.map(([precedence, operator]) => prec.left(precedence, seq(
field('operator', operator),
field('operand', $._expression)
))));
},
binary: $ => {
const operators = [
[prec.left, PREC.PLUS, choice('+', '-')],
[prec.left, PREC.TIMES, choice('*', '/')],
[prec.right, PREC.EXP, '^'],
[prec.left, PREC.REL, choice('<', '>', '<=', '>=', '==', '!=')],
[prec.left, PREC.OR, choice('||', '|')],
[prec.left, PREC.AND, choice('&&', '&')],
[prec.left, PREC.SPECIAL, $.special],
[prec.left, PREC.COLON, ':'],
[prec.left, PREC.TILDE, '~'],
];
return choice(...operators.map(([fn, precedence, operator]) => fn(precedence, seq(
field('left', $._expression),
field('operator', operator),
field('right', $._expression)
))));
},
break: $ => 'break',
next: $ => 'next',
true: $ => 'TRUE',
false: $ => 'FALSE',
null: $ => 'NULL',
inf: $ => 'Inf',
nan: $ => 'NaN',
na: $ => choice(
'NA',
'NA_character_',
'NA_complex_',
'NA_integer_',
'NA_real_'
),
_expression: $ => prec.right(choice(
$.identifier,
$.integer,
$.float,
$.complex,
$.string,
$.call,
$.function_definition,
$.lambda_function,
$._assignment,
$.brace_list,
$.paren_list,
$.binary,
$.unary,
$.pipe,
$.subset,
$.subset2,
$.dollar,
$.slot,
$.namespace_get,
$.namespace_get_internal,
$.help,
$.if,
$.for,
$.while,
$.repeat,
$.switch,
$.break,
$.next,
$.true,
$.false,
$.null,
$.inf,
$.nan,
$.na,
$.dots,
// ';'
)),
identifier: $ =>
choice(
/[.\p{XID_Start}][._\p{XID_Continue}]*/,
seq(
'`',
repeat(choice(
/[^`\\\n]+|\\\r?\n/,
$.escape_sequence
)),
'`'
)
),
integer: $ => token(prec(PREC.FLOAT + 1,
seq(
choice(
seq(
choice('0x', '0X'),
/[A-Fa-f0-9]+/
),
/\d+/
),
'L'
))),
float: $ => {
const digits = repeat1(/[0-9]/);
const exponent = seq(/[eE][\+-]?/, digits)
return token(prec.left(PREC.FLOAT,
choice(
seq(digits, optional('.'), optional(digits), optional(exponent)),
seq(optional(digits), '.', digits, optional(exponent)),
seq(digits, exponent),
seq(
choice('0x', '0X'),
/[A-Fa-f0-9]+/
)
)
))
},
complex: $ => seq($.float, 'i'),
comment: $ => token(prec(PREC.COMMENT, seq('#', /.*/))),
string: $ => choice(
$._raw_string_literal,
seq(
'"',
repeat(choice(
/[^"\\\n]+|\\\r?\n/,
$.escape_sequence
)),
'"'
),
seq(
"'",
repeat(choice(
/[^'\\\n]+|\\\r?\n/,
$.escape_sequence
)),
"'"
)
),
special: $ => seq(
'%',
repeat(choice(
/[^%\\\n]+|\\\r?\n/,
$.escape_sequence
)),
'%'
),
escape_sequence: $ => token.immediate(seq(
'\\',
choice(
/[^xu0-7]/,
/[0-7]{1,3}/,
/x[0-9a-fA-F]{2}/,
/u[0-9a-fA-F]{4}/,
/u{[0-9a-fA-F]+}/
)
))
}
});
function commaSep1(rule) {
return seq(rule, repeat(seq(',', rule)));
}