Update operators and identifiers (#34)

- Add more operator types:
  - Bitshift
  - Rational
  - "Dotty"
- Fix unary operators:
  - Add broadcasting dots
  - Allow unary operators as variables
  - Parse <unary-op><array> as a unary_expression (not an index
    expression)
- Fix pipe typos (femtolisp uses pipes as raw symbol delimiters).
- Simplify `binary_expression` rule.
- Make assignment, binary and unary operators visible. Operators that
  have their own rules (pair, range) are not made visible.
- Use unicode categories instead of character ranges for valid
  identifiers. It allows more characters, and removes some invalid ones.
- Update tests.
pull/315/head
Sergio A. Vargas 2022-05-31 16:11:51 +07:00 committed by GitHub
parent 03b5a92cb0
commit fc60b7cce8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 189277 additions and 186053 deletions

@ -11,9 +11,9 @@ const PREC = [
'colon_quote',
'colon_range',
'plus',
'bitshift',
'times',
'rational',
'bitshift',
'power',
'call',
'decl',
@ -25,6 +25,10 @@ const PREC = [
return result;
}, {});
const ASSIGN_OPERATORS = `
= += -= *= /= //= \\= ^= ÷= %= <<= >>= >>>= |= &= =
`;
const ARROW_OPERATORS = `
@ -35,10 +39,6 @@ const ARROW_OPERATORS = `
`;
const ASSIGN_OPERATORS = `
= += -= *= /= //= |\=| ^= ÷= %= <<= >>= >>>= ||=| &= =
`;
const COMPARISON_OPERATORS = `
> < >= <= == === != !==
@ -51,8 +51,10 @@ const COMPARISON_OPERATORS = `
`;
const DOTTY_OPERATORS = '… ⁝ ⋮ ⋱ ⋰ ⋯';
const PLUS_OPERATORS = `
+ - |\|| |++| ±
+ - | ++ ±
`;
@ -63,6 +65,8 @@ const TIMES_OPERATORS = `
`;
const BITSHIFT_OPERATORS = '<< >> >>>';
const POWER_OPERATORS = `
^
`;
@ -414,11 +418,11 @@ grammar({
$.interpolation_expression,
$._primary_expression,
$._literal,
$.operator,
),
_primary_expression: $ => choice(
$.identifier,
$.operator,
$.array_expression,
$.array_comprehension_expression,
$.matrix_expression,
@ -436,7 +440,16 @@ grammar({
repeat1(prec(-1, seq(',', $._expression)))
)),
operator: $ => choice('+', $._plus_operator, $._times_operator, $._power_operator),
operator: $ => choice(
$._comparison_operator,
$._dotty_operator,
$._plus_operator,
$._times_operator,
$._rational_operator,
$._bitshift_operator,
$._power_operator,
$._unary_operator,
),
parenthesized_expression: $ => prec(1, seq(
'(', choice($._expression_list, $.spread_expression), ')'
@ -483,7 +496,7 @@ grammar({
),
call_expression: $ => prec(PREC.call, seq(
$._primary_expression,
choice($._primary_expression, $.operator),
$._immediate_paren,
choice($.argument_list, $.generator_expression),
optional($.do_clause)
@ -540,7 +553,7 @@ grammar({
$._expression,
$.bare_tuple_expression
),
choice($._assign_operator, '='),
alias(choice($._assign_operator, '='), $.operator),
choice(
$._expression,
$.assignment_expression,
@ -550,62 +563,34 @@ grammar({
unary_expression: $ => choice(
prec(PREC.prefix, seq(
choice('>:', '+', '-', '!', '~', '¬', '√', '∛', '∜'),
$._expression
)),
prec(PREC.postfix, seq(
$._expression,
choice("'", ".'")
))
),
binary_expression: $ => choice(
prec.left(PREC.power, seq(
$._expression,
$._power_operator,
$._expression
)),
prec.left(PREC.times, seq(
$._expression,
$._times_operator,
$._expression
)),
prec.left(PREC.plus, seq(
$._expression,
choice('+', $._plus_operator),
$._expression
)),
prec.right(PREC.arrow, seq(
alias($._unary_operator, $.operator),
$._expression,
$._arrow_operator,
$._expression
)),
prec.right(PREC.pipe_left, seq(
$._expression,
'<|',
$._expression
)),
prec.left(PREC.pipe_right, seq(
$._expression,
'|>',
$._expression
)),
prec.left(PREC.comparison, seq(
$._expression,
choice('in', 'isa', $._comparison_operator),
$._expression
)),
prec.left(PREC.lazy_or, seq(
prec(PREC.postfix, seq($._expression, alias("'", $.operator))),
),
binary_expression: $ => {
const table = [
[prec.left, PREC.power, $._power_operator],
[prec.left, PREC.rational, $._rational_operator],
[prec.left, PREC.bitshift, $._bitshift_operator],
[prec.left, PREC.times, $._times_operator],
[prec.left, PREC.plus, choice('+', $._plus_operator)],
[prec.left, PREC.colon_range, $._dotty_operator],
[prec.right, PREC.arrow, $._arrow_operator],
[prec.right, PREC.pipe_left, '<|'],
[prec.left, PREC.pipe_right, '|>'],
[prec.left, PREC.comparison, choice('in', 'isa', $._comparison_operator)],
[prec.left, PREC.lazy_or, '||'],
[prec.left, PREC.lazy_and, '&&'],
];
return choice(...table.map(([fn, prec, op]) => fn(prec, seq(
$._expression,
'||',
$._expression
)),
prec.left(PREC.lazy_and, seq(
alias(op, $.operator),
$._expression,
'&&',
$._expression
))
),
))));
},
ternary_expression: $ => prec.right(PREC.conditional, seq(
$._expression,
@ -757,14 +742,15 @@ grammar({
'(', ')',
'{', '}',
'&',
'|',
'$',
ARROW_OPERATORS,
ASSIGN_OPERATORS,
ARROW_OPERATORS,
COMPARISON_OPERATORS,
DOTTY_OPERATORS,
PLUS_OPERATORS,
POWER_OPERATORS,
TIMES_OPERATORS,
BITSHIFT_OPERATORS,
POWER_OPERATORS
];
const operatorCharacters = operators
@ -775,9 +761,9 @@ grammar({
.replace(/\\/g, '\\\\')
.replace(/!/g, '');
// First char: ASCII letter, Greek letter, Extended Latin letter, or ∇
// Remaining characters: not delimiter, not operator
return new RegExp(`[_a-zA-ZͰ-ϿĀ-ſ∇][^"'\`\\s\\.\\-\\[\\]${operatorCharacters}]*`)
const start = "[_\\p{L}\\p{Nl}∇]"
const rest = `[^"'\`\\s\\.\\-\\[\\]${operatorCharacters}]*`
return new RegExp(start + rest)
},
// Literals
@ -872,17 +858,23 @@ grammar({
),
),
_unary_operator: $ => token(addDots('+ - ! ~ ¬ √ ∛ ∜')),
_power_operator: $ => token(addDots(POWER_OPERATORS)),
_bitshift_operator: $ => token(addDots(BITSHIFT_OPERATORS)),
_rational_operator: $ => token(addDots('//')),
_times_operator: $ => token(addDots(TIMES_OPERATORS)),
_plus_operator: $ => token(choice('$', addDots(PLUS_OPERATORS))),
_arrow_operator: $ => token(choice('--', '-->', addDots(ARROW_OPERATORS))),
_dotty_operator: $ => token(choice('..', addDots(DOTTY_OPERATORS))),
_comparison_operator: $ => token(choice(
'|<:|', '|>:|', addDots(COMPARISON_OPERATORS)
)),
_comparison_operator: $ => token(choice('<:', '>:', addDots(COMPARISON_OPERATORS))),
_arrow_operator: $ => token(choice('<--', '-->', '<-->', addDots(ARROW_OPERATORS))),
_assign_operator: $ => token(choice(':=', '~', '$=', addDots(ASSIGN_OPERATORS))),

1521
src/grammar.json vendored

File diff suppressed because it is too large Load Diff

@ -423,10 +423,6 @@
"type": "matrix_expression",
"named": true
},
{
"type": "operator",
"named": true
},
{
"type": "parameterized_identifier",
"named": true
@ -796,10 +792,6 @@
"type": "matrix_expression",
"named": true
},
{
"type": "operator",
"named": true
},
{
"type": "parameterized_identifier",
"named": true
@ -1983,7 +1975,7 @@
"named": true,
"fields": {},
"children": {
"multiple": false,
"multiple": true,
"required": true,
"types": [
{
@ -2046,18 +2038,10 @@
"type": "\n",
"named": false
},
{
"type": "!",
"named": false
},
{
"type": "$",
"named": false
},
{
"type": "&&",
"named": false
},
{
"type": "'",
"named": false
@ -2070,18 +2054,10 @@
"type": ")",
"named": false
},
{
"type": "+",
"named": false
},
{
"type": ",",
"named": false
},
{
"type": "-",
"named": false
},
{
"type": "->",
"named": false
@ -2090,10 +2066,6 @@
"type": ".",
"named": false
},
{
"type": ".'",
"named": false
},
{
"type": "...",
"named": false
@ -2114,10 +2086,6 @@
"type": "<:",
"named": false
},
{
"type": "<|",
"named": false
},
{
"type": "=",
"named": false
@ -2126,10 +2094,6 @@
"type": "=>",
"named": false
},
{
"type": ">:",
"named": false
},
{
"type": "?",
"named": false
@ -2230,10 +2194,6 @@
"type": "in",
"named": false
},
{
"type": "isa",
"named": false
},
{
"type": "let",
"named": false
@ -2290,40 +2250,12 @@
"type": "{",
"named": false
},
{
"type": "|>",
"named": false
},
{
"type": "||",
"named": false
},
{
"type": "}",
"named": false
},
{
"type": "~",
"named": false
},
{
"type": "¬",
"named": false
},
{
"type": "∈",
"named": false
},
{
"type": "√",
"named": false
},
{
"type": "∛",
"named": false
},
{
"type": "∜",
"named": false
}
]

373152
src/parser.c vendored

File diff suppressed because it is too large Load Diff

@ -175,6 +175,7 @@ end
(parameter_list (identifier) (optional_parameter (identifier) (identifier)))
(assignment_expression
(identifier)
(operator)
(array_expression
(quote_expression
(tuple_expression

@ -1,10 +1,12 @@
================
Variables
================
w
w
θ
==============================
identifiers
==============================
abc_123_ABC
_fn!
ρ; φ; z
x
θ̄
logŷ
ϵ
@ -19,30 +21,12 @@ logŷ
(identifier)
(identifier)
(identifier)
(identifier)
(identifier)
(identifier)
(identifier)
(identifier))
=================
Assignments
=================
a = b
c &= d ÷= e
a, b = c, d, e
---
(source_file
(assignment_expression
(identifier)
(identifier))
(assignment_expression
(identifier)
(assignment_expression
(identifier)
(identifier)))
(assignment_expression
(bare_tuple_expression (identifier) (identifier))
(bare_tuple_expression (identifier) (identifier) (identifier))))
=================
Functions
@ -137,111 +121,19 @@ end
(macro_expression
(macro_identifier (identifier))
(macro_argument_list
(binary_expression (identifier) (identifier))))
(binary_expression (identifier) (operator) (identifier))))
(macro_expression
(macro_identifier (identifier))
(macro_argument_list
(binary_expression (identifier) (identifier))
(binary_expression (identifier) (operator) (identifier))
(string_literal)))
(macro_expression (macro_identifier (operator)) (macro_argument_list (identifier)))
(macro_expression
(macro_identifier (identifier))
(macro_argument_list
(string_literal)
(compound_expression (assignment_expression (identifier) (identifier))))))
=================
Binary operators
=================
a → b ⇶ c ⭄ d
a * b ⦼ c
a == b
---
(source_file
(binary_expression
(identifier)
(binary_expression
(identifier)
(binary_expression
(identifier)
(identifier))))
(binary_expression
(binary_expression
(identifier)
(identifier))
(identifier))
(binary_expression
(identifier)
(identifier)))
===================================
Binary operators with leading dots
===================================
a .* b .+ c
---
(compound_expression (assignment_expression (identifier) (operator) (identifier))))))
(source_file
(binary_expression
(binary_expression (identifier) (identifier))
(identifier)))
=================
Unary operators
=================
+a
-b
√9
[a]'
!(a + b)
---
(source_file
(unary_expression (identifier))
(unary_expression (identifier))
(unary_expression (integer_literal))
(unary_expression (array_expression (identifier)))
(unary_expression
(parenthesized_expression (binary_expression (identifier) (identifier)))))
====================
The ternary operator
====================
x = batch_size == 1 ?
rand(10) :
rand(10, batch_size)
---
(source_file
(assignment_expression
(identifier)
(ternary_expression
(binary_expression (identifier) (integer_literal))
(call_expression (identifier) (argument_list (integer_literal)))
(call_expression (identifier) (argument_list (integer_literal) (identifier))))))
====================
Operators as values
====================
x = +
print(:)
foo(^, ÷)
---
(source_file
(assignment_expression (identifier) (operator))
(call_expression (identifier) (argument_list (operator)))
(call_expression (identifier) (argument_list (operator) (operator))))
===========================
Coefficient expressions
@ -259,29 +151,39 @@ Coefficient expressions
(binary_expression
(binary_expression
(coefficient_expression (integer_literal) (identifier))
(operator)
(integer_literal))
(operator)
(coefficient_expression (integer_literal) (identifier)))
(operator)
(integer_literal))
(binary_expression
(binary_expression
(binary_expression
(coefficient_expression (float_literal) (identifier))
(operator)
(integer_literal))
(operator)
(coefficient_expression (float_literal) (identifier)))
(operator)
(integer_literal))
(binary_expression
(integer_literal)
(operator)
(coefficient_expression (integer_literal) (identifier)))
(binary_expression
(binary_expression
(binary_expression
(coefficient_expression
(integer_literal)
(parenthesized_expression (binary_expression (identifier) (integer_literal))))
(parenthesized_expression (binary_expression (identifier) (operator) (integer_literal))))
(operator)
(integer_literal))
(operator)
(coefficient_expression
(integer_literal)
(parenthesized_expression (binary_expression (identifier) (integer_literal)))))
(parenthesized_expression (binary_expression (identifier) (operator) (integer_literal)))))
(operator)
(integer_literal)))
=================
@ -313,7 +215,7 @@ Named tuples
(source_file
(parenthesized_expression
(assignment_expression (identifier) (integer_literal)))
(assignment_expression (identifier) (operator) (integer_literal)))
(tuple_expression
(named_field (identifier) (integer_literal)))
(tuple_expression
@ -353,20 +255,6 @@ Matrices
(matrix_row (integer_literal) (integer_literal))
(matrix_row (integer_literal) (integer_literal))))
=================
Pairs
=================
A(b => c, d => e)
---
(source_file
(call_expression
(identifier)
(argument_list
(pair_expression (identifier) (identifier))
(pair_expression (identifier) (identifier)))))
====================
Function expressions
@ -383,11 +271,15 @@ function () 3 end
(source_file
(function_expression
(identifier)
(binary_expression (identifier) (integer_literal)))
(binary_expression (identifier) (operator) (integer_literal)))
(function_expression
(parameter_list (identifier) (identifier) (identifier))
(binary_expression
(binary_expression (coefficient_expression (integer_literal) (identifier)) (identifier))
(binary_expression
(coefficient_expression (integer_literal) (identifier))
(operator)
(identifier))
(operator)
(identifier)))
(function_expression
(parameter_list)
@ -399,7 +291,7 @@ function () 3 end
(parameter_list)
(parenthesized_expression
(call_expression (identifier) (argument_list (float_literal)))
(assignment_expression (identifier) (integer_literal))
(assignment_expression (identifier) (operator) (integer_literal))
(identifier))))
============================

@ -94,6 +94,7 @@ band = "Interpol"
(string_literal)
(assignment_expression
(identifier)
(operator)
(string_literal))
(string_literal
(string_interpolation (identifier)))
@ -143,10 +144,12 @@ version = v"1.0"
(source_file
(assignment_expression
(identifier)
(operator)
(prefixed_string_literal
prefix: (identifier)))
(assignment_expression
(identifier)
(operator)
(prefixed_string_literal
prefix: (identifier))))
@ -172,7 +175,7 @@ nested #= comments =# =#
(line_comment)
(block_comment)
(block_comment)
(assignment_expression (identifier) (block_comment) (integer_literal))
(assignment_expression (identifier) (operator) (block_comment) (integer_literal))
(block_comment)
(block_comment))

@ -0,0 +1,255 @@
==============================
assignment operators
==============================
a = b
a .. b = a * b
c &= d ÷= e
tup = 1, 2, 3
car, cdr... = list
---
(source_file
(assignment_expression
(identifier)
(operator)
(identifier))
(assignment_expression
(binary_expression (identifier) (operator) (identifier))
(operator)
(binary_expression (identifier) (operator) (identifier)))
(assignment_expression
(identifier)
(operator)
(assignment_expression
(identifier)
(operator)
(identifier)))
(assignment_expression
(identifier)
(operator)
(bare_tuple_expression
(integer_literal)
(integer_literal)
(integer_literal)))
(assignment_expression
(bare_tuple_expression
(identifier)
(spread_expression (identifier)))
(operator)
(identifier)))
==============================
binary operators
==============================
a + b
a ++ 1 × b ⥌ 2 → c
a:(a // b)
x = A \ (v × w)
a & b | c
(x >>> 16, x >>> 8, x) .& 0xff
---
(source_file
; Sanity check
(binary_expression (identifier) (operator) (identifier))
; plus/times/power/arrow
; (→ (++ a (× 1 (⥌ b 2))) c)
(binary_expression
(binary_expression
(identifier)
(operator)
(binary_expression
(integer_literal)
(operator)
(binary_expression
(identifier)
(operator)
(integer_literal))))
(operator)
(identifier))
; range/rational
(range_expression
(identifier)
(parenthesized_expression
(binary_expression
(identifier)
(operator)
(identifier))))
; LA
(assignment_expression
(identifier)
(operator)
(binary_expression
(identifier)
(operator)
(parenthesized_expression
(binary_expression
(identifier)
(operator)
(identifier)))))
; bitwise
(binary_expression
(binary_expression (identifier) (operator) (identifier))
(operator)
(identifier))
(binary_expression
(tuple_expression
(binary_expression (identifier) (operator) (integer_literal))
(binary_expression (identifier) (operator) (integer_literal))
(identifier))
(operator)
(integer_literal)))
==============================
binary comparison operators
==============================
a === 1
a! != 0
A ⊆ B ⊆ C
x ≥ 0 ≥ z
---
(source_file
(binary_expression (identifier) (operator) (integer_literal))
(binary_expression (identifier) (operator) (integer_literal))
; Chained comparisons are parsed as a single expression in Julia.
; So this isn't 100% correct.
(binary_expression
(binary_expression
(identifier)
(operator)
(identifier))
(operator)
(identifier))
(binary_expression
(binary_expression
(identifier)
(operator)
(integer_literal))
(operator)
(identifier)))
==============================
pair operator
==============================
Dict(b => c, d => e)
---
(source_file
(call_expression
(identifier)
(argument_list
(pair_expression (identifier) (identifier))
(pair_expression (identifier) (identifier)))))
==============================
unary operators
==============================
+a
-b
√9
[a, b]'
!p === !(p)
1 ++ +2
---
(source_file
(unary_expression (operator) (identifier))
(unary_expression (operator) (identifier))
(unary_expression (operator) (integer_literal))
(unary_expression (array_expression (identifier) (identifier)) (operator))
(binary_expression
(unary_expression (operator) (identifier))
(operator)
(call_expression (operator) (argument_list (identifier))))
(binary_expression
(integer_literal)
(operator)
(unary_expression (operator) (integer_literal))))
=============================
operator broadcasting
=============================
a .* b .+ c
.~[x]
---
(source_file
(binary_expression
(binary_expression (identifier) (operator) (identifier))
(operator)
(identifier))
(unary_expression
(operator)
(array_expression (identifier))))
==============================
ternary operator
==============================
x = batch_size == 1 ?
rand(10) :
rand(10, batch_size)
---
(source_file
(assignment_expression
(identifier)
(operator)
(ternary_expression
(binary_expression (identifier) (operator) (integer_literal))
(call_expression (identifier) (argument_list (integer_literal)))
(call_expression (identifier) (argument_list (integer_literal) (identifier))))))
==============================
operators as values
==============================
x = +
⪯ = .≤
print(:)
foo(^, ÷, -)
---
(source_file
(assignment_expression
(identifier)
(operator)
(operator))
(assignment_expression
(operator)
(operator)
(operator))
(call_expression
(identifier)
(argument_list (operator)))
(call_expression
(identifier)
(argument_list (operator) (operator) (operator))))

@ -78,7 +78,7 @@ while a(); b(); end
(source_file
(while_statement
(binary_expression (identifier) (integer_literal))
(binary_expression (identifier) (operator) (integer_literal))
(call_expression (identifier) (argument_list (identifier)))
(continue_statement)
(break_statement))
@ -100,7 +100,7 @@ return a, b, c
(source_file
(return_statement)
(return_statement (identifier))
(return_statement (binary_expression (identifier) (identifier)))
(return_statement (binary_expression (identifier) (operator) (identifier)))
(return_statement (bare_tuple_expression (identifier) (identifier) (identifier))))
===============================
@ -147,9 +147,9 @@ end
(source_file
(quote_statement
(assignment_expression (identifier) (integer_literal))
(assignment_expression (identifier) (integer_literal))
(binary_expression (identifier) (identifier))))
(assignment_expression (identifier) (operator) (integer_literal))
(assignment_expression (identifier) (operator) (integer_literal))
(binary_expression (identifier) (operator) (identifier))))
===============================
Try statements