Rework lambda expressions

Summary
-------
`$.lambda_expression` body was changed from `$._block` to
`$._indentable_expression`. This had the following effects:
* x10 faster parser generation
* parser size reduced from 41M to 24M
* conflict with `$.self_type`, which was resolved by matching
  indent-tokens in `$.template_body`. This change, in its turn required
  scanner.c to stop emitting INDENT and OUTDENT tokens when encountering
  comments
text_sliders
susliko 2023-06-06 00:18:26 +07:00
parent d24edb6bf2
commit a889c3c749
4 changed files with 103 additions and 34 deletions

@ -840,6 +840,8 @@ Value declarations (Scala 3 syntax)
================================================================================
class A:
// Comments that should not
// influence indentation
val b, c : Int
val d : String
@ -849,6 +851,8 @@ class A:
(class_definition
(identifier)
(template_body
(comment)
(comment)
(val_declaration
(identifier)
(identifier)
@ -1454,6 +1458,10 @@ trait A {
def f: Int
}
trait A { self =>
def f: Int
}
class B {
self: Something[A] =>
@ -1463,6 +1471,14 @@ class B {
--------------------------------------------------------------------------------
(compilation_unit
(trait_definition
(identifier)
(template_body
(self_type
(identifier))
(function_declaration
(identifier)
(type_identifier))))
(trait_definition
(identifier)
(template_body

@ -185,10 +185,11 @@ class C:
(indented_block
(lambda_expression
(identifier)
(infix_expression
(identifier)
(operator_identifier)
(integer_literal))))))
(indented_block
(infix_expression
(identifier)
(operator_identifier)
(integer_literal)))))))
(call_expression
(identifier)
(colon_argument
@ -217,8 +218,8 @@ class C:
(indented_cases
(case_clause
(identifier)
(identifier)))))
(comment)
(identifier))
(comment))))
(ascription_expression
(identifier)
(type_identifier))
@ -442,8 +443,8 @@ class C:
(if_expression
(boolean_literal)
(indented_block
(unit))
(comment)
(unit)
(comment))
(indented_block
(unit))))))))
@ -1054,8 +1055,14 @@ object O {
val l = a => a + 1
val b = (x: Int, y: Int) => { x * y }
val f = _ => 2
(a, b, _) => a - b
foo { i => val x = 2 + i }
foo { i =>
val x = 2 + i
x
}
{ x =>
val y = 2 * x
y * y
}
}
--------------------------------------------------------------------------------
@ -1092,29 +1099,33 @@ object O {
(lambda_expression
(wildcard)
(integer_literal)))
(lambda_expression
(bindings
(binding
(identifier))
(binding
(identifier))
(binding
(identifier)))
(infix_expression
(identifier)
(operator_identifier)
(identifier)))
(call_expression
(identifier)
(block
(lambda_expression
(identifier)
(indented_block
(val_definition
(identifier)
(infix_expression
(integer_literal)
(operator_identifier)
(identifier)))
(identifier)))))
(block
(lambda_expression
(identifier)
(indented_block
(val_definition
(identifier)
(infix_expression
(integer_literal)
(operator_identifier)
(identifier)))))))))
(identifier)))
(infix_expression
(identifier)
(operator_identifier)
(identifier))))))))
================================================================================
Unit expressions
@ -1648,6 +1659,7 @@ throws()
using()
--------------------------------------------------------------------------------
(compilation_unit
(call_expression
(identifier)

@ -369,17 +369,37 @@ module.exports = grammar({
/*
* TemplateBody ::= :<<< [SelfType] TemplateStat {semi TemplateStat} >>>
*/
template_body: $ =>
template_body: $ => choice(
prec.left(PREC.control, $._indented_template_body),
prec.left(PREC.control, $._braced_template_body),
),
_indented_template_body: $ => seq(
':',
$._indent,
optional($.self_type),
$._block,
$._outdent,
),
_braced_template_body: $ => seq(
'{',
optional(choice(
$._braced_template_body1,
$._braced_template_body2,
)),
'}',
),
_braced_template_body1: $ => seq(optional($.self_type), $._block),
_braced_template_body2: $ => seq(
choice(
prec.left(
PREC.control,
seq(":", $._indent, optional($.self_type), $._block, $._outdent),
),
prec.left(
PREC.control,
seq("{", optional($.self_type), optional($._block), "}"),
),
seq($._indent, optional($.self_type)),
seq(optional($.self_type), $._indent),
),
optional($._block),
$._outdent
),
/*
* WithTemplateBody ::= <<< [SelfType] TemplateStat {semi TemplateStat} >>>
@ -1038,12 +1058,13 @@ module.exports = grammar({
$.call_expression,
),
lambda_expression: $ =>
prec.right(
seq(
field("parameters", choice($.bindings, $._identifier, $.wildcard)),
"=>",
$._block,
$._indentable_expression,
),
),

22
src/scanner.c vendored

@ -89,6 +89,18 @@ static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_inte
}
}
static bool detect_comment_start(TSLexer *lexer) {
lexer->mark_end(lexer);
// Comments should not affect indentation
if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead == '/' || lexer -> lookahead == '*') {
return true;
}
}
return false;
}
bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
ScannerStack *stack = (ScannerStack *)payload;
@ -103,7 +115,8 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
(
(prev != -1) &&
lexer->lookahead == ')' ||
lexer->lookahead == ']'
lexer->lookahead == ']' ||
lexer->lookahead == '}'
) || (
stack->last_indentation_size != -1 &&
prev != -1 &&
@ -131,6 +144,9 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
newline_count > 0 &&
(isEmptyStack(stack) ||
indentation_size > peekStack(stack))) {
if (detect_comment_start(lexer)) {
return false;
}
pushStack(stack, indentation_size);
lexer->result_symbol = INDENT;
LOG(" INDENT\n");
@ -148,6 +164,10 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
LOG(" pop\n");
LOG(" OUTDENT\n");
lexer->result_symbol = OUTDENT;
lexer->mark_end(lexer);
if (detect_comment_start(lexer)) {
return false;
}
stack->last_indentation_size = indentation_size;
stack->last_newline_count = newline_count;
if (lexer->eof(lexer)) {