Fix indent/outdent tracking, part 2

Problem
-------
Given something like
  class A:
    def l: Int =
      1

    def m = ()
the scanner doesn't inject automatic semicolon since the newline
has been consumed by outdenting, and it fails to parse the code.

Solution
--------
Track `last_newline_count` in the payload, which represents the
newline_count at the time out outdent.
This then is recovered so automatic semicolon can use it
only if the column position hasn't moved.
pull/481/head
Eugene Yokota 2022-12-19 16:19:40 +07:00
parent 83aaa6020e
commit 1e70f7de9e
4 changed files with 41 additions and 11 deletions

@ -525,6 +525,11 @@ class A {
j
}
def h(x: T)(implicit ev: Reads[T])
def l: Int =
1
def m = ()
}
---
@ -547,7 +552,9 @@ class A {
(function_declaration
(identifier)
(parameters (parameter (identifier) (type_identifier)))
(parameters (parameter (identifier) (generic_type (type_identifier) (type_arguments (type_identifier)))))))))
(parameters (parameter (identifier) (generic_type (type_identifier) (type_arguments (type_identifier))))))
(function_definition (identifier) (type_identifier) (indented_block (integer_literal)))
(function_definition (identifier) (unit)))))
=======================================
Function definitions (Scala 3 syntax)

25
src/scanner.c vendored

@ -92,7 +92,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
ScannerStack *stack = (ScannerStack *)payload;
int prev = peekStack(stack);
unsigned newline_count = 0;
int newline_count = 0;
int indentation_size = 0;
LOG("scanner was called at column: %d\n", lexer->get_column(lexer));
@ -121,27 +121,40 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
}
printStack(stack, " before");
if (valid_symbols[INDENT] && newline_count > 0 &&
(isEmptyStack(stack) || indentation_size > peekStack(stack))) {
if (valid_symbols[INDENT] &&
newline_count > 0 &&
(isEmptyStack(stack) ||
indentation_size > peekStack(stack))) {
pushStack(stack, indentation_size);
lexer->result_symbol = INDENT;
LOG(" INDENT\n");
return true;
}
// This saves the newline_count into the stack since
// sometimes we need to outdent multiple times.
// This saves the indentation_size and newline_count so it can be used
// in subsequent calls for multiple outdent or autosemicolon.
if (valid_symbols[OUTDENT] &&
(lexer->lookahead == 0 || (
newline_count > 0 && prev != -1 && indentation_size < prev))) {
newline_count > 0 &&
prev != -1 &&
indentation_size < prev))) {
popStack(stack);
LOG(" pop\n");
LOG(" OUTDENT\n");
lexer->result_symbol = OUTDENT;
stack->last_indentation_size = indentation_size;
stack->last_newline_count = newline_count;
stack->last_column = lexer->get_column(lexer);
return true;
}
// Recover newline_count from the outdent reset
if (stack->last_newline_count > 0 &&
lexer->get_column(lexer) == stack->last_column) {
newline_count += stack->last_newline_count;
}
stack->last_newline_count = 0;
printStack(stack, " after");
LOG(" indentation_size: %d, newline_count: %d, column: %d, indent_is_valid: %d, dedent_is_valid: %d\n", indentation_size,

14
src/stack.h vendored

@ -16,6 +16,8 @@ typedef struct ScannerStack {
unsigned int stack[STACK_SIZE];
int top;
int last_indentation_size;
int last_newline_count;
int last_column;
} ScannerStack;
ScannerStack* createStack() {
@ -23,6 +25,8 @@ ScannerStack* createStack() {
ptr -> top = 0;
ptr -> last_indentation_size = -1;
ptr -> last_newline_count = 0;
ptr -> last_column = -1;
memset(ptr -> stack, STACK_SIZE, (0));
return ptr;
@ -60,10 +64,12 @@ void printStack(ScannerStack *stack, char *msg) {
unsigned serialiseStack(ScannerStack *stack, char *buf) {
unsigned elements = isEmptyStack(stack) ? 0 : stack->top;
unsigned result_length = (elements + 1) * sizeof(int);
unsigned result_length = (elements + 3) * sizeof(int);
int *placement = (int *)buf;
memcpy(placement, stack->stack, elements * sizeof(int));
placement[elements] = stack->last_indentation_size;
placement[elements + 1] = stack->last_newline_count;
placement[elements + 2] = stack->last_column;
return result_length;
}
@ -72,14 +78,18 @@ void deserialiseStack(ScannerStack* stack, const char* buf, unsigned n) {
if (n != 0) {
int *intBuf = (int *)buf;
unsigned elements = n / sizeof(int) - 1;
unsigned elements = n / sizeof(int) - 3;
stack->top = elements;
memcpy(stack->stack, intBuf, elements * sizeof(int));
stack->last_indentation_size = intBuf[elements];
stack->last_newline_count = intBuf[elements + 1];
stack->last_column = intBuf[elements + 2];
}
}
void resetStack(ScannerStack *p) {
p->top = 0;
p->last_indentation_size = -1;
p->last_newline_count = 0;
p->last_column = -1;
}

@ -32,11 +32,11 @@ int main() {
pushStack(stack, i);
}
assert(serialiseStack(stack, buf) == sizeof(int) * 251);
assert(serialiseStack(stack, buf) == sizeof(int) * 253);
ScannerStack *newStack = createStack();
deserialiseStack(newStack, buf, sizeof(int) * 251);
deserialiseStack(newStack, buf, sizeof(int) * 253);
assert(newStack -> top == 250);
assert(popStack(newStack) == 249);