Fix indent/outdent tracking, part 2

Problem
-------
Given something like
  class A:
    def l: Int =
      1

    def m = ()
the scanner doesn't inject automatic semicolon since the newline
has been consumed by outdenting, and it fails to parse the code.

Solution
--------
Track `last_newline_count` in the payload, which represents the
newline_count at the time out outdent.
This then is recovered so automatic semicolon can use it
only if the column position hasn't moved.
pull/481/head
Eugene Yokota 2022-12-19 16:19:40 +07:00
parent 83aaa6020e
commit 1e70f7de9e
4 changed files with 41 additions and 11 deletions

@ -525,6 +525,11 @@ class A {
j j
} }
def h(x: T)(implicit ev: Reads[T]) def h(x: T)(implicit ev: Reads[T])
def l: Int =
1
def m = ()
} }
--- ---
@ -547,7 +552,9 @@ class A {
(function_declaration (function_declaration
(identifier) (identifier)
(parameters (parameter (identifier) (type_identifier))) (parameters (parameter (identifier) (type_identifier)))
(parameters (parameter (identifier) (generic_type (type_identifier) (type_arguments (type_identifier))))))))) (parameters (parameter (identifier) (generic_type (type_identifier) (type_arguments (type_identifier))))))
(function_definition (identifier) (type_identifier) (indented_block (integer_literal)))
(function_definition (identifier) (unit)))))
======================================= =======================================
Function definitions (Scala 3 syntax) Function definitions (Scala 3 syntax)

25
src/scanner.c vendored

@ -92,7 +92,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) { const bool *valid_symbols) {
ScannerStack *stack = (ScannerStack *)payload; ScannerStack *stack = (ScannerStack *)payload;
int prev = peekStack(stack); int prev = peekStack(stack);
unsigned newline_count = 0; int newline_count = 0;
int indentation_size = 0; int indentation_size = 0;
LOG("scanner was called at column: %d\n", lexer->get_column(lexer)); LOG("scanner was called at column: %d\n", lexer->get_column(lexer));
@ -121,27 +121,40 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
} }
printStack(stack, " before"); printStack(stack, " before");
if (valid_symbols[INDENT] && newline_count > 0 && if (valid_symbols[INDENT] &&
(isEmptyStack(stack) || indentation_size > peekStack(stack))) { newline_count > 0 &&
(isEmptyStack(stack) ||
indentation_size > peekStack(stack))) {
pushStack(stack, indentation_size); pushStack(stack, indentation_size);
lexer->result_symbol = INDENT; lexer->result_symbol = INDENT;
LOG(" INDENT\n"); LOG(" INDENT\n");
return true; return true;
} }
// This saves the newline_count into the stack since // This saves the indentation_size and newline_count so it can be used
// sometimes we need to outdent multiple times. // in subsequent calls for multiple outdent or autosemicolon.
if (valid_symbols[OUTDENT] && if (valid_symbols[OUTDENT] &&
(lexer->lookahead == 0 || ( (lexer->lookahead == 0 || (
newline_count > 0 && prev != -1 && indentation_size < prev))) { newline_count > 0 &&
prev != -1 &&
indentation_size < prev))) {
popStack(stack); popStack(stack);
LOG(" pop\n"); LOG(" pop\n");
LOG(" OUTDENT\n"); LOG(" OUTDENT\n");
lexer->result_symbol = OUTDENT; lexer->result_symbol = OUTDENT;
stack->last_indentation_size = indentation_size; stack->last_indentation_size = indentation_size;
stack->last_newline_count = newline_count;
stack->last_column = lexer->get_column(lexer);
return true; return true;
} }
// Recover newline_count from the outdent reset
if (stack->last_newline_count > 0 &&
lexer->get_column(lexer) == stack->last_column) {
newline_count += stack->last_newline_count;
}
stack->last_newline_count = 0;
printStack(stack, " after"); printStack(stack, " after");
LOG(" indentation_size: %d, newline_count: %d, column: %d, indent_is_valid: %d, dedent_is_valid: %d\n", indentation_size, LOG(" indentation_size: %d, newline_count: %d, column: %d, indent_is_valid: %d, dedent_is_valid: %d\n", indentation_size,

14
src/stack.h vendored

@ -16,6 +16,8 @@ typedef struct ScannerStack {
unsigned int stack[STACK_SIZE]; unsigned int stack[STACK_SIZE];
int top; int top;
int last_indentation_size; int last_indentation_size;
int last_newline_count;
int last_column;
} ScannerStack; } ScannerStack;
ScannerStack* createStack() { ScannerStack* createStack() {
@ -23,6 +25,8 @@ ScannerStack* createStack() {
ptr -> top = 0; ptr -> top = 0;
ptr -> last_indentation_size = -1; ptr -> last_indentation_size = -1;
ptr -> last_newline_count = 0;
ptr -> last_column = -1;
memset(ptr -> stack, STACK_SIZE, (0)); memset(ptr -> stack, STACK_SIZE, (0));
return ptr; return ptr;
@ -60,10 +64,12 @@ void printStack(ScannerStack *stack, char *msg) {
unsigned serialiseStack(ScannerStack *stack, char *buf) { unsigned serialiseStack(ScannerStack *stack, char *buf) {
unsigned elements = isEmptyStack(stack) ? 0 : stack->top; unsigned elements = isEmptyStack(stack) ? 0 : stack->top;
unsigned result_length = (elements + 1) * sizeof(int); unsigned result_length = (elements + 3) * sizeof(int);
int *placement = (int *)buf; int *placement = (int *)buf;
memcpy(placement, stack->stack, elements * sizeof(int)); memcpy(placement, stack->stack, elements * sizeof(int));
placement[elements] = stack->last_indentation_size; placement[elements] = stack->last_indentation_size;
placement[elements + 1] = stack->last_newline_count;
placement[elements + 2] = stack->last_column;
return result_length; return result_length;
} }
@ -72,14 +78,18 @@ void deserialiseStack(ScannerStack* stack, const char* buf, unsigned n) {
if (n != 0) { if (n != 0) {
int *intBuf = (int *)buf; int *intBuf = (int *)buf;
unsigned elements = n / sizeof(int) - 1; unsigned elements = n / sizeof(int) - 3;
stack->top = elements; stack->top = elements;
memcpy(stack->stack, intBuf, elements * sizeof(int)); memcpy(stack->stack, intBuf, elements * sizeof(int));
stack->last_indentation_size = intBuf[elements]; stack->last_indentation_size = intBuf[elements];
stack->last_newline_count = intBuf[elements + 1];
stack->last_column = intBuf[elements + 2];
} }
} }
void resetStack(ScannerStack *p) { void resetStack(ScannerStack *p) {
p->top = 0; p->top = 0;
p->last_indentation_size = -1; p->last_indentation_size = -1;
p->last_newline_count = 0;
p->last_column = -1;
} }

@ -32,11 +32,11 @@ int main() {
pushStack(stack, i); pushStack(stack, i);
} }
assert(serialiseStack(stack, buf) == sizeof(int) * 251); assert(serialiseStack(stack, buf) == sizeof(int) * 253);
ScannerStack *newStack = createStack(); ScannerStack *newStack = createStack();
deserialiseStack(newStack, buf, sizeof(int) * 251); deserialiseStack(newStack, buf, sizeof(int) * 253);
assert(newStack -> top == 250); assert(newStack -> top == 250);
assert(popStack(newStack) == 249); assert(popStack(newStack) == 249);