Merge pull request #340 from tree-sitter/scanner-and-generate

Tidy up scanner
pull/659/head
Amaan Qureshi 2023-08-20 03:15:51 +07:00 committed by GitHub
commit d50b6ca5cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 133 deletions

151
src/scanner.c vendored

@ -25,24 +25,21 @@ void *tree_sitter_scala_external_scanner_create() {
return createStack();
}
void tree_sitter_scala_external_scanner_destroy(void *p) {
free(p);
void tree_sitter_scala_external_scanner_destroy(void *payload) {
free(payload);
}
void tree_sitter_scala_external_scanner_reset(void *p) {
resetStack(p);
unsigned tree_sitter_scala_external_scanner_serialize(void *payload, char *buffer) {
return serialiseStack(payload, buffer);
}
unsigned tree_sitter_scala_external_scanner_serialize(void *p, char *buffer) {
return serialiseStack(p, buffer);
}
void tree_sitter_scala_external_scanner_deserialize(void *p, const char *b,
unsigned n) {
deserialiseStack(p, b, n);
void tree_sitter_scala_external_scanner_deserialize(void *payload, const char *buffer,
unsigned length) {
deserialiseStack(payload, buffer, length);
}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_interpolation) {
@ -63,24 +60,26 @@ static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_inte
if (is_multiline && has_interpolation) {
lexer->result_symbol = INTERPOLATED_MULTILINE_STRING_MIDDLE;
return true;
} else if (has_interpolation){
}
if (has_interpolation) {
lexer->result_symbol = INTERPOLATED_STRING_MIDDLE;
return true;
} else {
advance(lexer);
}
advance(lexer);
} else {
closing_quote_count = 0;
if (lexer->lookahead == '\\') {
advance(lexer);
if (lexer->lookahead != 0) advance(lexer);
if (!lexer->eof(lexer)) {
advance(lexer);
}
} else if (lexer->lookahead == '\n') {
if (is_multiline) {
advance(lexer);
} else {
return false;
}
} else if (lexer->lookahead == 0) {
} else if (lexer->eof(lexer)) {
return false;
} else {
advance(lexer);
@ -101,6 +100,16 @@ static bool detect_comment_start(TSLexer *lexer) {
return false;
}
static bool scan_word(TSLexer *lexer, const char* const word) {
for (int i = 0; word[i] != '\0'; i++) {
if (lexer->lookahead != word[i]) {
return false;
}
advance(lexer);
}
return !iswalnum(lexer->lookahead);
}
bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
ScannerStack *stack = (ScannerStack *)payload;
@ -114,8 +123,9 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
newline_count++;
indentation_size = 0;
}
else
else {
indentation_size++;
}
skip(lexer);
}
@ -174,7 +184,7 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
if (lexer->eof(lexer)) {
stack->last_column = -1;
} else {
stack->last_column = lexer->get_column(lexer);
stack->last_column = (int)lexer->get_column(lexer);
}
return true;
}
@ -203,14 +213,17 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
// a
// .b
// .c
if (lexer->lookahead == '.') return false;
if (lexer->lookahead == '.') {
return false;
}
// Single-line and multi-line comments
if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead == '/') {
return false;
} else if (lexer->lookahead == '*') {
}
if (lexer->lookahead == '*') {
advance(lexer);
while (!lexer->eof(lexer)) {
if (lexer->lookahead == '*') {
@ -238,108 +251,42 @@ bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
}
if (valid_symbols[ELSE]) {
if (lexer->lookahead != 'e') return true;
advance(lexer);
if (lexer->lookahead != 'l') return true;
advance(lexer);
if (lexer->lookahead != 's') return true;
advance(lexer);
if (lexer->lookahead != 'e') return true;
advance(lexer);
if (iswalpha(lexer->lookahead)) return true;
return false;
return !scan_word(lexer, "else");
}
if (valid_symbols[CATCH]) {
if (lexer->lookahead != 'c' && lexer->lookahead != 'f') return true;
advance(lexer);
if (lexer->lookahead == 'a') {
advance(lexer);
if (lexer->lookahead != 't') return true;
advance(lexer);
if (lexer->lookahead != 'c') return true;
advance(lexer);
if (lexer->lookahead != 'h') return true;
advance(lexer);
if (iswalpha(lexer->lookahead)) return true;
return false;
} else if (lexer->lookahead == 'i') {
advance(lexer);
if (lexer->lookahead != 'n') return true;
advance(lexer);
if (lexer->lookahead != 'a') return true;
advance(lexer);
if (lexer->lookahead != 'l') return true;
advance(lexer);
if (lexer->lookahead != 'l') return true;
advance(lexer);
if (lexer->lookahead != 'y') return true;
advance(lexer);
if (iswalpha(lexer->lookahead)) return true;
return false;
} else {
return true;
if (lexer->lookahead == 'c') {
return !scan_word(lexer, "catch");
}
if (lexer->lookahead == 'f') {
return !scan_word(lexer, "finally");
}
return true;
}
if (valid_symbols[FINALLY]) {
if (lexer->lookahead != 'f') return true;
advance(lexer);
if (lexer->lookahead != 'i') return true;
advance(lexer);
if (lexer->lookahead != 'n') return true;
advance(lexer);
if (lexer->lookahead != 'a') return true;
advance(lexer);
if (lexer->lookahead != 'l') return true;
advance(lexer);
if (lexer->lookahead != 'l') return true;
advance(lexer);
if (lexer->lookahead != 'y') return true;
advance(lexer);
if (iswalpha(lexer->lookahead)) return true;
return false;
return !scan_word(lexer, "finally");
}
if (valid_symbols[EXTENDS]) {
if (lexer->lookahead != 'e') return true;
advance(lexer);
if (lexer->lookahead != 'x') return true;
advance(lexer);
if (lexer->lookahead != 't') return true;
advance(lexer);
if (lexer->lookahead != 'e') return true;
advance(lexer);
if (lexer->lookahead != 'n') return true;
advance(lexer);
if (lexer->lookahead != 'd') return true;
advance(lexer);
if (lexer->lookahead != 's') return true;
advance(lexer);
if (iswalpha(lexer->lookahead)) return true;
return false;
return !scan_word(lexer, "extends");
}
if (valid_symbols[WITH]) {
if (lexer->lookahead != 'w') return true;
advance(lexer);
if (lexer->lookahead != 'i') return true;
advance(lexer);
if (lexer->lookahead != 't') return true;
advance(lexer);
if (lexer->lookahead != 'h') return true;
advance(lexer);
if (iswalpha(lexer->lookahead)) return true;
return false;
return !scan_word(lexer, "with");
}
if (newline_count > 1) return true;
if (newline_count > 1) {
return true;
}
return true;
}
while (iswspace(lexer->lookahead)) {
if (lexer->lookahead == '\n') newline_count++;
if (lexer->lookahead == '\n') {
newline_count++;
}
skip(lexer);
}

45
src/stack.h vendored

@ -1,6 +1,5 @@
#include <stdio.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -21,7 +20,7 @@ typedef struct ScannerStack {
int last_column;
} ScannerStack;
ScannerStack* createStack() {
static ScannerStack* createStack() {
ScannerStack* ptr = (ScannerStack*) malloc(sizeof(ScannerStack));
ptr -> top = 0;
@ -33,29 +32,28 @@ ScannerStack* createStack() {
return ptr;
}
bool isEmptyStack(ScannerStack *stack) { return stack->top == 0; }
static bool isEmptyStack(ScannerStack *stack) { return stack->top == 0; }
int peekStack(ScannerStack *stack) {
static int peekStack(ScannerStack *stack) {
return isEmptyStack(stack) ? -1 : stack->stack[stack->top - 1];
}
void pushStack(ScannerStack *stack, unsigned int value) {
static void pushStack(ScannerStack *stack, unsigned int value) {
stack->top++;
stack->stack[stack->top - 1] = value;
stack->stack[stack->top - 1] = (int)value;
}
int popStack(ScannerStack *stack) {
if (isEmptyStack(stack))
static int popStack(ScannerStack *stack) {
if (isEmptyStack(stack)) {
return -1;
else {
int result = peekStack(stack);
stack->top--;
return result;
}
int result = peekStack(stack);
stack->top--;
return result;
}
void printStack(ScannerStack *stack, char *msg) {
static void printStack(ScannerStack *stack, char *msg) {
LOG("%s Stack[top = %d; ", msg, stack->top);
for (int i = 0; i < stack->top; i++) {
LOG("%d | ", stack->stack[i]);
@ -63,7 +61,7 @@ void printStack(ScannerStack *stack, char *msg) {
LOG("]\n");
}
unsigned serialiseStack(ScannerStack *stack, char *buf) {
static unsigned serialiseStack(ScannerStack *stack, char *buf) {
int elements = isEmptyStack(stack) ? 0 : stack->top;
if (elements < 0) {
elements = 0;
@ -78,22 +76,15 @@ unsigned serialiseStack(ScannerStack *stack, char *buf) {
return result_length;
}
void deserialiseStack(ScannerStack* stack, const char* buf, unsigned n) {
if (n != 0) {
static void deserialiseStack(ScannerStack* stack, const char* buf, unsigned length) {
if (length != 0) {
int *intBuf = (int *)buf;
unsigned elements = n / sizeof(int) - 3;
stack->top = elements;
unsigned elements = length / sizeof(int) - 3;
stack->top = (int)elements;
memcpy(stack->stack, intBuf, elements * sizeof(int));
stack->last_indentation_size = intBuf[elements];
stack->last_newline_count = intBuf[elements + 1];
stack->last_column = intBuf[elements + 2];
}
}
void resetStack(ScannerStack *p) {
p->top = 0;
p->last_indentation_size = -1;
p->last_newline_count = 0;
p->last_column = -1;
}

@ -40,10 +40,6 @@ int main() {
assert(newStack -> top == 100);
assert(popStack(newStack) == 99);
resetStack(newStack);
assert(isEmptyStack(newStack));
printStack(stack, "hello");
printStack(newStack, "hello");
return 0;