|
|
|
@ -1,19 +1,29 @@
|
|
|
|
#include <tree_sitter/parser.h>
|
|
|
|
#include "tree_sitter/parser.h"
|
|
|
|
|
|
|
|
|
|
|
|
#include <wctype.h>
|
|
|
|
#include <wctype.h>
|
|
|
|
|
|
|
|
|
|
|
|
enum TokenType {
|
|
|
|
enum TokenType {
|
|
|
|
AUTOMATIC_SEMICOLON,
|
|
|
|
AUTOMATIC_SEMICOLON,
|
|
|
|
TEMPLATE_CHARS,
|
|
|
|
TEMPLATE_CHARS,
|
|
|
|
TERNARY_QMARK,
|
|
|
|
TERNARY_QMARK,
|
|
|
|
|
|
|
|
HTML_COMMENT,
|
|
|
|
|
|
|
|
LOGICAL_OR,
|
|
|
|
|
|
|
|
ESCAPE_SEQUENCE,
|
|
|
|
|
|
|
|
REGEX_PATTERN,
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
void *tree_sitter_javascript_external_scanner_create() { return NULL; }
|
|
|
|
void *tree_sitter_javascript_external_scanner_create() { return NULL; }
|
|
|
|
|
|
|
|
|
|
|
|
void tree_sitter_javascript_external_scanner_destroy(void *p) {}
|
|
|
|
void tree_sitter_javascript_external_scanner_destroy(void *p) {}
|
|
|
|
|
|
|
|
|
|
|
|
void tree_sitter_javascript_external_scanner_reset(void *p) {}
|
|
|
|
void tree_sitter_javascript_external_scanner_reset(void *p) {}
|
|
|
|
|
|
|
|
|
|
|
|
unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
|
|
|
|
unsigned tree_sitter_javascript_external_scanner_serialize(void *p, char *buffer) { return 0; }
|
|
|
|
|
|
|
|
|
|
|
|
void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
|
|
|
|
void tree_sitter_javascript_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
|
|
|
|
|
|
|
|
|
|
|
|
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
|
|
|
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
|
|
|
|
|
|
|
|
|
|
|
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
|
|
|
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
|
|
|
|
|
|
|
|
|
|
|
static bool scan_template_chars(TSLexer *lexer) {
|
|
|
|
static bool scan_template_chars(TSLexer *lexer) {
|
|
|
|
@ -27,7 +37,9 @@ static bool scan_template_chars(TSLexer *lexer) {
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
|
case '$':
|
|
|
|
case '$':
|
|
|
|
advance(lexer);
|
|
|
|
advance(lexer);
|
|
|
|
if (lexer->lookahead == '{') return has_content;
|
|
|
|
if (lexer->lookahead == '{') {
|
|
|
|
|
|
|
|
return has_content;
|
|
|
|
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
case '\\':
|
|
|
|
return has_content;
|
|
|
|
return has_content;
|
|
|
|
@ -37,7 +49,7 @@ static bool scan_template_chars(TSLexer *lexer) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static bool scan_whitespace_and_comments(TSLexer *lexer) {
|
|
|
|
static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
|
|
|
|
for (;;) {
|
|
|
|
for (;;) {
|
|
|
|
while (iswspace(lexer->lookahead)) {
|
|
|
|
while (iswspace(lexer->lookahead)) {
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
@ -48,9 +60,11 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '/') {
|
|
|
|
if (lexer->lookahead == '/') {
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
|
|
|
|
while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
|
|
|
|
|
|
|
|
lexer->lookahead != 0x2029) {
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*scanned_comment = true;
|
|
|
|
} else if (lexer->lookahead == '*') {
|
|
|
|
} else if (lexer->lookahead == '*') {
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
while (lexer->lookahead != 0) {
|
|
|
|
while (lexer->lookahead != 0) {
|
|
|
|
@ -58,6 +72,7 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
if (lexer->lookahead == '/') {
|
|
|
|
if (lexer->lookahead == '/') {
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
|
|
|
|
*scanned_comment = true;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
@ -73,22 +88,48 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static bool scan_automatic_semicolon(TSLexer *lexer) {
|
|
|
|
static bool scan_automatic_semicolon(TSLexer *lexer, bool comment_condition, bool *scanned_comment) {
|
|
|
|
lexer->result_symbol = AUTOMATIC_SEMICOLON;
|
|
|
|
lexer->result_symbol = AUTOMATIC_SEMICOLON;
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
for (;;) {
|
|
|
|
if (lexer->lookahead == 0) return true;
|
|
|
|
if (lexer->lookahead == 0) {
|
|
|
|
if (lexer->lookahead == '}') return true;
|
|
|
|
return true;
|
|
|
|
if (lexer->is_at_included_range_start(lexer)) return true;
|
|
|
|
}
|
|
|
|
if (lexer->lookahead == '\n') break;
|
|
|
|
|
|
|
|
if (!iswspace(lexer->lookahead)) return false;
|
|
|
|
if (lexer->lookahead == '/') {
|
|
|
|
|
|
|
|
if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (comment_condition && lexer->lookahead != ',' && lexer->lookahead != '=') {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '}') {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->is_at_included_range_start(lexer)) {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '\n' || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!iswspace(lexer->lookahead)) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
|
|
|
|
|
|
|
|
if (!scan_whitespace_and_comments(lexer)) return false;
|
|
|
|
if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
switch (lexer->lookahead) {
|
|
|
|
switch (lexer->lookahead) {
|
|
|
|
case ',':
|
|
|
|
case ',':
|
|
|
|
@ -127,17 +168,28 @@ static bool scan_automatic_semicolon(TSLexer *lexer) {
|
|
|
|
case 'i':
|
|
|
|
case 'i':
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead != 'n') return true;
|
|
|
|
if (lexer->lookahead != 'n') {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
|
|
|
|
|
|
|
|
if (!iswalpha(lexer->lookahead)) return false;
|
|
|
|
if (!iswalpha(lexer->lookahead)) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 8; i++) {
|
|
|
|
for (unsigned i = 0; i < 8; i++) {
|
|
|
|
if (lexer->lookahead != "stanceof"[i]) return true;
|
|
|
|
if (lexer->lookahead != "stanceof"[i]) {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!iswalpha(lexer->lookahead)) return false;
|
|
|
|
if (!iswalpha(lexer->lookahead)) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ -146,21 +198,27 @@ static bool scan_automatic_semicolon(TSLexer *lexer) {
|
|
|
|
|
|
|
|
|
|
|
|
static bool scan_ternary_qmark(TSLexer *lexer) {
|
|
|
|
static bool scan_ternary_qmark(TSLexer *lexer) {
|
|
|
|
for (;;) {
|
|
|
|
for (;;) {
|
|
|
|
if (!iswspace(lexer->lookahead)) break;
|
|
|
|
if (!iswspace(lexer->lookahead)) {
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
skip(lexer);
|
|
|
|
skip(lexer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '?') {
|
|
|
|
if (lexer->lookahead == '?') {
|
|
|
|
advance(lexer);
|
|
|
|
advance(lexer);
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '?') return false;
|
|
|
|
if (lexer->lookahead == '?') {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
lexer->result_symbol = TERNARY_QMARK;
|
|
|
|
lexer->result_symbol = TERNARY_QMARK;
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '.') {
|
|
|
|
if (lexer->lookahead == '.') {
|
|
|
|
advance(lexer);
|
|
|
|
advance(lexer);
|
|
|
|
if (iswdigit(lexer->lookahead)) return true;
|
|
|
|
if (iswdigit(lexer->lookahead)) {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
return true;
|
|
|
|
@ -168,20 +226,68 @@ static bool scan_ternary_qmark(TSLexer *lexer) {
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer,
|
|
|
|
static bool scan_html_comment(TSLexer *lexer) {
|
|
|
|
const bool *valid_symbols) {
|
|
|
|
while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
|
|
|
|
|
|
|
|
skip(lexer);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const char *comment_start = "<!--";
|
|
|
|
|
|
|
|
const char *comment_end = "-->";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (lexer->lookahead == '<') {
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
|
|
|
|
|
|
if (lexer->lookahead != comment_start[i]) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
advance(lexer);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (lexer->lookahead == '-') {
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
|
|
|
if (lexer->lookahead != comment_end[i]) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
advance(lexer);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
|
|
|
|
|
|
|
|
lexer->lookahead != 0x2029) {
|
|
|
|
|
|
|
|
advance(lexer);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lexer->result_symbol = HTML_COMMENT;
|
|
|
|
|
|
|
|
lexer->mark_end(lexer);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool tree_sitter_javascript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
|
|
|
if (valid_symbols[TEMPLATE_CHARS]) {
|
|
|
|
if (valid_symbols[TEMPLATE_CHARS]) {
|
|
|
|
if (valid_symbols[AUTOMATIC_SEMICOLON]) return false;
|
|
|
|
if (valid_symbols[AUTOMATIC_SEMICOLON]) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
return scan_template_chars(lexer);
|
|
|
|
return scan_template_chars(lexer);
|
|
|
|
} else if (valid_symbols[AUTOMATIC_SEMICOLON]) {
|
|
|
|
}
|
|
|
|
bool ret = scan_automatic_semicolon(lexer);
|
|
|
|
|
|
|
|
if (!ret && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?')
|
|
|
|
if (valid_symbols[AUTOMATIC_SEMICOLON]) {
|
|
|
|
|
|
|
|
bool scanned_comment = false;
|
|
|
|
|
|
|
|
bool ret = scan_automatic_semicolon(lexer, !valid_symbols[LOGICAL_OR], &scanned_comment);
|
|
|
|
|
|
|
|
if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
|
|
|
|
return scan_ternary_qmark(lexer);
|
|
|
|
return scan_ternary_qmark(lexer);
|
|
|
|
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (valid_symbols[TERNARY_QMARK]) {
|
|
|
|
if (valid_symbols[TERNARY_QMARK]) {
|
|
|
|
return scan_ternary_qmark(lexer);
|
|
|
|
return scan_ternary_qmark(lexer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
|
|
|
|
|
|
|
|
!valid_symbols[REGEX_PATTERN]) {
|
|
|
|
|
|
|
|
return scan_html_comment(lexer);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|