Fix for comment-in-string bug (#27)

Nodes defined in `extras` can be expected before an node. Thus, `comment` could be expected before `escape_sequence` or `quoted_content`; aka inside of a string. Naturally, this makes no sense.

I tried wrapping `escape_sequence` and `quoted_content` in `token.immediate` to resolve the issue, but it had no effect (maybe is specific to whitespace?). Instead, I found success in largely copying [`tree-sitter-rust`'s solution of using an external scanner for string content](9a6d980afb/src/scanner.c (L27-L40)).
pull/315/head
Jonathan Arnett 2022-05-23 00:17:07 +07:00 committed by GitHub
parent ec0286fd84
commit 17ed183fc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 3804 additions and 3743 deletions

1
.gitattributes vendored

@ -1,2 +1,3 @@
src/** linguist-generated
src/scanner.c -linguist-generated
test/** linguist-documentation

@ -2,6 +2,7 @@ const NEWLINE = /\r?\n/;
module.exports = grammar({
name: "gleam",
externals: ($) => [$.quoted_content],
extras: ($) => [
";",
NEWLINE,
@ -581,10 +582,13 @@ module.exports = grammar({
),
/* Literals */
string: ($) => seq('"', repeat($._string_part), '"'),
_string_part: ($) => choice($.escape_sequence, $.quoted_content),
escape_sequence: ($) => /\\[efnrt\"\\]/,
quoted_content: ($) => /(?:[^\\\"]|\\[^efnrt\"\\])+/,
string: ($) =>
seq(
'"',
repeat(choice($.escape_sequence, $.quoted_content)),
token.immediate('"')
),
escape_sequence: ($) => token.immediate(/\\[efnrt\"\\]/),
float: ($) => /-?[0-9_]+\.[0-9_]+/,
integer: ($) =>
seq(optional("-"), choice($._hex, $._decimal, $._octal, $._binary)),

51
src/grammar.json generated

@ -5072,36 +5072,34 @@
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_string_part"
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "escape_sequence"
},
{
"type": "SYMBOL",
"name": "quoted_content"
}
]
}
},
{
"type": "STRING",
"value": "\""
}
]
},
"_string_part": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "escape_sequence"
},
{
"type": "SYMBOL",
"name": "quoted_content"
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "\""
}
}
]
},
"escape_sequence": {
"type": "PATTERN",
"value": "\\\\[efnrt\\\"\\\\]"
},
"quoted_content": {
"type": "PATTERN",
"value": "(?:[^\\\\\\\"]|\\\\[^efnrt\\\"\\\\])+"
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PATTERN",
"value": "\\\\[efnrt\\\"\\\\]"
}
},
"float": {
"type": "PATTERN",
@ -5760,7 +5758,12 @@
]
],
"precedences": [],
"externals": [],
"externals": [
{
"type": "SYMBOL",
"name": "quoted_content"
}
],
"inline": [],
"supertypes": []
}

@ -4178,11 +4178,11 @@
},
{
"type": "float",
"named": true
"named": false
},
{
"type": "float",
"named": false
"named": true
},
{
"type": "fn",

7423
src/parser.c generated

File diff suppressed because it is too large Load Diff

@ -0,0 +1,28 @@
#include <tree_sitter/parser.h>
enum TokenType {
QUOTED_CONTENT
};
void * tree_sitter_gleam_external_scanner_create() {return NULL;}
void tree_sitter_gleam_external_scanner_destroy(void * payload) {}
unsigned tree_sitter_gleam_external_scanner_serialize(void * payload, char * buffer) {return 0;}
void tree_sitter_gleam_external_scanner_deserialize(void * payload, const char * buffer, unsigned length) {}
bool tree_sitter_gleam_external_scanner_scan(void * payload, TSLexer *lexer, const bool * valid_symbols) {
if (valid_symbols[QUOTED_CONTENT]) {
bool has_content = false;
while (true) {
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
break;
} else if (lexer->lookahead == 0) {
return false;
}
has_content = true;
lexer->advance(lexer, false);
}
lexer->result_symbol = QUOTED_CONTENT;
return has_content;
}
}

@ -3,7 +3,7 @@ Constants
================================================================================
const a = "hello"
const a = "hello\nworld\!"
const a = "hello\nworld!"
const a = 1_234
const a = 0b110
const a = 0o7224

@ -1032,3 +1032,22 @@ fn lists(x) {
(list
(integer)
spread: (identifier)))))
================================================================================
Comment in string
================================================================================
io.println("// hello world!\n")
--------------------------------------------------------------------------------
(source_file
(function_call
function: (field_access
record: (identifier)
field: (label))
arguments: (arguments
(argument
value: (string
(quoted_content)
(escape_sequence))))))

@ -100,3 +100,10 @@ fn negate(arg) {
// <- operator
// ^ variable.parameter
}
fn comment_string_test() {
io.println("// hello world!")
// <- module
// ^ function
// ^ string
}