Merge commit '1a5df0206b25a05cb1b35a68d2105fc7493df39b' into add-racket

pull/483/head
6cdh 2023-02-12 16:41:16 +07:00
commit 24ccd0c527
10 changed files with 810 additions and 747 deletions

@ -6,3 +6,4 @@ Cargo.lock
test.rkt
compile_commands.json
.cache/
.vscode/

@ -0,0 +1,9 @@
===
test1
===
#<<
---
(program
(here_string))

@ -0,0 +1,10 @@
===
test1
===
#<<
#<<
---
(program
(here_string))

@ -0,0 +1,13 @@
===
test1
===
;; NOTE: this file use CRLF
#<<
#<<
---
(program
(comment)
(here_string))

@ -0,0 +1,12 @@
===
test1
===
;; NOTE: this file use CRLF
#<<
---
(program
(comment)
(here_string))

@ -151,7 +151,7 @@ module.exports = grammar({
seq("\\u", /[0-9a-fA-F]{1,4}/),
seq("\\u", /[0-9a-fA-F]{4,4}/),
seq("\\U", /[0-9a-fA-F]{1,8}/),
seq("\\", LEAF.newline))),
seq("\\", /[\r\n]|(\r\n)/))),
// string }}}

@ -514,7 +514,7 @@
},
{
"type": "PATTERN",
"value": "[\\r\\n\\u{85}\\u{2028}\\u{2029}]"
"value": "[\\r\\n]|(\\r\\n)"
}
]
}

File diff suppressed because it is too large Load Diff

@ -17,63 +17,76 @@ class optional_str {
public:
optional_str() : valid(true) {}
static optional_str empty() {
static optional_str none() {
optional_str emp;
emp.valid = false;
return emp;
}
bool has_value() const { return valid; }
bool operator==(const optional_str &rhs) const {
if (!this->valid) {
return !rhs.valid;
} else {
return rhs.valid && this->str == rhs.str;
}
}
u32string *operator->() { return &this->str; }
bool is_none() const { return !this->valid; }
const u32string &content() const { return this->str; }
u32string &content() { return this->str; }
};
// NOTE: only "\n" is allowed as newline here,
// It implies that "\r" can also be terminator.
inline bool isnewline(int32_t c) {
return c == '\n' || c == '\r' || c == 0x85 || c == 0x2028 || c == 0x2029;
return c == '\n';
}
inline optional_str readline(TSLexer *lexer) {
inline optional_str read_terminator(TSLexer *lexer) {
optional_str line;
while (!isnewline(lexer->lookahead)) {
if (lexer->eof(lexer)) {
return optional_str::empty();
while (true) {
if (isnewline(lexer->lookahead)) {
return line;
} else if (lexer->eof(lexer)) {
return optional_str::none();
} else {
line.content().push_back(lexer->lookahead);
lexer->advance(lexer, false);
}
line->push_back(lexer->lookahead);
}
}
// `read_line` read strings until a newline or EOF
inline u32string read_line(TSLexer *lexer) {
u32string line;
while (!isnewline(lexer->lookahead) && !lexer->eof(lexer)) {
line.push_back(lexer->lookahead);
lexer->advance(lexer, false);
}
lexer->advance(lexer, false);
return line;
}
// Suppose terminator is `T`, newline (\n) is `$`,
// It should accept "#<<T$T" or "#<<T$...$T", where `...`
// is the string content.
bool scan(TSLexer *lexer, const bool *valid_symbols) {
if (!valid_symbols[HERE_STRING_BODY]) {
return false;
}
const optional_str terminator = readline(lexer);
const optional_str terminator = read_terminator(lexer);
if (!terminator.has_value()) {
if (terminator.is_none()) {
return false;
}
// skip `\n`
lexer->advance(lexer, false);
while (true) {
const optional_str line = readline(lexer);
if (!line.has_value()) {
return false;
}
if (line == terminator) {
const u32string line = read_line(lexer);
if (line == terminator.content()) {
lexer->result_symbol = HERE_STRING_BODY;
return true;
}
if (lexer->eof(lexer)) {
return false;
}
// skip `\n`
lexer->advance(lexer, false);
}
}