From f6b4a5e38b70c32e90d02c7dcfdeeeaf4497f23f Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Wed, 9 Aug 2023 19:14:48 +0300 Subject: [PATCH] Formatting and other nitpicks --- .editorconfig | 4 ++ .gitattributes | 4 +- .github/workflows/ci.yml | 4 +- Cargo.toml | 18 ++++---- binding.gyp | 5 ++- bindings/node/binding.cc | 36 ++++++++-------- bindings/node/index.js | 32 ++++++-------- bindings/node/xml.js | 1 - bindings/rust/build.rs | 6 +-- bindings/rust/lib.rs | 16 +++---- common/scanner.h | 54 +++++++++++++++++------- package.json | 4 +- tree-sitter-dtd/grammar.js | 1 + tree-sitter-dtd/src/scanner.c | 36 +++------------- tree-sitter-dtd/src/tree_sitter/parser.h | 16 +++---- tree-sitter-xml/grammar.js | 6 +-- tree-sitter-xml/src/scanner.c | 37 +++------------- tree-sitter-xml/src/tree_sitter/parser.h | 16 +++---- 18 files changed, 127 insertions(+), 169 deletions(-) diff --git a/.editorconfig b/.editorconfig index a9cb6212e..dc84f89c6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,6 +7,10 @@ indent_size = 2 indent_style = space insert_final_newline = true trim_trailing_whitespace = true +max_line_length = 120 [*.js] quote_type = single + +[*.{c,h,cc,rs}] +indent_size = 4 diff --git a/.gitattributes b/.gitattributes index dcb6f403b..8d7d7324b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,4 @@ * text=auto -**/src/** linguist-generated -**/bindings/node/binding.cc linguist-generated +**/src/*.json linguist-generated +**/src/parser.c linguist-generated diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c03251f0..db409de58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,5 @@ jobs: node-version: 16 - run: npm install name: Install dependencies - - run: >- - npm run test-dtd && - npm run test-xml + - run: npm test name: Run tests diff --git a/Cargo.toml b/Cargo.toml index 1b13e78a0..ab7777bd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-xml" description = "XML grammar for tree-sitter" -version = "0.0.1" +version = "0.2.0" license = "MIT" readme = "README.md" keywords = ["incremental", "parsing", "dtd", "xml"] @@ -11,20 +11,20 @@ edition = "2021" build = "bindings/rust/build.rs" include = [ - "bindings/rust", - "dtd/grammar.js", - "dtd/src", - "dtd/queries", - "xml/grammar.js", - "xml/src", - "xml/queries", + "bindings/rust/*", + "tree-sitter-dtd/grammar.js", + "tree-sitter-dtd/src/*", + "tree-sitter-dtd/queries/*", + "tree-sitter-xml/grammar.js", + "tree-sitter-xml/src/*", + "tree-sitter-xml/queries/*", ] [lib] path = "bindings/rust/lib.rs" [dependencies] -tree-sitter = ">= 0.19, < 0.21" +tree-sitter = "~0.20.10" [build-dependencies] cc = "1.0" diff --git a/binding.gyp b/binding.gyp index fc5bf0c06..1cd5f41b4 100644 --- a/binding.gyp +++ b/binding.gyp @@ -4,14 +4,15 @@ "target_name": "tree_sitter_xml_binding", "include_dirs": [ " exports, Local module) { - Local xml_tpl = Nan::New(New); - xml_tpl->SetClassName(Nan::New("Language").ToLocalChecked()); - xml_tpl->InstanceTemplate()->SetInternalFieldCount(1); - Local xml_constructor = Nan::GetFunction(xml_tpl).ToLocalChecked(); - Local xml_instance = xml_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); - Nan::SetInternalFieldPointer(xml_instance, 0, tree_sitter_xml()); - Nan::Set(xml_instance, Nan::New("name").ToLocalChecked(), Nan::New("xml").ToLocalChecked()); - - Local dtd_tpl = Nan::New(New); - dtd_tpl->SetClassName(Nan::New("Language").ToLocalChecked()); - dtd_tpl->InstanceTemplate()->SetInternalFieldCount(1); - Local dtd_constructor = Nan::GetFunction(dtd_tpl).ToLocalChecked(); - Local dtd_instance = dtd_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); - Nan::SetInternalFieldPointer(dtd_instance, 0, tree_sitter_dtd()); - Nan::Set(dtd_instance, Nan::New("name").ToLocalChecked(), Nan::New("dtd").ToLocalChecked()); - - Nan::Set(exports, Nan::New("xml").ToLocalChecked(), xml_instance); - Nan::Set(exports, Nan::New("dtd").ToLocalChecked(), dtd_instance); + Local xml_tpl = Nan::New(New); + xml_tpl->SetClassName(Nan::New("Language").ToLocalChecked()); + xml_tpl->InstanceTemplate()->SetInternalFieldCount(1); + Local xml_constructor = Nan::GetFunction(xml_tpl).ToLocalChecked(); + Local xml_instance = xml_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); + Nan::SetInternalFieldPointer(xml_instance, 0, tree_sitter_xml()); + Nan::Set(xml_instance, Nan::New("name").ToLocalChecked(), Nan::New("xml").ToLocalChecked()); + + Local dtd_tpl = Nan::New(New); + dtd_tpl->SetClassName(Nan::New("Language").ToLocalChecked()); + dtd_tpl->InstanceTemplate()->SetInternalFieldCount(1); + Local dtd_constructor = Nan::GetFunction(dtd_tpl).ToLocalChecked(); + Local dtd_instance = dtd_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); + Nan::SetInternalFieldPointer(dtd_instance, 0, tree_sitter_dtd()); + Nan::Set(dtd_instance, Nan::New("name").ToLocalChecked(), Nan::New("dtd").ToLocalChecked()); + + Nan::Set(exports, Nan::New("xml").ToLocalChecked(), xml_instance); + Nan::Set(exports, Nan::New("dtd").ToLocalChecked(), dtd_instance); } NODE_MODULE(tree_sitter_xml_binding, Init) diff --git a/bindings/node/index.js b/bindings/node/index.js index 8fc3adb34..adc36cc96 100644 --- a/bindings/node/index.js +++ b/bindings/node/index.js @@ -1,24 +1,20 @@ try { - module.exports = require("../../build/Release/tree_sitter_xml_binding"); + module.exports = require('../../build/Release/tree_sitter_xml_binding'); } catch (error1) { - if (error1.code !== 'MODULE_NOT_FOUND') { - throw error1; - } - try { - module.exports = require("../../build/Debug/tree_sitter_xml_binding"); - } catch (error2) { - if (error2.code !== 'MODULE_NOT_FOUND') { - throw error2; - } - throw error1 - } + if (error1.code !== 'MODULE_NOT_FOUND') { + throw error1; + } + try { + module.exports = require('../../build/Debug/tree_sitter_xml_binding'); + } catch (error2) { + if (error2.code !== 'MODULE_NOT_FOUND') { + throw error2; + } + throw error1 + } } try { - module.exports.xml.nodeTypeInfo = require("../../tree-sitter-xml/src/node-types.json"); - module.exports.dtd.nodeTypeInfo = require("../../tree-sitter-dtd/src/node-types.json"); + module.exports.xml.nodeTypeInfo = require('../../tree-sitter-xml/src/node-types.json'); + module.exports.dtd.nodeTypeInfo = require('../../tree-sitter-dtd/src/node-types.json'); } catch (_) { } - - - - diff --git a/bindings/node/xml.js b/bindings/node/xml.js index a0b831b43..74e2706fb 100644 --- a/bindings/node/xml.js +++ b/bindings/node/xml.js @@ -1,2 +1 @@ module.exports = require('./index').xml; - diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index ee940d1cc..a34e1d961 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -2,6 +2,7 @@ fn main() { let root_dir = std::path::Path::new("."); let xml_dir = root_dir.join("tree-sitter-xml").join("src"); let dtd_dir = root_dir.join("tree-sitter-dtd").join("src"); + let common_dir = root_dir.join("common"); let mut config = cc::Build::new(); config.include(&xml_dir); @@ -19,10 +20,7 @@ fn main() { println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); } - println!( - "cargo:rerun-if-changed={}", - root_dir.join("common").join("scanner.h").to_str().unwrap() - ); + println!("cargo:rerun-if-changed={}", common_dir.join("scanner.h").to_str().unwrap()); config.compile("parser-scanner"); } diff --git a/bindings/rust/lib.rs b/bindings/rust/lib.rs index b91e4c2d1..947be54a7 100644 --- a/bindings/rust/lib.rs +++ b/bindings/rust/lib.rs @@ -50,18 +50,18 @@ pub fn language_xml() -> Language { unsafe { tree_sitter_xml() } } -/// The syntax highlighting query for this language. +/// The syntax highlighting queries for XML. pub const XML_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-xml/queries/highlights.scm"); -pub const DTD_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-dtd/queries/highlights.scm"); - -/// The local-variable syntax highlighting query for this language. -// pub const LOCALS_QUERY: &str = include_str!("../../tree-sitter-xml/queries/locals.scm"); -/// The symbol tagging query for this language. -// pub const TAGGING_QUERY: &str = include_str!("../../tree-sitter-xml/queries/tags.scm"); +/// The syntax highlighting queries for DTD. +pub const DTD_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-dtd/queries/highlights.scm"); -/// The content of the [`node-types.json`][] file for this grammar. +/// The content of the [`node-types.json`][] file for XML. /// /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types pub const XML_NODE_TYPES: &str = include_str!("../../tree-sitter-xml/src/node-types.json"); + +/// The content of the [`node-types.json`][] file for DTD. +/// +/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types pub const DTD_NODE_TYPES: &str = include_str!("../../tree-sitter-dtd/src/node-types.json"); diff --git a/common/scanner.h b/common/scanner.h index 71bace3cf..d3fe0f8a3 100644 --- a/common/scanner.h +++ b/common/scanner.h @@ -1,3 +1,5 @@ +#pragma once + #include #include @@ -7,20 +9,13 @@ enum TokenType { CharData, }; -static bool in_dtd_error_recovery(const bool *valid_symbols) { - return valid_symbols[PITarget] && valid_symbols[PIContent]; -} - -static bool in_xml_error_recovery(const bool *valid_symbols) { - return valid_symbols[PITarget] && valid_symbols[PIContent] && - valid_symbols[CharData]; -} - -static void advance(TSLexer *lexer) { lexer->advance(lexer, false); } +/// Advance the lexer to the next token +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } +/// Scan for the target of a PI node static bool scan_pi_target(TSLexer *lexer) { - bool advanced_once = false; - bool found_x_first = false; + bool advanced_once = false, found_x_first = false; + if (isalpha(lexer->lookahead) || lexer->lookahead == '_') { if (lexer->lookahead == 'x' || lexer->lookahead == 'X') { found_x_first = true; @@ -67,10 +62,13 @@ static bool scan_pi_target(TSLexer *lexer) { return false; } +/// Scan for the content of a PI node static bool scan_pi_content(TSLexer *lexer) { bool advanced_once = false; - while (lexer->lookahead != '\n' && lexer->lookahead != '?' && - !lexer->eof(lexer)) { + + while (!lexer->eof(lexer) && + lexer->lookahead != '\n' && + lexer->lookahead != '?') { advanced_once = true; advance(lexer); } @@ -93,10 +91,13 @@ static bool scan_pi_content(TSLexer *lexer) { return false; } +/// Scan for a CharData node static bool scan_char_data(TSLexer *lexer) { bool advanced_once = false; - while (lexer->lookahead != '<' && lexer->lookahead != '&' && - !lexer->eof(lexer)) { + + while (!lexer->eof(lexer) && + lexer->lookahead != '<' && + lexer->lookahead != '&') { if (lexer->lookahead == ']') { lexer->mark_end(lexer); advance(lexer); @@ -122,3 +123,24 @@ static bool scan_char_data(TSLexer *lexer) { } return false; } + +/// Scan for the common symbols +#define SCAN_COMMON(lexer, valid_symbols) \ + if (in_error_recovery(valid_symbols)) return false; \ + \ + if (valid_symbols[PITarget]) return scan_pi_target(lexer); \ + \ + if (valid_symbols[PIContent]) return scan_pi_content(lexer); + +/// Define the boilerplate functions of the scanner +/// @param name the name of the language +#define SCANNER_BOILERPLATE(name) \ + void *tree_sitter_##name##_external_scanner_create() { return NULL; } \ + \ + void tree_sitter_##name##_external_scanner_destroy(void *payload) {} \ + \ + void tree_sitter_##name##_external_scanner_reset(void *payload) {} \ + \ + unsigned tree_sitter_##name##_external_scanner_serialize(void *payload, char *buffer) { return 0; } \ + \ + void tree_sitter_##name##_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {} diff --git a/package.json b/package.json index 6dd9da205..6bd52d0cd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-xml", - "version": "0.1.0", + "version": "0.2.0", "license": "MIT", "description": "XML & DTD grammars for tree-sitter", "repository": "ObserverOfTime/tree-sitter-xml", @@ -53,7 +53,7 @@ "file-types": [ "dtd" ], - "injection-regex": "dtd", + "injection-regex": "^dtd$", "highlights": "tree-sitter-dtd/queries/highlights.scm", "path": "tree-sitter-dtd" } diff --git a/tree-sitter-dtd/grammar.js b/tree-sitter-dtd/grammar.js index e95b73d73..4c2a622d0 100644 --- a/tree-sitter-dtd/grammar.js +++ b/tree-sitter-dtd/grammar.js @@ -28,6 +28,7 @@ module.exports = grammar({ $._EntityDecl, $._Reference ], + rules: { // AKA: extSubset document: $ => seq( diff --git a/tree-sitter-dtd/src/scanner.c b/tree-sitter-dtd/src/scanner.c index 9b1f8244e..6ed6faa6d 100644 --- a/tree-sitter-dtd/src/scanner.c +++ b/tree-sitter-dtd/src/scanner.c @@ -1,37 +1,13 @@ -#include -#include -#include - #include "../../common/scanner.h" -void *tree_sitter_dtd_external_scanner_create() { return NULL; } - -void tree_sitter_dtd_external_scanner_destroy(void *payload) {} - -void tree_sitter_dtd_external_scanner_reset(void *payload) {} - -unsigned tree_sitter_dtd_external_scanner_serialize(void *payload, - char *buffer) { - return 0; +static inline bool in_error_recovery(const bool *valid_symbols) { + return valid_symbols[PITarget] && valid_symbols[PIContent] && valid_symbols[CharData]; } -void tree_sitter_dtd_external_scanner_deserialize(void *payload, - const char *buffer, - unsigned length) {} - -bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer, - const bool *valid_symbols) { - if (in_dtd_error_recovery(valid_symbols)) { - return false; - } - - if (valid_symbols[PITarget]) { - return scan_pi_target(lexer); - } - - if (valid_symbols[PIContent]) { - return scan_pi_content(lexer); - } +bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + SCAN_COMMON(lexer, valid_symbols) return false; } + +SCANNER_BOILERPLATE(dtd) diff --git a/tree-sitter-dtd/src/tree_sitter/parser.h b/tree-sitter-dtd/src/tree_sitter/parser.h index 17b4fde98..2b14ac104 100644 --- a/tree-sitter-dtd/src/tree_sitter/parser.h +++ b/tree-sitter-dtd/src/tree_sitter/parser.h @@ -13,8 +13,9 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 -#ifndef TREE_SITTER_API_H_ typedef uint16_t TSStateId; + +#ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; @@ -129,16 +130,9 @@ struct TSLanguage { * Lexer Macros */ -#ifdef _MSC_VER -#define UNUSED __pragma(warning(suppress : 4101)) -#else -#define UNUSED __attribute__((unused)) -#endif - #define START_LEXER() \ bool result = false; \ bool skip = false; \ - UNUSED \ bool eof = false; \ int32_t lookahead; \ goto start; \ @@ -172,7 +166,7 @@ struct TSLanguage { * Parse Table Macros */ -#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) +#define SMALL_STATE(id) id - LARGE_STATE_COUNT #define STATE(id) id @@ -182,7 +176,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = (state_value) \ + .state = state_value \ } \ }} @@ -190,7 +184,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = (state_value), \ + .state = state_value, \ .repetition = true \ } \ }} diff --git a/tree-sitter-xml/grammar.js b/tree-sitter-xml/grammar.js index c1ac1f37f..a0e1bf20c 100644 --- a/tree-sitter-xml/grammar.js +++ b/tree-sitter-xml/grammar.js @@ -29,8 +29,8 @@ module.exports = grammar(DTD, { rules: { document: $ => prec(2, seq( - optional($._S), - optional($.prolog), + O($._S), + O($.prolog), field('root', $.element), repeat($._Misc), )), @@ -138,7 +138,7 @@ module.exports = grammar(DTD, { CDStart: _ => seq(' /[^\]]*|][^\][\]>]*|]][^>]*/, + CData: _ => /([^\]]|][^\][\]]|]][^>])*/, StyleSheetPI: $ => seq( ' -#include -#include - #include "../../common/scanner.h" -void *tree_sitter_xml_external_scanner_create() { return NULL; } - -void tree_sitter_xml_external_scanner_destroy(void *payload) {} - -void tree_sitter_xml_external_scanner_reset(void *payload) {} - -unsigned tree_sitter_xml_external_scanner_serialize(void *payload, - char *buffer) { - return 0; +static inline bool in_error_recovery(const bool *valid_symbols) { + return valid_symbols[PITarget] && valid_symbols[PIContent]; } -void tree_sitter_xml_external_scanner_deserialize(void *payload, - const char *buffer, - unsigned length) {} - bool tree_sitter_xml_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { - if (in_xml_error_recovery(valid_symbols)) { - return false; - } - - if (valid_symbols[PITarget]) { - return scan_pi_target(lexer); - } + SCAN_COMMON(lexer, valid_symbols) - if (valid_symbols[PIContent]) { - return scan_pi_content(lexer); - } - - if (valid_symbols[CharData]) { - return scan_char_data(lexer); - } + if (valid_symbols[CharData]) return scan_char_data(lexer); return false; } + +SCANNER_BOILERPLATE(xml) diff --git a/tree-sitter-xml/src/tree_sitter/parser.h b/tree-sitter-xml/src/tree_sitter/parser.h index 17b4fde98..2b14ac104 100644 --- a/tree-sitter-xml/src/tree_sitter/parser.h +++ b/tree-sitter-xml/src/tree_sitter/parser.h @@ -13,8 +13,9 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 -#ifndef TREE_SITTER_API_H_ typedef uint16_t TSStateId; + +#ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; @@ -129,16 +130,9 @@ struct TSLanguage { * Lexer Macros */ -#ifdef _MSC_VER -#define UNUSED __pragma(warning(suppress : 4101)) -#else -#define UNUSED __attribute__((unused)) -#endif - #define START_LEXER() \ bool result = false; \ bool skip = false; \ - UNUSED \ bool eof = false; \ int32_t lookahead; \ goto start; \ @@ -172,7 +166,7 @@ struct TSLanguage { * Parse Table Macros */ -#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) +#define SMALL_STATE(id) id - LARGE_STATE_COUNT #define STATE(id) id @@ -182,7 +176,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = (state_value) \ + .state = state_value \ } \ }} @@ -190,7 +184,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = (state_value), \ + .state = state_value, \ .repetition = true \ } \ }}