Formatting and other nitpicks

pull/504/merge
ObserverOfTime 2023-08-09 19:14:48 +07:00
parent ddc1421229
commit f6b4a5e38b
No known key found for this signature in database
GPG Key ID: 8A2DEA1DBAEBCA9E
18 changed files with 127 additions and 169 deletions

@ -7,6 +7,10 @@ indent_size = 2
indent_style = space indent_style = space
insert_final_newline = true insert_final_newline = true
trim_trailing_whitespace = true trim_trailing_whitespace = true
max_line_length = 120
[*.js] [*.js]
quote_type = single quote_type = single
[*.{c,h,cc,rs}]
indent_size = 4

4
.gitattributes vendored

@ -1,4 +1,4 @@
* text=auto * text=auto
**/src/** linguist-generated **/src/*.json linguist-generated
**/bindings/node/binding.cc linguist-generated **/src/parser.c linguist-generated

@ -18,7 +18,5 @@ jobs:
node-version: 16 node-version: 16
- run: npm install - run: npm install
name: Install dependencies name: Install dependencies
- run: >- - run: npm test
npm run test-dtd &&
npm run test-xml
name: Run tests name: Run tests

@ -1,7 +1,7 @@
[package] [package]
name = "tree-sitter-xml" name = "tree-sitter-xml"
description = "XML grammar for tree-sitter" description = "XML grammar for tree-sitter"
version = "0.0.1" version = "0.2.0"
license = "MIT" license = "MIT"
readme = "README.md" readme = "README.md"
keywords = ["incremental", "parsing", "dtd", "xml"] keywords = ["incremental", "parsing", "dtd", "xml"]
@ -11,20 +11,20 @@ edition = "2021"
build = "bindings/rust/build.rs" build = "bindings/rust/build.rs"
include = [ include = [
"bindings/rust", "bindings/rust/*",
"dtd/grammar.js", "tree-sitter-dtd/grammar.js",
"dtd/src", "tree-sitter-dtd/src/*",
"dtd/queries", "tree-sitter-dtd/queries/*",
"xml/grammar.js", "tree-sitter-xml/grammar.js",
"xml/src", "tree-sitter-xml/src/*",
"xml/queries", "tree-sitter-xml/queries/*",
] ]
[lib] [lib]
path = "bindings/rust/lib.rs" path = "bindings/rust/lib.rs"
[dependencies] [dependencies]
tree-sitter = ">= 0.19, < 0.21" tree-sitter = "~0.20.10"
[build-dependencies] [build-dependencies]
cc = "1.0" cc = "1.0"

@ -4,14 +4,15 @@
"target_name": "tree_sitter_xml_binding", "target_name": "tree_sitter_xml_binding",
"include_dirs": [ "include_dirs": [
"<!(node -e \"require('nan')\")", "<!(node -e \"require('nan')\")",
"tree-sitter-xml/src" "tree-sitter-dtd/src",
"tree-sitter-xml/src",
], ],
"sources": [ "sources": [
"tree-sitter-dtd/src/parser.c", "tree-sitter-dtd/src/parser.c",
"tree-sitter-dtd/src/scanner.c", "tree-sitter-dtd/src/scanner.c",
"tree-sitter-xml/src/parser.c", "tree-sitter-xml/src/parser.c",
"tree-sitter-xml/src/scanner.c", "tree-sitter-xml/src/scanner.c",
"bindings/node/binding.cc" "bindings/node/binding.cc",
], ],
"cflags_c": [ "cflags_c": [
"-std=c99", "-std=c99",

@ -12,24 +12,24 @@ namespace {
NAN_METHOD(New) {} NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) { void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> xml_tpl = Nan::New<FunctionTemplate>(New); Local<FunctionTemplate> xml_tpl = Nan::New<FunctionTemplate>(New);
xml_tpl->SetClassName(Nan::New("Language").ToLocalChecked()); xml_tpl->SetClassName(Nan::New("Language").ToLocalChecked());
xml_tpl->InstanceTemplate()->SetInternalFieldCount(1); xml_tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> xml_constructor = Nan::GetFunction(xml_tpl).ToLocalChecked(); Local<Function> xml_constructor = Nan::GetFunction(xml_tpl).ToLocalChecked();
Local<Object> xml_instance = xml_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); Local<Object> xml_instance = xml_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(xml_instance, 0, tree_sitter_xml()); Nan::SetInternalFieldPointer(xml_instance, 0, tree_sitter_xml());
Nan::Set(xml_instance, Nan::New("name").ToLocalChecked(), Nan::New("xml").ToLocalChecked()); Nan::Set(xml_instance, Nan::New("name").ToLocalChecked(), Nan::New("xml").ToLocalChecked());
Local<FunctionTemplate> dtd_tpl = Nan::New<FunctionTemplate>(New); Local<FunctionTemplate> dtd_tpl = Nan::New<FunctionTemplate>(New);
dtd_tpl->SetClassName(Nan::New("Language").ToLocalChecked()); dtd_tpl->SetClassName(Nan::New("Language").ToLocalChecked());
dtd_tpl->InstanceTemplate()->SetInternalFieldCount(1); dtd_tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> dtd_constructor = Nan::GetFunction(dtd_tpl).ToLocalChecked(); Local<Function> dtd_constructor = Nan::GetFunction(dtd_tpl).ToLocalChecked();
Local<Object> dtd_instance = dtd_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); Local<Object> dtd_instance = dtd_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(dtd_instance, 0, tree_sitter_dtd()); Nan::SetInternalFieldPointer(dtd_instance, 0, tree_sitter_dtd());
Nan::Set(dtd_instance, Nan::New("name").ToLocalChecked(), Nan::New("dtd").ToLocalChecked()); Nan::Set(dtd_instance, Nan::New("name").ToLocalChecked(), Nan::New("dtd").ToLocalChecked());
Nan::Set(exports, Nan::New("xml").ToLocalChecked(), xml_instance); Nan::Set(exports, Nan::New("xml").ToLocalChecked(), xml_instance);
Nan::Set(exports, Nan::New("dtd").ToLocalChecked(), dtd_instance); Nan::Set(exports, Nan::New("dtd").ToLocalChecked(), dtd_instance);
} }
NODE_MODULE(tree_sitter_xml_binding, Init) NODE_MODULE(tree_sitter_xml_binding, Init)

@ -1,24 +1,20 @@
try { try {
module.exports = require("../../build/Release/tree_sitter_xml_binding"); module.exports = require('../../build/Release/tree_sitter_xml_binding');
} catch (error1) { } catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') { if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1; throw error1;
} }
try { try {
module.exports = require("../../build/Debug/tree_sitter_xml_binding"); module.exports = require('../../build/Debug/tree_sitter_xml_binding');
} catch (error2) { } catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') { if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2; throw error2;
} }
throw error1 throw error1
} }
} }
try { try {
module.exports.xml.nodeTypeInfo = require("../../tree-sitter-xml/src/node-types.json"); module.exports.xml.nodeTypeInfo = require('../../tree-sitter-xml/src/node-types.json');
module.exports.dtd.nodeTypeInfo = require("../../tree-sitter-dtd/src/node-types.json"); module.exports.dtd.nodeTypeInfo = require('../../tree-sitter-dtd/src/node-types.json');
} catch (_) { } } catch (_) { }

@ -1,2 +1 @@
module.exports = require('./index').xml; module.exports = require('./index').xml;

@ -2,6 +2,7 @@ fn main() {
let root_dir = std::path::Path::new("."); let root_dir = std::path::Path::new(".");
let xml_dir = root_dir.join("tree-sitter-xml").join("src"); let xml_dir = root_dir.join("tree-sitter-xml").join("src");
let dtd_dir = root_dir.join("tree-sitter-dtd").join("src"); let dtd_dir = root_dir.join("tree-sitter-dtd").join("src");
let common_dir = root_dir.join("common");
let mut config = cc::Build::new(); let mut config = cc::Build::new();
config.include(&xml_dir); config.include(&xml_dir);
@ -19,10 +20,7 @@ fn main() {
println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
} }
println!( println!("cargo:rerun-if-changed={}", common_dir.join("scanner.h").to_str().unwrap());
"cargo:rerun-if-changed={}",
root_dir.join("common").join("scanner.h").to_str().unwrap()
);
config.compile("parser-scanner"); config.compile("parser-scanner");
} }

@ -50,18 +50,18 @@ pub fn language_xml() -> Language {
unsafe { tree_sitter_xml() } unsafe { tree_sitter_xml() }
} }
/// The syntax highlighting query for this language. /// The syntax highlighting queries for XML.
pub const XML_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-xml/queries/highlights.scm"); pub const XML_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-xml/queries/highlights.scm");
pub const DTD_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-dtd/queries/highlights.scm");
/// The local-variable syntax highlighting query for this language.
// pub const LOCALS_QUERY: &str = include_str!("../../tree-sitter-xml/queries/locals.scm");
/// The symbol tagging query for this language. /// The syntax highlighting queries for DTD.
// pub const TAGGING_QUERY: &str = include_str!("../../tree-sitter-xml/queries/tags.scm"); pub const DTD_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-dtd/queries/highlights.scm");
/// The content of the [`node-types.json`][] file for this grammar. /// The content of the [`node-types.json`][] file for XML.
/// ///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const XML_NODE_TYPES: &str = include_str!("../../tree-sitter-xml/src/node-types.json"); pub const XML_NODE_TYPES: &str = include_str!("../../tree-sitter-xml/src/node-types.json");
/// The content of the [`node-types.json`][] file for DTD.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const DTD_NODE_TYPES: &str = include_str!("../../tree-sitter-dtd/src/node-types.json"); pub const DTD_NODE_TYPES: &str = include_str!("../../tree-sitter-dtd/src/node-types.json");

@ -1,3 +1,5 @@
#pragma once
#include <ctype.h> #include <ctype.h>
#include <tree_sitter/parser.h> #include <tree_sitter/parser.h>
@ -7,20 +9,13 @@ enum TokenType {
CharData, CharData,
}; };
static bool in_dtd_error_recovery(const bool *valid_symbols) { /// Advance the lexer to the next token
return valid_symbols[PITarget] && valid_symbols[PIContent]; static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
}
static bool in_xml_error_recovery(const bool *valid_symbols) {
return valid_symbols[PITarget] && valid_symbols[PIContent] &&
valid_symbols[CharData];
}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
/// Scan for the target of a PI node
static bool scan_pi_target(TSLexer *lexer) { static bool scan_pi_target(TSLexer *lexer) {
bool advanced_once = false; bool advanced_once = false, found_x_first = false;
bool found_x_first = false;
if (isalpha(lexer->lookahead) || lexer->lookahead == '_') { if (isalpha(lexer->lookahead) || lexer->lookahead == '_') {
if (lexer->lookahead == 'x' || lexer->lookahead == 'X') { if (lexer->lookahead == 'x' || lexer->lookahead == 'X') {
found_x_first = true; found_x_first = true;
@ -67,10 +62,13 @@ static bool scan_pi_target(TSLexer *lexer) {
return false; return false;
} }
/// Scan for the content of a PI node
static bool scan_pi_content(TSLexer *lexer) { static bool scan_pi_content(TSLexer *lexer) {
bool advanced_once = false; bool advanced_once = false;
while (lexer->lookahead != '\n' && lexer->lookahead != '?' &&
!lexer->eof(lexer)) { while (!lexer->eof(lexer) &&
lexer->lookahead != '\n' &&
lexer->lookahead != '?') {
advanced_once = true; advanced_once = true;
advance(lexer); advance(lexer);
} }
@ -93,10 +91,13 @@ static bool scan_pi_content(TSLexer *lexer) {
return false; return false;
} }
/// Scan for a CharData node
static bool scan_char_data(TSLexer *lexer) { static bool scan_char_data(TSLexer *lexer) {
bool advanced_once = false; bool advanced_once = false;
while (lexer->lookahead != '<' && lexer->lookahead != '&' &&
!lexer->eof(lexer)) { while (!lexer->eof(lexer) &&
lexer->lookahead != '<' &&
lexer->lookahead != '&') {
if (lexer->lookahead == ']') { if (lexer->lookahead == ']') {
lexer->mark_end(lexer); lexer->mark_end(lexer);
advance(lexer); advance(lexer);
@ -122,3 +123,24 @@ static bool scan_char_data(TSLexer *lexer) {
} }
return false; return false;
} }
/// Scan for the common symbols
#define SCAN_COMMON(lexer, valid_symbols) \
if (in_error_recovery(valid_symbols)) return false; \
\
if (valid_symbols[PITarget]) return scan_pi_target(lexer); \
\
if (valid_symbols[PIContent]) return scan_pi_content(lexer);
/// Define the boilerplate functions of the scanner
/// @param name the name of the language
#define SCANNER_BOILERPLATE(name) \
void *tree_sitter_##name##_external_scanner_create() { return NULL; } \
\
void tree_sitter_##name##_external_scanner_destroy(void *payload) {} \
\
void tree_sitter_##name##_external_scanner_reset(void *payload) {} \
\
unsigned tree_sitter_##name##_external_scanner_serialize(void *payload, char *buffer) { return 0; } \
\
void tree_sitter_##name##_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}

@ -1,6 +1,6 @@
{ {
"name": "tree-sitter-xml", "name": "tree-sitter-xml",
"version": "0.1.0", "version": "0.2.0",
"license": "MIT", "license": "MIT",
"description": "XML & DTD grammars for tree-sitter", "description": "XML & DTD grammars for tree-sitter",
"repository": "ObserverOfTime/tree-sitter-xml", "repository": "ObserverOfTime/tree-sitter-xml",
@ -53,7 +53,7 @@
"file-types": [ "file-types": [
"dtd" "dtd"
], ],
"injection-regex": "dtd", "injection-regex": "^dtd$",
"highlights": "tree-sitter-dtd/queries/highlights.scm", "highlights": "tree-sitter-dtd/queries/highlights.scm",
"path": "tree-sitter-dtd" "path": "tree-sitter-dtd"
} }

@ -28,6 +28,7 @@ module.exports = grammar({
$._EntityDecl, $._EntityDecl,
$._Reference $._Reference
], ],
rules: { rules: {
// AKA: extSubset // AKA: extSubset
document: $ => seq( document: $ => seq(

@ -1,37 +1,13 @@
#include <ctype.h>
#include <tree_sitter/parser.h>
#include <wctype.h>
#include "../../common/scanner.h" #include "../../common/scanner.h"
void *tree_sitter_dtd_external_scanner_create() { return NULL; } static inline bool in_error_recovery(const bool *valid_symbols) {
return valid_symbols[PITarget] && valid_symbols[PIContent] && valid_symbols[CharData];
void tree_sitter_dtd_external_scanner_destroy(void *payload) {}
void tree_sitter_dtd_external_scanner_reset(void *payload) {}
unsigned tree_sitter_dtd_external_scanner_serialize(void *payload,
char *buffer) {
return 0;
} }
void tree_sitter_dtd_external_scanner_deserialize(void *payload, bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
const char *buffer, SCAN_COMMON(lexer, valid_symbols)
unsigned length) {}
bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
if (in_dtd_error_recovery(valid_symbols)) {
return false;
}
if (valid_symbols[PITarget]) {
return scan_pi_target(lexer);
}
if (valid_symbols[PIContent]) {
return scan_pi_content(lexer);
}
return false; return false;
} }
SCANNER_BOILERPLATE(dtd)

@ -13,8 +13,9 @@ extern "C" {
#define ts_builtin_sym_end 0 #define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId; typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol; typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId; typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage; typedef struct TSLanguage TSLanguage;
@ -129,16 +130,9 @@ struct TSLanguage {
* Lexer Macros * Lexer Macros
*/ */
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \ #define START_LEXER() \
bool result = false; \ bool result = false; \
bool skip = false; \ bool skip = false; \
UNUSED \
bool eof = false; \ bool eof = false; \
int32_t lookahead; \ int32_t lookahead; \
goto start; \ goto start; \
@ -172,7 +166,7 @@ struct TSLanguage {
* Parse Table Macros * Parse Table Macros
*/ */
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) #define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id #define STATE(id) id
@ -182,7 +176,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = (state_value) \ .state = state_value \
} \ } \
}} }}
@ -190,7 +184,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = (state_value), \ .state = state_value, \
.repetition = true \ .repetition = true \
} \ } \
}} }}

@ -29,8 +29,8 @@ module.exports = grammar(DTD, {
rules: { rules: {
document: $ => prec(2, seq( document: $ => prec(2, seq(
optional($._S), O($._S),
optional($.prolog), O($.prolog),
field('root', $.element), field('root', $.element),
repeat($._Misc), repeat($._Misc),
)), )),
@ -138,7 +138,7 @@ module.exports = grammar(DTD, {
CDStart: _ => seq('<![', 'CDATA', '['), CDStart: _ => seq('<![', 'CDATA', '['),
CData: _ => /[^\]]*|][^\][\]>]*|]][^>]*/, CData: _ => /([^\]]|][^\][\]]|]][^>])*/,
StyleSheetPI: $ => seq( StyleSheetPI: $ => seq(
'<?', '<?',

@ -1,41 +1,16 @@
#include <ctype.h>
#include <tree_sitter/parser.h>
#include <wctype.h>
#include "../../common/scanner.h" #include "../../common/scanner.h"
void *tree_sitter_xml_external_scanner_create() { return NULL; } static inline bool in_error_recovery(const bool *valid_symbols) {
return valid_symbols[PITarget] && valid_symbols[PIContent];
void tree_sitter_xml_external_scanner_destroy(void *payload) {}
void tree_sitter_xml_external_scanner_reset(void *payload) {}
unsigned tree_sitter_xml_external_scanner_serialize(void *payload,
char *buffer) {
return 0;
} }
void tree_sitter_xml_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {}
bool tree_sitter_xml_external_scanner_scan(void *payload, TSLexer *lexer, bool tree_sitter_xml_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) { const bool *valid_symbols) {
if (in_xml_error_recovery(valid_symbols)) { SCAN_COMMON(lexer, valid_symbols)
return false;
}
if (valid_symbols[PITarget]) {
return scan_pi_target(lexer);
}
if (valid_symbols[PIContent]) { if (valid_symbols[CharData]) return scan_char_data(lexer);
return scan_pi_content(lexer);
}
if (valid_symbols[CharData]) {
return scan_char_data(lexer);
}
return false; return false;
} }
SCANNER_BOILERPLATE(xml)

@ -13,8 +13,9 @@ extern "C" {
#define ts_builtin_sym_end 0 #define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId; typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol; typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId; typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage; typedef struct TSLanguage TSLanguage;
@ -129,16 +130,9 @@ struct TSLanguage {
* Lexer Macros * Lexer Macros
*/ */
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \ #define START_LEXER() \
bool result = false; \ bool result = false; \
bool skip = false; \ bool skip = false; \
UNUSED \
bool eof = false; \ bool eof = false; \
int32_t lookahead; \ int32_t lookahead; \
goto start; \ goto start; \
@ -172,7 +166,7 @@ struct TSLanguage {
* Parse Table Macros * Parse Table Macros
*/ */
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) #define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id #define STATE(id) id
@ -182,7 +176,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = (state_value) \ .state = state_value \
} \ } \
}} }}
@ -190,7 +184,7 @@ struct TSLanguage {
{{ \ {{ \
.shift = { \ .shift = { \
.type = TSParseActionTypeShift, \ .type = TSParseActionTypeShift, \
.state = (state_value), \ .state = state_value, \
.repetition = true \ .repetition = true \
} \ } \
}} }}