Use tree-sitter-xml from crates.io

pull/795/head 0.62.0
Wilfred Hughes 2024-12-20 09:16:19 +07:00
parent cef0b569dd
commit b3606fc219
50 changed files with 19 additions and 23125 deletions

@ -12,7 +12,7 @@ Improved language detection when one argument is a named pipe.
Updated to the latest tree-sitter parser for Bash, C, C++, C#, CSS,
Go, Haskell, HTML, Java, JavaScript, JSON, Julia, Lua, Objective-C,
OCaml, PHP, Python, Ruby, Scala, TOML and TypeScript.
OCaml, PHP, Python, Ruby, Scala, TOML, TypeScript and XML.
### Syntax Highlighting

11
Cargo.lock generated

@ -271,6 +271,7 @@ dependencies = [
"tree-sitter-scala",
"tree-sitter-toml-ng",
"tree-sitter-typescript",
"tree-sitter-xml",
"tree_magic_mini",
"typed-arena",
"unicode-width",
@ -1238,6 +1239,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-xml"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree_magic_mini"
version = "3.1.5"

@ -99,6 +99,7 @@ tree-sitter-html = "0.23.2"
tree-sitter-css = "0.23.1"
tree-sitter-toml-ng = "0.7.0"
tree-sitter-lua = "0.2.0"
tree-sitter-xml = "0.7.0"
[dev-dependencies]
# assert_cmd 2.0.10 requires predicates 3.

@ -247,11 +247,6 @@ fn main() {
src_dir: "vendored_parsers/tree-sitter-vhdl-src",
extra_files: vec![],
},
TreeSitterParser {
name: "tree-sitter-xml",
src_dir: "vendored_parsers/tree-sitter-xml-src/tree-sitter-xml/src",
extra_files: vec!["scanner.c"],
},
TreeSitterParser {
name: "tree-sitter-yaml",
src_dir: "vendored_parsers/tree-sitter-yaml-src",

@ -70,6 +70,6 @@ with `difft --list-languages`.
| Newick | [delehef/tree-sitter-newick](https://github.com/delehef/tree-sitter-newick) |
| SCSS | [serenadeai/tree-sitter-scss](https://github.com/serenadeai/tree-sitter-scss) |
| TOML | [tree-sitter-grammars/tree-sitter-toml](https://github.com/tree-sitter-grammars/tree-sitter-toml) |
| XML | [ObserverOfTime/tree-sitter-xml](https://github.com/ObserverOfTime/tree-sitter-xml) |
| XML | [tree-sitter-grammars/tree-sitter-xml](https://github.com/tree-sitter-grammars/tree-sitter-xml) |
| YAML | [ikatyang/tree-sitter-yaml](https://github.com/ikatyang/tree-sitter-yaml) |

@ -97,7 +97,6 @@ extern "C" {
fn tree_sitter_sql() -> ts::Language;
fn tree_sitter_swift() -> ts::Language;
fn tree_sitter_vhdl() -> ts::Language;
fn tree_sitter_xml() -> ts::Language;
fn tree_sitter_yaml() -> ts::Language;
fn tree_sitter_zig() -> ts::Language;
}
@ -1080,7 +1079,9 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
}
}
Xml => {
let language = unsafe { tree_sitter_xml() };
let language_fn = tree_sitter_xml::LANGUAGE_XML;
let language = tree_sitter::Language::new(language_fn);
TreeSitterConfig {
language: language.clone(),
// XMLDecl is the <?xml ...?> header, but the parser
@ -1088,11 +1089,8 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
// e.g. string subexpressions, so flatten.
atom_nodes: vec!["AttValue", "XMLDecl"].into_iter().collect(),
delimiter_tokens: (vec![("<", ">")]),
highlight_query: ts::Query::new(
&language,
include_str!("../../vendored_parsers/highlights/xml.scm"),
)
.unwrap(),
highlight_query: ts::Query::new(&language, tree_sitter_xml::XML_HIGHLIGHT_QUERY)
.unwrap(),
sub_languages: vec![],
}
}

@ -1 +0,0 @@
../tree-sitter-xml/tree-sitter-xml/queries/highlights.scm

@ -1 +0,0 @@
../tree-sitter-xml/common

@ -1 +0,0 @@
../tree-sitter-xml/tree-sitter-xml

@ -1,16 +0,0 @@
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
max_line_length = 120
[*.js]
quote_type = single
[*.{c,h,cc,rs}]
indent_size = 4

@ -1,5 +0,0 @@
* text=auto
**/src/*.json linguist-generated
**/src/parser.c linguist-generated
**/src/tree_sitter/parser.h linguist-generated

@ -1,2 +0,0 @@
github: ObserverOfTime
liberapay: ObserverOfTime

@ -1,22 +0,0 @@
name: Test grammars
on:
push:
branches: [master]
pull_request:
branches: [master]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
name: Checkout repository
- uses: actions/setup-node@v3
name: Set up NodeJS
with:
node-version: 16
- run: npm install
name: Install dependencies
- run: npm test
name: Run tests

@ -1,8 +0,0 @@
node_modules/
package-lock.json
yarn.lock
Cargo.lock
build/
target/
*.wasm

@ -1,30 +0,0 @@
[package]
name = "tree-sitter-xml"
description = "XML grammar for tree-sitter"
version = "0.2.0"
license = "MIT"
readme = "README.md"
keywords = ["incremental", "parsing", "dtd", "xml"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/ObserverOfTime/tree-sitter-xml"
edition = "2021"
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"tree-sitter-dtd/grammar.js",
"tree-sitter-dtd/src/*",
"tree-sitter-dtd/queries/*",
"tree-sitter-xml/grammar.js",
"tree-sitter-xml/src/*",
"tree-sitter-xml/queries/*",
]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "~0.20.10"
[build-dependencies]
cc = "1.0"

@ -1,19 +0,0 @@
Copyright (c) 2023 ObserverOfTime
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -1,5 +0,0 @@
# tree-sitter-xml
A tree-sitter parser for XML & DTD files.
Based on the [XML](https://www.w3.org/TR/xml/) standard.

@ -1,22 +0,0 @@
{
"targets": [
{
"target_name": "tree_sitter_xml_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"tree-sitter-dtd/src",
"tree-sitter-xml/src",
],
"sources": [
"tree-sitter-dtd/src/parser.c",
"tree-sitter-dtd/src/scanner.c",
"tree-sitter-xml/src/parser.c",
"tree-sitter-xml/src/scanner.c",
"bindings/node/binding.cc",
],
"cflags_c": [
"-std=c99",
]
},
]
}

@ -1,37 +0,0 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_xml();
extern "C" TSLanguage * tree_sitter_dtd();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> xml_tpl = Nan::New<FunctionTemplate>(New);
xml_tpl->SetClassName(Nan::New("Language").ToLocalChecked());
xml_tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> xml_constructor = Nan::GetFunction(xml_tpl).ToLocalChecked();
Local<Object> xml_instance = xml_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(xml_instance, 0, tree_sitter_xml());
Nan::Set(xml_instance, Nan::New("name").ToLocalChecked(), Nan::New("xml").ToLocalChecked());
Local<FunctionTemplate> dtd_tpl = Nan::New<FunctionTemplate>(New);
dtd_tpl->SetClassName(Nan::New("Language").ToLocalChecked());
dtd_tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> dtd_constructor = Nan::GetFunction(dtd_tpl).ToLocalChecked();
Local<Object> dtd_instance = dtd_constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(dtd_instance, 0, tree_sitter_dtd());
Nan::Set(dtd_instance, Nan::New("name").ToLocalChecked(), Nan::New("dtd").ToLocalChecked());
Nan::Set(exports, Nan::New("xml").ToLocalChecked(), xml_instance);
Nan::Set(exports, Nan::New("dtd").ToLocalChecked(), dtd_instance);
}
NODE_MODULE(tree_sitter_xml_binding, Init)
} // namespace

@ -1 +0,0 @@
module.exports = require('./index').dtd;

@ -1,20 +0,0 @@
try {
module.exports = require('../../build/Release/tree_sitter_xml_binding');
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require('../../build/Debug/tree_sitter_xml_binding');
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try {
module.exports.xml.nodeTypeInfo = require('../../tree-sitter-xml/src/node-types.json');
module.exports.dtd.nodeTypeInfo = require('../../tree-sitter-dtd/src/node-types.json');
} catch (_) { }

@ -1 +0,0 @@
module.exports = require('./index').xml;

@ -1,26 +0,0 @@
fn main() {
let root_dir = std::path::Path::new(".");
let xml_dir = root_dir.join("tree-sitter-xml").join("src");
let dtd_dir = root_dir.join("tree-sitter-dtd").join("src");
let common_dir = root_dir.join("common");
let mut config = cc::Build::new();
config.include(&xml_dir);
config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
for path in &[
xml_dir.join("parser.c"),
xml_dir.join("scanner.c"),
dtd_dir.join("parser.c"),
dtd_dir.join("scanner.c"),
] {
config.file(path);
println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
}
println!("cargo:rerun-if-changed={}", common_dir.join("scanner.h").to_str().unwrap());
config.compile("parser-scanner");
}

@ -1,67 +0,0 @@
//! This crate provides XML and DTD grammars for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language_xml][language func] function to add this grammar to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! use tree_sitter::Parser;
//!
//! let code = r#"
//! <?xml version="1.0" encoding="UTF-8"?>
//! <note>
//! <to>Tove</to>
//! <from>Jani</from>
//! <heading>Reminder</heading>
//! <body>Don't forget me this weekend!</body>
//! </note>
//! "#;
//! let mut parser = Parser::new();
//! parser
//! .set_language(tree_sitter_xml::language_xml())
//! .expect("Error loading XML grammar");
//! let parsed = parser.parse(code, None).unwrap();
//! let root = parsed.root_node();
//! assert!(!root.has_error());
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language_xml.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_dtd() -> Language;
fn tree_sitter_xml() -> Language;
}
/// Returns the tree-sitter [Language][] for DTD.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language_dtd() -> Language {
unsafe { tree_sitter_dtd() }
}
/// Returns the tree-sitter [Language][] for XML.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language_xml() -> Language {
unsafe { tree_sitter_xml() }
}
/// The syntax highlighting queries for XML.
pub const XML_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-xml/queries/highlights.scm");
/// The syntax highlighting queries for DTD.
pub const DTD_HIGHLIGHT_QUERY: &str = include_str!("../../tree-sitter-dtd/queries/highlights.scm");
/// The content of the [`node-types.json`][] file for XML.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const XML_NODE_TYPES: &str = include_str!("../../tree-sitter-xml/src/node-types.json");
/// The content of the [`node-types.json`][] file for DTD.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const DTD_NODE_TYPES: &str = include_str!("../../tree-sitter-dtd/src/node-types.json");

@ -1,68 +0,0 @@
/**
* @file Reusable constructs
* @author ObserverOfTime
* @license MIT
*/
/**
* @param {"'" | ''} q
* @see {@link https://www.w3.org/TR/xml/#NT-PubidChar}
*/
module.exports.pubid_char = (q) =>
new RegExp(`[ \\r\\na-zA-Z0-9\\-${q}()+,./:=?;!*#@$_%]*`);
/**
* @param {GrammarSymbols<any>} $
* @param {'"' | "'"} q
*/
module.exports.att_value = ($, q) =>
seq(
q,
field(
'content',
repeat(choice(
new RegExp(`[^<&${q}]`),
$._Reference
))
),
q
);
/**
* @param {GrammarSymbols<any>} $
* @param {'"' | "'"} q
*/
module.exports.entity_value = ($, q) =>
seq(
q,
field(
'content',
repeat(choice(
new RegExp(`[^<%&${q}]`),
$.PEReference,
$._Reference
))
),
q
);
/** @param {RuleOrLiteral[]} choices */
module.exports.str = (...choices) =>
choice(
seq("'", ...choices, "'"),
seq('"', ...choices, '"')
);
/**
* @param {GrammarSymbols<any>} $
* @param {RuleOrLiteral[]} choices
*/
module.exports.ref = ($, ...choices) =>
choice(...choices, $.PEReference);
/** @param {RuleOrLiteral[]} rules */
module.exports.rseq = (...rules) => repeat(seq(...rules));
/** @param {RuleOrLiteral[]} rules */
module.exports.rseq1 = (...rules) => repeat1(seq(...rules));

@ -1,142 +0,0 @@
#pragma once
#include <ctype.h>
#include <tree_sitter/parser.h>
enum TokenType {
PI_TARGET,
PI_CONTENT,
COMMENT,
CHAR_DATA,
CDATA,
XML_MODEL,
XML_STYLESHEET,
};
/// Advance the lexer if the next token doesn't match the given character
#define advance_if_not(lexer, chr) \
if ((lexer)->lookahead != (chr)) return false; advance((lexer))
/// Advance the lexer to the next token
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
/// Check if the character is valid in PITarget
/// @private
static inline bool is_valid_pi_char(int32_t chr) {
return isalnum(chr) || chr == '_' || chr == ':' || chr == '.' || chr == '-' || chr == L'·';
}
/// Check if the lexer matches the given word
/// @private
static inline bool check_word(TSLexer *lexer, const char *const word) {
for (int j = 0; word[j] != '\0'; ++j) {
advance_if_not(lexer, word[j]);
}
return true;
}
/// Scan for the target of a PI node
static bool scan_pi_target(TSLexer *lexer, const bool *valid_symbols) {
bool advanced_once = false, found_x_first = false;
if (isalpha(lexer->lookahead) || lexer->lookahead == '_') {
if (lexer->lookahead == 'x' || lexer->lookahead == 'X') {
found_x_first = true;
lexer->mark_end(lexer);
}
advanced_once = true;
advance(lexer);
}
if (advanced_once) {
while (is_valid_pi_char(lexer->lookahead)) {
if (found_x_first &&
(lexer->lookahead == 'm' || lexer->lookahead == 'M')) {
advance(lexer);
if (lexer->lookahead == 'l' || lexer->lookahead == 'L') {
advance(lexer);
if (is_valid_pi_char(lexer->lookahead)) {
found_x_first = false;
bool last_char_hyphen = lexer->lookahead == '-';
advance(lexer);
if (last_char_hyphen) {
if (valid_symbols[XML_MODEL] && check_word(lexer, "model")) return false;
if (valid_symbols[XML_STYLESHEET] && check_word(lexer, "stylesheet")) return false;
}
} else {
return false;
}
}
}
found_x_first = false;
advance(lexer);
}
lexer->mark_end(lexer);
lexer->result_symbol = PI_TARGET;
return true;
}
return false;
}
/// Scan for the content of a PI node
static bool scan_pi_content(TSLexer *lexer) {
while (!lexer->eof(lexer) && lexer->lookahead != '\n' && lexer->lookahead != '?') advance(lexer);
if (lexer->lookahead != '?') return false;
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
while (lexer->lookahead == ' ') advance(lexer);
advance_if_not(lexer, '\n');
lexer->result_symbol = PI_CONTENT;
return true;
}
return false;
}
/// Scan for a Comment node
static bool scan_comment(TSLexer *lexer) {
advance_if_not(lexer, '<');
advance_if_not(lexer, '!');
advance_if_not(lexer, '-');
advance_if_not(lexer, '-');
while (!lexer->eof(lexer)) {
if (lexer->lookahead == '-') {
advance(lexer);
if (lexer->lookahead == '-') {
advance(lexer);
break;
}
} else {
advance(lexer);
}
}
if (lexer->lookahead == '>') {
advance(lexer);
lexer->mark_end(lexer);
lexer->result_symbol = COMMENT;
return true;
}
return false;
}
/// Define the boilerplate functions of the scanner
#define SCANNER_BOILERPLATE(name) \
void *tree_sitter_##name##_external_scanner_create() { return NULL; } \
\
void tree_sitter_##name##_external_scanner_destroy(void *payload) {} \
\
void tree_sitter_##name##_external_scanner_reset(void *payload) {} \
\
unsigned tree_sitter_##name##_external_scanner_serialize(void *payload, char *buffer) { return 0; } \
\
void tree_sitter_##name##_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}

@ -1,13 +0,0 @@
{
"files": [
"common/index.js",
"tree-sitter-xml/grammar.js",
"tree-sitter-dtd/grammar.js"
],
"exclude": ["node_modules"],
"compilerOptions": {
"checkJs": true,
"module": "Node16",
"types": ["tree-sitter-cli/dsl"]
}
}

@ -1,61 +0,0 @@
{
"name": "tree-sitter-xml",
"version": "0.2.0",
"license": "MIT",
"description": "XML & DTD grammars for tree-sitter",
"repository": "ObserverOfTime/tree-sitter-xml",
"author": {
"name": "ObserverOfTime",
"email": "chronobserver@disroot.org",
"url": "https://observeroftime.github.io/"
},
"keywords": [
"tree-sitter",
"parser",
"lexer",
"xml",
"dtd"
],
"main": "./bindings/node",
"dependencies": {
"nan": "^2.17.0"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.8"
},
"scripts": {
"build": "npm run build-dtd && npm run build-xml",
"build-dtd": "cd tree-sitter-dtd && tree-sitter generate --no-bindings",
"build-xml": "cd tree-sitter-xml && tree-sitter generate --no-bindings",
"parse": "tree-sitter parse",
"test-load": "node -e \"console.log(require('./tree-sitter-dtd').name, require('./tree-sitter-xml').name)\"",
"test": "npm run test-dtd && npm run test-xml && npm run test-load",
"test-dtd": "cd tree-sitter-dtd && tree-sitter test",
"test-xml": "cd tree-sitter-xml && tree-sitter test",
"playground-xml": "cd tree-sitter-xml && tree-sitter build-wasm && tree-sitter playground",
"playground-dtd": "cd tree-sitter-dtd && tree-sitter build-wasm && tree-sitter playground"
},
"tree-sitter": [
{
"scope": "source.xml",
"file-types": [
"xml",
"svg",
"xsd",
"xslt"
],
"injection-regex": "^(xml|svg|xsd|xslt)$",
"highlights": "tree-sitter-xml/queries/highlights.scm",
"path": "tree-sitter-xml"
},
{
"scope": "source.dtd",
"file-types": [
"dtd"
],
"injection-regex": "^dtd$",
"highlights": "tree-sitter-dtd/queries/highlights.scm",
"path": "tree-sitter-dtd"
}
]
}

@ -1,348 +0,0 @@
/**
* @file Tree-sitter grammar definition for DTD
* @author ObserverOfTime
* @license MIT
* @see {@link https://www.w3.org/TR/xml/|XML standard}
*/
const c = require('../common');
const O = optional;
module.exports = grammar({
name: 'dtd',
externals: $ => [
$.PITarget,
$._pi_content,
$.Comment,
],
extras: _ => [],
supertypes: $ => [
$._markupdecl,
$._AttType,
$._EnumeratedType,
$._EntityDecl,
$._Reference,
],
word: $ => $.Name,
rules: {
// AKA: extSubset
document: $ => seq(
O($.XMLDecl),
repeat1($._extSubsetDecl)
),
_extSubsetDecl: $ => choice(
$._markupdecl,
$.conditionalSect,
$._DeclSep
),
conditionalSect: $ => seq(
'<![',
O($._S),
c.ref($, 'IGNORE', 'INCLUDE'),
O($._S),
'[',
repeat($._extSubsetDecl),
']]>'
),
_markupdecl: $ => choice(
$.elementdecl,
$.AttlistDecl,
$._EntityDecl,
$.NotationDecl,
$.PI,
$.Comment
),
_DeclSep: $ => choice($.PEReference, $._S),
elementdecl: $ => seq(
'<!',
'ELEMENT',
$._S,
c.ref($, $.Name),
$._S,
$.contentspec,
O($._S),
'>'
),
contentspec: $ => choice(
'EMPTY',
'ANY',
$.Mixed,
$.children,
$.PEReference
),
Mixed: $ => choice(
seq(
'(',
O($._S),
c.ref($, '#PCDATA'),
c.rseq(
O($._S),
'|',
O($._S),
c.ref($, $.Name),
O($._S),
),
')',
'*'
),
seq(
'(',
O($._S),
c.ref($, '#PCDATA'),
O($._S),
')'
)
),
children: $ => seq(
$._choice,
O(choice('?', '*', '+'))
),
_cp: $ => prec.right(seq(
c.ref($, $.Name, $._choice),
O(choice('?', '*', '+'))
)),
_choice: $ => seq(
'(',
O($._S),
$._cp,
c.rseq(
O($._S),
choice('|', ','),
O($._S),
$._cp
),
')'
),
AttlistDecl: $ => seq(
'<!',
'ATTLIST',
$._S,
c.ref($, $.Name),
repeat($.AttDef),
O($._S),
'>'
),
AttDef: $ => prec.right(seq(
$._S,
c.ref($, $.Name),
$._S,
$._AttType,
O(seq($._S, $.DefaultDecl))
)),
_AttType: $ => choice(
$.StringType,
$.TokenizedType,
$._EnumeratedType,
$.PEReference
),
StringType: _ => 'CDATA',
TokenizedType: _ => token(choice(
'ID',
'IDREF',
'IDREFS',
'ENTITY',
'ENTITIES',
'NMTOKEN',
'NMTOKENS',
)),
_EnumeratedType: $ => choice(
$.NotationType,
$.Enumeration
),
NotationType: $ => seq(
'NOTATION',
$._S,
'(',
O($._S),
c.ref($, $.Name),
c.rseq(
O($._S),
'|',
O($._S)
),
c.ref($, $.Name),
O($._S),
')'
),
Enumeration: $ => seq(
'(',
O($._S),
$.Nmtoken,
c.rseq(
O($._S),
'|',
O($._S),
$.Nmtoken
),
O($._S),
')'
),
DefaultDecl: $ => choice(
'#REQUIRED',
'#IMPLIED',
seq(
O(seq('#FIXED', $._S)),
$.AttValue
)
),
_EntityDecl: $ => choice(
$.GEDecl,
$.PEDecl
),
GEDecl: $ => seq(
'<!',
'ENTITY',
$._S,
c.ref($, $.Name),
$._S,
choice(
$.EntityValue,
seq(
$.ExternalID,
O($.NDataDecl)
)
),
O($._S),
'>'
),
PEDecl: $ => seq(
'<!',
'ENTITY',
$._S,
'%',
$._S,
$.Name,
$._S,
choice(
$.EntityValue,
$.ExternalID
),
O($._S),
'>'
),
EntityValue: $ => choice(
c.entity_value($, '"'),
c.entity_value($, "'")
),
NDataDecl: $ => seq($._S, 'NDATA', $._S, c.ref($, $.Name)),
NotationDecl: $ => seq(
'<!',
'NOTATION',
$._S,
c.ref($, $.Name),
$._S,
choice($.ExternalID, $.PublicID),
O($._S),
'>'
),
PEReference: $ => seq('%', $.Name, ';'),
_S: _ => /[ \t\r\n]+/,
Name: _ => /[a-zA-Z_][a-zA-Z0-9_:.·-]*/,
Nmtoken: _ => /[a-zA-Z0-9_:.·-]+/,
_Reference: $ => choice($.EntityRef, $.CharRef),
EntityRef: $ => seq('&', $.Name, ';'),
CharRef: _ => choice(
seq('&#', /[0-9]+/, ';'),
seq('&#x', /[0-9a-fA-F]+/, ';')
),
AttValue: $ => choice(
c.att_value($, '"'),
c.att_value($, "'")
),
ExternalID: $ => choice(
seq('SYSTEM', $._S, $.SystemLiteral),
seq('PUBLIC', $._S, $.PubidLiteral, $._S, $.SystemLiteral)
),
PublicID: $ => prec.right(
seq(c.ref($, 'PUBLIC'), $._S, $.PubidLiteral)
),
SystemLiteral: $ => choice(
seq('"', alias(/[^"]*/, $.URI), '"'),
seq("'", alias(/[^']*/, $.URI), "'")
),
PubidLiteral: _ => choice(
seq('"', c.pubid_char("'"), '"'),
seq("'", c.pubid_char(''), "'")
),
// AKA: TextDecl
XMLDecl: $ => seq(
'<?',
'xml',
$._VersionInfo,
O($._EncodingDecl),
O($._S),
'?>'
),
_VersionInfo: $ => seq(
$._S,
'version',
$._Eq,
c.str($.VersionNum)
),
VersionNum: _ => /1\.[0-9]+/,
_EncodingDecl: $ => seq(
$._S,
'encoding',
$._Eq,
c.str($.EncName)
),
EncName: _ => /[A-Za-z][A-Za-z0-9._\-]*/,
PI: $ => seq(
'<?',
$.PITarget,
O(seq($._S, $._pi_content)),
'?>'
),
_Eq: $ => seq(O($._S), '=', O($._S))
}
});

@ -1,3 +0,0 @@
{
"main": "../bindings/node/dtd"
}

@ -1,122 +0,0 @@
;; XML declaration
(XMLDecl "xml" @keyword)
(XMLDecl [ "version" "encoding" ] @property)
(XMLDecl (EncName) @string.special)
(XMLDecl (VersionNum) @number)
;; Processing instructions
(PI) @embedded
(PI (PITarget) @keyword)
;; Element declaration
(elementdecl
"ELEMENT" @keyword
(Name) @tag)
(contentspec
(_ (Name) @property))
"#PCDATA" @type.builtin
[ "EMPTY" "ANY" ] @string.special.symbol
[ "*" "?" "+" ] @operator
;; Entity declaration
(GEDecl
"ENTITY" @keyword
(Name) @constant)
(GEDecl (EntityValue) @string)
(NDataDecl
"NDATA" @keyword
(Name) @label)
;; Parsed entity declaration
(PEDecl
"ENTITY" @keyword
"%" @operator
(Name) @constant)
(PEDecl (EntityValue) @string)
;; Notation declaration
(NotationDecl
"NOTATION" @keyword
(Name) @constant)
(NotationDecl
(ExternalID
(SystemLiteral (URI) @string.special)))
;; Attlist declaration
(AttlistDecl
"ATTLIST" @keyword
(Name) @tag)
(AttDef (Name) @property)
(AttDef (Enumeration (Nmtoken) @string))
[
(StringType)
(TokenizedType)
] @type.builtin
(NotationType "NOTATION" @type.builtin)
[
"#REQUIRED"
"#IMPLIED"
"#FIXED"
] @attribute
;; Entities
(EntityRef) @constant
((EntityRef) @constant.builtin
(#any-of? @constant.builtin
"&amp;" "&lt;" "&gt;" "&quot;" "&apos;"))
(CharRef) @constant
(PEReference) @constant
;; External references
[ "PUBLIC" "SYSTEM" ] @keyword
(PubidLiteral) @string.special
(SystemLiteral (URI) @markup.link)
;; Delimiters & punctuation
[ "<?" "?>" "<!" ">" "]]>" ] @punctuation.delimiter
[ "(" ")" "[" ] @punctuation.bracket
[ "\"" "'" ] @punctuation.delimiter
[ "," "|" "=" ] @operator
;; Misc
[ "INCLUDE" "IGNORE" ] @keyword
(Comment) @comment
(ERROR) @error

File diff suppressed because it is too large Load Diff

@ -1,784 +0,0 @@
[
{
"type": "_AttType",
"named": true,
"subtypes": [
{
"type": "PEReference",
"named": true
},
{
"type": "StringType",
"named": true
},
{
"type": "TokenizedType",
"named": true
},
{
"type": "_EnumeratedType",
"named": true
}
]
},
{
"type": "_EntityDecl",
"named": true,
"subtypes": [
{
"type": "GEDecl",
"named": true
},
{
"type": "PEDecl",
"named": true
}
]
},
{
"type": "_EnumeratedType",
"named": true,
"subtypes": [
{
"type": "Enumeration",
"named": true
},
{
"type": "NotationType",
"named": true
}
]
},
{
"type": "_Reference",
"named": true,
"subtypes": [
{
"type": "CharRef",
"named": true
},
{
"type": "EntityRef",
"named": true
}
]
},
{
"type": "_markupdecl",
"named": true,
"subtypes": [
{
"type": "AttlistDecl",
"named": true
},
{
"type": "Comment",
"named": true
},
{
"type": "NotationDecl",
"named": true
},
{
"type": "PI",
"named": true
},
{
"type": "_EntityDecl",
"named": true
},
{
"type": "elementdecl",
"named": true
}
]
},
{
"type": "AttDef",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "DefaultDecl",
"named": true
},
{
"type": "Name",
"named": true
},
{
"type": "_AttType",
"named": true
}
]
}
},
{
"type": "AttValue",
"named": true,
"fields": {
"content": {
"multiple": true,
"required": false,
"types": [
{
"type": "_Reference",
"named": true
}
]
}
}
},
{
"type": "AttlistDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "AttDef",
"named": true
},
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
}
]
}
},
{
"type": "CharRef",
"named": true,
"fields": {}
},
{
"type": "DefaultDecl",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": false,
"types": [
{
"type": "AttValue",
"named": true
}
]
}
},
{
"type": "EntityRef",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "Name",
"named": true
}
]
}
},
{
"type": "EntityValue",
"named": true,
"fields": {
"content": {
"multiple": true,
"required": false,
"types": [
{
"type": "PEReference",
"named": true
},
{
"type": "_Reference",
"named": true
}
]
}
}
},
{
"type": "Enumeration",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "Nmtoken",
"named": true
}
]
}
},
{
"type": "ExternalID",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "PubidLiteral",
"named": true
},
{
"type": "SystemLiteral",
"named": true
}
]
}
},
{
"type": "GEDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "EntityValue",
"named": true
},
{
"type": "ExternalID",
"named": true
},
{
"type": "NDataDecl",
"named": true
},
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
}
]
}
},
{
"type": "Mixed",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
}
]
}
},
{
"type": "NDataDecl",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
}
]
}
},
{
"type": "NotationDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "ExternalID",
"named": true
},
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
},
{
"type": "PublicID",
"named": true
}
]
}
},
{
"type": "NotationType",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
}
]
}
},
{
"type": "PEDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "EntityValue",
"named": true
},
{
"type": "ExternalID",
"named": true
},
{
"type": "Name",
"named": true
}
]
}
},
{
"type": "PEReference",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "Name",
"named": true
}
]
}
},
{
"type": "PI",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "PITarget",
"named": true
}
]
}
},
{
"type": "PubidLiteral",
"named": true,
"fields": {}
},
{
"type": "PublicID",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "PEReference",
"named": true
},
{
"type": "PubidLiteral",
"named": true
}
]
}
},
{
"type": "SystemLiteral",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "URI",
"named": true
}
]
}
},
{
"type": "XMLDecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "EncName",
"named": true
},
{
"type": "VersionNum",
"named": true
}
]
}
},
{
"type": "children",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
}
]
}
},
{
"type": "conditionalSect",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "PEReference",
"named": true
},
{
"type": "_markupdecl",
"named": true
},
{
"type": "conditionalSect",
"named": true
}
]
}
},
{
"type": "contentspec",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": false,
"types": [
{
"type": "Mixed",
"named": true
},
{
"type": "PEReference",
"named": true
},
{
"type": "children",
"named": true
}
]
}
},
{
"type": "document",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "PEReference",
"named": true
},
{
"type": "XMLDecl",
"named": true
},
{
"type": "_markupdecl",
"named": true
},
{
"type": "conditionalSect",
"named": true
}
]
}
},
{
"type": "elementdecl",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "Name",
"named": true
},
{
"type": "PEReference",
"named": true
},
{
"type": "contentspec",
"named": true
}
]
}
},
{
"type": "\"",
"named": false
},
{
"type": "#FIXED",
"named": false
},
{
"type": "#IMPLIED",
"named": false
},
{
"type": "#PCDATA",
"named": false
},
{
"type": "#REQUIRED",
"named": false
},
{
"type": "%",
"named": false
},
{
"type": "&",
"named": false
},
{
"type": "&#",
"named": false
},
{
"type": "&#x",
"named": false
},
{
"type": "'",
"named": false
},
{
"type": "(",
"named": false
},
{
"type": ")",
"named": false
},
{
"type": "*",
"named": false
},
{
"type": "+",
"named": false
},
{
"type": ",",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "<!",
"named": false
},
{
"type": "<![",
"named": false
},
{
"type": "<?",
"named": false
},
{
"type": "=",
"named": false
},
{
"type": ">",
"named": false
},
{
"type": "?",
"named": false
},
{
"type": "?>",
"named": false
},
{
"type": "ANY",
"named": false
},
{
"type": "ATTLIST",
"named": false
},
{
"type": "Comment",
"named": true
},
{
"type": "ELEMENT",
"named": false
},
{
"type": "EMPTY",
"named": false
},
{
"type": "ENTITY",
"named": false
},
{
"type": "EncName",
"named": true
},
{
"type": "IGNORE",
"named": false
},
{
"type": "INCLUDE",
"named": false
},
{
"type": "NDATA",
"named": false
},
{
"type": "NOTATION",
"named": false
},
{
"type": "Name",
"named": true
},
{
"type": "Nmtoken",
"named": true
},
{
"type": "PITarget",
"named": true
},
{
"type": "PUBLIC",
"named": false
},
{
"type": "SYSTEM",
"named": false
},
{
"type": "StringType",
"named": true
},
{
"type": "TokenizedType",
"named": true
},
{
"type": "URI",
"named": true
},
{
"type": "VersionNum",
"named": true
},
{
"type": "[",
"named": false
},
{
"type": "]]>",
"named": false
},
{
"type": "encoding",
"named": false
},
{
"type": "version",
"named": false
},
{
"type": "xml",
"named": false
},
{
"type": "|",
"named": false
}
]

File diff suppressed because it is too large Load Diff

@ -1,20 +0,0 @@
#include "../../common/scanner.h"
/// Check if the lexer is in error recovery mode
static inline bool in_error_recovery(const bool *valid_symbols) {
return valid_symbols[PI_TARGET] && valid_symbols[PI_CONTENT] && valid_symbols[COMMENT];
}
bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (in_error_recovery(valid_symbols)) return false;
if (valid_symbols[PI_TARGET]) return scan_pi_target(lexer, valid_symbols);
if (valid_symbols[PI_CONTENT]) return scan_pi_content(lexer);
if (valid_symbols[COMMENT]) return scan_comment(lexer);
return false;
}
SCANNER_BOILERPLATE(dtd)

@ -1,224 +0,0 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

@ -1,68 +0,0 @@
================================================================================
BookStore.dtd
================================================================================
<!ELEMENT bookstore (book*)>
<!ELEMENT book (title,author,genre?)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT author (first-name, last-name)>
<!ELEMENT genre (#PCDATA)>
<!ELEMENT first-name (#PCDATA)>
<!ELEMENT last-name (#PCDATA)>
<!ATTLIST book price CDATA #REQUIRED>
<!ATTLIST book publicationdate CDATA>
<!ATTLIST book IBSN CDATA>
--------------------------------------------------------------------------------
(document
(elementdecl
(Name)
(contentspec
(children
(Name))))
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name)
(Name))))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name))))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(Mixed)))
(AttlistDecl
(Name)
(AttDef
(Name)
(StringType)
(DefaultDecl)))
(AttlistDecl
(Name)
(AttDef
(Name)
(StringType)))
(AttlistDecl
(Name)
(AttDef
(Name)
(StringType))))

@ -1,214 +0,0 @@
================================================================================
Element Declarations
================================================================================
<!ELEMENT br EMPTY>
<!ELEMENT p (#PCDATA|emph)* >
<!ELEMENT %name.para; %content.para; >
<!ELEMENT container ANY>
<!ELEMENT spec (front, body, back?)>
<!ELEMENT div1 (head, (p | list | note)*, div2*)>
<!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*>
<!ELEMENT p (#PCDATA|a|ul|b|i|em)*>
<!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* >
<!ELEMENT b (#PCDATA)>
--------------------------------------------------------------------------------
(document
(elementdecl
(Name)
(contentspec))
(elementdecl
(Name)
(contentspec
(Mixed
(Name))))
(elementdecl
(PEReference
(Name))
(contentspec
(PEReference
(Name))))
(elementdecl
(Name)
(contentspec))
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name)
(Name))))
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name)
(Name)
(Name)
(Name))))
(elementdecl
(Name)
(contentspec
(Mixed
(PEReference
(Name))
(PEReference
(Name)))))
(elementdecl
(Name)
(contentspec
(Mixed
(Name)
(Name)
(Name)
(Name)
(Name))))
(elementdecl
(Name)
(contentspec
(Mixed
(PEReference
(Name))
(PEReference
(Name))
(PEReference
(Name))
(PEReference
(Name)))))
(elementdecl
(Name)
(contentspec
(Mixed))))
================================================================================
Attribute-List Declarations
================================================================================
<!ATTLIST termdef
id ID #REQUIRED
name CDATA #IMPLIED>
<!ATTLIST list
type (bullets|ordered|glossary) "ordered">
<!ATTLIST form
method CDATA #FIXED "POST">
--------------------------------------------------------------------------------
(document
(AttlistDecl
(Name)
(AttDef
(Name)
(TokenizedType)
(DefaultDecl))
(AttDef
(Name)
(StringType)
(DefaultDecl)))
(AttlistDecl
(Name)
(AttDef
(Name)
(Enumeration
(Nmtoken)
(Nmtoken)
(Nmtoken))
(DefaultDecl
(AttValue))))
(AttlistDecl
(Name)
(AttDef
(Name)
(StringType)
(DefaultDecl
(AttValue)))))
================================================================================
Entity Declarations
================================================================================
<!ENTITY Pub-Status "This is a pre-release of the
specification.">
<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
<!ENTITY open-hatch
PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN"
"http://www.textuality.com/boilerplate/OpenHatch.xml">
<!ENTITY hatch-pic
SYSTEM "../grafix/OpenHatch.gif"
NDATA gif >
--------------------------------------------------------------------------------
(document
(GEDecl
(Name)
(EntityValue))
(GEDecl
(Name)
(ExternalID
(SystemLiteral
(URI))))
(GEDecl
(Name)
(ExternalID
(PubidLiteral)
(SystemLiteral
(URI))))
(GEDecl
(Name)
(ExternalID
(SystemLiteral
(URI)))
(NDataDecl
(Name))))
================================================================================
Conditional Sections
================================================================================
<!ENTITY % draft 'INCLUDE' >
<!ENTITY % final 'IGNORE' >
<![%draft;[
<!ELEMENT book (comments*, title, body, supplements?)>
]]>
<![%final;[
<!ELEMENT book (title, body, supplements?)>
]]>
--------------------------------------------------------------------------------
(document
(PEDecl
(Name)
(EntityValue))
(PEDecl
(Name)
(EntityValue))
(conditionalSect
(PEReference
(Name))
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name)
(Name)
(Name)))))
(conditionalSect
(PEReference
(Name))
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name)
(Name))))))

@ -1,166 +0,0 @@
/**
* @file Tree-sitter grammar definition for XML
* @author ObserverOfTime
* @license MIT
* @see {@link https://www.w3.org/TR/xml/|XML standard}
* @see {@link https://www.w3.org/TR/xml-model/|XML model}
* @see {@link https://www.w3.org/TR/xml-stylesheet/|XML stylesheet}
*/
// @ts-nocheck
const c = require('../common');
const DTD = require('../tree-sitter-dtd/grammar');
const O = optional;
module.exports = grammar(DTD, {
name: 'xml',
externals: ($, previous) => previous.concat([
$.CharData,
$.CData,
'xml-model',
'xml-stylesheet',
]),
inline: $ => [
$._extSubsetDecl,
$.conditionalSect,
],
rules: {
document: $ => prec(2, seq(
O($._S),
O($.prolog),
field('root', $.element),
repeat($._Misc),
)),
prolog: $ => choice(
seq(
$.XMLDecl,
repeat($._Misc),
),
seq(
O($.XMLDecl),
repeat($._Misc),
$.doctypedecl,
repeat($._Misc),
),
repeat1($._Misc)
),
_Misc: $ => choice(
$.PI,
$.StyleSheetPI,
$.XmlModelPI,
$.Comment,
$._S
),
XMLDecl: $ => seq(
'<?',
'xml',
$._VersionInfo,
O($._EncodingDecl),
O($._SDDecl),
O($._S),
'?>'
),
_SDDecl: $ => seq(
$._S,
'standalone',
$._Eq,
c.str(choice('yes', 'no'))
),
doctypedecl: $ => seq(
'<!',
'DOCTYPE',
$._S,
$.Name,
O(seq($._S, $.ExternalID)),
O($._S),
O(seq(
'[',
$._intSubset,
']',
O($._S)
)),
'>'
),
_intSubset: $ => c.rseq1(
O($._S),
$._markupdecl,
$._DeclSep
),
element: $ => choice(
$.EmptyElemTag,
seq($.STag, O($.content), $.ETag)
),
EmptyElemTag: $ => seq(
'<',
$.Name,
c.rseq($._S, $.Attribute),
O($._S),
'/>'
),
Attribute: $ => seq($.Name, $._Eq, $.AttValue),
STag: $ => seq(
'<',
$.Name,
c.rseq($._S, $.Attribute),
O($._S),
'>'
),
ETag: $ => seq('</', $.Name, O($._S), '>'),
content: $ => repeat1(
choice(
$.CharData,
$.element,
$._Reference,
$.CDSect,
$.PI,
$.Comment
)
),
CDSect: $ => prec.left(
seq($.CDStart, $.CData, ']]>')
),
CDStart: _ => seq('<![', 'CDATA', '['),
StyleSheetPI: $ => seq(
'<?',
'xml-stylesheet',
c.rseq($._S, $.PseudoAtt),
O($._S),
'?>'
),
XmlModelPI: $ => seq(
'<?',
'xml-model',
c.rseq($._S, $.PseudoAtt),
O($._S),
'?>'
),
PseudoAtt: $ => seq($.Name, $._Eq, $.PseudoAttValue),
PseudoAttValue: $ => choice(
c.att_value($, '"'),
c.att_value($, "'")
)
}
});

@ -1,3 +0,0 @@
{
"main": "../bindings/node/xml"
}

@ -1,171 +0,0 @@
;; XML declaration
(XMLDecl "xml" @keyword)
(XMLDecl [ "version" "encoding" "standalone" ] @property)
(XMLDecl (EncName) @string.special)
(XMLDecl (VersionNum) @number)
(XMLDecl [ "yes" "no" ] @boolean)
;; Processing instructions
(PI) @embedded
(PI (PITarget) @keyword)
;; Element declaration
(elementdecl
"ELEMENT" @keyword
(Name) @tag)
(contentspec
(_ (Name) @property))
"#PCDATA" @type.builtin
[ "EMPTY" "ANY" ] @string.special.symbol
[ "*" "?" "+" ] @operator
;; Entity declaration
(GEDecl
"ENTITY" @keyword
(Name) @constant)
(GEDecl (EntityValue) @string)
(NDataDecl
"NDATA" @keyword
(Name) @label)
;; Parsed entity declaration
(PEDecl
"ENTITY" @keyword
"%" @operator
(Name) @constant)
(PEDecl (EntityValue) @string)
;; Notation declaration
(NotationDecl
"NOTATION" @keyword
(Name) @constant)
(NotationDecl
(ExternalID
(SystemLiteral (URI) @string.special)))
;; Attlist declaration
(AttlistDecl
"ATTLIST" @keyword
(Name) @tag)
(AttDef (Name) @property)
(AttDef (Enumeration (Nmtoken) @string))
[
(StringType)
(TokenizedType)
] @type.builtin
(NotationType "NOTATION" @type.builtin)
[
"#REQUIRED"
"#IMPLIED"
"#FIXED"
] @attribute
;; Entities
(EntityRef) @constant
((EntityRef) @constant.builtin
(#any-of? @constant.builtin
"&amp;" "&lt;" "&gt;" "&quot;" "&apos;"))
(CharRef) @constant
(PEReference) @constant
;; External references
[ "PUBLIC" "SYSTEM" ] @keyword
(PubidLiteral) @string.special
(SystemLiteral (URI) @markup.link)
;; Processing instructions
(XmlModelPI "xml-model" @keyword)
(StyleSheetPI "xml-stylesheet" @keyword)
(PseudoAtt (Name) @property)
(PseudoAtt (PseudoAttValue) @string)
;; Doctype declaration
(doctypedecl "DOCTYPE" @keyword)
(doctypedecl (Name) @type)
;; Tags
(STag (Name) @tag)
(ETag (Name) @tag)
(EmptyElemTag (Name) @tag)
;; Attributes
(Attribute (Name) @property)
(Attribute (AttValue) @string)
;; Text
(CharData) @markup
(CDSect
(CDStart) @markup.heading
(CData) @markup.raw
"]]>" @markup.heading)
;; Delimiters & punctuation
[
"<?" "?>"
"<!" "]]>"
"<" ">"
"</" "/>"
] @punctuation.delimiter
[
"(" ")"
"[" "]"
] @punctuation.bracket
[ "\"" "'" ] @punctuation.delimiter
[ "," "|" "=" ] @operator
;; Misc
[ "INCLUDE" "IGNORE" ] @keyword
(Comment) @comment
(ERROR) @error

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,81 +0,0 @@
#include "../../common/scanner.h"
/// Check if the lexer is in error recovery mode
static inline bool in_error_recovery(const bool *valid_symbols) {
return valid_symbols[PI_TARGET] && valid_symbols[PI_CONTENT] &&
valid_symbols[COMMENT] && valid_symbols[CHAR_DATA] && valid_symbols[CDATA];
}
/// Scan for a CharData node
static bool scan_char_data(TSLexer *lexer) {
bool advanced_once = false;
while (!lexer->eof(lexer) &&
lexer->lookahead != '<' &&
lexer->lookahead != '&') {
if (lexer->lookahead == ']') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == ']') {
advance(lexer);
if (lexer->lookahead == '>') {
advance(lexer);
if (advanced_once) {
lexer->result_symbol = CHAR_DATA;
return false;
}
}
}
}
advanced_once = true;
advance(lexer);
}
if (advanced_once) {
lexer->mark_end(lexer);
lexer->result_symbol = CHAR_DATA;
return true;
}
return false;
}
/// Scan for a CData node
static bool scan_cdata(TSLexer *lexer) {
bool advanced_once = false;
while (!lexer->eof(lexer)) {
if (lexer->lookahead == ']') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == ']') {
advance(lexer);
if (lexer->lookahead == '>' && advanced_once) {
lexer->result_symbol = CDATA;
return true;
}
}
}
advanced_once = true;
advance(lexer);
}
return false;
}
bool tree_sitter_xml_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (in_error_recovery(valid_symbols)) return false;
if (valid_symbols[PI_TARGET]) return scan_pi_target(lexer, valid_symbols);
if (valid_symbols[PI_CONTENT]) return scan_pi_content(lexer);
if (valid_symbols[COMMENT] && lexer->lookahead == '<') return scan_comment(lexer);
if (valid_symbols[CHAR_DATA]) return scan_char_data(lexer);
if (valid_symbols[CDATA]) return scan_cdata(lexer);
return false;
}
SCANNER_BOILERPLATE(xml)

@ -1,224 +0,0 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

@ -1,104 +0,0 @@
================================================================================
Invalid comment
================================================================================
<error>
<!-- invalid -- -->
</error>
--------------------------------------------------------------------------------
(ERROR
(STag
(Name))
(content
(CharData)
(element
(STag
(ERROR
(Nmtoken))
(Name)
(ERROR
(Nmtoken)
(Nmtoken)))
(content
(CharData))
(ETag
(Name)))
(CharData)))
================================================================================
Invalid processing instructions
================================================================================
<?xml is invalid?>
<error>
<?bar is ?> invalid?>
</error>
--------------------------------------------------------------------------------
(document
(ERROR
(Name)
(Name))
(element
(STag
(Name))
(content
(CharData))
(ERROR
(PITarget)
(Name)
(Name))
(ETag
(Name))))
================================================================================
Invalid character data
================================================================================
<error>
<raw>
This is not valid: ]]>
</raw>
<cdata>
<![CDATA[ ]]> is invalid ]]>
</cdata>
</error>
--------------------------------------------------------------------------------
(document
(element
(STag
(Name))
(content
(CharData)
(element
(STag
(Name))
(ERROR
(Name)
(Name)
(Name)
(Name))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData)
(CDSect
(CDStart)
(CData)))
(ERROR
(Name)
(Name))
(ETag
(Name)))
(CharData))
(ETag
(Name))))

@ -1,180 +0,0 @@
================================================================================
note.xml
================================================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE note [
<!ELEMENT note (to,from,heading,body,footer?)>
<!ELEMENT to (#PCDATA)>
<!ELEMENT from (#PCDATA)>
<!ELEMENT heading (#PCDATA)>
<!ELEMENT body (#PCDATA)>
<!ELEMENT footer (#PCDATA)>
<!ENTITY nbsp "&#xA0;">
<!ENTITY writer "Writer: Donald Duck.">
<!ENTITY copyright "Copyright: W3Schools.">
]>
<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
<footer>&writer;&nbsp;&copyright;</footer>
</note>
--------------------------------------------------------------------------------
(document
(prolog
(XMLDecl
(VersionNum)
(EncName))
(doctypedecl
(Name)
(elementdecl
(Name)
(contentspec
(children
(Name)
(Name)
(Name)
(Name)
(Name))))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(Mixed)))
(elementdecl
(Name)
(contentspec
(Mixed)))
(GEDecl
(Name)
(EntityValue
(CharRef)))
(GEDecl
(Name)
(EntityValue))
(GEDecl
(Name)
(EntityValue))))
(element
(STag
(Name))
(content
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(EntityRef
(Name))
(EntityRef
(Name))
(EntityRef
(Name)))
(ETag
(Name)))
(CharData))
(ETag
(Name))))
================================================================================
address.xml
================================================================================
<?xml version = "1.0" encoding = "UTF-8" standalone = "no" ?>
<!DOCTYPE address SYSTEM "address.dtd">
<address>
<name>Tanmay Patil</name>
<company>TutorialsPoint</company>
<phone>(011) 123-4567</phone>
</address>
--------------------------------------------------------------------------------
(document
(prolog
(XMLDecl
(VersionNum)
(EncName))
(doctypedecl
(Name)
(ExternalID
(SystemLiteral
(URI)))))
(element
(STag
(Name))
(content
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData))
(ETag
(Name))))

@ -1,119 +0,0 @@
================================================================================
XML Model
================================================================================
<?xml version="1.0"?>
<?xml-model href="http://www.docbook.org/xml/5.0/rng/docbook.rng"?>
<?xml-model href="http://www.docbook.org/xml/5.0/xsd/docbook.xsd"?>
<book xmlns="http://docbook.org/ns/docbook">
</book>
--------------------------------------------------------------------------------
(document
(prolog
(XMLDecl
(VersionNum))
(XmlModelPI
(PseudoAtt
(Name)
(PseudoAttValue)))
(XmlModelPI
(PseudoAtt
(Name)
(PseudoAttValue))))
(element
(STag
(Name)
(Attribute
(Name)
(AttValue)))
(content
(CharData))
(ETag
(Name))))
================================================================================
XML Stylesheet
================================================================================
<?xml-stylesheet href="common.css"?>
<?xml-stylesheet href="default.css" title="Default style"?>
<?xml-stylesheet alternate="yes" href="alt.css" title="Alternative style"?>
<?xml-stylesheet href="single-col.css" media="all and (max-width: 30em)"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Example with xml-stylesheet processing instructions</title>
</head>
<body>
...
</body>
</html>
--------------------------------------------------------------------------------
(document
(prolog
(StyleSheetPI
(PseudoAtt
(Name)
(PseudoAttValue)))
(StyleSheetPI
(PseudoAtt
(Name)
(PseudoAttValue))
(PseudoAtt
(Name)
(PseudoAttValue)))
(StyleSheetPI
(PseudoAtt
(Name)
(PseudoAttValue))
(PseudoAtt
(Name)
(PseudoAttValue))
(PseudoAtt
(Name)
(PseudoAttValue)))
(StyleSheetPI
(PseudoAtt
(Name)
(PseudoAttValue))
(PseudoAtt
(Name)
(PseudoAttValue))))
(element
(STag
(Name)
(Attribute
(Name)
(AttValue)))
(content
(CharData)
(element
(STag
(Name))
(content
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData))
(ETag
(Name)))
(CharData)
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name)))
(CharData))
(ETag
(Name))))

@ -1,117 +0,0 @@
================================================================================
Prolog and Document Type
================================================================================
<?xml version="1.1" encoding="UTF-8" ?>
<!DOCTYPE greeting [
<!ELEMENT greeting (#PCDATA)>
]>
<greeting>Hello, world!</greeting>
--------------------------------------------------------------------------------
(document
(prolog
(XMLDecl
(VersionNum)
(EncName))
(doctypedecl
(Name)
(elementdecl
(Name)
(contentspec
(Mixed)))))
(element
(STag
(Name))
(content
(CharData))
(ETag
(Name))))
================================================================================
Comment
================================================================================
<!-- declarations for <head> & <body> -->
<greeting/>
--------------------------------------------------------------------------------
(document
(prolog
(Comment))
(element
(EmptyElemTag
(Name))))
================================================================================
CDATA Section
================================================================================
<greeting>
<![CDATA[<greeting>Hello, world!</greeting>]]>
</greeting>
--------------------------------------------------------------------------------
(document
(element
(STag
(Name))
(content
(CharData)
(CDSect
(CDStart)
(CData))
(CharData))
(ETag
(Name))))
================================================================================
Element Tags
================================================================================
<termdef id="dt-dog" term="dog">
<IMG align="left"
src="http://www.w3.org/Icons/WWW/w3c_home" />
<br></br>
<br/>
</termdef>
--------------------------------------------------------------------------------
(document
(element
(STag
(Name)
(Attribute
(Name)
(AttValue))
(Attribute
(Name)
(AttValue)))
(content
(CharData)
(element
(EmptyElemTag
(Name)
(Attribute
(Name)
(AttValue))
(Attribute
(Name)
(AttValue))))
(CharData)
(element
(STag
(Name))
(ETag
(Name)))
(CharData)
(element
(EmptyElemTag
(Name)))
(CharData))
(ETag
(Name))))