Use tree-sitter-html from crates.io

pull/795/head
Wilfred Hughes 2024-12-20 08:55:33 +07:00
parent 507b60bc45
commit 4873e87dac
35 changed files with 18 additions and 4652 deletions

@ -11,7 +11,7 @@ with YAML.
Improved language detection when one argument is a named pipe.
Updated to the latest tree-sitter parser for Bash, C, C++, C#, Go,
Haskell, Java, JavaScript, JSON, Julia, Objective-C, OCaml, PHP,
Haskell, HTML, Java, JavaScript, JSON, Julia, Objective-C, OCaml, PHP,
Python, Ruby, Scala and TypeScript.
### Syntax Highlighting

11
Cargo.lock generated

@ -255,6 +255,7 @@ dependencies = [
"tree-sitter-cpp",
"tree-sitter-go",
"tree-sitter-haskell",
"tree-sitter-html",
"tree-sitter-java",
"tree-sitter-javascript",
"tree-sitter-json",
@ -1078,6 +1079,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-html"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-java"
version = "0.23.4"

@ -95,6 +95,7 @@ tree-sitter-go = "0.23.4"
tree-sitter-bash = "0.23.3"
tree-sitter-php = "0.23.11"
tree-sitter-json = "0.24.8"
tree-sitter-html = "0.23.2"
[dev-dependencies]
# assert_cmd 2.0.10 requires predicates 3.

@ -41,8 +41,6 @@ impl TreeSitterParser {
.flag_if_supported("-Wno-implicit-fallthrough")
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-ignored-qualifiers")
// Ignore warning from tree-sitter-html.
.flag_if_supported("-Wno-sign-compare")
.link_lib_modifier("+whole-archive");
for file in cpp_files {
@ -154,11 +152,6 @@ fn main() {
src_dir: "vendored_parsers/tree-sitter-hcl-src",
extra_files: vec!["scanner.cc"],
},
TreeSitterParser {
name: "tree-sitter-html",
src_dir: "vendored_parsers/tree-sitter-html-src",
extra_files: vec!["scanner.cc"],
},
TreeSitterParser {
name: "tree-sitter-janet-simple",
src_dir: "vendored_parsers/tree-sitter-janet-simple-src",

@ -79,7 +79,6 @@ extern "C" {
fn tree_sitter_hare() -> ts::Language;
fn tree_sitter_hack() -> ts::Language;
fn tree_sitter_hcl() -> ts::Language;
fn tree_sitter_html() -> ts::Language;
fn tree_sitter_janet_simple() -> ts::Language;
fn tree_sitter_kotlin() -> ts::Language;
fn tree_sitter_latex() -> ts::Language;
@ -507,7 +506,9 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
}
}
Html => {
let language = unsafe { tree_sitter_html() };
let language_fn = tree_sitter_html::LANGUAGE;
let language = tree_sitter::Language::new(language_fn);
TreeSitterConfig {
language: language.clone(),
atom_nodes: vec![
@ -522,11 +523,8 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
delimiter_tokens: vec![("<", ">"), ("<!", ">"), ("<!--", "-->")]
.into_iter()
.collect(),
highlight_query: ts::Query::new(
&language,
include_str!("../../vendored_parsers/highlights/html.scm"),
)
.unwrap(),
highlight_query: ts::Query::new(&language, tree_sitter_html::HIGHLIGHTS_QUERY)
.unwrap(),
sub_languages: vec![
TreeSitterSubLanguage {
query: ts::Query::new(&language, "(style_element (raw_text) @contents)")

@ -1 +0,0 @@
tree-sitter-html/src

@ -1,22 +0,0 @@
image: Visual Studio 2015
environment:
nodejs_version: "10"
platform:
- x64
install:
- ps: Install-Product node $env:nodejs_version
- node --version
- npm --version
- npm install
test_script:
- npm run test-windows
build: off
branches:
only:
- master

@ -1 +0,0 @@
/src/parser.c linguist-vendored

@ -1,12 +0,0 @@
node_modules
build
*.log
package-lock.json
target
Cargo.lock
*.a
*.dylib
*.so
*.o
bindings/c/*.h
bindings/c/*.pc

@ -1,6 +0,0 @@
corpus
examples
build
script
target
Cargo.lock

@ -1,10 +0,0 @@
language: node_js
node_js: 10
env:
- CXX=clang++
branches:
only:
- master

@ -1,25 +0,0 @@
[package]
name = "tree-sitter-html"
description = "html grammar for the tree-sitter parsing library"
version = "0.19.0"
keywords = ["incremental", "parsing", "html"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-html"
edition = "2018"
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "0.19"
[build-dependencies]
cc = "1.0"

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2014 Max Brunsfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -1,99 +0,0 @@
VERSION := 0.19.0
# Repository
SRC_DIR := src
PARSER_REPO_URL ?= $(shell git -C $(SRC_DIR) remote get-url origin )
# the # in the sed pattern has to be escaped or it will be interpreted as a comment
PARSER_NAME ?= $(shell basename $(PARSER_REPO_URL) | cut -d '-' -f3 | sed 's\#.git\#\#')
UPPER_PARSER_NAME := $(shell echo $(PARSER_NAME) | tr a-z A-Z )
# install directory layout
PREFIX ?= /usr/local
INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# collect C++ sources, and link if necessary
CPPSRC := $(wildcard $(SRC_DIR)/*.cc)
ifeq (, $(CPPSRC))
ADDITIONALLIBS :=
else
ADDITIONALLIBS := -lc++
endif
# collect sources
SRC := $(wildcard $(SRC_DIR)/*.c)
SRC += $(CPPSRC)
OBJ := $(addsuffix .o,$(basename $(SRC)))
# ABI versioning
SONAME_MAJOR := 0
SONAME_MINOR := 0
CFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
CXXFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
override CFLAGS += -std=gnu99 -fPIC
override CXXFLAGS += -fPIC
# OS-specific bits
ifeq ($(shell uname),Darwin)
SOEXT = dylib
SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
ifneq ($(ADDITIONALLIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS),
endif
LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/libtree-sitter-$(PARSER_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
else
SOEXT = so
SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
LINKSHARED := $(LINKSHARED)-shared -Wl,
ifneq ($(ADDITIONALLIBS),)
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS)
endif
LINKSHARED := $(LINKSHARED)-soname,libtree-sitter-$(PARSER_NAME).so.$(SONAME_MAJOR)
endif
ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
endif
all: libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h
libtree-sitter-$(PARSER_NAME).a: $(OBJ)
$(AR) rcs $@ $^
libtree-sitter-$(PARSER_NAME).$(SOEXTVER): $(OBJ)
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXT)
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
bindings/c/$(PARSER_NAME).h:
sed -e 's|@UPPER_PARSERNAME@|$(UPPER_PARSER_NAME)|' \
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
bindings/c/tree-sitter.h.in > $@
install: all
install -d '$(DESTDIR)$(LIBDIR)'
install -m755 libtree-sitter-$(PARSER_NAME).a '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).a
install -m755 libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER)
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXT)
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
install -m644 bindings/c/$(PARSER_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
install -d '$(DESTDIR)$(PCLIBDIR)'
sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' \
-e 's|@ADDITIONALLIBS@|$(ADDITIONALLIBS)|' \
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
-e 's|@PARSERREPOURL@|$(PARSER_REPO_URL)|' \
bindings/c/tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter-$(PARSER_NAME).pc
clean:
rm -f $(OBJ) libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXT) libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR) libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h
.PHONY: all install clean

@ -1,13 +0,0 @@
tree-sitter-html
================
[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter-html.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter-html)
[![Build status](https://ci.appveyor.com/api/projects/status/bv1i8f3yi2aoyonx/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter-html/branch/master)
HTML grammar for [tree-sitter][].
[tree-sitter]: https://github.com/tree-sitter/tree-sitter
References
* [The HTML5 Spec](https://www.w3.org/TR/html5/syntax.html)

@ -1,19 +0,0 @@
{
"targets": [
{
"target_name": "tree_sitter_html_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"src/parser.c",
"src/scanner.cc",
"bindings/node/binding.cc"
],
"cflags_c": [
"-std=c99",
]
}
]
}

@ -1,16 +0,0 @@
#ifndef TREE_SITTER_@UPPER_PARSERNAME@_H_
#define TREE_SITTER_@UPPER_PARSERNAME@_H_
#include <tree_sitter/parser.h>
#ifdef __cplusplus
extern "C" {
#endif
extern TSLanguage *tree_sitter_@PARSERNAME@();
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_@UPPER_PARSERNAME@_H_

@ -1,11 +0,0 @@
prefix=@PREFIX@
libdir=@LIBDIR@
includedir=@INCLUDEDIR@
additionallibs=@ADDITIONALLIBS@
Name: tree-sitter-@PARSERNAME@
Description: A tree-sitter grammar for the @PARSERNAME@ programming language.
URL: @PARSERREPOURL@
Version: @VERSION@
Libs: -L${libdir} ${additionallibs} -ltree-sitter-@PARSERNAME@
Cflags: -I${includedir}

@ -1,28 +0,0 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_html();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_html());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("html").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_html_binding, Init)
} // namespace

@ -1,19 +0,0 @@
try {
module.exports = require("../../build/Release/tree_sitter_html_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_html_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

@ -1,25 +0,0 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
c_config.compile("parser");
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
cpp_config.compile("scanner");
}

@ -1,52 +0,0 @@
//! This crate provides html language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_html::language()).expect("Error loading html grammar");
//! let tree = parser.parse(code, None).unwrap();
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_html() -> Language;
}
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_html() }
}
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
// Uncomment these to include any queries that this grammar contains
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.expect("Error loading html language");
}
}

@ -1,312 +0,0 @@
===================================
Tags
===================================
<span>Hello</span>
---
(fragment
(element
(start_tag (tag_name))
(text)
(end_tag (tag_name))))
===================================
Tags with attributes
===================================
<input value=yes class="a" data-💩></input>
---
(fragment
(element
(start_tag
(tag_name)
(attribute
(attribute_name)
(attribute_value))
(attribute
(attribute_name)
(quoted_attribute_value (attribute_value)))
(attribute
(attribute_name)))
(end_tag (tag_name))))
===================================
Nested tags
===================================
<div>
<span>a</span>
b
<b>c</b>
Multi-line
text
</div>
---
(fragment
(element
(start_tag (tag_name))
(element
(start_tag (tag_name))
(text)
(end_tag (tag_name)))
(text)
(element
(start_tag (tag_name))
(text)
(end_tag (tag_name)))
(text)
(end_tag (tag_name))))
==================================
Void tags
==================================
<form><img src="something.png"><br><input type=submit value=Ok /></form>
---
(fragment
(element
(start_tag (tag_name))
(element
(start_tag
(tag_name)
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
(element (start_tag (tag_name)))
(element
(self_closing_tag
(tag_name)
(attribute (attribute_name) (attribute_value))
(attribute (attribute_name) (attribute_value))))
(end_tag (tag_name))))
==================================
Void tags at EOF
==================================
<img src="something.png">
---
(fragment
(element
(start_tag
(tag_name)
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
==================================
Custom tags
==================================
<something:different>
<atom-text-editor mini>
Hello
</atom-text-editor>
</something:different>
---
(fragment
(element
(start_tag (tag_name))
(element
(start_tag (tag_name) (attribute (attribute_name)))
(text)
(end_tag (tag_name)))
(end_tag (tag_name))))
==================================
Comments
==================================
<!-- hello -->
<!-- world ->-> -- > ->->->-- -> still comment -->
<div>
<!-- <span>something</span> -->
</div>
---
(fragment
(comment)
(comment)
(element
(start_tag (tag_name))
(comment)
(end_tag (tag_name))))
==================================
Raw text elements
==================================
<script>
</s
</sc
</scr
</scri
</scrip
</script>
<style>
</ </s </st </sty </styl
</style>
<script>
</SCRIPT>
---
(fragment
(script_element
(start_tag (tag_name))
(raw_text)
(end_tag (tag_name)))
(style_element
(start_tag (tag_name))
(raw_text)
(end_tag (tag_name)))
(script_element
(start_tag (tag_name))
(raw_text)
(end_tag (tag_name))))
==================================
All-caps doctype
==================================
<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
---
(fragment
(doctype))
==================================
Lowercase doctype
==================================
<!doctype html>
---
(fragment
(doctype))
==================================
LI elements without close tags
==================================
<ul>
<li>One
<li>Two
</ul>
---
(fragment
(element
(start_tag (tag_name))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(end_tag (tag_name))))
======================================
DT and DL elements without close tags
======================================
<dl>
<dt>Coffee
<dt>Café
<dd>Black hot drink
<dt>Milk
<dd>White cold drink
</dl>
---
(fragment
(element
(start_tag (tag_name))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(end_tag (tag_name))))
======================================
P elements without close tags
======================================
<p>One
<div>Two</div>
<p>Three
<p>Four
<h1>Five</h1>
---
(fragment
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text) (end_tag (tag_name))))
======================================
Ruby annotation elements without close tags
======================================
<ruby>東<rb>京<rt>とう<rt>きょう</ruby>
---
(fragment
(element
(start_tag (tag_name))
(text)
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text))
(end_tag (tag_name))))
=======================================
COLGROUP elements without end tags
=======================================
<table>
<colgroup>
<col style="background-color: #0f0">
<col span="2">
<tr>
<th>Lime</th>
<th>Lemon</th>
<th>Orange</th>
</tr>
</table>
---
(fragment
(element
(start_tag (tag_name))
(element
(start_tag (tag_name))
(element (start_tag
(tag_name)
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
(element (start_tag
(tag_name)
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
(element
(start_tag (tag_name))
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
(end_tag (tag_name)))
(end_tag (tag_name))))
=========================================
TR, TD, and TH elements without end tags
=========================================
<table>
<tr>
<th>One
<th>Two
<tr>
<td>Three
<td>Four
</table>
---
(fragment
(element
(start_tag (tag_name))
(element
(start_tag (tag_name))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text)))
(element
(start_tag (tag_name))
(element (start_tag (tag_name)) (text))
(element (start_tag (tag_name)) (text)))
(end_tag (tag_name))))

@ -1,62 +0,0 @@
<div>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
</div>

@ -1,62 +0,0 @@
<div>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
</div>

@ -1,125 +0,0 @@
module.exports = grammar({
name: 'html',
extras: $ => [
$.comment,
/\s+/,
],
externals: $ => [
$._start_tag_name,
$._script_start_tag_name,
$._style_start_tag_name,
$._end_tag_name,
$.erroneous_end_tag_name,
'/>',
$._implicit_end_tag,
$.raw_text,
$.comment,
],
rules: {
fragment: $ => repeat($._node),
doctype: $ => seq(
'<!',
alias($._doctype, 'doctype'),
/[^>]+/,
'>'
),
_doctype: $ => /[Dd][Oo][Cc][Tt][Yy][Pp][Ee]/,
_node: $ => choice(
$.doctype,
$.text,
$.element,
$.script_element,
$.style_element,
$.erroneous_end_tag
),
element: $ => choice(
seq(
$.start_tag,
repeat($._node),
choice($.end_tag, $._implicit_end_tag)
),
$.self_closing_tag
),
script_element: $ => seq(
alias($.script_start_tag, $.start_tag),
optional($.raw_text),
$.end_tag
),
style_element: $ => seq(
alias($.style_start_tag, $.start_tag),
optional($.raw_text),
$.end_tag
),
start_tag: $ => seq(
'<',
alias($._start_tag_name, $.tag_name),
repeat($.attribute),
'>'
),
script_start_tag: $ => seq(
'<',
alias($._script_start_tag_name, $.tag_name),
repeat($.attribute),
'>'
),
style_start_tag: $ => seq(
'<',
alias($._style_start_tag_name, $.tag_name),
repeat($.attribute),
'>'
),
self_closing_tag: $ => seq(
'<',
alias($._start_tag_name, $.tag_name),
repeat($.attribute),
'/>'
),
end_tag: $ => seq(
'</',
alias($._end_tag_name, $.tag_name),
'>'
),
erroneous_end_tag: $ => seq(
'</',
$.erroneous_end_tag_name,
'>'
),
attribute: $ => seq(
$.attribute_name,
optional(seq(
'=',
choice(
$.attribute_value,
$.quoted_attribute_value
)
))
),
attribute_name: $ => /[^<>"'/=\s]+/,
attribute_value: $ => /[^<>"'=\s]+/,
quoted_attribute_value: $ => choice(
seq("'", optional(alias(/[^']+/, $.attribute_value)), "'"),
seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"')
),
text: $ => /[^<>\s]([^<>]*[^<>\s])?/
}
});

@ -1,38 +0,0 @@
{
"name": "tree-sitter-html",
"version": "0.19.0",
"description": "HTML grammar for tree-sitter",
"main": "bindings/node",
"keywords": [
"parser",
"lexer"
],
"repository": {
"type": "git",
"url": "https://github.com/tree-sitter/tree-sitter-html.git"
},
"authors": [
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
"Ashi Krishnan <queerviolet@github.com>"
],
"license": "MIT",
"dependencies": {
"nan": "^2.14.0"
},
"devDependencies": {
"tree-sitter-cli": "^0.19.1"
},
"scripts": {
"test": "tree-sitter test && tree-sitter parse examples/*.html --quiet --time",
"test-windows": "tree-sitter test"
},
"tree-sitter": [
{
"scope": "text.html.basic",
"file-types": [
"html"
],
"injection-regex": "html"
}
]
}

@ -1,12 +0,0 @@
(tag_name) @tag
(erroneous_end_tag_name) @tag.error
(doctype) @constant
(attribute_name) @attribute
(attribute_value) @string
(comment) @comment
[
"<"
">"
"</"
] @punctuation.bracket

@ -1,7 +0,0 @@
((script_element
(raw_text) @injection.content)
(#set! injection.language "javascript"))
((style_element
(raw_text) @injection.content)
(#set! injection.language "css"))

@ -1,493 +0,0 @@
{
"name": "html",
"rules": {
"fragment": {
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_node"
}
},
"doctype": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "<!"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_doctype"
},
"named": false,
"value": "doctype"
},
{
"type": "PATTERN",
"value": "[^>]+"
},
{
"type": "STRING",
"value": ">"
}
]
},
"_doctype": {
"type": "PATTERN",
"value": "[Dd][Oo][Cc][Tt][Yy][Pp][Ee]"
},
"_node": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "doctype"
},
{
"type": "SYMBOL",
"name": "text"
},
{
"type": "SYMBOL",
"name": "element"
},
{
"type": "SYMBOL",
"name": "script_element"
},
{
"type": "SYMBOL",
"name": "style_element"
},
{
"type": "SYMBOL",
"name": "erroneous_end_tag"
}
]
},
"element": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "start_tag"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_node"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "end_tag"
},
{
"type": "SYMBOL",
"name": "_implicit_end_tag"
}
]
}
]
},
{
"type": "SYMBOL",
"name": "self_closing_tag"
}
]
},
"script_element": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "script_start_tag"
},
"named": true,
"value": "start_tag"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "raw_text"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "end_tag"
}
]
},
"style_element": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "style_start_tag"
},
"named": true,
"value": "start_tag"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "raw_text"
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "end_tag"
}
]
},
"start_tag": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "<"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_start_tag_name"
},
"named": true,
"value": "tag_name"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "attribute"
}
},
{
"type": "STRING",
"value": ">"
}
]
},
"script_start_tag": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "<"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_script_start_tag_name"
},
"named": true,
"value": "tag_name"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "attribute"
}
},
{
"type": "STRING",
"value": ">"
}
]
},
"style_start_tag": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "<"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_style_start_tag_name"
},
"named": true,
"value": "tag_name"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "attribute"
}
},
{
"type": "STRING",
"value": ">"
}
]
},
"self_closing_tag": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "<"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_start_tag_name"
},
"named": true,
"value": "tag_name"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "attribute"
}
},
{
"type": "STRING",
"value": "/>"
}
]
},
"end_tag": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "</"
},
{
"type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_end_tag_name"
},
"named": true,
"value": "tag_name"
},
{
"type": "STRING",
"value": ">"
}
]
},
"erroneous_end_tag": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "</"
},
{
"type": "SYMBOL",
"name": "erroneous_end_tag_name"
},
{
"type": "STRING",
"value": ">"
}
]
},
"attribute": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "attribute_name"
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "="
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "attribute_value"
},
{
"type": "SYMBOL",
"name": "quoted_attribute_value"
}
]
}
]
},
{
"type": "BLANK"
}
]
}
]
},
"attribute_name": {
"type": "PATTERN",
"value": "[^<>\"'/=\\s]+"
},
"attribute_value": {
"type": "PATTERN",
"value": "[^<>\"'=\\s]+"
},
"quoted_attribute_value": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "'"
},
{
"type": "CHOICE",
"members": [
{
"type": "ALIAS",
"content": {
"type": "PATTERN",
"value": "[^']+"
},
"named": true,
"value": "attribute_value"
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "'"
}
]
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\""
},
{
"type": "CHOICE",
"members": [
{
"type": "ALIAS",
"content": {
"type": "PATTERN",
"value": "[^\"]+"
},
"named": true,
"value": "attribute_value"
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": "\""
}
]
}
]
},
"text": {
"type": "PATTERN",
"value": "[^<>\\s]([^<>]*[^<>\\s])?"
}
},
"extras": [
{
"type": "SYMBOL",
"name": "comment"
},
{
"type": "PATTERN",
"value": "\\s+"
}
],
"conflicts": [],
"precedences": [],
"externals": [
{
"type": "SYMBOL",
"name": "_start_tag_name"
},
{
"type": "SYMBOL",
"name": "_script_start_tag_name"
},
{
"type": "SYMBOL",
"name": "_style_start_tag_name"
},
{
"type": "SYMBOL",
"name": "_end_tag_name"
},
{
"type": "SYMBOL",
"name": "erroneous_end_tag_name"
},
{
"type": "STRING",
"value": "/>"
},
{
"type": "SYMBOL",
"name": "_implicit_end_tag"
},
{
"type": "SYMBOL",
"name": "raw_text"
},
{
"type": "SYMBOL",
"name": "comment"
}
],
"inline": [],
"supertypes": []
}

@ -1,305 +0,0 @@
[
{
"type": "attribute",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "attribute_name",
"named": true
},
{
"type": "attribute_value",
"named": true
},
{
"type": "quoted_attribute_value",
"named": true
}
]
}
},
{
"type": "doctype",
"named": true,
"fields": {}
},
{
"type": "element",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "doctype",
"named": true
},
{
"type": "element",
"named": true
},
{
"type": "end_tag",
"named": true
},
{
"type": "erroneous_end_tag",
"named": true
},
{
"type": "script_element",
"named": true
},
{
"type": "self_closing_tag",
"named": true
},
{
"type": "start_tag",
"named": true
},
{
"type": "style_element",
"named": true
},
{
"type": "text",
"named": true
}
]
}
},
{
"type": "end_tag",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "tag_name",
"named": true
}
]
}
},
{
"type": "erroneous_end_tag",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "erroneous_end_tag_name",
"named": true
}
]
}
},
{
"type": "fragment",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "doctype",
"named": true
},
{
"type": "element",
"named": true
},
{
"type": "erroneous_end_tag",
"named": true
},
{
"type": "script_element",
"named": true
},
{
"type": "style_element",
"named": true
},
{
"type": "text",
"named": true
}
]
}
},
{
"type": "quoted_attribute_value",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": false,
"types": [
{
"type": "attribute_value",
"named": true
}
]
}
},
{
"type": "script_element",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "end_tag",
"named": true
},
{
"type": "raw_text",
"named": true
},
{
"type": "start_tag",
"named": true
}
]
}
},
{
"type": "self_closing_tag",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "attribute",
"named": true
},
{
"type": "tag_name",
"named": true
}
]
}
},
{
"type": "start_tag",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "attribute",
"named": true
},
{
"type": "tag_name",
"named": true
}
]
}
},
{
"type": "style_element",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "end_tag",
"named": true
},
{
"type": "raw_text",
"named": true
},
{
"type": "start_tag",
"named": true
}
]
}
},
{
"type": "\"",
"named": false
},
{
"type": "'",
"named": false
},
{
"type": "/>",
"named": false
},
{
"type": "<",
"named": false
},
{
"type": "<!",
"named": false
},
{
"type": "</",
"named": false
},
{
"type": "=",
"named": false
},
{
"type": ">",
"named": false
},
{
"type": "attribute_name",
"named": true
},
{
"type": "attribute_value",
"named": true
},
{
"type": "comment",
"named": true
},
{
"type": "doctype",
"named": false
},
{
"type": "erroneous_end_tag_name",
"named": true
},
{
"type": "raw_text",
"named": true
},
{
"type": "tag_name",
"named": true
},
{
"type": "text",
"named": true
}
]

File diff suppressed because it is too large Load Diff

@ -1,310 +0,0 @@
#include <tree_sitter/parser.h>
#include <algorithm>
#include <vector>
#include <string>
#include <cwctype>
#include <cstring>
#include "tag.h"
namespace {
using std::vector;
using std::string;
enum TokenType {
START_TAG_NAME,
SCRIPT_START_TAG_NAME,
STYLE_START_TAG_NAME,
END_TAG_NAME,
ERRONEOUS_END_TAG_NAME,
SELF_CLOSING_TAG_DELIMITER,
IMPLICIT_END_TAG,
RAW_TEXT,
COMMENT
};
struct Scanner {
Scanner() {}
unsigned serialize(char *buffer) {
uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size();
uint16_t serialized_tag_count = 0;
unsigned i = sizeof(tag_count);
std::memcpy(&buffer[i], &tag_count, sizeof(tag_count));
i += sizeof(tag_count);
for (; serialized_tag_count < tag_count; serialized_tag_count++) {
Tag &tag = tags[serialized_tag_count];
if (tag.type == CUSTOM) {
unsigned name_length = tag.custom_tag_name.size();
if (name_length > UINT8_MAX) name_length = UINT8_MAX;
if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
buffer[i++] = static_cast<char>(tag.type);
buffer[i++] = name_length;
tag.custom_tag_name.copy(&buffer[i], name_length);
i += name_length;
} else {
if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
buffer[i++] = static_cast<char>(tag.type);
}
}
std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
return i;
}
void deserialize(const char *buffer, unsigned length) {
tags.clear();
if (length > 0) {
unsigned i = 0;
uint16_t tag_count, serialized_tag_count;
std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count));
i += sizeof(serialized_tag_count);
std::memcpy(&tag_count, &buffer[i], sizeof(tag_count));
i += sizeof(tag_count);
tags.resize(tag_count);
for (unsigned j = 0; j < serialized_tag_count; j++) {
Tag &tag = tags[j];
tag.type = static_cast<TagType>(buffer[i++]);
if (tag.type == CUSTOM) {
uint16_t name_length = static_cast<uint8_t>(buffer[i++]);
tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]);
i += name_length;
}
}
}
}
string scan_tag_name(TSLexer *lexer) {
string tag_name;
while (iswalnum(lexer->lookahead) ||
lexer->lookahead == '-' ||
lexer->lookahead == ':') {
tag_name += towupper(lexer->lookahead);
lexer->advance(lexer, false);
}
return tag_name;
}
bool scan_comment(TSLexer *lexer) {
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '-') return false;
lexer->advance(lexer, false);
unsigned dashes = 0;
while (lexer->lookahead) {
switch (lexer->lookahead) {
case '-':
++dashes;
break;
case '>':
if (dashes >= 2) {
lexer->result_symbol = COMMENT;
lexer->advance(lexer, false);
lexer->mark_end(lexer);
return true;
}
default:
dashes = 0;
}
lexer->advance(lexer, false);
}
return false;
}
bool scan_raw_text(TSLexer *lexer) {
if (!tags.size()) return false;
lexer->mark_end(lexer);
const string &end_delimiter = tags.back().type == SCRIPT
? "</SCRIPT"
: "</STYLE";
unsigned delimiter_index = 0;
while (lexer->lookahead) {
if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) {
delimiter_index++;
if (delimiter_index == end_delimiter.size()) break;
lexer->advance(lexer, false);
} else {
delimiter_index = 0;
lexer->advance(lexer, false);
lexer->mark_end(lexer);
}
}
lexer->result_symbol = RAW_TEXT;
return true;
}
bool scan_implicit_end_tag(TSLexer *lexer) {
Tag *parent = tags.empty() ? NULL : &tags.back();
bool is_closing_tag = false;
if (lexer->lookahead == '/') {
is_closing_tag = true;
lexer->advance(lexer, false);
} else {
if (parent && parent->is_void()) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
}
string tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
Tag next_tag = Tag::for_name(tag_name);
if (is_closing_tag) {
// The tag correctly closes the topmost element on the stack
if (!tags.empty() && tags.back() == next_tag) return false;
// Otherwise, dig deeper and queue implicit end tags (to be nice in
// the case of malformed HTML)
if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
} else if (parent && !parent->can_contain(next_tag)) {
tags.pop_back();
lexer->result_symbol = IMPLICIT_END_TAG;
return true;
}
return false;
}
bool scan_start_tag_name(TSLexer *lexer) {
string tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
Tag tag = Tag::for_name(tag_name);
tags.push_back(tag);
switch (tag.type) {
case SCRIPT:
lexer->result_symbol = SCRIPT_START_TAG_NAME;
break;
case STYLE:
lexer->result_symbol = STYLE_START_TAG_NAME;
break;
default:
lexer->result_symbol = START_TAG_NAME;
break;
}
return true;
}
bool scan_end_tag_name(TSLexer *lexer) {
string tag_name = scan_tag_name(lexer);
if (tag_name.empty()) return false;
Tag tag = Tag::for_name(tag_name);
if (!tags.empty() && tags.back() == tag) {
tags.pop_back();
lexer->result_symbol = END_TAG_NAME;
} else {
lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
}
return true;
}
bool scan_self_closing_tag_delimiter(TSLexer *lexer) {
lexer->advance(lexer, false);
if (lexer->lookahead == '>') {
lexer->advance(lexer, false);
if (!tags.empty()) {
tags.pop_back();
lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
}
return true;
}
return false;
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
while (iswspace(lexer->lookahead)) {
lexer->advance(lexer, true);
}
if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {
return scan_raw_text(lexer);
}
switch (lexer->lookahead) {
case '<':
lexer->mark_end(lexer);
lexer->advance(lexer, false);
if (lexer->lookahead == '!') {
lexer->advance(lexer, false);
return scan_comment(lexer);
}
if (valid_symbols[IMPLICIT_END_TAG]) {
return scan_implicit_end_tag(lexer);
}
break;
case '\0':
if (valid_symbols[IMPLICIT_END_TAG]) {
return scan_implicit_end_tag(lexer);
}
break;
case '/':
if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
return scan_self_closing_tag_delimiter(lexer);
}
break;
default:
if ((valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) && !valid_symbols[RAW_TEXT]) {
return valid_symbols[START_TAG_NAME]
? scan_start_tag_name(lexer)
: scan_end_tag_name(lexer);
}
}
return false;
}
vector<Tag> tags;
};
}
extern "C" {
void *tree_sitter_html_external_scanner_create() {
return new Scanner();
}
bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
unsigned tree_sitter_html_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(buffer);
}
void tree_sitter_html_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(buffer, length);
}
void tree_sitter_html_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
}

@ -1,380 +0,0 @@
#include <string>
#include <map>
using std::string;
using std::map;
enum TagType {
AREA,
BASE,
BASEFONT,
BGSOUND,
BR,
COL,
COMMAND,
EMBED,
FRAME,
HR,
IMAGE,
IMG,
INPUT,
ISINDEX,
KEYGEN,
LINK,
MENUITEM,
META,
NEXTID,
PARAM,
SOURCE,
TRACK,
WBR,
END_OF_VOID_TAGS,
A,
ABBR,
ADDRESS,
ARTICLE,
ASIDE,
AUDIO,
B,
BDI,
BDO,
BLOCKQUOTE,
BODY,
BUTTON,
CANVAS,
CAPTION,
CITE,
CODE,
COLGROUP,
DATA,
DATALIST,
DD,
DEL,
DETAILS,
DFN,
DIALOG,
DIV,
DL,
DT,
EM,
FIELDSET,
FIGCAPTION,
FIGURE,
FOOTER,
FORM,
H1,
H2,
H3,
H4,
H5,
H6,
HEAD,
HEADER,
HGROUP,
HTML,
I,
IFRAME,
INS,
KBD,
LABEL,
LEGEND,
LI,
MAIN,
MAP,
MARK,
MATH,
MENU,
METER,
NAV,
NOSCRIPT,
OBJECT,
OL,
OPTGROUP,
OPTION,
OUTPUT,
P,
PICTURE,
PRE,
PROGRESS,
Q,
RB,
RP,
RT,
RTC,
RUBY,
S,
SAMP,
SCRIPT,
SECTION,
SELECT,
SLOT,
SMALL,
SPAN,
STRONG,
STYLE,
SUB,
SUMMARY,
SUP,
SVG,
TABLE,
TBODY,
TD,
TEMPLATE,
TEXTAREA,
TFOOT,
TH,
THEAD,
TIME,
TITLE,
TR,
U,
UL,
VAR,
VIDEO,
CUSTOM,
};
static const map<string, TagType> get_tag_map() {
map<string, TagType> result;
#define TAG(name) result[#name] = name
TAG(AREA);
TAG(BASE);
TAG(BASEFONT);
TAG(BGSOUND);
TAG(BR);
TAG(COL);
TAG(COMMAND);
TAG(EMBED);
TAG(FRAME);
TAG(HR);
TAG(IMAGE);
TAG(IMG);
TAG(INPUT);
TAG(ISINDEX);
TAG(KEYGEN);
TAG(LINK);
TAG(MENUITEM);
TAG(META);
TAG(NEXTID);
TAG(PARAM);
TAG(SOURCE);
TAG(TRACK);
TAG(WBR);
TAG(A);
TAG(ABBR);
TAG(ADDRESS);
TAG(ARTICLE);
TAG(ASIDE);
TAG(AUDIO);
TAG(B);
TAG(BDI);
TAG(BDO);
TAG(BLOCKQUOTE);
TAG(BODY);
TAG(BUTTON);
TAG(CANVAS);
TAG(CAPTION);
TAG(CITE);
TAG(CODE);
TAG(COLGROUP);
TAG(DATA);
TAG(DATALIST);
TAG(DD);
TAG(DEL);
TAG(DETAILS);
TAG(DFN);
TAG(DIALOG);
TAG(DIV);
TAG(DL);
TAG(DT);
TAG(EM);
TAG(FIELDSET);
TAG(FIGCAPTION);
TAG(FIGURE);
TAG(FOOTER);
TAG(FORM);
TAG(H1);
TAG(H2);
TAG(H3);
TAG(H4);
TAG(H5);
TAG(H6);
TAG(HEAD);
TAG(HEADER);
TAG(HGROUP);
TAG(HTML);
TAG(I);
TAG(IFRAME);
TAG(INS);
TAG(KBD);
TAG(LABEL);
TAG(LEGEND);
TAG(LI);
TAG(MAIN);
TAG(MAP);
TAG(MARK);
TAG(MATH);
TAG(MENU);
TAG(METER);
TAG(NAV);
TAG(NOSCRIPT);
TAG(OBJECT);
TAG(OL);
TAG(OPTGROUP);
TAG(OPTION);
TAG(OUTPUT);
TAG(P);
TAG(PICTURE);
TAG(PRE);
TAG(PROGRESS);
TAG(Q);
TAG(RB);
TAG(RP);
TAG(RT);
TAG(RTC);
TAG(RUBY);
TAG(S);
TAG(SAMP);
TAG(SCRIPT);
TAG(SECTION);
TAG(SELECT);
TAG(SLOT);
TAG(SMALL);
TAG(SPAN);
TAG(STRONG);
TAG(STYLE);
TAG(SUB);
TAG(SUMMARY);
TAG(SUP);
TAG(SVG);
TAG(TABLE);
TAG(TBODY);
TAG(TD);
TAG(TEMPLATE);
TAG(TEXTAREA);
TAG(TFOOT);
TAG(TH);
TAG(THEAD);
TAG(TIME);
TAG(TITLE);
TAG(TR);
TAG(U);
TAG(UL);
TAG(VAR);
TAG(VIDEO);
#undef TAG
return result;
}
static const map<string, TagType> TAG_TYPES_BY_TAG_NAME = get_tag_map();
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
ADDRESS,
ARTICLE,
ASIDE,
BLOCKQUOTE,
DETAILS,
DIV,
DL,
FIELDSET,
FIGCAPTION,
FIGURE,
FOOTER,
FORM,
H1,
H2,
H3,
H4,
H5,
H6,
HEADER,
HR,
MAIN,
NAV,
OL,
P,
PRE,
SECTION,
};
static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = (
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS +
sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) /
sizeof(TagType)
);
struct Tag {
TagType type;
string custom_tag_name;
// This default constructor is used in the case where there is not enough space
// in the serialization buffer to store all of the tags. In that case, tags
// that cannot be serialized will be treated as having an unknown type. These
// tags will be closed via implicit end tags regardless of the next closing
// tag is encountered.
Tag() : type(END_OF_VOID_TAGS) {}
Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {}
bool operator==(const Tag &other) const {
if (type != other.type) return false;
if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false;
return true;
}
inline bool is_void() const {
return type < END_OF_VOID_TAGS;
}
inline bool can_contain(const Tag &tag) {
TagType child = tag.type;
switch (type) {
case LI: return child != LI;
case DT:
case DD:
return child != DT && child != DD;
case P:
return std::find(
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS,
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END,
tag.type
) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END;
case COLGROUP:
return child == COL;
case RB:
case RT:
case RP:
return child != RB && child != RT && child != RP;
case OPTGROUP:
return child != OPTGROUP;
case TR:
return child != TR;
case TD:
case TH:
return child != TD && child != TH && child != TR;
default:
return true;
}
}
static inline Tag for_name(const string &name) {
map<string, TagType>::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name);
if (type != TAG_TYPES_BY_TAG_NAME.end()) {
return Tag(type->second, string());
} else {
return Tag(CUSTOM, name);
}
}
};

@ -1,223 +0,0 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_