mirror of https://github.com/Wilfred/difftastic/
Use tree-sitter-html from crates.io
parent
507b60bc45
commit
4873e87dac
@ -1 +0,0 @@
|
||||
tree-sitter-html/src
|
||||
@ -1,22 +0,0 @@
|
||||
image: Visual Studio 2015
|
||||
|
||||
environment:
|
||||
nodejs_version: "10"
|
||||
|
||||
platform:
|
||||
- x64
|
||||
|
||||
install:
|
||||
- ps: Install-Product node $env:nodejs_version
|
||||
- node --version
|
||||
- npm --version
|
||||
- npm install
|
||||
|
||||
test_script:
|
||||
- npm run test-windows
|
||||
|
||||
build: off
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
@ -1 +0,0 @@
|
||||
/src/parser.c linguist-vendored
|
||||
@ -1,12 +0,0 @@
|
||||
node_modules
|
||||
build
|
||||
*.log
|
||||
package-lock.json
|
||||
target
|
||||
Cargo.lock
|
||||
*.a
|
||||
*.dylib
|
||||
*.so
|
||||
*.o
|
||||
bindings/c/*.h
|
||||
bindings/c/*.pc
|
||||
@ -1,6 +0,0 @@
|
||||
corpus
|
||||
examples
|
||||
build
|
||||
script
|
||||
target
|
||||
Cargo.lock
|
||||
@ -1,10 +0,0 @@
|
||||
language: node_js
|
||||
|
||||
node_js: 10
|
||||
|
||||
env:
|
||||
- CXX=clang++
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "tree-sitter-html"
|
||||
description = "html grammar for the tree-sitter parsing library"
|
||||
version = "0.19.0"
|
||||
keywords = ["incremental", "parsing", "html"]
|
||||
categories = ["parsing", "text-editors"]
|
||||
repository = "https://github.com/tree-sitter/tree-sitter-html"
|
||||
edition = "2018"
|
||||
|
||||
build = "bindings/rust/build.rs"
|
||||
include = [
|
||||
"bindings/rust/*",
|
||||
"grammar.js",
|
||||
"queries/*",
|
||||
"src/*",
|
||||
]
|
||||
|
||||
[lib]
|
||||
path = "bindings/rust/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
tree-sitter = "0.19"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Max Brunsfeld
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@ -1,99 +0,0 @@
|
||||
VERSION := 0.19.0
|
||||
|
||||
# Repository
|
||||
SRC_DIR := src
|
||||
|
||||
PARSER_REPO_URL ?= $(shell git -C $(SRC_DIR) remote get-url origin )
|
||||
# the # in the sed pattern has to be escaped or it will be interpreted as a comment
|
||||
PARSER_NAME ?= $(shell basename $(PARSER_REPO_URL) | cut -d '-' -f3 | sed 's\#.git\#\#')
|
||||
UPPER_PARSER_NAME := $(shell echo $(PARSER_NAME) | tr a-z A-Z )
|
||||
|
||||
# install directory layout
|
||||
PREFIX ?= /usr/local
|
||||
INCLUDEDIR ?= $(PREFIX)/include
|
||||
LIBDIR ?= $(PREFIX)/lib
|
||||
PCLIBDIR ?= $(LIBDIR)/pkgconfig
|
||||
|
||||
# collect C++ sources, and link if necessary
|
||||
CPPSRC := $(wildcard $(SRC_DIR)/*.cc)
|
||||
|
||||
ifeq (, $(CPPSRC))
|
||||
ADDITIONALLIBS :=
|
||||
else
|
||||
ADDITIONALLIBS := -lc++
|
||||
endif
|
||||
|
||||
# collect sources
|
||||
SRC := $(wildcard $(SRC_DIR)/*.c)
|
||||
SRC += $(CPPSRC)
|
||||
OBJ := $(addsuffix .o,$(basename $(SRC)))
|
||||
|
||||
# ABI versioning
|
||||
SONAME_MAJOR := 0
|
||||
SONAME_MINOR := 0
|
||||
|
||||
CFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
|
||||
CXXFLAGS ?= -O3 -Wall -Wextra -I$(SRC_DIR)
|
||||
override CFLAGS += -std=gnu99 -fPIC
|
||||
override CXXFLAGS += -fPIC
|
||||
|
||||
# OS-specific bits
|
||||
ifeq ($(shell uname),Darwin)
|
||||
SOEXT = dylib
|
||||
SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib
|
||||
SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib
|
||||
LINKSHARED := $(LINKSHARED)-dynamiclib -Wl,
|
||||
ifneq ($(ADDITIONALLIBS),)
|
||||
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS),
|
||||
endif
|
||||
LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/libtree-sitter-$(PARSER_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks
|
||||
else
|
||||
SOEXT = so
|
||||
SOEXTVER_MAJOR = so.$(SONAME_MAJOR)
|
||||
SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR)
|
||||
LINKSHARED := $(LINKSHARED)-shared -Wl,
|
||||
ifneq ($(ADDITIONALLIBS),)
|
||||
LINKSHARED := $(LINKSHARED)$(ADDITIONALLIBS)
|
||||
endif
|
||||
LINKSHARED := $(LINKSHARED)-soname,libtree-sitter-$(PARSER_NAME).so.$(SONAME_MAJOR)
|
||||
endif
|
||||
ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly))
|
||||
PCLIBDIR := $(PREFIX)/libdata/pkgconfig
|
||||
endif
|
||||
|
||||
all: libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h
|
||||
|
||||
libtree-sitter-$(PARSER_NAME).a: $(OBJ)
|
||||
$(AR) rcs $@ $^
|
||||
|
||||
libtree-sitter-$(PARSER_NAME).$(SOEXTVER): $(OBJ)
|
||||
$(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@
|
||||
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXT)
|
||||
ln -sf $@ libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
|
||||
|
||||
bindings/c/$(PARSER_NAME).h:
|
||||
sed -e 's|@UPPER_PARSERNAME@|$(UPPER_PARSER_NAME)|' \
|
||||
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
|
||||
bindings/c/tree-sitter.h.in > $@
|
||||
|
||||
install: all
|
||||
install -d '$(DESTDIR)$(LIBDIR)'
|
||||
install -m755 libtree-sitter-$(PARSER_NAME).a '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).a
|
||||
install -m755 libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER)
|
||||
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR)
|
||||
ln -sf libtree-sitter-$(PARSER_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter-$(PARSER_NAME).$(SOEXT)
|
||||
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter
|
||||
install -m644 bindings/c/$(PARSER_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/
|
||||
install -d '$(DESTDIR)$(PCLIBDIR)'
|
||||
sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \
|
||||
-e 's|=$(PREFIX)|=$${prefix}|' \
|
||||
-e 's|@PREFIX@|$(PREFIX)|' \
|
||||
-e 's|@ADDITIONALLIBS@|$(ADDITIONALLIBS)|' \
|
||||
-e 's|@PARSERNAME@|$(PARSER_NAME)|' \
|
||||
-e 's|@PARSERREPOURL@|$(PARSER_REPO_URL)|' \
|
||||
bindings/c/tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter-$(PARSER_NAME).pc
|
||||
|
||||
clean:
|
||||
rm -f $(OBJ) libtree-sitter-$(PARSER_NAME).a libtree-sitter-$(PARSER_NAME).$(SOEXT) libtree-sitter-$(PARSER_NAME).$(SOEXTVER_MAJOR) libtree-sitter-$(PARSER_NAME).$(SOEXTVER) bindings/c/$(PARSER_NAME).h
|
||||
|
||||
.PHONY: all install clean
|
||||
@ -1,13 +0,0 @@
|
||||
tree-sitter-html
|
||||
================
|
||||
|
||||
[](https://travis-ci.org/tree-sitter/tree-sitter-html)
|
||||
[](https://ci.appveyor.com/project/maxbrunsfeld/tree-sitter-html/branch/master)
|
||||
|
||||
HTML grammar for [tree-sitter][].
|
||||
|
||||
[tree-sitter]: https://github.com/tree-sitter/tree-sitter
|
||||
|
||||
References
|
||||
|
||||
* [The HTML5 Spec](https://www.w3.org/TR/html5/syntax.html)
|
||||
@ -1,19 +0,0 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "tree_sitter_html_binding",
|
||||
"include_dirs": [
|
||||
"<!(node -e \"require('nan')\")",
|
||||
"src"
|
||||
],
|
||||
"sources": [
|
||||
"src/parser.c",
|
||||
"src/scanner.cc",
|
||||
"bindings/node/binding.cc"
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c99",
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,16 +0,0 @@
|
||||
#ifndef TREE_SITTER_@UPPER_PARSERNAME@_H_
|
||||
#define TREE_SITTER_@UPPER_PARSERNAME@_H_
|
||||
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern TSLanguage *tree_sitter_@PARSERNAME@();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_@UPPER_PARSERNAME@_H_
|
||||
@ -1,11 +0,0 @@
|
||||
prefix=@PREFIX@
|
||||
libdir=@LIBDIR@
|
||||
includedir=@INCLUDEDIR@
|
||||
additionallibs=@ADDITIONALLIBS@
|
||||
|
||||
Name: tree-sitter-@PARSERNAME@
|
||||
Description: A tree-sitter grammar for the @PARSERNAME@ programming language.
|
||||
URL: @PARSERREPOURL@
|
||||
Version: @VERSION@
|
||||
Libs: -L${libdir} ${additionallibs} -ltree-sitter-@PARSERNAME@
|
||||
Cflags: -I${includedir}
|
||||
@ -1,28 +0,0 @@
|
||||
#include "tree_sitter/parser.h"
|
||||
#include <node.h>
|
||||
#include "nan.h"
|
||||
|
||||
using namespace v8;
|
||||
|
||||
extern "C" TSLanguage * tree_sitter_html();
|
||||
|
||||
namespace {
|
||||
|
||||
NAN_METHOD(New) {}
|
||||
|
||||
void Init(Local<Object> exports, Local<Object> module) {
|
||||
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
|
||||
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
|
||||
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
|
||||
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_html());
|
||||
|
||||
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("html").ToLocalChecked());
|
||||
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
|
||||
}
|
||||
|
||||
NODE_MODULE(tree_sitter_html_binding, Init)
|
||||
|
||||
} // namespace
|
||||
@ -1,19 +0,0 @@
|
||||
try {
|
||||
module.exports = require("../../build/Release/tree_sitter_html_binding");
|
||||
} catch (error1) {
|
||||
if (error1.code !== 'MODULE_NOT_FOUND') {
|
||||
throw error1;
|
||||
}
|
||||
try {
|
||||
module.exports = require("../../build/Debug/tree_sitter_html_binding");
|
||||
} catch (error2) {
|
||||
if (error2.code !== 'MODULE_NOT_FOUND') {
|
||||
throw error2;
|
||||
}
|
||||
throw error1
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
module.exports.nodeTypeInfo = require("../../src/node-types.json");
|
||||
} catch (_) {}
|
||||
@ -1,25 +0,0 @@
|
||||
fn main() {
|
||||
let src_dir = std::path::Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.include(&src_dir);
|
||||
c_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable")
|
||||
.flag_if_supported("-Wno-trigraphs");
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
c_config.compile("parser");
|
||||
|
||||
let mut cpp_config = cc::Build::new();
|
||||
cpp_config.cpp(true);
|
||||
cpp_config.include(&src_dir);
|
||||
cpp_config
|
||||
.flag_if_supported("-Wno-unused-parameter")
|
||||
.flag_if_supported("-Wno-unused-but-set-variable");
|
||||
let scanner_path = src_dir.join("scanner.cc");
|
||||
cpp_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
cpp_config.compile("scanner");
|
||||
}
|
||||
@ -1,52 +0,0 @@
|
||||
//! This crate provides html language support for the [tree-sitter][] parsing library.
|
||||
//!
|
||||
//! Typically, you will use the [language][language func] function to add this language to a
|
||||
//! tree-sitter [Parser][], and then use the parser to parse some code:
|
||||
//!
|
||||
//! ```
|
||||
//! let code = "";
|
||||
//! let mut parser = tree_sitter::Parser::new();
|
||||
//! parser.set_language(tree_sitter_html::language()).expect("Error loading html grammar");
|
||||
//! let tree = parser.parse(code, None).unwrap();
|
||||
//! ```
|
||||
//!
|
||||
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
//! [language func]: fn.language.html
|
||||
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
|
||||
//! [tree-sitter]: https://tree-sitter.github.io/
|
||||
|
||||
use tree_sitter::Language;
|
||||
|
||||
extern "C" {
|
||||
fn tree_sitter_html() -> Language;
|
||||
}
|
||||
|
||||
/// Get the tree-sitter [Language][] for this grammar.
|
||||
///
|
||||
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
|
||||
pub fn language() -> Language {
|
||||
unsafe { tree_sitter_html() }
|
||||
}
|
||||
|
||||
/// The content of the [`node-types.json`][] file for this grammar.
|
||||
///
|
||||
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
|
||||
|
||||
// Uncomment these to include any queries that this grammar contains
|
||||
|
||||
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
|
||||
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
|
||||
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
|
||||
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_can_load_grammar() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(super::language())
|
||||
.expect("Error loading html language");
|
||||
}
|
||||
}
|
||||
@ -1,312 +0,0 @@
|
||||
===================================
|
||||
Tags
|
||||
===================================
|
||||
<span>Hello</span>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
===================================
|
||||
Tags with attributes
|
||||
===================================
|
||||
<input value=yes class="a" data-💩></input>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute
|
||||
(attribute_name)
|
||||
(attribute_value))
|
||||
(attribute
|
||||
(attribute_name)
|
||||
(quoted_attribute_value (attribute_value)))
|
||||
(attribute
|
||||
(attribute_name)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
===================================
|
||||
Nested tags
|
||||
===================================
|
||||
<div>
|
||||
<span>a</span>
|
||||
b
|
||||
<b>c</b>
|
||||
Multi-line
|
||||
text
|
||||
</div>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Void tags
|
||||
==================================
|
||||
<form><img src="something.png"><br><input type=submit value=Ok /></form>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag (tag_name)))
|
||||
(element
|
||||
(self_closing_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (attribute_value))
|
||||
(attribute (attribute_name) (attribute_value))))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Void tags at EOF
|
||||
==================================
|
||||
<img src="something.png">
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
|
||||
|
||||
==================================
|
||||
Custom tags
|
||||
==================================
|
||||
<something:different>
|
||||
<atom-text-editor mini>
|
||||
Hello
|
||||
</atom-text-editor>
|
||||
</something:different>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name) (attribute (attribute_name)))
|
||||
(text)
|
||||
(end_tag (tag_name)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Comments
|
||||
==================================
|
||||
<!-- hello -->
|
||||
<!-- world ->-> -- > ->->->-- -> still comment -->
|
||||
<div>
|
||||
<!-- <span>something</span> -->
|
||||
</div>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(comment)
|
||||
(comment)
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(comment)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
Raw text elements
|
||||
==================================
|
||||
<script>
|
||||
</s
|
||||
</sc
|
||||
</scr
|
||||
</scri
|
||||
</scrip
|
||||
</script>
|
||||
|
||||
<style>
|
||||
</ </s </st </sty </styl
|
||||
</style>
|
||||
|
||||
<script>
|
||||
</SCRIPT>
|
||||
|
||||
---
|
||||
|
||||
(fragment
|
||||
(script_element
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name)))
|
||||
(style_element
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name)))
|
||||
(script_element
|
||||
(start_tag (tag_name))
|
||||
(raw_text)
|
||||
(end_tag (tag_name))))
|
||||
|
||||
==================================
|
||||
All-caps doctype
|
||||
==================================
|
||||
<!DOCTYPE html PUBLIC
|
||||
"-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
---
|
||||
|
||||
(fragment
|
||||
(doctype))
|
||||
|
||||
==================================
|
||||
Lowercase doctype
|
||||
==================================
|
||||
<!doctype html>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(doctype))
|
||||
|
||||
==================================
|
||||
LI elements without close tags
|
||||
==================================
|
||||
<ul>
|
||||
<li>One
|
||||
<li>Two
|
||||
</ul>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
DT and DL elements without close tags
|
||||
======================================
|
||||
<dl>
|
||||
<dt>Coffee
|
||||
<dt>Café
|
||||
<dd>Black hot drink
|
||||
<dt>Milk
|
||||
<dd>White cold drink
|
||||
</dl>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
P elements without close tags
|
||||
======================================
|
||||
<p>One
|
||||
<div>Two</div>
|
||||
<p>Three
|
||||
<p>Four
|
||||
<h1>Five</h1>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name))))
|
||||
|
||||
======================================
|
||||
Ruby annotation elements without close tags
|
||||
======================================
|
||||
<ruby>東<rb>京<rt>とう<rt>きょう</ruby>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(text)
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=======================================
|
||||
COLGROUP elements without end tags
|
||||
=======================================
|
||||
<table>
|
||||
<colgroup>
|
||||
<col style="background-color: #0f0">
|
||||
<col span="2">
|
||||
<tr>
|
||||
<th>Lime</th>
|
||||
<th>Lemon</th>
|
||||
<th>Orange</th>
|
||||
</tr>
|
||||
</table>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value)))))
|
||||
(element (start_tag
|
||||
(tag_name)
|
||||
(attribute (attribute_name) (quoted_attribute_value (attribute_value))))))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(element (start_tag (tag_name)) (text) (end_tag (tag_name)))
|
||||
(end_tag (tag_name)))
|
||||
(end_tag (tag_name))))
|
||||
|
||||
=========================================
|
||||
TR, TD, and TH elements without end tags
|
||||
=========================================
|
||||
<table>
|
||||
<tr>
|
||||
<th>One
|
||||
<th>Two
|
||||
<tr>
|
||||
<td>Three
|
||||
<td>Four
|
||||
</table>
|
||||
---
|
||||
|
||||
(fragment
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text)))
|
||||
(element
|
||||
(start_tag (tag_name))
|
||||
(element (start_tag (tag_name)) (text))
|
||||
(element (start_tag (tag_name)) (text)))
|
||||
(end_tag (tag_name))))
|
||||
@ -1,62 +0,0 @@
|
||||
<div>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
<xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz><xyz>
|
||||
</div>
|
||||
@ -1,62 +0,0 @@
|
||||
<div>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
<a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a><a>
|
||||
</div>
|
||||
@ -1,125 +0,0 @@
|
||||
module.exports = grammar({
|
||||
name: 'html',
|
||||
|
||||
extras: $ => [
|
||||
$.comment,
|
||||
/\s+/,
|
||||
],
|
||||
|
||||
externals: $ => [
|
||||
$._start_tag_name,
|
||||
$._script_start_tag_name,
|
||||
$._style_start_tag_name,
|
||||
$._end_tag_name,
|
||||
$.erroneous_end_tag_name,
|
||||
'/>',
|
||||
$._implicit_end_tag,
|
||||
$.raw_text,
|
||||
$.comment,
|
||||
],
|
||||
|
||||
rules: {
|
||||
fragment: $ => repeat($._node),
|
||||
|
||||
doctype: $ => seq(
|
||||
'<!',
|
||||
alias($._doctype, 'doctype'),
|
||||
/[^>]+/,
|
||||
'>'
|
||||
),
|
||||
|
||||
_doctype: $ => /[Dd][Oo][Cc][Tt][Yy][Pp][Ee]/,
|
||||
|
||||
_node: $ => choice(
|
||||
$.doctype,
|
||||
$.text,
|
||||
$.element,
|
||||
$.script_element,
|
||||
$.style_element,
|
||||
$.erroneous_end_tag
|
||||
),
|
||||
|
||||
element: $ => choice(
|
||||
seq(
|
||||
$.start_tag,
|
||||
repeat($._node),
|
||||
choice($.end_tag, $._implicit_end_tag)
|
||||
),
|
||||
$.self_closing_tag
|
||||
),
|
||||
|
||||
script_element: $ => seq(
|
||||
alias($.script_start_tag, $.start_tag),
|
||||
optional($.raw_text),
|
||||
$.end_tag
|
||||
),
|
||||
|
||||
style_element: $ => seq(
|
||||
alias($.style_start_tag, $.start_tag),
|
||||
optional($.raw_text),
|
||||
$.end_tag
|
||||
),
|
||||
|
||||
start_tag: $ => seq(
|
||||
'<',
|
||||
alias($._start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'>'
|
||||
),
|
||||
|
||||
script_start_tag: $ => seq(
|
||||
'<',
|
||||
alias($._script_start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'>'
|
||||
),
|
||||
|
||||
style_start_tag: $ => seq(
|
||||
'<',
|
||||
alias($._style_start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'>'
|
||||
),
|
||||
|
||||
self_closing_tag: $ => seq(
|
||||
'<',
|
||||
alias($._start_tag_name, $.tag_name),
|
||||
repeat($.attribute),
|
||||
'/>'
|
||||
),
|
||||
|
||||
end_tag: $ => seq(
|
||||
'</',
|
||||
alias($._end_tag_name, $.tag_name),
|
||||
'>'
|
||||
),
|
||||
|
||||
erroneous_end_tag: $ => seq(
|
||||
'</',
|
||||
$.erroneous_end_tag_name,
|
||||
'>'
|
||||
),
|
||||
|
||||
attribute: $ => seq(
|
||||
$.attribute_name,
|
||||
optional(seq(
|
||||
'=',
|
||||
choice(
|
||||
$.attribute_value,
|
||||
$.quoted_attribute_value
|
||||
)
|
||||
))
|
||||
),
|
||||
|
||||
attribute_name: $ => /[^<>"'/=\s]+/,
|
||||
|
||||
attribute_value: $ => /[^<>"'=\s]+/,
|
||||
|
||||
quoted_attribute_value: $ => choice(
|
||||
seq("'", optional(alias(/[^']+/, $.attribute_value)), "'"),
|
||||
seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"')
|
||||
),
|
||||
|
||||
text: $ => /[^<>\s]([^<>]*[^<>\s])?/
|
||||
}
|
||||
});
|
||||
@ -1,38 +0,0 @@
|
||||
{
|
||||
"name": "tree-sitter-html",
|
||||
"version": "0.19.0",
|
||||
"description": "HTML grammar for tree-sitter",
|
||||
"main": "bindings/node",
|
||||
"keywords": [
|
||||
"parser",
|
||||
"lexer"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/tree-sitter/tree-sitter-html.git"
|
||||
},
|
||||
"authors": [
|
||||
"Max Brunsfeld <maxbrunsfeld@gmail.com>",
|
||||
"Ashi Krishnan <queerviolet@github.com>"
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nan": "^2.14.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tree-sitter-cli": "^0.19.1"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "tree-sitter test && tree-sitter parse examples/*.html --quiet --time",
|
||||
"test-windows": "tree-sitter test"
|
||||
},
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "text.html.basic",
|
||||
"file-types": [
|
||||
"html"
|
||||
],
|
||||
"injection-regex": "html"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,12 +0,0 @@
|
||||
(tag_name) @tag
|
||||
(erroneous_end_tag_name) @tag.error
|
||||
(doctype) @constant
|
||||
(attribute_name) @attribute
|
||||
(attribute_value) @string
|
||||
(comment) @comment
|
||||
|
||||
[
|
||||
"<"
|
||||
">"
|
||||
"</"
|
||||
] @punctuation.bracket
|
||||
@ -1,7 +0,0 @@
|
||||
((script_element
|
||||
(raw_text) @injection.content)
|
||||
(#set! injection.language "javascript"))
|
||||
|
||||
((style_element
|
||||
(raw_text) @injection.content)
|
||||
(#set! injection.language "css"))
|
||||
@ -1,493 +0,0 @@
|
||||
{
|
||||
"name": "html",
|
||||
"rules": {
|
||||
"fragment": {
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_node"
|
||||
}
|
||||
},
|
||||
"doctype": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<!"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_doctype"
|
||||
},
|
||||
"named": false,
|
||||
"value": "doctype"
|
||||
},
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "[^>]+"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"_doctype": {
|
||||
"type": "PATTERN",
|
||||
"value": "[Dd][Oo][Cc][Tt][Yy][Pp][Ee]"
|
||||
},
|
||||
"_node": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "doctype"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "text"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "element"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "script_element"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "style_element"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "erroneous_end_tag"
|
||||
}
|
||||
]
|
||||
},
|
||||
"element": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "start_tag"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_node"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "end_tag"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_implicit_end_tag"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "self_closing_tag"
|
||||
}
|
||||
]
|
||||
},
|
||||
"script_element": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "script_start_tag"
|
||||
},
|
||||
"named": true,
|
||||
"value": "start_tag"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "raw_text"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "end_tag"
|
||||
}
|
||||
]
|
||||
},
|
||||
"style_element": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "style_start_tag"
|
||||
},
|
||||
"named": true,
|
||||
"value": "start_tag"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "raw_text"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "end_tag"
|
||||
}
|
||||
]
|
||||
},
|
||||
"start_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_start_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "attribute"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"script_start_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_script_start_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "attribute"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"style_start_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_style_start_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "attribute"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"self_closing_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "<"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_start_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "attribute"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "/>"
|
||||
}
|
||||
]
|
||||
},
|
||||
"end_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "</"
|
||||
},
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_end_tag_name"
|
||||
},
|
||||
"named": true,
|
||||
"value": "tag_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"erroneous_end_tag": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "</"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "erroneous_end_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ">"
|
||||
}
|
||||
]
|
||||
},
|
||||
"attribute": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "attribute_name"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "="
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "attribute_value"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "quoted_attribute_value"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"attribute_name": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^<>\"'/=\\s]+"
|
||||
},
|
||||
"attribute_value": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^<>\"'=\\s]+"
|
||||
},
|
||||
"quoted_attribute_value": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "'"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^']+"
|
||||
},
|
||||
"named": true,
|
||||
"value": "attribute_value"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "'"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\""
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\"]+"
|
||||
},
|
||||
"named": true,
|
||||
"value": "attribute_value"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "\""
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"text": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^<>\\s]([^<>]*[^<>\\s])?"
|
||||
}
|
||||
},
|
||||
"extras": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "comment"
|
||||
},
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "\\s+"
|
||||
}
|
||||
],
|
||||
"conflicts": [],
|
||||
"precedences": [],
|
||||
"externals": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_start_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_script_start_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_style_start_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_end_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "erroneous_end_tag_name"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "/>"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_implicit_end_tag"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "raw_text"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "comment"
|
||||
}
|
||||
],
|
||||
"inline": [],
|
||||
"supertypes": []
|
||||
}
|
||||
|
||||
@ -1,305 +0,0 @@
|
||||
[
|
||||
{
|
||||
"type": "attribute",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "attribute_name",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "attribute_value",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "quoted_attribute_value",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "doctype",
|
||||
"named": true,
|
||||
"fields": {}
|
||||
},
|
||||
{
|
||||
"type": "element",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "doctype",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "element",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "end_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "erroneous_end_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "script_element",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "self_closing_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "start_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "style_element",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "end_tag",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "tag_name",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "erroneous_end_tag",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "erroneous_end_tag_name",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "fragment",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "doctype",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "element",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "erroneous_end_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "script_element",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "style_element",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "quoted_attribute_value",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "attribute_value",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "script_element",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "end_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "raw_text",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "start_tag",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "self_closing_tag",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "attribute",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "tag_name",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "start_tag",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "attribute",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "tag_name",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "style_element",
|
||||
"named": true,
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "end_tag",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "raw_text",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "start_tag",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "\"",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "'",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "/>",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "<",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "<!",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "</",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "=",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": ">",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "attribute_name",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "attribute_value",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "comment",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "doctype",
|
||||
"named": false
|
||||
},
|
||||
{
|
||||
"type": "erroneous_end_tag_name",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "raw_text",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "tag_name",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,310 +0,0 @@
|
||||
#include <tree_sitter/parser.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cwctype>
|
||||
#include <cstring>
|
||||
#include "tag.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
|
||||
enum TokenType {
|
||||
START_TAG_NAME,
|
||||
SCRIPT_START_TAG_NAME,
|
||||
STYLE_START_TAG_NAME,
|
||||
END_TAG_NAME,
|
||||
ERRONEOUS_END_TAG_NAME,
|
||||
SELF_CLOSING_TAG_DELIMITER,
|
||||
IMPLICIT_END_TAG,
|
||||
RAW_TEXT,
|
||||
COMMENT
|
||||
};
|
||||
|
||||
struct Scanner {
|
||||
Scanner() {}
|
||||
|
||||
unsigned serialize(char *buffer) {
|
||||
uint16_t tag_count = tags.size() > UINT16_MAX ? UINT16_MAX : tags.size();
|
||||
uint16_t serialized_tag_count = 0;
|
||||
|
||||
unsigned i = sizeof(tag_count);
|
||||
std::memcpy(&buffer[i], &tag_count, sizeof(tag_count));
|
||||
i += sizeof(tag_count);
|
||||
|
||||
for (; serialized_tag_count < tag_count; serialized_tag_count++) {
|
||||
Tag &tag = tags[serialized_tag_count];
|
||||
if (tag.type == CUSTOM) {
|
||||
unsigned name_length = tag.custom_tag_name.size();
|
||||
if (name_length > UINT8_MAX) name_length = UINT8_MAX;
|
||||
if (i + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
|
||||
buffer[i++] = static_cast<char>(tag.type);
|
||||
buffer[i++] = name_length;
|
||||
tag.custom_tag_name.copy(&buffer[i], name_length);
|
||||
i += name_length;
|
||||
} else {
|
||||
if (i + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) break;
|
||||
buffer[i++] = static_cast<char>(tag.type);
|
||||
}
|
||||
}
|
||||
|
||||
std::memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count));
|
||||
return i;
|
||||
}
|
||||
|
||||
void deserialize(const char *buffer, unsigned length) {
|
||||
tags.clear();
|
||||
if (length > 0) {
|
||||
unsigned i = 0;
|
||||
uint16_t tag_count, serialized_tag_count;
|
||||
|
||||
std::memcpy(&serialized_tag_count, &buffer[i], sizeof(serialized_tag_count));
|
||||
i += sizeof(serialized_tag_count);
|
||||
|
||||
std::memcpy(&tag_count, &buffer[i], sizeof(tag_count));
|
||||
i += sizeof(tag_count);
|
||||
|
||||
tags.resize(tag_count);
|
||||
for (unsigned j = 0; j < serialized_tag_count; j++) {
|
||||
Tag &tag = tags[j];
|
||||
tag.type = static_cast<TagType>(buffer[i++]);
|
||||
if (tag.type == CUSTOM) {
|
||||
uint16_t name_length = static_cast<uint8_t>(buffer[i++]);
|
||||
tag.custom_tag_name.assign(&buffer[i], &buffer[i + name_length]);
|
||||
i += name_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string scan_tag_name(TSLexer *lexer) {
|
||||
string tag_name;
|
||||
while (iswalnum(lexer->lookahead) ||
|
||||
lexer->lookahead == '-' ||
|
||||
lexer->lookahead == ':') {
|
||||
tag_name += towupper(lexer->lookahead);
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
return tag_name;
|
||||
}
|
||||
|
||||
bool scan_comment(TSLexer *lexer) {
|
||||
if (lexer->lookahead != '-') return false;
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead != '-') return false;
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
unsigned dashes = 0;
|
||||
while (lexer->lookahead) {
|
||||
switch (lexer->lookahead) {
|
||||
case '-':
|
||||
++dashes;
|
||||
break;
|
||||
case '>':
|
||||
if (dashes >= 2) {
|
||||
lexer->result_symbol = COMMENT;
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
dashes = 0;
|
||||
}
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool scan_raw_text(TSLexer *lexer) {
|
||||
if (!tags.size()) return false;
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
const string &end_delimiter = tags.back().type == SCRIPT
|
||||
? "</SCRIPT"
|
||||
: "</STYLE";
|
||||
|
||||
unsigned delimiter_index = 0;
|
||||
while (lexer->lookahead) {
|
||||
if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) {
|
||||
delimiter_index++;
|
||||
if (delimiter_index == end_delimiter.size()) break;
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
delimiter_index = 0;
|
||||
lexer->advance(lexer, false);
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
lexer->result_symbol = RAW_TEXT;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scan_implicit_end_tag(TSLexer *lexer) {
|
||||
Tag *parent = tags.empty() ? NULL : &tags.back();
|
||||
|
||||
bool is_closing_tag = false;
|
||||
if (lexer->lookahead == '/') {
|
||||
is_closing_tag = true;
|
||||
lexer->advance(lexer, false);
|
||||
} else {
|
||||
if (parent && parent->is_void()) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
string tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
|
||||
Tag next_tag = Tag::for_name(tag_name);
|
||||
|
||||
if (is_closing_tag) {
|
||||
// The tag correctly closes the topmost element on the stack
|
||||
if (!tags.empty() && tags.back() == next_tag) return false;
|
||||
|
||||
// Otherwise, dig deeper and queue implicit end tags (to be nice in
|
||||
// the case of malformed HTML)
|
||||
if (std::find(tags.begin(), tags.end(), next_tag) != tags.end()) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
} else if (parent && !parent->can_contain(next_tag)) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = IMPLICIT_END_TAG;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool scan_start_tag_name(TSLexer *lexer) {
|
||||
string tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
Tag tag = Tag::for_name(tag_name);
|
||||
tags.push_back(tag);
|
||||
switch (tag.type) {
|
||||
case SCRIPT:
|
||||
lexer->result_symbol = SCRIPT_START_TAG_NAME;
|
||||
break;
|
||||
case STYLE:
|
||||
lexer->result_symbol = STYLE_START_TAG_NAME;
|
||||
break;
|
||||
default:
|
||||
lexer->result_symbol = START_TAG_NAME;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scan_end_tag_name(TSLexer *lexer) {
|
||||
string tag_name = scan_tag_name(lexer);
|
||||
if (tag_name.empty()) return false;
|
||||
Tag tag = Tag::for_name(tag_name);
|
||||
if (!tags.empty() && tags.back() == tag) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = END_TAG_NAME;
|
||||
} else {
|
||||
lexer->result_symbol = ERRONEOUS_END_TAG_NAME;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool scan_self_closing_tag_delimiter(TSLexer *lexer) {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '>') {
|
||||
lexer->advance(lexer, false);
|
||||
if (!tags.empty()) {
|
||||
tags.pop_back();
|
||||
lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool scan(TSLexer *lexer, const bool *valid_symbols) {
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) {
|
||||
return scan_raw_text(lexer);
|
||||
}
|
||||
|
||||
switch (lexer->lookahead) {
|
||||
case '<':
|
||||
lexer->mark_end(lexer);
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
if (lexer->lookahead == '!') {
|
||||
lexer->advance(lexer, false);
|
||||
return scan_comment(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[IMPLICIT_END_TAG]) {
|
||||
return scan_implicit_end_tag(lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
case '\0':
|
||||
if (valid_symbols[IMPLICIT_END_TAG]) {
|
||||
return scan_implicit_end_tag(lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
case '/':
|
||||
if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) {
|
||||
return scan_self_closing_tag_delimiter(lexer);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) && !valid_symbols[RAW_TEXT]) {
|
||||
return valid_symbols[START_TAG_NAME]
|
||||
? scan_start_tag_name(lexer)
|
||||
: scan_end_tag_name(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<Tag> tags;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
void *tree_sitter_html_external_scanner_create() {
|
||||
return new Scanner();
|
||||
}
|
||||
|
||||
bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer,
|
||||
const bool *valid_symbols) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->scan(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
unsigned tree_sitter_html_external_scanner_serialize(void *payload, char *buffer) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
return scanner->serialize(buffer);
|
||||
}
|
||||
|
||||
void tree_sitter_html_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
scanner->deserialize(buffer, length);
|
||||
}
|
||||
|
||||
void tree_sitter_html_external_scanner_destroy(void *payload) {
|
||||
Scanner *scanner = static_cast<Scanner *>(payload);
|
||||
delete scanner;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,380 +0,0 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
using std::string;
|
||||
using std::map;
|
||||
|
||||
enum TagType {
|
||||
AREA,
|
||||
BASE,
|
||||
BASEFONT,
|
||||
BGSOUND,
|
||||
BR,
|
||||
COL,
|
||||
COMMAND,
|
||||
EMBED,
|
||||
FRAME,
|
||||
HR,
|
||||
IMAGE,
|
||||
IMG,
|
||||
INPUT,
|
||||
ISINDEX,
|
||||
KEYGEN,
|
||||
LINK,
|
||||
MENUITEM,
|
||||
META,
|
||||
NEXTID,
|
||||
PARAM,
|
||||
SOURCE,
|
||||
TRACK,
|
||||
WBR,
|
||||
END_OF_VOID_TAGS,
|
||||
|
||||
A,
|
||||
ABBR,
|
||||
ADDRESS,
|
||||
ARTICLE,
|
||||
ASIDE,
|
||||
AUDIO,
|
||||
B,
|
||||
BDI,
|
||||
BDO,
|
||||
BLOCKQUOTE,
|
||||
BODY,
|
||||
BUTTON,
|
||||
CANVAS,
|
||||
CAPTION,
|
||||
CITE,
|
||||
CODE,
|
||||
COLGROUP,
|
||||
DATA,
|
||||
DATALIST,
|
||||
DD,
|
||||
DEL,
|
||||
DETAILS,
|
||||
DFN,
|
||||
DIALOG,
|
||||
DIV,
|
||||
DL,
|
||||
DT,
|
||||
EM,
|
||||
FIELDSET,
|
||||
FIGCAPTION,
|
||||
FIGURE,
|
||||
FOOTER,
|
||||
FORM,
|
||||
H1,
|
||||
H2,
|
||||
H3,
|
||||
H4,
|
||||
H5,
|
||||
H6,
|
||||
HEAD,
|
||||
HEADER,
|
||||
HGROUP,
|
||||
HTML,
|
||||
I,
|
||||
IFRAME,
|
||||
INS,
|
||||
KBD,
|
||||
LABEL,
|
||||
LEGEND,
|
||||
LI,
|
||||
MAIN,
|
||||
MAP,
|
||||
MARK,
|
||||
MATH,
|
||||
MENU,
|
||||
METER,
|
||||
NAV,
|
||||
NOSCRIPT,
|
||||
OBJECT,
|
||||
OL,
|
||||
OPTGROUP,
|
||||
OPTION,
|
||||
OUTPUT,
|
||||
P,
|
||||
PICTURE,
|
||||
PRE,
|
||||
PROGRESS,
|
||||
Q,
|
||||
RB,
|
||||
RP,
|
||||
RT,
|
||||
RTC,
|
||||
RUBY,
|
||||
S,
|
||||
SAMP,
|
||||
SCRIPT,
|
||||
SECTION,
|
||||
SELECT,
|
||||
SLOT,
|
||||
SMALL,
|
||||
SPAN,
|
||||
STRONG,
|
||||
STYLE,
|
||||
SUB,
|
||||
SUMMARY,
|
||||
SUP,
|
||||
SVG,
|
||||
TABLE,
|
||||
TBODY,
|
||||
TD,
|
||||
TEMPLATE,
|
||||
TEXTAREA,
|
||||
TFOOT,
|
||||
TH,
|
||||
THEAD,
|
||||
TIME,
|
||||
TITLE,
|
||||
TR,
|
||||
U,
|
||||
UL,
|
||||
VAR,
|
||||
VIDEO,
|
||||
|
||||
CUSTOM,
|
||||
};
|
||||
|
||||
|
||||
static const map<string, TagType> get_tag_map() {
|
||||
map<string, TagType> result;
|
||||
#define TAG(name) result[#name] = name
|
||||
TAG(AREA);
|
||||
TAG(BASE);
|
||||
TAG(BASEFONT);
|
||||
TAG(BGSOUND);
|
||||
TAG(BR);
|
||||
TAG(COL);
|
||||
TAG(COMMAND);
|
||||
TAG(EMBED);
|
||||
TAG(FRAME);
|
||||
TAG(HR);
|
||||
TAG(IMAGE);
|
||||
TAG(IMG);
|
||||
TAG(INPUT);
|
||||
TAG(ISINDEX);
|
||||
TAG(KEYGEN);
|
||||
TAG(LINK);
|
||||
TAG(MENUITEM);
|
||||
TAG(META);
|
||||
TAG(NEXTID);
|
||||
TAG(PARAM);
|
||||
TAG(SOURCE);
|
||||
TAG(TRACK);
|
||||
TAG(WBR);
|
||||
TAG(A);
|
||||
TAG(ABBR);
|
||||
TAG(ADDRESS);
|
||||
TAG(ARTICLE);
|
||||
TAG(ASIDE);
|
||||
TAG(AUDIO);
|
||||
TAG(B);
|
||||
TAG(BDI);
|
||||
TAG(BDO);
|
||||
TAG(BLOCKQUOTE);
|
||||
TAG(BODY);
|
||||
TAG(BUTTON);
|
||||
TAG(CANVAS);
|
||||
TAG(CAPTION);
|
||||
TAG(CITE);
|
||||
TAG(CODE);
|
||||
TAG(COLGROUP);
|
||||
TAG(DATA);
|
||||
TAG(DATALIST);
|
||||
TAG(DD);
|
||||
TAG(DEL);
|
||||
TAG(DETAILS);
|
||||
TAG(DFN);
|
||||
TAG(DIALOG);
|
||||
TAG(DIV);
|
||||
TAG(DL);
|
||||
TAG(DT);
|
||||
TAG(EM);
|
||||
TAG(FIELDSET);
|
||||
TAG(FIGCAPTION);
|
||||
TAG(FIGURE);
|
||||
TAG(FOOTER);
|
||||
TAG(FORM);
|
||||
TAG(H1);
|
||||
TAG(H2);
|
||||
TAG(H3);
|
||||
TAG(H4);
|
||||
TAG(H5);
|
||||
TAG(H6);
|
||||
TAG(HEAD);
|
||||
TAG(HEADER);
|
||||
TAG(HGROUP);
|
||||
TAG(HTML);
|
||||
TAG(I);
|
||||
TAG(IFRAME);
|
||||
TAG(INS);
|
||||
TAG(KBD);
|
||||
TAG(LABEL);
|
||||
TAG(LEGEND);
|
||||
TAG(LI);
|
||||
TAG(MAIN);
|
||||
TAG(MAP);
|
||||
TAG(MARK);
|
||||
TAG(MATH);
|
||||
TAG(MENU);
|
||||
TAG(METER);
|
||||
TAG(NAV);
|
||||
TAG(NOSCRIPT);
|
||||
TAG(OBJECT);
|
||||
TAG(OL);
|
||||
TAG(OPTGROUP);
|
||||
TAG(OPTION);
|
||||
TAG(OUTPUT);
|
||||
TAG(P);
|
||||
TAG(PICTURE);
|
||||
TAG(PRE);
|
||||
TAG(PROGRESS);
|
||||
TAG(Q);
|
||||
TAG(RB);
|
||||
TAG(RP);
|
||||
TAG(RT);
|
||||
TAG(RTC);
|
||||
TAG(RUBY);
|
||||
TAG(S);
|
||||
TAG(SAMP);
|
||||
TAG(SCRIPT);
|
||||
TAG(SECTION);
|
||||
TAG(SELECT);
|
||||
TAG(SLOT);
|
||||
TAG(SMALL);
|
||||
TAG(SPAN);
|
||||
TAG(STRONG);
|
||||
TAG(STYLE);
|
||||
TAG(SUB);
|
||||
TAG(SUMMARY);
|
||||
TAG(SUP);
|
||||
TAG(SVG);
|
||||
TAG(TABLE);
|
||||
TAG(TBODY);
|
||||
TAG(TD);
|
||||
TAG(TEMPLATE);
|
||||
TAG(TEXTAREA);
|
||||
TAG(TFOOT);
|
||||
TAG(TH);
|
||||
TAG(THEAD);
|
||||
TAG(TIME);
|
||||
TAG(TITLE);
|
||||
TAG(TR);
|
||||
TAG(U);
|
||||
TAG(UL);
|
||||
TAG(VAR);
|
||||
TAG(VIDEO);
|
||||
#undef TAG
|
||||
return result;
|
||||
}
|
||||
|
||||
static const map<string, TagType> TAG_TYPES_BY_TAG_NAME = get_tag_map();
|
||||
|
||||
static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
|
||||
ADDRESS,
|
||||
ARTICLE,
|
||||
ASIDE,
|
||||
BLOCKQUOTE,
|
||||
DETAILS,
|
||||
DIV,
|
||||
DL,
|
||||
FIELDSET,
|
||||
FIGCAPTION,
|
||||
FIGURE,
|
||||
FOOTER,
|
||||
FORM,
|
||||
H1,
|
||||
H2,
|
||||
H3,
|
||||
H4,
|
||||
H5,
|
||||
H6,
|
||||
HEADER,
|
||||
HR,
|
||||
MAIN,
|
||||
NAV,
|
||||
OL,
|
||||
P,
|
||||
PRE,
|
||||
SECTION,
|
||||
};
|
||||
|
||||
static const TagType *TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END = (
|
||||
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS +
|
||||
sizeof(TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS) /
|
||||
sizeof(TagType)
|
||||
);
|
||||
|
||||
struct Tag {
|
||||
TagType type;
|
||||
string custom_tag_name;
|
||||
|
||||
// This default constructor is used in the case where there is not enough space
|
||||
// in the serialization buffer to store all of the tags. In that case, tags
|
||||
// that cannot be serialized will be treated as having an unknown type. These
|
||||
// tags will be closed via implicit end tags regardless of the next closing
|
||||
// tag is encountered.
|
||||
Tag() : type(END_OF_VOID_TAGS) {}
|
||||
|
||||
Tag(TagType type, const string &name) : type(type), custom_tag_name(name) {}
|
||||
|
||||
bool operator==(const Tag &other) const {
|
||||
if (type != other.type) return false;
|
||||
if (type == CUSTOM && custom_tag_name != other.custom_tag_name) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool is_void() const {
|
||||
return type < END_OF_VOID_TAGS;
|
||||
}
|
||||
|
||||
inline bool can_contain(const Tag &tag) {
|
||||
TagType child = tag.type;
|
||||
|
||||
switch (type) {
|
||||
case LI: return child != LI;
|
||||
|
||||
case DT:
|
||||
case DD:
|
||||
return child != DT && child != DD;
|
||||
|
||||
case P:
|
||||
return std::find(
|
||||
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS,
|
||||
TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END,
|
||||
tag.type
|
||||
) == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS_END;
|
||||
|
||||
case COLGROUP:
|
||||
return child == COL;
|
||||
|
||||
case RB:
|
||||
case RT:
|
||||
case RP:
|
||||
return child != RB && child != RT && child != RP;
|
||||
|
||||
case OPTGROUP:
|
||||
return child != OPTGROUP;
|
||||
|
||||
case TR:
|
||||
return child != TR;
|
||||
|
||||
case TD:
|
||||
case TH:
|
||||
return child != TD && child != TH && child != TR;
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static inline Tag for_name(const string &name) {
|
||||
map<string, TagType>::const_iterator type = TAG_TYPES_BY_TAG_NAME.find(name);
|
||||
if (type != TAG_TYPES_BY_TAG_NAME.end()) {
|
||||
return Tag(type->second, string());
|
||||
} else {
|
||||
return Tag(CUSTOM, name);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -1,223 +0,0 @@
|
||||
#ifndef TREE_SITTER_PARSER_H_
|
||||
#define TREE_SITTER_PARSER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
typedef uint16_t TSStateId;
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
TSFieldId field_id;
|
||||
uint8_t child_index;
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef struct {
|
||||
bool visible;
|
||||
bool named;
|
||||
bool supertype;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
||||
struct TSLexer {
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeShift,
|
||||
TSParseActionTypeReduce,
|
||||
TSParseActionTypeAccept,
|
||||
TSParseActionTypeRecover,
|
||||
} TSParseActionType;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint8_t type;
|
||||
TSStateId state;
|
||||
bool extra;
|
||||
bool repetition;
|
||||
} shift;
|
||||
struct {
|
||||
uint8_t type;
|
||||
uint8_t child_count;
|
||||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint16_t production_id;
|
||||
} reduce;
|
||||
uint8_t type;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
uint8_t count;
|
||||
bool reusable;
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t alias_count;
|
||||
uint32_t token_count;
|
||||
uint32_t external_token_count;
|
||||
uint32_t state_count;
|
||||
uint32_t large_state_count;
|
||||
uint32_t production_id_count;
|
||||
uint32_t field_count;
|
||||
uint16_t max_alias_sequence_length;
|
||||
const uint16_t *parse_table;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const char * const *symbol_names;
|
||||
const char * const *field_names;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)(void);
|
||||
void (*destroy)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
};
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
next_state: \
|
||||
lexer->advance(lexer, skip); \
|
||||
start: \
|
||||
skip = false; \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol_value) \
|
||||
result = true; \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
#define END_STATE() return result;
|
||||
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = state_value \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_REPEAT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = state_value, \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.extra = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
__VA_ARGS__ \
|
||||
}, \
|
||||
}}
|
||||
|
||||
#define RECOVER() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeRecover \
|
||||
}}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeAccept \
|
||||
}}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PARSER_H_
|
||||
Loading…
Reference in New Issue