Initial commit

pull/511/head
Patrick Förster 2020-11-25 14:53:14 +07:00
commit d80c3da4f8
11 changed files with 90627 additions and 0 deletions

202
.gitignore vendored

@ -0,0 +1,202 @@
# Created by https://www.toptal.com/developers/gitignore/api/c,c++,node,visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=c,c++,node,visualstudiocode
### C ###
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
### C++ ###
# Prerequisites
# Compiled Object files
*.slo
# Precompiled Headers
# Compiled Dynamic libraries
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
# Executables
### Node ###
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
.env*.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
### VisualStudioCode ###
.vscode/*
!.vscode/tasks.json
!.vscode/launch.json
*.code-workspace
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
# End of https://www.toptal.com/developers/gitignore/api/c,c++,node,visualstudiocode
/binding.gyp
/index.js

@ -0,0 +1,4 @@
{
"singleQuote": true,
"arrowParens": "avoid"
}

@ -0,0 +1,190 @@
{
"citation": [
"\\cite",
"\\cite*",
"\\Cite",
"\\nocite",
"\\citet",
"\\citep",
"\\citet*",
"\\citep*",
"\\citeauthor",
"\\citeauthor*",
"\\Citeauthor",
"\\Citeauthor*",
"\\citetitle",
"\\citetitle*",
"\\citeyear",
"\\citeyear*",
"\\citedate",
"\\citedate*",
"\\citeurl",
"\\fullcite",
"\\citeyearpar",
"\\citealt",
"\\citealp",
"\\citetext",
"\\parencite",
"\\parencite*",
"\\Parencite",
"\\footcite",
"\\footfullcite",
"\\footcitetext",
"\\textcite",
"\\Textcite",
"\\smartcite",
"\\Smartcite",
"\\supercite",
"\\autocite",
"\\Autocite",
"\\autocite*",
"\\Autocite*",
"\\volcite",
"\\Volcite",
"\\pvolcite",
"\\Pvolcite",
"\\fvolcite",
"\\ftvolcite",
"\\svolcite",
"\\Svolcite",
"\\tvolcite",
"\\Tvolcite",
"\\avolcite",
"\\Avolcite",
"\\notecite",
"\\notecite",
"\\pnotecite",
"\\Pnotecite",
"\\fnotecite"
],
"include": [
"\\include",
"\\input",
"\\bibliography",
"\\verbatiminput",
"\\VerbatimInput",
"\\subfile",
"\\subfileinclude"
],
"labelReference": [
"\\ref",
"\\vref",
"\\Vref",
"\\autoref",
"\\pageref",
"\\eqref",
"\\cref",
"\\Cref",
"\\cref*",
"\\Cref*",
"\\namecref",
"\\nameCref",
"\\lcnamecref",
"\\namecrefs",
"\\nameCrefs",
"\\lcnamecrefs",
"\\labelcref",
"\\labelcpageref"
],
"labelRangeReference": [
"\\crefrange",
"\\crefrange",
"\\Crefrange",
"\\Crefrange",
"\\crefrange*",
"\\crefrange*",
"\\Crefrange*",
"\\Crefrange*"
],
"glossaryEntryReference": [
"\\gls",
"\\Gls",
"\\GLS",
"\\glspl",
"\\Glspl",
"\\GLSpl",
"\\glsdisp",
"\\glslink",
"\\glstext",
"\\Glstext",
"\\GLStext",
"\\glsfirst",
"\\Glsfirst",
"\\GLSfirst",
"\\glsplural",
"\\Glsplural",
"\\GLSplural",
"\\glsfirstplural",
"\\Glsfirstplural",
"\\GLSfirstplural",
"\\glsname",
"\\Glsname",
"\\GLSname",
"\\glssymbol",
"\\Glssymbol",
"\\glsdesc",
"\\Glsdesc",
"\\GLSdesc",
"\\glsuseri",
"\\Glsuseri",
"\\GLSuseri",
"\\glsuserii",
"\\Glsuserii",
"\\GLSuserii",
"\\glsuseriii",
"\\Glsuseriii",
"\\GLSuseriii",
"\\glsuseriv",
"\\Glsuseriv",
"\\GLSuseriv",
"\\glsuserv",
"\\Glsuserv",
"\\GLSuserv",
"\\glsuservi",
"\\Glsuservi",
"\\GLSuservi",
"\\acrshort",
"\\Acrshort",
"\\ACRshort",
"\\acrshortpl",
"\\Acrshortpl",
"\\ACRshortpl",
"\\acrlong",
"\\Acrlong",
"\\ACRlong",
"\\acrlongpl",
"\\Acrlongpl",
"\\ACRlongpl",
"\\acrfull",
"\\Acrfull",
"\\ACRfull",
"\\acrfullpl",
"\\Acrfullpl",
"\\ACRfullpl",
"\\acs",
"\\Acs",
"\\acsp",
"\\Acsp",
"\\acl",
"\\Acl",
"\\aclp",
"\\Aclp",
"\\acf",
"\\Acf",
"\\acfp",
"\\Acfp",
"\\ac",
"\\Ac",
"\\acp",
"\\glsentrylong",
"\\Glsentrylong",
"\\glsentrylongpl",
"\\Glsentrylongpl",
"\\glsentryshort",
"\\Glsentryshort",
"\\glsentryshortpl",
"\\Glsentryshortpl",
"\\glsentryfullpl",
"\\Glsentryfullpl"
]
}

@ -0,0 +1,374 @@
const commands = require('./commands.json');
const sepBy1 = (rule, sep) => seq(rule, repeat(seq(sep, rule)));
const sepBy = (rule, sep) => optional(sepBy1(rule, sep));
module.exports = grammar({
name: 'latex',
extras: $ => [$.whitespace, $.comment],
word: $ => $._generic_command_name,
rules: {
document: $ => repeat($._content),
//--- Trivia ---//
whitespace: $ => /\s+/,
comment: $ => /%[^\r\n]+/,
//--- Content ---//
_simple_content: $ =>
choice(
$.brace_group,
$.mixed_group,
$.param,
$.text,
$.displayed_equation,
$.inline_formula,
$.environment,
$.caption,
$.citation,
$.package_include,
$.class_include,
$.biblatex_include,
$.graphics_include,
$.import,
$.label_definition,
$.label_reference,
$.label_reference_range,
$.label_number,
$.command_definition,
$.math_operator,
$.glossary_entry_definition,
$.glossary_entry_reference,
$.acronym_definition,
$.theorem_definition,
$.generic_command
),
_content: $ =>
choice(
$.part,
$.chapter,
$.section,
$.subsection,
$.subsubsection,
$.paragraph,
$.subparagraph,
$.enum_item,
$._simple_content
),
//--- Structure ---//
part: $ =>
prec.right(
seq(
/\\part\*?/,
$.brace_group,
repeat(
choice(
$.chapter,
$.section,
$.subsection,
$.subsubsection,
$.paragraph,
$.subparagraph,
$.enum_item,
$._simple_content
)
)
)
),
chapter: $ =>
prec.right(
seq(
/\\chapter\*?/,
$.brace_group,
repeat(
choice(
$.section,
$.subsection,
$.subsubsection,
$.paragraph,
$.subparagraph,
$.enum_item,
$._simple_content
)
)
)
),
section: $ =>
prec.right(
seq(
/\\section\*?/,
$.brace_group,
repeat(
choice(
$.subsection,
$.subsubsection,
$.paragraph,
$.subparagraph,
$.enum_item,
$._simple_content
)
)
)
),
subsection: $ =>
prec.right(
seq(
/\\subsection\*?/,
$.brace_group,
repeat(
choice(
$.subsubsection,
$.paragraph,
$.subparagraph,
$.enum_item,
$._simple_content
)
)
)
),
subsubsection: $ =>
prec.right(
seq(
/\\subsubsection\*?/,
$.brace_group,
repeat(
choice($.paragraph, $.subparagraph, $.enum_item, $._simple_content)
)
)
),
paragraph: $ =>
prec.right(
seq(
/\\paragraph\*?/,
$.brace_group,
repeat(choice($.subparagraph, $.enum_item, $._simple_content))
)
),
subparagraph: $ =>
prec.right(
seq(
/\\subparagraph\*?/,
$.brace_group,
repeat(choice($.enum_item, $._simple_content))
)
),
enum_item: $ =>
prec.right(
seq(
'\\item',
optional(seq('[', $.word, ']')),
repeat(choice($._simple_content))
)
),
//--- Groups ---//
brace_group: $ => seq('{', repeat($._content), '}'),
bracket_group: $ => seq('[', repeat($._content), ']'),
paren_group: $ => seq('(', repeat($._content), ')'),
mixed_group: $ =>
seq(choice('(', '['), repeat($._content), choice(')', ']')),
key_val_options: $ => seq('[', sepBy($.key_val_pair, ','), ']'),
key_val_pair: $ => seq($.key, optional(seq('=', $._content))),
key: $ => repeat1($.word),
_word_group: $ => seq('{', $.word, '}'),
_comma_sep_word_group: $ => seq('{', sepBy($.word, ','), '}'),
//--- Text ---//
// Performance optimization: store text as a binary tree instead of a list
text: $ => prec.right(seq($._text_fragment, optional($.text))),
_text_fragment: $ => prec.right(choice($.word, ',', '=')),
word: $ => /[^\s\\%\{\},\$\[\]\(\)=\#]+/,
param: $ => /#\d/,
//--- Math ---//
displayed_equation: $ =>
prec.left(
seq(choice('$$', '\\['), repeat($._content), choice('$$', '\\]'))
),
inline_formula: $ =>
prec.left(
seq(choice('$', '\\('), repeat($._content), choice('$', '\\)'))
),
//--- Environment ---//
begin: $ =>
prec.right(seq('\\begin', '{', $.word, '}', repeat($.bracket_group))),
end: $ => seq('\\end', '{', $.word, '}'),
environment: $ => prec.right(seq($.begin, repeat($._content), $.end)),
//--- Special Commands ---//
caption: $ => seq('\\caption', optional($.bracket_group), $.brace_group),
citation: $ =>
seq(
token(choice(...commands.citation)),
optional($.bracket_group),
optional($.bracket_group),
$._comma_sep_word_group
),
package_include: $ =>
seq(
token(choice('\\usepackage', '\\RequirePackage')),
optional($.key_val_options),
$._comma_sep_word_group
),
class_include: $ =>
seq(
'\\documentclass',
optional($.key_val_options),
$._comma_sep_word_group
),
biblatex_include: $ =>
seq(
'\\addbibresource',
optional($.key_val_options),
$._comma_sep_word_group
),
graphics_include: $ =>
seq(
token(choice('\\includegraphics', '\\includesvg', '\\includeinkscape')),
optional($.key_val_options),
$._comma_sep_word_group
),
generic_include: $ =>
seq(token(choice(...commands.include)), $._comma_sep_word_group),
import: $ =>
seq(
token(
choice(
'\\import',
'\\subimport',
'\\inputfrom',
'\\subimportfrom',
'\\includefrom',
'\\subincludefrom'
)
),
$._word_group,
$._word_group
),
label_definition: $ => seq('\\label', $._word_group),
label_reference: $ =>
seq(token(choice(...commands.labelReference)), $._comma_sep_word_group),
label_reference_range: $ =>
seq(
token(choice(...commands.labelRangeReference)),
$._word_group,
$._word_group
),
label_number: $ => seq('\\newlabel', $._word_group, $.brace_group),
command_definition: $ =>
seq(
token(
choice('\\newcommand', '\\renewcommand', '\\DeclareRobustCommand')
),
optional(seq('[', /\d/, ']')),
'{',
$._generic_command_name,
'}',
$.brace_group
),
math_operator: $ =>
seq(
token(choice('\\DeclareMathOperator', '\\DeclareMathOperator*')),
'{',
$._generic_command_name,
'}',
$.brace_group
),
glossary_entry_definition: $ =>
seq(
'\\newglossaryentry',
$._word_group,
'{',
sepBy($.key_val_pair, ','),
'}'
),
glossary_entry_reference: $ =>
seq(
token(choice(...commands.glossaryEntryReference)),
optional($.key_val_options),
$._word_group
),
acronym_definition: $ =>
seq(
'\\newacronym',
optional($.key_val_options),
$._word_group,
$.brace_group,
$.brace_group
),
theorem_definition: $ =>
prec.right(
seq(
token(choice('\\newtheorem', '\\declaretheorem')),
$._word_group,
choice(
seq($.brace_group, optional(seq('[', $.word, ']'))),
seq('[', $.word, ']', $.brace_group)
)
)
),
//--- Generic commands ---//
generic_command: $ =>
prec.right(
seq(
$._generic_command_name,
repeat(choice($.brace_group, $.bracket_group, $.paren_group))
)
),
_generic_command_name: $ => /\\([^\r\n]|[@a-zA-Z]+\*?)?/,
},
});

19
package-lock.json generated

@ -0,0 +1,19 @@
{
"name": "tree-sitter-latex",
"version": "0.1.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ=="
},
"tree-sitter-cli": {
"version": "0.17.3",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.17.3.tgz",
"integrity": "sha512-AsQhjwRwWK5wtymwVc2H5E8/Q7yzMebSj7CQyeSg50k4h7m8HHwao1i/eKlh8aGTJ3IWbGjSwBAUZTSbzcSW6Q==",
"dev": true
}
}
}

@ -0,0 +1,30 @@
{
"name": "tree-sitter-latex",
"version": "0.1.0",
"description": "Tree-sitter Grammar for LaTeX",
"scripts": {
"build": "tree-sitter build",
"test": "tree-sitter test"
},
"repository": {
"type": "git",
"url": "git+https://github.com/latex-lsp/tree-sitter-latex.git"
},
"keywords": [
"tree-sitter",
"latex",
"parser"
],
"author": "Patrick Förster <pfoerster@users.noreply.github.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/latex-lsp/tree-sitter-latex/issues"
},
"homepage": "https://github.com/latex-lsp/tree-sitter-latex#readme",
"dependencies": {
"nan": "^2.14.2"
},
"devDependencies": {
"tree-sitter-cli": "^0.17.3"
}
}

@ -0,0 +1,28 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_latex();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_latex());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("latex").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_latex_binding, Init)
} // namespace

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,238 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef uint16_t TSStateId;
typedef struct {
bool visible : 1;
bool named : 1;
bool supertype: 1;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef struct {
union {
struct {
TSStateId state;
bool extra : 1;
bool repetition : 1;
} shift;
struct {
TSSymbol symbol;
int16_t dynamic_precedence;
uint8_t child_count;
uint8_t production_id;
} reduce;
} params;
TSParseActionType type : 4;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable : 1;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const uint16_t *parse_table;
const TSParseActionEntry *parse_actions;
const TSLexMode *lex_modes;
const TSSymbol *alias_sequences;
uint16_t max_alias_sequence_length;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
uint32_t field_count;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const char **field_names;
uint32_t large_state_count;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
uint32_t state_count;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{ \
{ \
.params = { \
.shift = { \
.state = state_value \
} \
}, \
.type = TSParseActionTypeShift \
} \
}
#define SHIFT_REPEAT(state_value) \
{ \
{ \
.params = { \
.shift = { \
.state = state_value, \
.repetition = true \
} \
}, \
.type = TSParseActionTypeShift \
} \
}
#define RECOVER() \
{ \
{ .type = TSParseActionTypeRecover } \
}
#define SHIFT_EXTRA() \
{ \
{ \
.params = { \
.shift = { \
.extra = true \
} \
}, \
.type = TSParseActionTypeShift \
} \
}
#define REDUCE(symbol_val, child_count_val, ...) \
{ \
{ \
.params = { \
.reduce = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}, \
.type = TSParseActionTypeReduce \
} \
}
#define ACCEPT_INPUT() \
{ \
{ .type = TSParseActionTypeAccept } \
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_