Merge branch 'delehef/master'

pull/492/head
Wilfred Hughes 2023-02-21 08:46:07 +07:00
commit 9556cd978e
30 changed files with 3841 additions and 0 deletions

@ -235,6 +235,11 @@ fn main() {
src_dir: "vendored_parsers/tree-sitter-make-src",
extra_files: vec![],
},
TreeSitterParser {
name: "tree-sitter-newick",
src_dir: "vendored_parsers/tree-sitter-newick-src",
extra_files: vec![],
},
TreeSitterParser {
name: "tree-sitter-nix",
src_dir: "vendored_parsers/tree-sitter-nix-src",

@ -33,6 +33,7 @@ with `difft --list-languages`.
| Kotlin | [fwcd/tree-sitter-kotlin](https://github.com/fwcd/tree-sitter-kotlin) |
| Lua | [nvim-treesitter/tree-sitter-lua](https://github.com/nvim-treesitter/tree-sitter-lua) |
| Make | [alemuller/tree-sitter-make](https://github.com/alemuller/tree-sitter-make) |
| Newick | [delehef/tree-sitter-newick](https://github.com/delehef/tree-sitter-newick) |
| Nix | [cstrahan/tree-sitter-nix](https://github.com/cstrahan/tree-sitter-nix) |
| OCaml | [tree-sitter/tree-sitter-ocaml](https://github.com/tree-sitter/tree-sitter-ocaml) |
| Perl | [ganezdragon/tree-sitter-perl](https://github.com/ganezdragon/tree-sitter-perl) |

@ -130,6 +130,9 @@ sample_files/nested_slider_before.rs sample_files/nested_slider_after.rs
sample_files/nesting_before.el sample_files/nesting_after.el
9fcf6b8adf148cdae47bc3a5f8068e41 -
sample_files/newick_before.nwk sample_files/newick_after.nwk
4778aef7b901b2cc7f9ad3601f87864d -
sample_files/nix_before.nix sample_files/nix_after.nix
09a56752c1eb7f3f5c10d631a01973fc -

@ -0,0 +1,903 @@
(
(
(
Drosophila_melanogaster,
Caenorhabditis_elegans
)
,
(
(
Ciona_intestinalis,
Ciona_savignyi
)
,
(
(
Eptatretus_burgeri,
Petromyzon_marinus
)
,
(
(
(
(
(
(
(
(
(
(
(
Astyanax_mexicanus,
Pygocentrus_nattereri
)
,
Electrophorus_electricus
)
,
Ictalurus_punctatus
)
,
(
(
(
Cyprinus_carpio_carpio,
Sinocyclocheilus_grahami
)
,
Carassius_auratus
)
,
Danio_rerio
)
)
,
(
Clupea_harengus,
Denticeps_clupeoides
)
)
,
(
(
(
(
(
(
(
(
(
(
Kryptolebias_marmoratus,
Nothobranchius_furzeri
)
,
(
(
Fundulus_heteroclitus,
Cyprinodon_variegatus
)
,
(
(
(
Poecilia_latipinna,
Poecilia_formosa
)
,
Poecilia_reticulata
)
,
Xiphophorus_maculatus
)
)
)
,
(
(
Oryzias_javanicus,
Oryzias_melastigma
)
,
(
Oryzias_latipes,
Oryzias_sinensis
)
)
)
,
(
(
(
(
(
(
(
Maylandia_zebra,
Astatotilapia_calliptera
)
,
Pundamilia_nyererei
)
,
Haplochromis_burtoni
)
,
Neolamprologus_brichardi
)
,
Oreochromis_niloticus
)
,
Amphilophus_citrinellus
)
,
(
(
(
Amphiprion_percula,
Amphiprion_ocellaris
)
,
Acanthochromis_polyacanthus
)
,
Stegastes_partitus
)
)
)
,
(
(
(
Cynoglossus_semilaevis,
Scophthalmus_maximus
)
,
(
(
Seriola_dumerili,
Seriola_lalandi_dorsalis
)
,
Lates_calcarifer
)
)
,
(
(
Anabas_testudineus,
Betta_splendens
)
,
Mastacembelus_armatus
)
)
)
,
(
(
(
(
(
Cyclopterus_lumpus,
Gasterosteus_aculeatus
)
,
(
Cottoperca_gobio,
Sander_lucioperca
)
)
,
(
(
Larimichthys_crocea,
Dicentrarchus_labrax
)
,
Sparus_aurata
)
)
,
Labrus_bergylta
)
,
(
Tetraodon_nigroviridis,
Takifugu_rubripes
)
)
)
,
Hippocampus_comes
)
,
Myripristis_murdjan
)
,
Gadus_morhua
)
,
(
(
(
(
(
Oncorhynchus_kisutch,
Oncorhynchus_tshawytscha
)
,
Oncorhynchus_mykiss
)
,
(
Salmo_salar,
Salmo_trutta
)
)
,
Hucho_hucho
)
,
Esox_lucius
)
)
)
,
(
Scleropages_formosus,
Paramormyrops_kingsleyae
)
)
,
Lepisosteus_oculatus
)
,
Erpetoichthys_calabaricus
)
,
(
(
(
Leptobrachium_leishanense,
Xenopus_tropicalis
)
,
(
(
(
(
(
(
(
(
Erinaceus_europaeus,
Sorex_araneus
)
,
(
(
(
(
(
Rhinolophus_ferrumequinum,
Myotis_lucifugus
)
,
Pteropus_vampyrus
)
,
(
Equus_caballus,
Equus_asinus_asinus
)
)
,
(
(
(
(
(
(
(
Ovis_aries_reference_breed,
Capra_hircus_reference_breed
)
,
(
(
(
Bos_indicus_x_Bos_taurus,
Bos_taurus
)
,
(
Bos_mutus,
Bos_grunniens
)
)
,
Bison_bison_bison
)
)
,
Moschus_moschiferus
)
,
Cervus_hanglu_yarkandensis
)
,
(
(
(
(
(
Monodon_monoceros,
Delphinapterus_leucas
)
,
Phocoena_sinus
)
,
Tursiops_truncatus
)
,
Physeter_catodon
)
,
Balaenoptera_musculus
)
)
,
(
(
(
(
(
(
(
Sus_scrofa_breed_jinhua,
Sus_scrofa_breed_meishan
)
,
Sus_scrofa_breed_rongchang
)
,
Sus_scrofa_breed_tibetan
)
,
Sus_scrofa_breed_bamei
)
,
Sus_scrofa_breed_wuzhishan
)
,
(
(
Sus_scrofa_reference_breed,
Sus_scrofa_breed_usmarc
)
,
(
(
(
Sus_scrofa_breed_hampshire,
Sus_scrofa_breed_berkshire
)
,
Sus_scrofa_breed_landrace
)
,
(
Sus_scrofa_breed_largewhite,
Sus_scrofa_breed_pietrain
)
)
)
)
,
Catagonus_wagneri
)
)
,
(
Camelus_dromedarius,
Vicugna_pacos
)
)
)
,
(
(
(
(
(
Ursus_maritimus,
Ursus_americanus
)
,
Ailuropoda_melanoleuca_reference_isolate
)
,
(
Neogale_vison,
Mustela_putorius_furo
)
)
,
(
(
Canis_lupus_familiaris_breed_Labrador_retriever,
Canis_lupus_dingo
)
,
Vulpes_vulpes
)
)
,
(
(
(
Panthera_leo,
Panthera_pardus
)
,
Panthera_tigris_altaica
)
,
Felis_catus
)
)
)
)
,
(
(
(
(
(
(
(
(
(
(
(
Cricetulus_griseus,
Mesocricetus_auratus
)
,
Peromyscus_maniculatus_bairdii
)
,
Microtus_ochrogaster
)
,
(
(
(
(
(
Mus_spicilegus,
Mus_spretus_strain_SPRET/EiJ
)
,
(
(
(
(
(
(
(
Mus_musculus_reference_CL57BL6_strain,
Mus_musculus_strain_C57BL/6NJ
)
,
Mus_musculus_strain_NZO/HlLtJ
)
,
(
(
(
(
Mus_musculus_strain_A/J,
Mus_musculus_strain_BALB/cJ
)
,
(
(
Mus_musculus_strain_C3H/HeJ,
Mus_musculus_strain_CBA/J
)
,
Mus_musculus_strain_DBA/2J
)
)
,
Mus_musculus_strain_AKR/J
)
,
(
Mus_musculus_strain_FVB/NJ,
Mus_musculus_strain_NOD/ShiLtJ
)
)
)
,
(
Mus_musculus_strain_129S1/SvImJ,
Mus_musculus_strain_LP/J
)
)
,
Mus_musculus_domesticus_strain_WSB/EiJ
)
,
Mus_musculus_musculus_strain_PWK/PhJ
)
,
Mus_musculus_castaneus_strain_CAST/EiJ
)
)
,
Mus_caroli_strain_CAROLI_EIJ
)
,
Mus_pahari_strain_PAHARI_EIJ
)
,
Rattus_norvegicus_strain_BN/NHsdMcwi
)
)
,
Nannospalax_galili
)
,
Jaculus_jaculus
)
,
Dipodomys_ordii
)
,
(
(
(
(
Chinchilla_lanigera,
Cavia_porcellus
)
,
Heterocephalus_glaber
)
,
Octodon_degus
)
,
(
(
(
Urocitellus_parryii,
Ictidomys_tridecemlineatus
)
,
Marmota_marmota_marmota
)
,
Sciurus_vulgaris
)
)
)
,
(
Oryctolagus_cuniculus,
Ochotona_princeps
)
)
,
Tupaia_belangeri
)
,
(
(
(
(
(
(
(
(
(
Pan_troglodytes,
Pan_paniscus
)
,
Homo_sapiens
)
,
Gorilla_gorilla_gorilla
)
,
Pongo_abelii
)
,
Nomascus_leucogenys
)
,
(
(
(
(
(
Cercocebus_atys,
Mandrillus_leucophaeus
)
,
Papio_anubis
)
,
(
(
Macaca_mulatta,
Macaca_fascicularis
)
,
Macaca_nemestrina
)
)
,
Chlorocebus_sabaeus
)
,
(
Rhinopithecus_roxellana,
Rhinopithecus_bieti
)
)
)
,
(
(
(
Cebus_imitator,
Saimiri_boliviensis_boliviensis
)
,
Callithrix_jacchus
)
,
Aotus_nancymaae
)
)
,
Carlito_syrichta
)
,
(
(
(
Prolemur_simus,
Propithecus_coquereli
)
,
Microcebus_murinus
)
,
Otolemur_garnettii
)
)
)
)
,
(
Dasypus_novemcinctus,
Choloepus_hoffmanni
)
)
,
(
(
Loxodonta_africana,
Procavia_capensis
)
,
Echinops_telfairi
)
)
,
(
(
(
(
Vombatus_ursinus,
Phascolarctos_cinereus
)
,
Notamacropus_eugenii
)
,
Sarcophilus_harrisii
)
,
Monodelphis_domestica
)
)
,
Ornithorhynchus_anatinus
)
,
(
(
(
(
(
(
(
Notechis_scutatus,
Pseudonaja_textilis
)
,
Laticauda_laticaudata
)
,
Naja_naja
)
,
Anolis_carolinensis_reference_strain
)
,
(
Salvator_merianae,
Podarcis_muralis
)
)
,
Sphenodon_punctatus
)
,
(
(
(
(
(
(
(
Gallus_gallus_reference_breed,
Meleagris_gallopavo_reference_strain
)
,
Coturnix_japonica
)
,
(
Anas_platyrhynchos_platyrhynchos,
Anser_brachyrhynchus
)
)
,
(
(
Aquila_chrysaetos_chrysaetos,
Strigops_habroptila
)
,
(
(
(
(
Serinus_canaria,
Taeniopygia_guttata
)
,
Geospiza_fortis
)
,
Parus_major
)
,
Ficedula_albicollis
)
)
)
,
Struthio_camelus_australis
)
,
Crocodylus_porosus
)
,
(
(
(
Gopherus_evgoodei,
Chelonoidis_abingdonii
)
,
(
Terrapene_carolina_triunguis,
Chrysemys_picta_bellii
)
)
,
Pelodiscus_sinensis
)
)
)
)
)
,
Latimeria_chalumnae
)
)
,
Callorhinchus_milii
)
)
)
)
,
Saccharomyces_cerevisiae;

@ -0,0 +1,461 @@
(
(
(
Drosophila_melanogaster,
Caenorhabditis_elegans
)
,
(
(
Ciona_intestinalis,
Ciona_savignyi
)
,
(
(
Eptatretus_burgeri,
Petromyzon_marinus
)
,
(
(
(
(
Danio_rerio,
Astyanax_mexicanus
)
,
(
(
(
(
(
Poecilia_formosa,
Xiphophorus_maculatus
)
,
Oryzias_latipes
)
,
Oreochromis_niloticus
)
,
(
(
Tetraodon_nigroviridis,
Takifugu_rubripes
)
,
Gasterosteus_aculeatus
)
)
,
Gadus_morhua
)
)
,
Lepisosteus_oculatus
)
,
(
(
(
(
(
(
(
(
(
(
(
(
(
(
(
(
(
(
(
(
Cricetulus_griseus_chok1gshd,
Cricetulus_griseus_crigri
)
,
Mesocricetus_auratus
)
,
Peromyscus_maniculatus_bairdii
)
,
Microtus_ochrogaster
)
,
(
(
(
(
(
(
(
(
(
(
(
(
(
Mus_musculus,
Mus_musculus_BALB/cJ
)
,
(
(
Mus_musculus_C3H/HeJ,
Mus_musculus_CBA/J
)
,
Mus_musculus_DBA/2J
)
)
,
Mus_musculus_AKR/J
)
,
(
(
Mus_musculus_FVB/NJ,
Mus_musculus_NOD/ShiLtJ
)
,
Mus_musculus_domesticus_WSB/EiJ
)
)
,
(
Mus_musculus_129S1/SvImJ,
Mus_musculus_LP/J
)
)
,
(
Mus_musculus_reference_CL57BL6,
Mus_musculus_C57BL/6NJ
)
)
,
Mus_musculus_NZO/HlLtJ
)
,
Mus_musculus_musculus_PWK/PhJ
)
,
Mus_musculus_castaneus_CAST/EiJ
)
,
Mus_spretus
)
,
Mus_caroli
)
,
Mus_pahari
)
,
Rattus_norvegicus
)
)
,
Nannospalax_galili
)
,
Jaculus_jaculus
)
,
Dipodomys_ordii
)
,
(
(
(
(
(
Cavia_porcellus,
Cavia_aperea
)
,
Chinchilla_lanigera
)
,
Octodon_degus
)
,
(
(
Heterocephalus_glaber_female,
Heterocephalus_glaber_male
)
,
Fukomys_damarensis
)
)
,
Ictidomys_tridecemlineatus
)
)
,
(
Oryctolagus_cuniculus,
Ochotona_princeps
)
)
,
Tupaia_belangeri
)
,
(
(
(
(
(
(
(
(
(
Pan_troglodytes,
Pan_paniscus
)
,
Homo_sapiens
)
,
Gorilla_gorilla
)
,
Pongo_abelii
)
,
Nomascus_leucogenys
)
,
(
(
(
(
(
Cercocebus_atys,
Mandrillus_leucophaeus
)
,
Papio_anubis
)
,
(
(
Macaca_fascicularis,
Macaca_mulatta
)
,
Macaca_nemestrina
)
)
,
Chlorocebus_sabaeus
)
,
(
(
Rhinopithecus_roxellana,
Rhinopithecus_bieti
)
,
Colobus_angolensis_palliatus
)
)
)
,
(
(
(
Cebus_capucinus,
Saimiri_boliviensis_boliviensis
)
,
Callithrix_jacchus
)
,
Aotus_nancymaae
)
)
,
Carlito_syrichta
)
,
(
(
Microcebus_murinus,
Propithecus_coquereli
)
,
Otolemur_garnettii
)
)
)
,
(
(
Erinaceus_europaeus,
Sorex_araneus
)
,
(
(
(
(
(
(
Sus_scrofa,
Vicugna_pacos
)
,
(
(
Ovis_aries,
Capra_hircus
)
,
Bos_taurus
)
)
,
Tursiops_truncatus
)
,
Equus_caballus
)
,
(
(
(
Ailuropoda_melanoleuca,
Mustela_putorius_furo
)
,
Canis_familiaris
)
,
(
(
Panthera_pardus,
Panthera_tigris_altaica
)
,
Felis_catus
)
)
)
,
(
Pteropus_vampyrus,
Myotis_lucifugus
)
)
)
)
,
(
Dasypus_novemcinctus,
Choloepus_hoffmanni
)
)
,
(
(
Loxodonta_africana,
Procavia_capensis
)
,
Echinops_telfairi
)
)
,
(
(
Sarcophilus_harrisii,
Notamacropus_eugenii
)
,
Monodelphis_domestica
)
)
,
Ornithorhynchus_anatinus
)
,
(
(
(
(
(
Gallus_gallus,
Meleagris_gallopavo
)
,
Anas_platyrhynchos
)
,
(
Ficedula_albicollis,
Taeniopygia_guttata
)
)
,
Pelodiscus_sinensis
)
,
Anolis_carolinensis
)
)
,
Xenopus_tropicalis
)
,
Latimeria_chalumnae
)
)
)
)
)
,
Saccharomyces_cerevisiae
)
;

@ -48,6 +48,7 @@ pub enum Language {
Kotlin,
Lua,
Make,
Newick,
Nix,
OCaml,
OCamlInterface,
@ -103,6 +104,7 @@ pub fn language_name(language: Language) -> &'static str {
Kotlin => "Kotlin",
Lua => "Lua",
Make => "Make",
Newick => "Newick",
Nix => "Nix",
OCaml => "OCaml",
OCamlInterface => "OCaml Interface",
@ -197,6 +199,7 @@ pub const LANG_EXTENSIONS: &[(Language, &[&str])] = &[
(Kotlin, &["kt", "ktm", "kts"]),
(Lua, &["lua"]),
(Make, &["mak", "d", "make", "makefile", "mk", "mkfile"]),
(Newick, &["nhx", "nwk", "nh"]),
(Nix, &["nix"]),
(OCaml, &["ml"]),
(OCamlInterface, &["mli"]),

@ -91,6 +91,7 @@ extern "C" {
fn tree_sitter_kotlin() -> ts::Language;
fn tree_sitter_lua() -> ts::Language;
fn tree_sitter_make() -> ts::Language;
fn tree_sitter_newick() -> ts::Language;
fn tree_sitter_nix() -> ts::Language;
fn tree_sitter_ocaml() -> ts::Language;
fn tree_sitter_ocaml_interface() -> ts::Language;
@ -620,6 +621,20 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig {
}],
}
}
Newick => {
let language = unsafe { tree_sitter_newick() };
TreeSitterConfig {
language,
atom_nodes: vec![].into_iter().collect(),
delimiter_tokens: vec![("(", ")")],
highlight_query: ts::Query::new(
language,
include_str!("../../vendored_parsers/highlights/newick.scm"),
)
.unwrap(),
sub_languages: vec![],
}
}
Nix => {
let language = unsafe { tree_sitter_nix() };
TreeSitterConfig {

@ -0,0 +1 @@
../tree-sitter-newick/queries/highlights.scm

@ -0,0 +1 @@
tree-sitter-newick/src

@ -0,0 +1,4 @@
.direnv/
node_modules/
.envrc

@ -0,0 +1,26 @@
[package]
name = "tree-sitter-newick"
description = "newick grammar for the tree-sitter parsing library"
version = "0.0.1"
keywords = ["incremental", "parsing", "newick"]
categories = ["parsing", "text-editors"]
repository = "https://github.com/tree-sitter/tree-sitter-newick"
edition = "2018"
license = "MIT"
build = "bindings/rust/build.rs"
include = [
"bindings/rust/*",
"grammar.js",
"queries/*",
"src/*",
]
[lib]
path = "bindings/rust/lib.rs"
[dependencies]
tree-sitter = "~0.20.3"
[build-dependencies]
cc = "1.0"

@ -0,0 +1,65 @@
# tree-sitter-newick
A [tree-sitter](https://tree-sitter.github.io/tree-sitter/) grammar for the [Newick (nh, nwk)](https://en.wikipedia.org/wiki/Newick_format) and [New Hampshire X (nhx)](https://www.cs.mcgill.ca/~birch/birchhomedir/java/forester/NHX.pdf) formats for trees representation.
# Example
## NHX source
```
(
(
(
ADH2:0.1[&&NHX:S=human:E=1.1.1.1],
ADH1:0.11[&&NHX:S=human:E=1.1.1.1]
):0.05[&&NHX:S=Primates:E=1.1.1.1:D=Y:B=100],
ADHY:0.1[&&NHX:S=nematode:E=1.1.1.1],
ADHX:0.12[&&NHX:S=insect:E=1.1.1.1]
):0.1[&&NHX:S=Metazoa:E=1.1.1.1:D=N],
(
ADH4:0.09[&&NHX:S=yeast:E=1.1.1.1],
ADH3:0.13[&&NHX:S=yeast:E=1.1.1.1],
ADH2:0.12[&&NHX:S=yeast:E=1.1.1.1],
ADH1:0.11[&&NHX:S=yeast:E=1.1.1.1]
):0.1[&&NHX:S=Fungi]
)[&&NHX:E=1.1.1.1:D=N];
```
## tree-sitter-newick highlighting
![NHX file highlighted by tree-sitter-newick](./example.png)
## Syntax tree
```
(source_file [0, 0] - [16, 0]
(tree [0, 0] - [15, 23]
clade: (clade [0, 0] - [15, 22]
clade: (clade [1, 4] - [8, 40]
clade: (clade [2, 8] - [5, 52]
leaf: (leaf [3, 12] - [3, 45]
name: (name [3, 12] - [3, 16])
attributes: (attributes [3, 16] - [3, 45]
length: (length [3, 17] - [3, 20]
(float [3, 17] - [3, 20]))
data: (data [3, 20] - [3, 45]
entry: (nhx_entry [3, 26] - [3, 34]
key: (nhx_val [3, 27] - [3, 28])
value: (nhx_val [3, 29] - [3, 34]))
entry: (nhx_entry [3, 34] - [3, 44]
key: (nhx_val [3, 35] - [3, 36])
value: (nhx_val [3, 37] - [3, 44])))))
leaf: (leaf [4, 12] - [4, 46]
name: (name [4, 12] - [4, 16])
attributes: (attributes [4, 16] - [4, 46]
length: (length [4, 17] - [4, 21]
(float [4, 17] - [4, 21]))
data: (data [4, 21] - [4, 46]
entry: (nhx_entry [4, 27] - [4, 35]
key: (nhx_val [4, 28] - [4, 29])
value: (nhx_val [4, 30] - [4, 35]))
entry: (nhx_entry [4, 35] - [4, 45]
key: (nhx_val [4, 36] - [4, 37])
value: (nhx_val [4, 38] - [4, 45])))))
...
```

@ -0,0 +1,19 @@
{
"targets": [
{
"target_name": "tree_sitter_newick_binding",
"include_dirs": [
"<!(node -e \"require('nan')\")",
"src"
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
# If your language uses an external scanner, add it here.
],
"cflags_c": [
"-std=c99",
]
}
]
}

@ -0,0 +1,28 @@
#include "tree_sitter/parser.h"
#include <node.h>
#include "nan.h"
using namespace v8;
extern "C" TSLanguage * tree_sitter_newick();
namespace {
NAN_METHOD(New) {}
void Init(Local<Object> exports, Local<Object> module) {
Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
tpl->SetClassName(Nan::New("Language").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
Nan::SetInternalFieldPointer(instance, 0, tree_sitter_newick());
Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("newick").ToLocalChecked());
Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
}
NODE_MODULE(tree_sitter_newick_binding, Init)
} // namespace

@ -0,0 +1,19 @@
try {
module.exports = require("../../build/Release/tree_sitter_newick_binding");
} catch (error1) {
if (error1.code !== 'MODULE_NOT_FOUND') {
throw error1;
}
try {
module.exports = require("../../build/Debug/tree_sitter_newick_binding");
} catch (error2) {
if (error2.code !== 'MODULE_NOT_FOUND') {
throw error2;
}
throw error1
}
}
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}

@ -0,0 +1,40 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
// If your language uses an external scanner written in C,
// then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
// If your language uses an external scanner written in C++,
// then include this block of code:
/*
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
cpp_config.compile("scanner");
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
}

@ -0,0 +1,52 @@
//! This crate provides newick language support for the [tree-sitter][] parsing library.
//!
//! Typically, you will use the [language][language func] function to add this language to a
//! tree-sitter [Parser][], and then use the parser to parse some code:
//!
//! ```
//! let code = "";
//! let mut parser = tree_sitter::Parser::new();
//! parser.set_language(tree_sitter_newick::language()).expect("Error loading newick grammar");
//! let tree = parser.parse(code, None).unwrap();
//! ```
//!
//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
//! [language func]: fn.language.html
//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
//! [tree-sitter]: https://tree-sitter.github.io/
use tree_sitter::Language;
extern "C" {
fn tree_sitter_newick() -> Language;
}
/// Get the tree-sitter [Language][] for this grammar.
///
/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
pub fn language() -> Language {
unsafe { tree_sitter_newick() }
}
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
// Uncomment these to include any queries that this grammar contains
// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(super::language())
.expect("Error loading newick language");
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

@ -0,0 +1,16 @@
(
(
(
ADH2:0.1[&&NHX:S=human:E=1.1.1.1],
ADH1:0.11[&&NHX:S=human:E=1.1.1.1]
):0.05[&&NHX:S=Primates:E=1.1.1.1:D=Y:B=100],
ADHY:0.1[&&NHX:S=nematode:E=1.1.1.1],
ADHX:0.12[&&NHX:S=insect:E=1.1.1.1]
):0.1[&&NHX:S=Metazoa:E=1.1.1.1:D=N],
(
ADH4:0.09[&&NHX:S=yeast:E=1.1.1.1],
ADH3:0.13[&&NHX:S=yeast:E=1.1.1.1],
ADH2:0.12[&&NHX:S=yeast:E=1.1.1.1],
ADH1:0.11[&&NHX:S=yeast:E=1.1.1.1]
):0.1[&&NHX:S=Fungi]
)[&&NHX:E=1.1.1.1:D=N];

@ -0,0 +1,9 @@
(A);
(,,(,));
(A,B,(C,D));
(A,B,(C,D)E)F;
(:0.1,:0.2,(:0.3,:0.4):0.5);
(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;
(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);
(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;
((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;

@ -0,0 +1,31 @@
module.exports = grammar({
name: "newick",
rules: {
source_file: $ => repeat($.tree),
tree: $ => seq($._node, ";"),
_node: $ => choice(field("leaf", $.leaf), field("clade", $.clade)),
leaf: $ => choice(
field("attributes", $.attributes),
seq(field("name", $.name), optional(field("attributes", $.attributes))),
),
clade: $ => seq(
"(",
optional($._node),
repeat(seq(",", optional($._node))),
")", optional($.name), optional($.attributes)
),
attributes: $ => choice(
seq(":", field("length", $.length)),
seq(optional(seq(":", field("length", $.length))), field("data", $.data))
),
data: $ => seq("[&&NHX", repeat1(field("entry", $.nhx_entry)), "]"),
nhx_entry: $ => seq(":", field("key", $.nhx_val), "=", optional(field("value", $.nhx_val))),
length: $ => $.float,
_data_safe: $ => /[^:,;()\[\]=\s]+/,
nhx_val: $ => repeat1($._data_safe),
float: $ => /\d(_?\d)*(\.\d)?(_?\d)*([eE][\+-]?\d(_?\d)*)?/,
name: $ => /[^:,;()\[\]\s]+/,
}
});

@ -0,0 +1,47 @@
{
"name": "tree-sitter-newick",
"version": "1.0.0",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "tree-sitter-newick",
"version": "1.0.0",
"license": "MIT",
"dependencies": {
"nan": "^2.17.0"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.7"
}
},
"node_modules/nan": {
"version": "2.17.0",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.17.0.tgz",
"integrity": "sha512-2ZTgtl0nJsO0KQCjEpxcIr5D+Yv90plTitZt9JBfQvVJDS5seMl3FOvsh3+9CoYWXf/1l5OaZzzF6nDm4cagaQ=="
},
"node_modules/tree-sitter-cli": {
"version": "0.20.7",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.7.tgz",
"integrity": "sha512-MHABT8oCPr4D0fatsPo6ATQ9H4h9vHpPRjlxkxJs80tpfAEKGn6A1zU3eqfCKBcgmfZDe9CiL3rKOGMzYHwA3w==",
"dev": true,
"hasInstallScript": true,
"bin": {
"tree-sitter": "cli.js"
}
}
},
"dependencies": {
"nan": {
"version": "2.17.0",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.17.0.tgz",
"integrity": "sha512-2ZTgtl0nJsO0KQCjEpxcIr5D+Yv90plTitZt9JBfQvVJDS5seMl3FOvsh3+9CoYWXf/1l5OaZzzF6nDm4cagaQ=="
},
"tree-sitter-cli": {
"version": "0.20.7",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.20.7.tgz",
"integrity": "sha512-MHABT8oCPr4D0fatsPo6ATQ9H4h9vHpPRjlxkxJs80tpfAEKGn6A1zU3eqfCKBcgmfZDe9CiL3rKOGMzYHwA3w==",
"dev": true
}
}
}

@ -0,0 +1,31 @@
{
"name": "tree-sitter-newick",
"version": "1.0.0",
"description": "A tree-sitter grammar for newick (extended) files",
"main": "grammar.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/delehef/tree-sitter-newick.git"
},
"author": "Franklin Delehelle",
"license": "MIT",
"bugs": {
"url": "https://github.com/delehef/tree-sitter-newick/issues"
},
"homepage": "https://github.com/delehef/tree-sitter-newick#readme",
"dependencies": {
"nan": "^2.17.0"
},
"devDependencies": {
"tree-sitter-cli": "^0.20.7"
},
"tree-sitter": [
{
"scope": "source.newick",
"file-types": ["nh", "nhx", "nwk"]
}
]
}

@ -0,0 +1,8 @@
"(" @punctuation.bracket
")" @punctuation.bracket
"[&&NHX" @punctuation.bracket
"]" @punctuation.bracket
(name) @function
(length) @number
(nhx_entry key: (nhx_val) @keyword value: (nhx_val) @string.special)

@ -0,0 +1,9 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
buildInputs = [
pkgs.cargo pkgs.rust-analyzer pkgs.rustc pkgs.rustfmt pkgs.clippy
pkgs.nodejs pkgs.tree-sitter pkgs.emscripten
pkgs.nodePackages.typescript pkgs.nodePackages.typescript-language-server
];
}

@ -0,0 +1,317 @@
{
"name": "newick",
"rules": {
"source_file": {
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "tree"
}
},
"tree": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_node"
},
{
"type": "STRING",
"value": ";"
}
]
},
"_node": {
"type": "CHOICE",
"members": [
{
"type": "FIELD",
"name": "leaf",
"content": {
"type": "SYMBOL",
"name": "leaf"
}
},
{
"type": "FIELD",
"name": "clade",
"content": {
"type": "SYMBOL",
"name": "clade"
}
}
]
},
"leaf": {
"type": "CHOICE",
"members": [
{
"type": "FIELD",
"name": "attributes",
"content": {
"type": "SYMBOL",
"name": "attributes"
}
},
{
"type": "SEQ",
"members": [
{
"type": "FIELD",
"name": "name",
"content": {
"type": "SYMBOL",
"name": "name"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "FIELD",
"name": "attributes",
"content": {
"type": "SYMBOL",
"name": "attributes"
}
},
{
"type": "BLANK"
}
]
}
]
}
]
},
"clade": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "("
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_node"
},
{
"type": "BLANK"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ","
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_node"
},
{
"type": "BLANK"
}
]
}
]
}
},
{
"type": "STRING",
"value": ")"
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "name"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "attributes"
},
{
"type": "BLANK"
}
]
}
]
},
"attributes": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ":"
},
{
"type": "FIELD",
"name": "length",
"content": {
"type": "SYMBOL",
"name": "length"
}
}
]
},
{
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ":"
},
{
"type": "FIELD",
"name": "length",
"content": {
"type": "SYMBOL",
"name": "length"
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "FIELD",
"name": "data",
"content": {
"type": "SYMBOL",
"name": "data"
}
}
]
}
]
},
"data": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "[&&NHX"
},
{
"type": "REPEAT1",
"content": {
"type": "FIELD",
"name": "entry",
"content": {
"type": "SYMBOL",
"name": "nhx_entry"
}
}
},
{
"type": "STRING",
"value": "]"
}
]
},
"nhx_entry": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ":"
},
{
"type": "FIELD",
"name": "key",
"content": {
"type": "SYMBOL",
"name": "nhx_val"
}
},
{
"type": "STRING",
"value": "="
},
{
"type": "CHOICE",
"members": [
{
"type": "FIELD",
"name": "value",
"content": {
"type": "SYMBOL",
"name": "nhx_val"
}
},
{
"type": "BLANK"
}
]
}
]
},
"length": {
"type": "SYMBOL",
"name": "float"
},
"_data_safe": {
"type": "PATTERN",
"value": "[^:,;()\\[\\]=\\s]+"
},
"nhx_val": {
"type": "REPEAT1",
"content": {
"type": "SYMBOL",
"name": "_data_safe"
}
},
"float": {
"type": "PATTERN",
"value": "\\d(_?\\d)*(\\.\\d)?(_?\\d)*([eE][\\+-]?\\d(_?\\d)*)?"
},
"name": {
"type": "PATTERN",
"value": "[^:,;()\\[\\]\\s]+"
}
},
"extras": [
{
"type": "PATTERN",
"value": "\\s"
}
],
"conflicts": [],
"precedences": [],
"externals": [],
"inline": [],
"supertypes": []
}

@ -0,0 +1,237 @@
[
{
"type": "attributes",
"named": true,
"fields": {
"data": {
"multiple": false,
"required": false,
"types": [
{
"type": "data",
"named": true
}
]
},
"length": {
"multiple": false,
"required": false,
"types": [
{
"type": "length",
"named": true
}
]
}
}
},
{
"type": "clade",
"named": true,
"fields": {
"clade": {
"multiple": true,
"required": false,
"types": [
{
"type": "clade",
"named": true
}
]
},
"leaf": {
"multiple": true,
"required": false,
"types": [
{
"type": "leaf",
"named": true
}
]
}
},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "attributes",
"named": true
},
{
"type": "name",
"named": true
}
]
}
},
{
"type": "data",
"named": true,
"fields": {
"entry": {
"multiple": true,
"required": true,
"types": [
{
"type": "nhx_entry",
"named": true
}
]
}
}
},
{
"type": "leaf",
"named": true,
"fields": {
"attributes": {
"multiple": false,
"required": false,
"types": [
{
"type": "attributes",
"named": true
}
]
},
"name": {
"multiple": false,
"required": false,
"types": [
{
"type": "name",
"named": true
}
]
}
}
},
{
"type": "length",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "float",
"named": true
}
]
}
},
{
"type": "nhx_entry",
"named": true,
"fields": {
"key": {
"multiple": false,
"required": true,
"types": [
{
"type": "nhx_val",
"named": true
}
]
},
"value": {
"multiple": false,
"required": false,
"types": [
{
"type": "nhx_val",
"named": true
}
]
}
}
},
{
"type": "nhx_val",
"named": true,
"fields": {}
},
{
"type": "source_file",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "tree",
"named": true
}
]
}
},
{
"type": "tree",
"named": true,
"fields": {
"clade": {
"multiple": false,
"required": false,
"types": [
{
"type": "clade",
"named": true
}
]
},
"leaf": {
"multiple": false,
"required": false,
"types": [
{
"type": "leaf",
"named": true
}
]
}
}
},
{
"type": "(",
"named": false
},
{
"type": ")",
"named": false
},
{
"type": ",",
"named": false
},
{
"type": ":",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "=",
"named": false
},
{
"type": "[&&NHX",
"named": false
},
{
"type": "]",
"named": false
},
{
"type": "float",
"named": true
},
{
"type": "name",
"named": true
}
]

File diff suppressed because it is too large Load Diff

@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_