Initial tree-sitter prototype

ida_star
Wilfred Hughes 2021-08-12 17:58:43 +07:00
parent 70ee1b736e
commit e7a6c3b11e
4 changed files with 116 additions and 4 deletions

28
Cargo.lock generated

@ -37,6 +37,12 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "cc"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
[[package]]
name = "clap"
version = "2.33.3"
@ -102,6 +108,8 @@ dependencies = [
"strsim 0.10.0",
"term_size",
"toml",
"tree-sitter",
"tree-sitter-rust",
"typed-arena",
]
@ -308,6 +316,26 @@ dependencies = [
"serde",
]
[[package]]
name = "tree-sitter"
version = "0.19.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad726ec26496bf4c083fff0f43d4eb3a2ad1bba305323af5ff91383c0b6ecac0"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-rust"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784f7ef9cdbd4c895dc2d4bb785e95b4a5364a602eec803681db83d1927ddf15"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "typed-arena"
version = "2.0.1"

@ -22,6 +22,8 @@ rustc-hash = "1.1.0"
strsim = "0.10.0"
lazy_static = "1.4.0"
atty = "0.2.14"
tree-sitter = "0.19.5"
tree-sitter-rust = "0.19.0"
[dev-dependencies]
pretty_assertions = "0.6.1"

@ -5,6 +5,7 @@ mod lines;
mod parse;
mod positions;
mod side_by_side;
mod sitter;
mod style;
mod syntax;
use clap::{App, AppSettings, Arg};
@ -81,7 +82,8 @@ fn main() {
};
let syntax_toml = ConfigDir::read_default_toml();
let lang = match Path::new(&display_path).extension() {
let extension = Path::new(&display_path).extension();
let lang = match extension {
Some(extension) => find_lang(syntax_toml, &OsStr::to_string_lossy(extension)),
None => None,
};
@ -112,9 +114,17 @@ fn main() {
let arena = Arena::new();
let (lhs, rhs) = match &lang {
Some(lang) => (parse(&arena, &lhs_src, lang), parse(&arena, &rhs_src, lang)),
None => (parse_lines(&arena, &lhs_src), parse_lines(&arena, &rhs_src)),
let prefer_legacy_parser = env::var("DFT_LEGACY").is_ok();
let (lhs, rhs) = if sitter::supported(extension.unwrap_or_else(|| OsStr::new(""))) && !prefer_legacy_parser {
(
sitter::parse(&arena, &lhs_src),
sitter::parse(&arena, &rhs_src),
)
} else {
match &lang {
Some(lang) => (parse(&arena, &lhs_src, lang), parse(&arena, &rhs_src, lang)),
None => (parse_lines(&arena, &lhs_src), parse_lines(&arena, &rhs_src)),
}
};
init_info(&lhs);

@ -0,0 +1,72 @@
use std::ffi::OsStr;
use tree_sitter::{Parser, TreeCursor};
use typed_arena::Arena;
use crate::{lines::NewlinePositions, syntax::Syntax};
pub fn supported(extension: &OsStr) -> bool {
extension == "rs"
}
pub fn parse<'a>(arena: &'a Arena<Syntax<'a>>, src: &str) -> Vec<&'a Syntax<'a>> {
let mut parser = Parser::new();
parser
.set_language(tree_sitter_rust::language())
.expect("Error loading Rust grammar");
let tree = parser.parse(src, None).unwrap();
let nl_pos = NewlinePositions::from(src);
let mut cursor = tree.walk();
// The tree always has a single root, whereas we want nodes for
// each top level syntax item.
cursor.goto_first_child();
syntax_from_cursor(arena, src, &nl_pos, &mut cursor)
}
fn syntax_from_cursor<'a>(
arena: &'a Arena<Syntax<'a>>,
src: &str,
nl_pos: &NewlinePositions,
cursor: &mut TreeCursor,
) -> Vec<&'a Syntax<'a>> {
let mut result: Vec<&Syntax> = vec![];
loop {
let node = cursor.node();
if cursor.goto_first_child() {
// This node has children, so treat it as a list.
let children = syntax_from_cursor(arena, src, nl_pos, cursor);
cursor.goto_parent();
let open_position = nl_pos.from_offsets(node.start_byte(), node.start_byte());
let close_position = nl_pos.from_offsets(node.end_byte(), node.end_byte());
result.push(Syntax::new_list(
arena,
"",
open_position,
children,
"",
close_position,
))
} else {
let position = nl_pos.from_offsets(node.start_byte(), node.end_byte());
let content = &src[node.start_byte()..node.end_byte()];
if node.is_extra() {
result.push(Syntax::new_comment(arena, position, content));
} else {
result.push(Syntax::new_atom(arena, position, content));
}
}
if !cursor.goto_next_sibling() {
break;
}
}
result
}