diff --git a/Cargo.lock b/Cargo.lock index 73ea2185f..2b3986a6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "cc" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2" + [[package]] name = "clap" version = "2.33.3" @@ -102,6 +108,8 @@ dependencies = [ "strsim 0.10.0", "term_size", "toml", + "tree-sitter", + "tree-sitter-rust", "typed-arena", ] @@ -308,6 +316,26 @@ dependencies = [ "serde", ] +[[package]] +name = "tree-sitter" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad726ec26496bf4c083fff0f43d4eb3a2ad1bba305323af5ff91383c0b6ecac0" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784f7ef9cdbd4c895dc2d4bb785e95b4a5364a602eec803681db83d1927ddf15" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "typed-arena" version = "2.0.1" diff --git a/Cargo.toml b/Cargo.toml index c0fab601f..7dd841d07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ rustc-hash = "1.1.0" strsim = "0.10.0" lazy_static = "1.4.0" atty = "0.2.14" +tree-sitter = "0.19.5" +tree-sitter-rust = "0.19.0" [dev-dependencies] pretty_assertions = "0.6.1" diff --git a/src/main.rs b/src/main.rs index ca46ad3f8..0eb00f357 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ mod lines; mod parse; mod positions; mod side_by_side; +mod sitter; mod style; mod syntax; use clap::{App, AppSettings, Arg}; @@ -81,7 +82,8 @@ fn main() { }; let syntax_toml = ConfigDir::read_default_toml(); - let lang = match Path::new(&display_path).extension() { + let extension = Path::new(&display_path).extension(); + let lang = match extension { Some(extension) => find_lang(syntax_toml, &OsStr::to_string_lossy(extension)), None => None, }; @@ -112,9 +114,17 @@ fn main() { let arena = Arena::new(); - let (lhs, rhs) = match &lang { - Some(lang) => (parse(&arena, &lhs_src, lang), parse(&arena, &rhs_src, lang)), - None => (parse_lines(&arena, &lhs_src), parse_lines(&arena, &rhs_src)), + let prefer_legacy_parser = env::var("DFT_LEGACY").is_ok(); + let (lhs, rhs) = if sitter::supported(extension.unwrap_or_else(|| OsStr::new(""))) && !prefer_legacy_parser { + ( + sitter::parse(&arena, &lhs_src), + sitter::parse(&arena, &rhs_src), + ) + } else { + match &lang { + Some(lang) => (parse(&arena, &lhs_src, lang), parse(&arena, &rhs_src, lang)), + None => (parse_lines(&arena, &lhs_src), parse_lines(&arena, &rhs_src)), + } }; init_info(&lhs); diff --git a/src/sitter.rs b/src/sitter.rs new file mode 100644 index 000000000..6ea7beb40 --- /dev/null +++ b/src/sitter.rs @@ -0,0 +1,72 @@ +use std::ffi::OsStr; + +use tree_sitter::{Parser, TreeCursor}; +use typed_arena::Arena; + +use crate::{lines::NewlinePositions, syntax::Syntax}; + +pub fn supported(extension: &OsStr) -> bool { + extension == "rs" +} + +pub fn parse<'a>(arena: &'a Arena>, src: &str) -> Vec<&'a Syntax<'a>> { + let mut parser = Parser::new(); + parser + .set_language(tree_sitter_rust::language()) + .expect("Error loading Rust grammar"); + let tree = parser.parse(src, None).unwrap(); + + let nl_pos = NewlinePositions::from(src); + let mut cursor = tree.walk(); + + // The tree always has a single root, whereas we want nodes for + // each top level syntax item. + cursor.goto_first_child(); + + syntax_from_cursor(arena, src, &nl_pos, &mut cursor) +} + +fn syntax_from_cursor<'a>( + arena: &'a Arena>, + src: &str, + nl_pos: &NewlinePositions, + cursor: &mut TreeCursor, +) -> Vec<&'a Syntax<'a>> { + let mut result: Vec<&Syntax> = vec![]; + + loop { + let node = cursor.node(); + + if cursor.goto_first_child() { + // This node has children, so treat it as a list. + let children = syntax_from_cursor(arena, src, nl_pos, cursor); + cursor.goto_parent(); + + let open_position = nl_pos.from_offsets(node.start_byte(), node.start_byte()); + let close_position = nl_pos.from_offsets(node.end_byte(), node.end_byte()); + result.push(Syntax::new_list( + arena, + "", + open_position, + children, + "", + close_position, + )) + } else { + let position = nl_pos.from_offsets(node.start_byte(), node.end_byte()); + let content = &src[node.start_byte()..node.end_byte()]; + + if node.is_extra() { + result.push(Syntax::new_comment(arena, position, content)); + } else { + result.push(Syntax::new_atom(arena, position, content)); + } + } + + if !cursor.goto_next_sibling() { + break; + } + } + + result +}