From c5fe152f259bb33f2a587fec6f8ad1c78f27c49f Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Tue, 24 May 2022 09:37:22 -0700 Subject: [PATCH] Define a parse submodule --- src/changes.rs | 2 +- src/diff/dijkstra.rs | 2 +- src/diff/graph.rs | 2 +- src/diff/sliders.rs | 10 ++-- src/diff/unchanged.rs | 6 +- src/display/context.rs | 2 +- src/display/inline.rs | 2 +- src/display/side_by_side.rs | 2 +- src/display/style.rs | 2 +- src/hunks.rs | 2 +- src/line_parser.rs | 2 +- src/main.rs | 16 +++--- src/options.rs | 2 +- src/{ => parse}/guess_language.rs | 0 src/parse/mod.rs | 3 + src/{ => parse}/syntax.rs | 0 src/{ => parse}/tree_sitter_parser.rs | 82 +++++++++++++-------------- src/summary.rs | 2 +- 18 files changed, 71 insertions(+), 68 deletions(-) rename src/{ => parse}/guess_language.rs (100%) create mode 100644 src/parse/mod.rs rename src/{ => parse}/syntax.rs (100%) rename src/{ => parse}/tree_sitter_parser.rs (92%) diff --git a/src/changes.rs b/src/changes.rs index c4ab04c17..5844b9c77 100644 --- a/src/changes.rs +++ b/src/changes.rs @@ -4,7 +4,7 @@ use std::num::NonZeroU32; use rustc_hash::FxHashMap; -use crate::syntax::Syntax; +use crate::parse::syntax::Syntax; #[derive(PartialEq, Eq, Clone, Copy)] pub enum ChangeKind<'a> { diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index ceb2118f1..53a1304c4 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -6,7 +6,7 @@ use std::{cmp::Reverse, env, rc::Rc}; use crate::{ changes::ChangeMap, diff::graph::{neighbours, populate_change_map, Edge, Vertex}, - syntax::Syntax, + parse::syntax::Syntax, }; use bumpalo::Bump; use itertools::Itertools; diff --git a/src/diff/graph.rs b/src/diff/graph.rs index aee2f1c37..59ec06a82 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -11,7 +11,7 @@ use strsim::normalized_levenshtein; use crate::{ changes::{insert_deep_unchanged, ChangeKind, ChangeMap}, - syntax::{AtomKind, Syntax, SyntaxId}, + parse::syntax::{AtomKind, Syntax, SyntaxId}, }; use Edge::*; diff --git a/src/diff/sliders.rs b/src/diff/sliders.rs index bc1393e35..acb77fb69 100644 --- a/src/diff/sliders.rs +++ b/src/diff/sliders.rs @@ -31,9 +31,9 @@ use crate::{ changes::{insert_deep_novel, insert_deep_unchanged, ChangeKind::*, ChangeMap}, - guess_language, + parse::guess_language, positions::SingleLineSpan, - syntax::Syntax, + parse::syntax::Syntax, }; use Syntax::*; @@ -52,7 +52,7 @@ pub fn fix_all_sliders<'a>( /// Should nester slider correction prefer the inner or outer /// delimiter? fn prefer_outer_delimiter(language: guess_language::Language) -> bool { - use guess_language::Language::*; + use crate::parse::guess_language::Language::*; match language { // For Lisp family languages, we get the best result with the // outer delimiter. @@ -605,9 +605,9 @@ impl<'a> Syntax<'a> { mod tests { use super::*; use crate::{ - guess_language, + parse::guess_language, syntax::{init_all_info, AtomKind}, - tree_sitter_parser::{from_language, parse}, + parse::tree_sitter_parser::{from_language, parse}, }; use pretty_assertions::assert_eq; use typed_arena::Arena; diff --git a/src/diff/unchanged.rs b/src/diff/unchanged.rs index 5b8c011fe..acbb24c17 100644 --- a/src/diff/unchanged.rs +++ b/src/diff/unchanged.rs @@ -4,7 +4,7 @@ use crate::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}; use crate::diff::myers_diff; -use crate::syntax::Syntax; +use crate::parse::syntax::Syntax; const TINY_TREE_THRESHOLD: u32 = 10; const MOSTLY_UNCHANGED_MIN_NODES: usize = 4; @@ -432,9 +432,9 @@ fn shrink_unchanged_at_ends<'a>( mod tests { use super::*; use crate::{ - guess_language, + parse::guess_language, syntax::init_all_info, - tree_sitter_parser::{from_language, parse}, + parse::tree_sitter_parser::{from_language, parse}, }; use typed_arena::Arena; diff --git a/src/display/context.rs b/src/display/context.rs index 2f791d44e..c7d08065f 100644 --- a/src/display/context.rs +++ b/src/display/context.rs @@ -7,7 +7,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use crate::{ lines::LineNumber, - syntax::{zip_repeat_shorter, MatchKind, MatchedPos}, + parse::syntax::{zip_repeat_shorter, MatchKind, MatchedPos}, }; /// The maximum number of lines that may be displayed above and below diff --git a/src/display/inline.rs b/src/display/inline.rs index dd83aecfe..b1ce981c5 100644 --- a/src/display/inline.rs +++ b/src/display/inline.rs @@ -6,7 +6,7 @@ use crate::{ hunks::Hunk, lines::{format_line_num, MaxLine}, options::DisplayOptions, - syntax::MatchedPos, + parse::syntax::MatchedPos, }; use owo_colors::colored::*; diff --git a/src/display/side_by_side.rs b/src/display/side_by_side.rs index f2afe4f22..564fafdfa 100644 --- a/src/display/side_by_side.rs +++ b/src/display/side_by_side.rs @@ -14,7 +14,7 @@ use crate::{ lines::{codepoint_len, format_line_num, LineNumber}, options::{DisplayMode, DisplayOptions}, positions::SingleLineSpan, - syntax::{zip_pad_shorter, MatchedPos}, + parse::syntax::{zip_pad_shorter, MatchedPos}, }; const SPACER: &str = " "; diff --git a/src/display/style.rs b/src/display/style.rs index f5a4c243a..01b76ad2d 100644 --- a/src/display/style.rs +++ b/src/display/style.rs @@ -5,7 +5,7 @@ use crate::{ lines::{byte_len, codepoint_len, LineNumber}, options::DisplayOptions, positions::SingleLineSpan, - syntax::{AtomKind, MatchKind, MatchedPos, TokenKind}, + parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind}, }; use owo_colors::{OwoColorize, Style}; use rustc_hash::FxHashMap; diff --git a/src/hunks.rs b/src/hunks.rs index a29a284d0..2a8f101ab 100644 --- a/src/hunks.rs +++ b/src/hunks.rs @@ -14,7 +14,7 @@ use crate::{ display::context::{add_context, opposite_positions, MAX_PADDING}, display::side_by_side::lines_with_novel, lines::LineNumber, - syntax::{zip_pad_shorter, MatchedPos}, + parse::syntax::{zip_pad_shorter, MatchedPos}, }; /// A hunk represents a series of modified lines that are displayed diff --git a/src/line_parser.rs b/src/line_parser.rs index 8b0244fb9..2be015017 100644 --- a/src/line_parser.rs +++ b/src/line_parser.rs @@ -9,7 +9,7 @@ use rustc_hash::FxHashMap; use crate::{ diff::myers_diff, lines::NewlinePositions, - syntax::{split_words, AtomKind, MatchKind, MatchedPos, TokenKind}, + parse::syntax::{split_words, AtomKind, MatchKind, MatchedPos, TokenKind}, }; fn split_lines_keep_newline(s: &str) -> Vec<&str> { diff --git a/src/main.rs b/src/main.rs index 9a07a340f..979a26cd6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,27 +18,26 @@ mod constants; mod diff; mod display; mod files; -mod guess_language; mod hunks; mod line_parser; mod lines; mod options; +mod parse; mod positions; mod summary; -mod syntax; -mod tree_sitter_parser; #[macro_use] extern crate log; use crate::diff::{dijkstra, unchanged}; use crate::hunks::{matched_pos_to_hunks, merge_adjacent}; +use crate::parse::syntax; use changes::ChangeMap; use display::context::opposite_positions; use files::{is_probably_binary, read_files_or_die, read_or_die, relative_paths_in_either}; -use guess_language::guess; use log::info; use mimalloc::MiMalloc; +use parse::guess_language::guess; /// The global allocator used by difftastic. /// @@ -56,7 +55,8 @@ use syntax::init_next_prev; use typed_arena::Arena; use crate::{ - dijkstra::mark_syntax, lines::MaxLine, syntax::init_all_info, tree_sitter_parser as tsp, + dijkstra::mark_syntax, lines::MaxLine, parse::syntax::init_all_info, + parse::tree_sitter_parser as tsp, }; extern crate pretty_env_logger; @@ -187,7 +187,7 @@ fn diff_file( missing_as_empty: bool, node_limit: u32, byte_limit: usize, - language_override: Option, + language_override: Option, ) -> DiffResult { let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty); diff_file_content( @@ -210,7 +210,7 @@ fn diff_file_content( tab_width: usize, node_limit: u32, byte_limit: usize, - language_override: Option, + language_override: Option, ) -> DiffResult { if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) { return DiffResult { @@ -360,7 +360,7 @@ fn diff_directories<'a>( display_options: &DisplayOptions, node_limit: u32, byte_limit: usize, - language_override: Option, + language_override: Option, ) -> impl ParallelIterator + 'a { let display_options = display_options.clone(); diff --git a/src/options.rs b/src/options.rs index e30b6d967..c59e087f0 100644 --- a/src/options.rs +++ b/src/options.rs @@ -6,7 +6,7 @@ use atty::Stream; use clap::{crate_authors, crate_description, crate_version, Arg, Command}; use const_format::formatcp; -use crate::{display::style::BackgroundColor, guess_language}; +use crate::{display::style::BackgroundColor, parse::guess_language}; pub const DEFAULT_NODE_LIMIT: u32 = 30_000; pub const DEFAULT_BYTE_LIMIT: usize = 1_000_000; diff --git a/src/guess_language.rs b/src/parse/guess_language.rs similarity index 100% rename from src/guess_language.rs rename to src/parse/guess_language.rs diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 000000000..a5f352511 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,3 @@ +pub mod guess_language; +pub mod tree_sitter_parser; +pub mod syntax; diff --git a/src/syntax.rs b/src/parse/syntax.rs similarity index 100% rename from src/syntax.rs rename to src/parse/syntax.rs diff --git a/src/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs similarity index 92% rename from src/tree_sitter_parser.rs rename to src/parse/tree_sitter_parser.rs index 2360028bf..a0254c157 100644 --- a/src/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -2,13 +2,13 @@ use std::collections::HashSet; -use crate::guess_language as guess; +use crate::parse::guess_language as guess; use tree_sitter as ts; use typed_arena::Arena; use crate::{ lines::NewlinePositions, - syntax::{AtomKind, Syntax}, + parse::syntax::{AtomKind, Syntax}, }; /// Configuration for a tree-sitter parser. @@ -106,7 +106,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/bash.scm"), + include_str!("../../vendor/highlights/bash.scm"), ) .unwrap(), } @@ -120,7 +120,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/c.scm"), + include_str!("../../vendor/highlights/c.scm"), ) .unwrap(), } @@ -137,8 +137,8 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new( language, concat!( - include_str!("../vendor/highlights/c.scm"), - include_str!("../vendor/highlights/cpp.scm") + include_str!("../../vendor/highlights/c.scm"), + include_str!("../../vendor/highlights/cpp.scm") ), ) .unwrap(), @@ -155,7 +155,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/clojure.scm"), + include_str!("../../vendor/highlights/clojure.scm"), ) .unwrap(), } @@ -185,7 +185,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/c-sharp.scm"), + include_str!("../../vendor/highlights/c-sharp.scm"), ) .unwrap(), } @@ -199,7 +199,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/css.scm"), + include_str!("../../vendor/highlights/css.scm"), ) .unwrap(), } @@ -213,7 +213,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/dart.scm"), + include_str!("../../vendor/highlights/dart.scm"), ) .unwrap(), } @@ -229,7 +229,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/elisp.scm"), + include_str!("../../vendor/highlights/elisp.scm"), ) .unwrap(), } @@ -245,7 +245,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/elixir.scm"), + include_str!("../../vendor/highlights/elixir.scm"), ) .unwrap(), } @@ -259,7 +259,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("[", "]"), ("(", ")")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/elm.scm"), + include_str!("../../vendor/highlights/elm.scm"), ) .unwrap(), } @@ -273,7 +273,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("|", "|")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/elvish.scm"), + include_str!("../../vendor/highlights/elvish.scm"), ) .unwrap(), } @@ -287,7 +287,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/gleam.scm"), + include_str!("../../vendor/highlights/gleam.scm"), ) .unwrap(), } @@ -305,7 +305,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/go.scm"), + include_str!("../../vendor/highlights/go.scm"), ) .unwrap(), } @@ -319,7 +319,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("[", "]"), ("(", ")")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/haskell.scm"), + include_str!("../../vendor/highlights/haskell.scm"), ) .unwrap(), } @@ -340,7 +340,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { ], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/hcl.scm"), + include_str!("../../vendor/highlights/hcl.scm"), ) .unwrap(), } @@ -363,7 +363,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/janet_simple.scm"), + include_str!("../../vendor/highlights/janet_simple.scm"), ) .unwrap(), } @@ -377,7 +377,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/java.scm"), + include_str!("../../vendor/highlights/java.scm"), ) .unwrap(), } @@ -401,7 +401,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { ], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/javascript.scm"), + include_str!("../../vendor/highlights/javascript.scm"), ) .unwrap(), } @@ -415,7 +415,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/json.scm"), + include_str!("../../vendor/highlights/json.scm"), ) .unwrap(), } @@ -433,7 +433,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/kotlin.scm"), + include_str!("../../vendor/highlights/kotlin.scm"), ) .unwrap(), } @@ -449,7 +449,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/lua.scm"), + include_str!("../../vendor/highlights/lua.scm"), ) .unwrap(), } @@ -465,7 +465,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("[", "]")].into_iter().collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/nix.scm"), + include_str!("../../vendor/highlights/nix.scm"), ) .unwrap(), } @@ -479,7 +479,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/ocaml.scm"), + include_str!("../../vendor/highlights/ocaml.scm"), ) .unwrap(), } @@ -493,7 +493,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/ocaml.scm"), + include_str!("../../vendor/highlights/ocaml.scm"), ) .unwrap(), } @@ -507,7 +507,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/php.scm"), + include_str!("../../vendor/highlights/php.scm"), ) .unwrap(), } @@ -532,7 +532,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/perl.scm"), + include_str!("../../vendor/highlights/perl.scm"), ) .unwrap(), } @@ -546,7 +546,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/python.scm"), + include_str!("../../vendor/highlights/python.scm"), ) .unwrap(), } @@ -570,7 +570,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { ], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/ruby.scm"), + include_str!("../../vendor/highlights/ruby.scm"), ) .unwrap(), } @@ -584,7 +584,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("|", "|"), ("<", ">")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/rust.scm"), + include_str!("../../vendor/highlights/rust.scm"), ) .unwrap(), } @@ -598,7 +598,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/scala.scm"), + include_str!("../../vendor/highlights/scala.scm"), ) .unwrap(), } @@ -612,7 +612,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/swift.scm"), + include_str!("../../vendor/highlights/swift.scm"), ) .unwrap(), } @@ -626,7 +626,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: vec![("{", "}"), ("[", "]")], highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/toml.scm"), + include_str!("../../vendor/highlights/toml.scm"), ) .unwrap(), } @@ -641,8 +641,8 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new( language, concat!( - include_str!("../vendor/highlights/javascript.scm"), - include_str!("../vendor/highlights/typescript.scm"), + include_str!("../../vendor/highlights/javascript.scm"), + include_str!("../../vendor/highlights/typescript.scm"), ), ) .unwrap(), @@ -660,8 +660,8 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new( language, concat!( - include_str!("../vendor/highlights/javascript.scm"), - include_str!("../vendor/highlights/typescript.scm"), + include_str!("../../vendor/highlights/javascript.scm"), + include_str!("../../vendor/highlights/typescript.scm"), ), ) .unwrap(), @@ -683,7 +683,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { delimiter_tokens: (vec![("{", "}"), ("(", ")"), ("[", "]")]), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/yaml.scm"), + include_str!("../../vendor/highlights/yaml.scm"), ) .unwrap(), } @@ -701,7 +701,7 @@ pub fn from_language(language: guess::Language) -> TreeSitterConfig { .collect(), highlight_query: ts::Query::new( language, - include_str!("../vendor/highlights/zig.scm"), + include_str!("../../vendor/highlights/zig.scm"), ) .unwrap(), } diff --git a/src/summary.rs b/src/summary.rs index 8ae1c35c3..f3be4dfc8 100644 --- a/src/summary.rs +++ b/src/summary.rs @@ -1,6 +1,6 @@ //! Data types summarising the result of diffing content. -use crate::syntax::MatchedPos; +use crate::parse::syntax::MatchedPos; #[derive(Debug, PartialEq, Eq)] pub enum FileContent {