diff --git a/src/diff/myers_diff.rs b/src/diff/myers_diff.rs index d404a3eed..3ea18e9f4 100644 --- a/src/diff/myers_diff.rs +++ b/src/diff/myers_diff.rs @@ -2,9 +2,7 @@ use std::hash::Hash; -use rustc_hash::FxHashSet; - -use crate::hash::DftHashMap; +use crate::hash::{DftHashMap, DftHashSet}; #[derive(Debug, PartialEq)] pub(crate) enum DiffResult { @@ -105,11 +103,11 @@ pub(crate) fn slice_unique_by_hash<'a, T: Eq + Clone + Hash>( lhs: &'a [T], rhs: &'a [T], ) -> Vec> { - let mut lhs_set = FxHashSet::default(); + let mut lhs_set = DftHashSet::default(); for item in lhs { lhs_set.insert(item); } - let mut rhs_set = FxHashSet::default(); + let mut rhs_set = DftHashSet::default(); for item in rhs { rhs_set.insert(item); } diff --git a/src/diff/unchanged.rs b/src/diff/unchanged.rs index 02198d9b7..83ac240a5 100644 --- a/src/diff/unchanged.rs +++ b/src/diff/unchanged.rs @@ -1,11 +1,11 @@ //! Find nodes that are obviously unchanged, so we can run the main //! diff on smaller inputs. -use std::collections::HashSet; use std::hash::Hash; use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}; use crate::diff::myers_diff; +use crate::hash::DftHashSet; use crate::parse::syntax::Syntax; const TINY_TREE_THRESHOLD: u32 = 10; @@ -123,7 +123,7 @@ fn split_unchanged_singleton_list<'a>( res } -fn find_unique_content_ids(node: &Syntax, unique_ids: &mut HashSet) { +fn find_unique_content_ids(node: &Syntax, unique_ids: &mut DftHashSet) { if node.content_is_unique() { unique_ids.insert(node.content_id()); } @@ -134,13 +134,13 @@ fn find_unique_content_ids(node: &Syntax, unique_ids: &mut HashSet) { } } -fn find_all_unique_content_ids(node: &Syntax) -> HashSet { - let mut unique_ids = HashSet::new(); +fn find_all_unique_content_ids(node: &Syntax) -> DftHashSet { + let mut unique_ids = DftHashSet::default(); find_unique_content_ids(node, &mut unique_ids); unique_ids } -fn count_unique_subtrees(node: &Syntax, opposite_unique_ids: &HashSet) -> usize { +fn count_unique_subtrees(node: &Syntax, opposite_unique_ids: &DftHashSet) -> usize { if node.content_is_unique() && opposite_unique_ids.contains(&node.content_id()) { // Ignore children as soon as find a unique node, to avoid // overcounting. diff --git a/src/display/context.rs b/src/display/context.rs index b8472e2fb..61128ec83 100644 --- a/src/display/context.rs +++ b/src/display/context.rs @@ -1,13 +1,11 @@ //! Calculate which nearby lines should also be displayed. use std::cmp::Ordering; -use std::collections::HashSet; use line_numbers::LineNumber; -use rustc_hash::FxHashSet; use crate::{ - hash::DftHashMap, + hash::{DftHashMap, DftHashSet}, parse::syntax::{zip_repeat_shorter, MatchKind, MatchedPos}, }; @@ -124,7 +122,7 @@ fn all_matched_lines( } fn all_lines(mps: &[MatchedPos]) -> Vec { - let mut lines = FxHashSet::default(); + let mut lines = DftHashSet::default(); for mp in mps { lines.insert(mp.pos.line); } @@ -329,8 +327,8 @@ fn match_preceding_blanks( pub(crate) fn opposite_positions( mps: &[MatchedPos], -) -> DftHashMap> { - let mut res: DftHashMap> = DftHashMap::default(); +) -> DftHashMap> { + let mut res: DftHashMap> = DftHashMap::default(); for mp in mps { match &mp.kind { @@ -340,7 +338,9 @@ pub(crate) fn opposite_positions( .. } => { for (self_span, opposite_span) in zip_repeat_shorter(self_pos, opposite_pos) { - let opposite_lines = res.entry(self_span.line).or_insert_with(HashSet::new); + let opposite_lines = res + .entry(self_span.line) + .or_insert_with(DftHashSet::default); opposite_lines.insert(opposite_span.line); } } @@ -349,7 +349,7 @@ pub(crate) fn opposite_positions( self_pos, .. } => { - let opposite_lines = res.entry(self_pos.line).or_insert_with(HashSet::new); + let opposite_lines = res.entry(self_pos.line).or_insert_with(DftHashSet::default); for opposite_span in opposite_pos { opposite_lines.insert(opposite_span.line); } @@ -378,7 +378,7 @@ pub(crate) fn opposite_positions( /// ``` fn before_with_opposites( before_lines: &[LineNumber], - opposite_lines: &DftHashMap>, + opposite_lines: &DftHashMap>, ) -> Vec<(Option, Option)> { let mut lines = before_lines.to_vec(); lines.reverse(); @@ -474,7 +474,7 @@ pub(crate) fn flip_tuples(items: &[(Tx, Ty)]) -> Vec<(Ty, Tx /// 122 91 (closest match) fn after_with_opposites( after_lines: &[LineNumber], - opposite_lines: &DftHashMap>, + opposite_lines: &DftHashMap>, prev_max_opposite: Option, max_opposite: LineNumber, ) -> Vec<(Option, Option)> { @@ -517,8 +517,8 @@ fn after_with_opposites( pub(crate) fn calculate_before_context( lines: &[(Option, Option)], - opposite_to_lhs: &DftHashMap>, - opposite_to_rhs: &DftHashMap>, + opposite_to_lhs: &DftHashMap>, + opposite_to_rhs: &DftHashMap>, num_context_lines: usize, ) -> Vec<(Option, Option)> { match lines.first() { @@ -539,8 +539,8 @@ pub(crate) fn calculate_before_context( pub(crate) fn calculate_after_context( lines: &[(Option, Option)], - opposite_to_lhs: &DftHashMap>, - opposite_to_rhs: &DftHashMap>, + opposite_to_lhs: &DftHashMap>, + opposite_to_rhs: &DftHashMap>, max_lhs_src_line: LineNumber, max_rhs_src_line: LineNumber, num_context_lines: usize, @@ -589,8 +589,8 @@ pub(crate) fn calculate_after_context( pub(crate) fn add_context( lines: &[(Option, Option)], - opposite_to_lhs: &DftHashMap>, - opposite_to_rhs: &DftHashMap>, + opposite_to_lhs: &DftHashMap>, + opposite_to_rhs: &DftHashMap>, max_lhs_src_line: LineNumber, max_rhs_src_line: LineNumber, num_context_lines: usize, @@ -743,10 +743,10 @@ mod tests { let lines = vec![(Some(1.into()), Some(1.into()))]; let mut opposite_to_lhs = DftHashMap::default(); - opposite_to_lhs.insert(0.into(), HashSet::from_iter([0.into()])); + opposite_to_lhs.insert(0.into(), DftHashSet::from_iter([0.into()])); let mut opposite_to_rhs = DftHashMap::default(); - opposite_to_rhs.insert(0.into(), HashSet::from_iter([0.into()])); + opposite_to_rhs.insert(0.into(), DftHashSet::from_iter([0.into()])); let res = calculate_before_context( &lines, diff --git a/src/display/hunks.rs b/src/display/hunks.rs index 373a5cfea..06187393a 100644 --- a/src/display/hunks.rs +++ b/src/display/hunks.rs @@ -5,15 +5,15 @@ /// If we exceed this, the lines are stored in separate hunks. const MAX_DISTANCE: u32 = 4; -use std::collections::HashSet; - use line_numbers::LineNumber; use crate::{ constants::Side, - display::context::{add_context, opposite_positions}, - display::side_by_side::lines_with_novel, - hash::DftHashMap, + display::{ + context::{add_context, opposite_positions}, + side_by_side::lines_with_novel, + }, + hash::{DftHashMap, DftHashSet}, parse::syntax::{zip_pad_shorter, MatchKind, MatchedPos}, }; @@ -22,9 +22,9 @@ use crate::{ #[derive(Debug, Clone)] pub(crate) struct Hunk { /// The LHS line numbers that contain novel content. - pub(crate) novel_lhs: HashSet, + pub(crate) novel_lhs: DftHashSet, /// The RHS line numbers that contain novel content. - pub(crate) novel_rhs: HashSet, + pub(crate) novel_rhs: DftHashSet, /// Line pairs that contain modified lines. This does not include /// padding, so at least one of the two lines has novel content. pub(crate) lines: Vec<(Option, Option)>, @@ -35,8 +35,8 @@ impl Hunk { let mut lines = self.lines; lines.extend(other.lines.iter()); - let mut lhs_seen: HashSet = HashSet::new(); - let mut rhs_seen: HashSet = HashSet::new(); + let mut lhs_seen: DftHashSet = DftHashSet::default(); + let mut rhs_seen: DftHashSet = DftHashSet::default(); let mut deduped_lines = vec![]; for (lhs_line, rhs_line) in lines { @@ -132,8 +132,8 @@ fn extract_lines(hunk: &Hunk) -> Vec<(Option, Option)> { pub(crate) fn merge_adjacent( hunks: &[Hunk], - opposite_to_lhs: &DftHashMap>, - opposite_to_rhs: &DftHashMap>, + opposite_to_lhs: &DftHashMap>, + opposite_to_rhs: &DftHashMap>, max_lhs_src_line: LineNumber, max_rhs_src_line: LineNumber, num_context_lines: usize, @@ -141,12 +141,12 @@ pub(crate) fn merge_adjacent( let mut merged_hunks: Vec = vec![]; let mut prev_hunk: Option = None; - let mut prev_lhs_lines: HashSet = HashSet::new(); - let mut prev_rhs_lines: HashSet = HashSet::new(); + let mut prev_lhs_lines: DftHashSet = DftHashSet::default(); + let mut prev_rhs_lines: DftHashSet = DftHashSet::default(); for hunk in hunks { - let mut lhs_lines: HashSet = HashSet::new(); - let mut rhs_lines: HashSet = HashSet::new(); + let mut lhs_lines: DftHashSet = DftHashSet::default(); + let mut rhs_lines: DftHashSet = DftHashSet::default(); let lines = extract_lines(hunk); let contextual_lines = add_context( @@ -273,11 +273,11 @@ fn enforce_increasing( fn find_novel_lines( lines: &[(Option, Option)], - all_lhs_novel: &HashSet, - all_rhs_novel: &HashSet, -) -> (HashSet, HashSet) { - let mut lhs_novel = HashSet::new(); - let mut rhs_novel = HashSet::new(); + all_lhs_novel: &DftHashSet, + all_rhs_novel: &DftHashSet, +) -> (DftHashSet, DftHashSet) { + let mut lhs_novel = DftHashSet::default(); + let mut rhs_novel = DftHashSet::default(); for (lhs_line, rhs_line) in lines { if let Some(lhs_line) = lhs_line { @@ -364,8 +364,8 @@ fn novel_section_in_order( rhs_novel_mps: &[&MatchedPos], lhs_prev_matched_line: Option, rhs_prev_matched_line: Option, - opposite_to_lhs: &DftHashMap>, - opposite_to_rhs: &DftHashMap>, + opposite_to_lhs: &DftHashMap>, + opposite_to_rhs: &DftHashMap>, ) -> Vec<(Side, MatchedPos)> { let mut res: Vec<(Side, MatchedPos)> = vec![]; @@ -439,8 +439,8 @@ fn novel_section_in_order( fn sorted_novel_positions( lhs_mps: &[MatchedPos], rhs_mps: &[MatchedPos], - opposite_to_lhs: &DftHashMap>, - opposite_to_rhs: &DftHashMap>, + opposite_to_lhs: &DftHashMap>, + opposite_to_rhs: &DftHashMap>, ) -> Vec<(Side, MatchedPos)> { let mut lhs_mps: Vec = lhs_mps.to_vec(); lhs_mps.sort_unstable_by_key(|mp| mp.pos); @@ -533,7 +533,7 @@ fn sorted_novel_positions( fn next_opposite( line: LineNumber, - opposites: &DftHashMap>, + opposites: &DftHashMap>, prev_opposite: Option, ) -> Option { opposites.get(&line).and_then(|lines_set| { @@ -805,8 +805,8 @@ mod tests { (Some(2.into()), Some(2.into())), ]; - let novel_lhs = HashSet::from_iter([1.into()]); - let novel_rhs = HashSet::from_iter([1.into()]); + let novel_lhs = DftHashSet::from_iter([1.into()]); + let novel_rhs = DftHashSet::from_iter([1.into()]); let hunk = Hunk { novel_lhs, novel_rhs, @@ -835,8 +835,8 @@ mod tests { (Some(5.into()), Some(5.into())), ]; - let novel_lhs = HashSet::from_iter([1.into()]); - let novel_rhs = HashSet::from_iter([2.into()]); + let novel_lhs = DftHashSet::from_iter([1.into()]); + let novel_rhs = DftHashSet::from_iter([2.into()]); let hunk = Hunk { novel_lhs, novel_rhs, diff --git a/src/display/side_by_side.rs b/src/display/side_by_side.rs index 888fa2d75..5961af2d1 100644 --- a/src/display/side_by_side.rs +++ b/src/display/side_by_side.rs @@ -1,9 +1,6 @@ //! Side-by-side (two column) display of diffs. -use std::{ - cmp::{max, min}, - collections::HashSet, -}; +use std::cmp::{max, min}; use line_numbers::LineNumber; use line_numbers::SingleLineSpan; @@ -19,7 +16,7 @@ use crate::{ replace_tabs, split_and_apply, BackgroundColor, }, }, - hash::DftHashMap, + hash::{DftHashMap, DftHashSet}, lines::{format_line_num, split_on_newlines}, options::{DisplayMode, DisplayOptions}, parse::syntax::{zip_pad_shorter, MatchedPos}, @@ -248,13 +245,13 @@ impl SourceDimensions { pub(crate) fn lines_with_novel( lhs_mps: &[MatchedPos], rhs_mps: &[MatchedPos], -) -> (HashSet, HashSet) { - let lhs_lines_with_novel: HashSet = lhs_mps +) -> (DftHashSet, DftHashSet) { + let lhs_lines_with_novel: DftHashSet = lhs_mps .iter() .filter(|mp| mp.kind.is_novel()) .map(|mp| mp.pos.line) .collect(); - let rhs_lines_with_novel: HashSet = rhs_mps + let rhs_lines_with_novel: DftHashSet = rhs_mps .iter() .filter(|mp| mp.kind.is_novel()) .map(|mp| mp.pos.line) @@ -311,7 +308,7 @@ fn highlight_as_novel( line_num: Option, lines: &[&str], opposite_line_num: Option, - lines_with_novel: &HashSet, + lines_with_novel: &DftHashSet, ) -> bool { if let Some(line_num) = line_num { // If this line contains any novel tokens, highlight it. @@ -745,9 +742,9 @@ mod tests { }, }]; - let mut novel_lhs = HashSet::new(); + let mut novel_lhs = DftHashSet::default(); novel_lhs.insert(0.into()); - let mut novel_rhs = HashSet::new(); + let mut novel_rhs = DftHashSet::default(); novel_rhs.insert(0.into()); let hunks = [Hunk { diff --git a/src/files.rs b/src/files.rs index 7c9105ef1..5bd87afa7 100644 --- a/src/files.rs +++ b/src/files.rs @@ -8,9 +8,9 @@ use std::{ }; use ignore::WalkBuilder; -use rustc_hash::FxHashSet; use crate::exit_codes::EXIT_BAD_ARGUMENTS; +use crate::hash::DftHashSet; use crate::options::FileArgument; pub(crate) fn read_file_or_die(path: &FileArgument) -> Vec { @@ -268,7 +268,7 @@ pub(crate) fn relative_paths_in_either(lhs_dir: &Path, rhs_dir: &Path) -> Vec = vec![]; let mut i = 0; diff --git a/src/hash.rs b/src/hash.rs index b38cf81b5..f79ac6389 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,6 +1,6 @@ use std::hash::BuildHasherDefault; -use rustc_hash::FxHasher; +use rustc_hash::{FxHashSet, FxHasher}; /// A fast hashmap with no hash DoS protection. This is used in /// extremely hot code. @@ -10,3 +10,7 @@ use rustc_hash::FxHasher; /// little faster, and it also allows us to use the entry_ref API /// which is unavailable in stable Rust. pub(crate) type DftHashMap = hashbrown::HashMap>; + +/// A fast hash set with no hash DoS protection. This is a simple +/// alias, but added for consistency with `DftHashMap`. +pub(crate) type DftHashSet = FxHashSet; diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 0d471b1ea..b5b60baed 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -1,7 +1,5 @@ //! Load and configure parsers written with tree-sitter. -use std::collections::HashSet; - use line_numbers::LinePositions; use streaming_iterator::StreamingIterator as _; use tree_sitter as ts; @@ -9,7 +7,7 @@ use typed_arena::Arena; use super::syntax::MatchedPos; use super::syntax::{self, StringKind}; -use crate::hash::DftHashMap; +use crate::hash::{DftHashMap, DftHashSet}; use crate::options::DiffOptions; use crate::parse::guess_language as guess; use crate::parse::syntax::{AtomKind, Syntax}; @@ -45,7 +43,7 @@ pub(crate) struct TreeSitterConfig { /// all the children in the source. This is known limitation of /// tree-sitter, and occurs more often for complex string syntax. /// - atom_nodes: HashSet<&'static str>, + atom_nodes: DftHashSet<&'static str>, /// We want to consider delimiter tokens as part of lists, not /// standalone atoms. Tree-sitter includes delimiter tokens, so @@ -356,7 +354,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_elvish() }; TreeSitterConfig { language: language.clone(), - atom_nodes: [].into(), + atom_nodes: [].into_iter().collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("|", "|")], highlight_query: ts::Query::new( &language, @@ -370,7 +368,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_erlang() }; TreeSitterConfig { language: language.clone(), - atom_nodes: [].into(), + atom_nodes: [].into_iter().collect(), delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")], highlight_query: ts::Query::new( &language, @@ -384,7 +382,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_fsharp() }; TreeSitterConfig { language: language.clone(), - atom_nodes: ["string", "triple_quoted_string"].into(), + atom_nodes: ["string", "triple_quoted_string"].into_iter().collect(), delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( &language, @@ -398,7 +396,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_gleam() }; TreeSitterConfig { language: language.clone(), - atom_nodes: ["string"].into(), + atom_nodes: ["string"].into_iter().collect(), delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")], highlight_query: ts::Query::new( &language, @@ -974,7 +972,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_smali() }; TreeSitterConfig { language: language.clone(), - atom_nodes: HashSet::from(["string"]), + atom_nodes: vec!["string"].into_iter().collect(), delimiter_tokens: Vec::new(), highlight_query: ts::Query::new( &language, @@ -1018,7 +1016,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { let language = unsafe { tree_sitter_swift() }; TreeSitterConfig { language: language.clone(), - atom_nodes: ["line_string_literal"].into(), + atom_nodes: ["line_string_literal"].into_iter().collect(), delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")], highlight_query: ts::Query::new( &language, @@ -1278,10 +1276,10 @@ fn tree_highlights( let mut qc = ts::QueryCursor::new(); let mut q_matches = qc.matches(&config.highlight_query, tree.root_node(), src.as_bytes()); - let mut comment_ids = HashSet::new(); - let mut keyword_ids = HashSet::new(); - let mut string_ids = HashSet::new(); - let mut type_ids = HashSet::new(); + let mut comment_ids = DftHashSet::default(); + let mut keyword_ids = DftHashSet::default(); + let mut string_ids = DftHashSet::default(); + let mut type_ids = DftHashSet::default(); while let Some(m) = q_matches.next() { for c in m.captures { @@ -1505,10 +1503,10 @@ fn find_delim_positions( #[derive(Debug)] pub(crate) struct HighlightedNodeIds { - keyword_ids: HashSet, - comment_ids: HashSet, - string_ids: HashSet, - type_ids: HashSet, + keyword_ids: DftHashSet, + comment_ids: DftHashSet, + string_ids: DftHashSet, + type_ids: DftHashSet, } /// Convert all the tree-sitter nodes at this level to difftastic