Use FxHashSet everywhere

This is a performance improvement, and makes the code more consistent.

Instruction counts before:

3,137M instructions typing_*.ml
2,210M instructions slow_*.rs

Instruction counts after:

2,994M instructions typing_*.ml
2,180M instructions slow_*.rs

So almost a 5% reduction in instruction count.
pull/813/head
Wilfred Hughes 2025-02-03 22:17:04 +07:00
parent 2218c88fb0
commit c14f567f81
8 changed files with 86 additions and 89 deletions

@ -2,9 +2,7 @@
use std::hash::Hash;
use rustc_hash::FxHashSet;
use crate::hash::DftHashMap;
use crate::hash::{DftHashMap, DftHashSet};
#[derive(Debug, PartialEq)]
pub(crate) enum DiffResult<T> {
@ -105,11 +103,11 @@ pub(crate) fn slice_unique_by_hash<'a, T: Eq + Clone + Hash>(
lhs: &'a [T],
rhs: &'a [T],
) -> Vec<DiffResult<&'a T>> {
let mut lhs_set = FxHashSet::default();
let mut lhs_set = DftHashSet::default();
for item in lhs {
lhs_set.insert(item);
}
let mut rhs_set = FxHashSet::default();
let mut rhs_set = DftHashSet::default();
for item in rhs {
rhs_set.insert(item);
}

@ -1,11 +1,11 @@
//! Find nodes that are obviously unchanged, so we can run the main
//! diff on smaller inputs.
use std::collections::HashSet;
use std::hash::Hash;
use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap};
use crate::diff::myers_diff;
use crate::hash::DftHashSet;
use crate::parse::syntax::Syntax;
const TINY_TREE_THRESHOLD: u32 = 10;
@ -123,7 +123,7 @@ fn split_unchanged_singleton_list<'a>(
res
}
fn find_unique_content_ids(node: &Syntax, unique_ids: &mut HashSet<u32>) {
fn find_unique_content_ids(node: &Syntax, unique_ids: &mut DftHashSet<u32>) {
if node.content_is_unique() {
unique_ids.insert(node.content_id());
}
@ -134,13 +134,13 @@ fn find_unique_content_ids(node: &Syntax, unique_ids: &mut HashSet<u32>) {
}
}
fn find_all_unique_content_ids(node: &Syntax) -> HashSet<u32> {
let mut unique_ids = HashSet::new();
fn find_all_unique_content_ids(node: &Syntax) -> DftHashSet<u32> {
let mut unique_ids = DftHashSet::default();
find_unique_content_ids(node, &mut unique_ids);
unique_ids
}
fn count_unique_subtrees(node: &Syntax, opposite_unique_ids: &HashSet<u32>) -> usize {
fn count_unique_subtrees(node: &Syntax, opposite_unique_ids: &DftHashSet<u32>) -> usize {
if node.content_is_unique() && opposite_unique_ids.contains(&node.content_id()) {
// Ignore children as soon as find a unique node, to avoid
// overcounting.

@ -1,13 +1,11 @@
//! Calculate which nearby lines should also be displayed.
use std::cmp::Ordering;
use std::collections::HashSet;
use line_numbers::LineNumber;
use rustc_hash::FxHashSet;
use crate::{
hash::DftHashMap,
hash::{DftHashMap, DftHashSet},
parse::syntax::{zip_repeat_shorter, MatchKind, MatchedPos},
};
@ -124,7 +122,7 @@ fn all_matched_lines(
}
fn all_lines(mps: &[MatchedPos]) -> Vec<LineNumber> {
let mut lines = FxHashSet::default();
let mut lines = DftHashSet::default();
for mp in mps {
lines.insert(mp.pos.line);
}
@ -329,8 +327,8 @@ fn match_preceding_blanks(
pub(crate) fn opposite_positions(
mps: &[MatchedPos],
) -> DftHashMap<LineNumber, HashSet<LineNumber>> {
let mut res: DftHashMap<LineNumber, HashSet<LineNumber>> = DftHashMap::default();
) -> DftHashMap<LineNumber, DftHashSet<LineNumber>> {
let mut res: DftHashMap<LineNumber, DftHashSet<LineNumber>> = DftHashMap::default();
for mp in mps {
match &mp.kind {
@ -340,7 +338,9 @@ pub(crate) fn opposite_positions(
..
} => {
for (self_span, opposite_span) in zip_repeat_shorter(self_pos, opposite_pos) {
let opposite_lines = res.entry(self_span.line).or_insert_with(HashSet::new);
let opposite_lines = res
.entry(self_span.line)
.or_insert_with(DftHashSet::default);
opposite_lines.insert(opposite_span.line);
}
}
@ -349,7 +349,7 @@ pub(crate) fn opposite_positions(
self_pos,
..
} => {
let opposite_lines = res.entry(self_pos.line).or_insert_with(HashSet::new);
let opposite_lines = res.entry(self_pos.line).or_insert_with(DftHashSet::default);
for opposite_span in opposite_pos {
opposite_lines.insert(opposite_span.line);
}
@ -378,7 +378,7 @@ pub(crate) fn opposite_positions(
/// ```
fn before_with_opposites(
before_lines: &[LineNumber],
opposite_lines: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_lines: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
let mut lines = before_lines.to_vec();
lines.reverse();
@ -474,7 +474,7 @@ pub(crate) fn flip_tuples<Tx: Copy, Ty: Copy>(items: &[(Tx, Ty)]) -> Vec<(Ty, Tx
/// 122 91 (closest match)
fn after_with_opposites(
after_lines: &[LineNumber],
opposite_lines: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_lines: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
prev_max_opposite: Option<LineNumber>,
max_opposite: LineNumber,
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
@ -517,8 +517,8 @@ fn after_with_opposites(
pub(crate) fn calculate_before_context(
lines: &[(Option<LineNumber>, Option<LineNumber>)],
opposite_to_lhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_lhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
num_context_lines: usize,
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
match lines.first() {
@ -539,8 +539,8 @@ pub(crate) fn calculate_before_context(
pub(crate) fn calculate_after_context(
lines: &[(Option<LineNumber>, Option<LineNumber>)],
opposite_to_lhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_lhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
max_lhs_src_line: LineNumber,
max_rhs_src_line: LineNumber,
num_context_lines: usize,
@ -589,8 +589,8 @@ pub(crate) fn calculate_after_context(
pub(crate) fn add_context(
lines: &[(Option<LineNumber>, Option<LineNumber>)],
opposite_to_lhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_lhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
max_lhs_src_line: LineNumber,
max_rhs_src_line: LineNumber,
num_context_lines: usize,
@ -743,10 +743,10 @@ mod tests {
let lines = vec![(Some(1.into()), Some(1.into()))];
let mut opposite_to_lhs = DftHashMap::default();
opposite_to_lhs.insert(0.into(), HashSet::from_iter([0.into()]));
opposite_to_lhs.insert(0.into(), DftHashSet::from_iter([0.into()]));
let mut opposite_to_rhs = DftHashMap::default();
opposite_to_rhs.insert(0.into(), HashSet::from_iter([0.into()]));
opposite_to_rhs.insert(0.into(), DftHashSet::from_iter([0.into()]));
let res = calculate_before_context(
&lines,

@ -5,15 +5,15 @@
/// If we exceed this, the lines are stored in separate hunks.
const MAX_DISTANCE: u32 = 4;
use std::collections::HashSet;
use line_numbers::LineNumber;
use crate::{
constants::Side,
display::context::{add_context, opposite_positions},
display::side_by_side::lines_with_novel,
hash::DftHashMap,
display::{
context::{add_context, opposite_positions},
side_by_side::lines_with_novel,
},
hash::{DftHashMap, DftHashSet},
parse::syntax::{zip_pad_shorter, MatchKind, MatchedPos},
};
@ -22,9 +22,9 @@ use crate::{
#[derive(Debug, Clone)]
pub(crate) struct Hunk {
/// The LHS line numbers that contain novel content.
pub(crate) novel_lhs: HashSet<LineNumber>,
pub(crate) novel_lhs: DftHashSet<LineNumber>,
/// The RHS line numbers that contain novel content.
pub(crate) novel_rhs: HashSet<LineNumber>,
pub(crate) novel_rhs: DftHashSet<LineNumber>,
/// Line pairs that contain modified lines. This does not include
/// padding, so at least one of the two lines has novel content.
pub(crate) lines: Vec<(Option<LineNumber>, Option<LineNumber>)>,
@ -35,8 +35,8 @@ impl Hunk {
let mut lines = self.lines;
lines.extend(other.lines.iter());
let mut lhs_seen: HashSet<LineNumber> = HashSet::new();
let mut rhs_seen: HashSet<LineNumber> = HashSet::new();
let mut lhs_seen: DftHashSet<LineNumber> = DftHashSet::default();
let mut rhs_seen: DftHashSet<LineNumber> = DftHashSet::default();
let mut deduped_lines = vec![];
for (lhs_line, rhs_line) in lines {
@ -132,8 +132,8 @@ fn extract_lines(hunk: &Hunk) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
pub(crate) fn merge_adjacent(
hunks: &[Hunk],
opposite_to_lhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_lhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
max_lhs_src_line: LineNumber,
max_rhs_src_line: LineNumber,
num_context_lines: usize,
@ -141,12 +141,12 @@ pub(crate) fn merge_adjacent(
let mut merged_hunks: Vec<Hunk> = vec![];
let mut prev_hunk: Option<Hunk> = None;
let mut prev_lhs_lines: HashSet<LineNumber> = HashSet::new();
let mut prev_rhs_lines: HashSet<LineNumber> = HashSet::new();
let mut prev_lhs_lines: DftHashSet<LineNumber> = DftHashSet::default();
let mut prev_rhs_lines: DftHashSet<LineNumber> = DftHashSet::default();
for hunk in hunks {
let mut lhs_lines: HashSet<LineNumber> = HashSet::new();
let mut rhs_lines: HashSet<LineNumber> = HashSet::new();
let mut lhs_lines: DftHashSet<LineNumber> = DftHashSet::default();
let mut rhs_lines: DftHashSet<LineNumber> = DftHashSet::default();
let lines = extract_lines(hunk);
let contextual_lines = add_context(
@ -273,11 +273,11 @@ fn enforce_increasing(
fn find_novel_lines(
lines: &[(Option<LineNumber>, Option<LineNumber>)],
all_lhs_novel: &HashSet<LineNumber>,
all_rhs_novel: &HashSet<LineNumber>,
) -> (HashSet<LineNumber>, HashSet<LineNumber>) {
let mut lhs_novel = HashSet::new();
let mut rhs_novel = HashSet::new();
all_lhs_novel: &DftHashSet<LineNumber>,
all_rhs_novel: &DftHashSet<LineNumber>,
) -> (DftHashSet<LineNumber>, DftHashSet<LineNumber>) {
let mut lhs_novel = DftHashSet::default();
let mut rhs_novel = DftHashSet::default();
for (lhs_line, rhs_line) in lines {
if let Some(lhs_line) = lhs_line {
@ -364,8 +364,8 @@ fn novel_section_in_order(
rhs_novel_mps: &[&MatchedPos],
lhs_prev_matched_line: Option<LineNumber>,
rhs_prev_matched_line: Option<LineNumber>,
opposite_to_lhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_lhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
) -> Vec<(Side, MatchedPos)> {
let mut res: Vec<(Side, MatchedPos)> = vec![];
@ -439,8 +439,8 @@ fn novel_section_in_order(
fn sorted_novel_positions(
lhs_mps: &[MatchedPos],
rhs_mps: &[MatchedPos],
opposite_to_lhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposite_to_lhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
opposite_to_rhs: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
) -> Vec<(Side, MatchedPos)> {
let mut lhs_mps: Vec<MatchedPos> = lhs_mps.to_vec();
lhs_mps.sort_unstable_by_key(|mp| mp.pos);
@ -533,7 +533,7 @@ fn sorted_novel_positions(
fn next_opposite(
line: LineNumber,
opposites: &DftHashMap<LineNumber, HashSet<LineNumber>>,
opposites: &DftHashMap<LineNumber, DftHashSet<LineNumber>>,
prev_opposite: Option<LineNumber>,
) -> Option<LineNumber> {
opposites.get(&line).and_then(|lines_set| {
@ -805,8 +805,8 @@ mod tests {
(Some(2.into()), Some(2.into())),
];
let novel_lhs = HashSet::from_iter([1.into()]);
let novel_rhs = HashSet::from_iter([1.into()]);
let novel_lhs = DftHashSet::from_iter([1.into()]);
let novel_rhs = DftHashSet::from_iter([1.into()]);
let hunk = Hunk {
novel_lhs,
novel_rhs,
@ -835,8 +835,8 @@ mod tests {
(Some(5.into()), Some(5.into())),
];
let novel_lhs = HashSet::from_iter([1.into()]);
let novel_rhs = HashSet::from_iter([2.into()]);
let novel_lhs = DftHashSet::from_iter([1.into()]);
let novel_rhs = DftHashSet::from_iter([2.into()]);
let hunk = Hunk {
novel_lhs,
novel_rhs,

@ -1,9 +1,6 @@
//! Side-by-side (two column) display of diffs.
use std::{
cmp::{max, min},
collections::HashSet,
};
use std::cmp::{max, min};
use line_numbers::LineNumber;
use line_numbers::SingleLineSpan;
@ -19,7 +16,7 @@ use crate::{
replace_tabs, split_and_apply, BackgroundColor,
},
},
hash::DftHashMap,
hash::{DftHashMap, DftHashSet},
lines::{format_line_num, split_on_newlines},
options::{DisplayMode, DisplayOptions},
parse::syntax::{zip_pad_shorter, MatchedPos},
@ -248,13 +245,13 @@ impl SourceDimensions {
pub(crate) fn lines_with_novel(
lhs_mps: &[MatchedPos],
rhs_mps: &[MatchedPos],
) -> (HashSet<LineNumber>, HashSet<LineNumber>) {
let lhs_lines_with_novel: HashSet<LineNumber> = lhs_mps
) -> (DftHashSet<LineNumber>, DftHashSet<LineNumber>) {
let lhs_lines_with_novel: DftHashSet<LineNumber> = lhs_mps
.iter()
.filter(|mp| mp.kind.is_novel())
.map(|mp| mp.pos.line)
.collect();
let rhs_lines_with_novel: HashSet<LineNumber> = rhs_mps
let rhs_lines_with_novel: DftHashSet<LineNumber> = rhs_mps
.iter()
.filter(|mp| mp.kind.is_novel())
.map(|mp| mp.pos.line)
@ -311,7 +308,7 @@ fn highlight_as_novel(
line_num: Option<LineNumber>,
lines: &[&str],
opposite_line_num: Option<LineNumber>,
lines_with_novel: &HashSet<LineNumber>,
lines_with_novel: &DftHashSet<LineNumber>,
) -> bool {
if let Some(line_num) = line_num {
// If this line contains any novel tokens, highlight it.
@ -745,9 +742,9 @@ mod tests {
},
}];
let mut novel_lhs = HashSet::new();
let mut novel_lhs = DftHashSet::default();
novel_lhs.insert(0.into());
let mut novel_rhs = HashSet::new();
let mut novel_rhs = DftHashSet::default();
novel_rhs.insert(0.into());
let hunks = [Hunk {

@ -8,9 +8,9 @@ use std::{
};
use ignore::WalkBuilder;
use rustc_hash::FxHashSet;
use crate::exit_codes::EXIT_BAD_ARGUMENTS;
use crate::hash::DftHashSet;
use crate::options::FileArgument;
pub(crate) fn read_file_or_die(path: &FileArgument) -> Vec<u8> {
@ -268,7 +268,7 @@ pub(crate) fn relative_paths_in_either(lhs_dir: &Path, rhs_dir: &Path) -> Vec<Pa
let lhs_paths = relative_file_paths_in_dir(lhs_dir);
let rhs_paths = relative_file_paths_in_dir(rhs_dir);
let mut seen = FxHashSet::default();
let mut seen = DftHashSet::default();
let mut paths: Vec<PathBuf> = vec![];
let mut i = 0;

@ -1,6 +1,6 @@
use std::hash::BuildHasherDefault;
use rustc_hash::FxHasher;
use rustc_hash::{FxHashSet, FxHasher};
/// A fast hashmap with no hash DoS protection. This is used in
/// extremely hot code.
@ -10,3 +10,7 @@ use rustc_hash::FxHasher;
/// little faster, and it also allows us to use the entry_ref API
/// which is unavailable in stable Rust.
pub(crate) type DftHashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
/// A fast hash set with no hash DoS protection. This is a simple
/// alias, but added for consistency with `DftHashMap`.
pub(crate) type DftHashSet<V> = FxHashSet<V>;

@ -1,7 +1,5 @@
//! Load and configure parsers written with tree-sitter.
use std::collections::HashSet;
use line_numbers::LinePositions;
use streaming_iterator::StreamingIterator as _;
use tree_sitter as ts;
@ -9,7 +7,7 @@ use typed_arena::Arena;
use super::syntax::MatchedPos;
use super::syntax::{self, StringKind};
use crate::hash::DftHashMap;
use crate::hash::{DftHashMap, DftHashSet};
use crate::options::DiffOptions;
use crate::parse::guess_language as guess;
use crate::parse::syntax::{AtomKind, Syntax};
@ -45,7 +43,7 @@ pub(crate) struct TreeSitterConfig {
/// all the children in the source. This is known limitation of
/// tree-sitter, and occurs more often for complex string syntax.
/// <https://github.com/tree-sitter/tree-sitter/issues/1156>
atom_nodes: HashSet<&'static str>,
atom_nodes: DftHashSet<&'static str>,
/// We want to consider delimiter tokens as part of lists, not
/// standalone atoms. Tree-sitter includes delimiter tokens, so
@ -356,7 +354,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_elvish() };
TreeSitterConfig {
language: language.clone(),
atom_nodes: [].into(),
atom_nodes: [].into_iter().collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("|", "|")],
highlight_query: ts::Query::new(
&language,
@ -370,7 +368,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_erlang() };
TreeSitterConfig {
language: language.clone(),
atom_nodes: [].into(),
atom_nodes: [].into_iter().collect(),
delimiter_tokens: vec![("(", ")"), ("{", "}"), ("[", "]")],
highlight_query: ts::Query::new(
&language,
@ -384,7 +382,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_fsharp() };
TreeSitterConfig {
language: language.clone(),
atom_nodes: ["string", "triple_quoted_string"].into(),
atom_nodes: ["string", "triple_quoted_string"].into_iter().collect(),
delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")],
highlight_query: ts::Query::new(
&language,
@ -398,7 +396,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_gleam() };
TreeSitterConfig {
language: language.clone(),
atom_nodes: ["string"].into(),
atom_nodes: ["string"].into_iter().collect(),
delimiter_tokens: vec![("(", ")"), ("[", "]"), ("{", "}")],
highlight_query: ts::Query::new(
&language,
@ -974,7 +972,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_smali() };
TreeSitterConfig {
language: language.clone(),
atom_nodes: HashSet::from(["string"]),
atom_nodes: vec!["string"].into_iter().collect(),
delimiter_tokens: Vec::new(),
highlight_query: ts::Query::new(
&language,
@ -1018,7 +1016,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig {
let language = unsafe { tree_sitter_swift() };
TreeSitterConfig {
language: language.clone(),
atom_nodes: ["line_string_literal"].into(),
atom_nodes: ["line_string_literal"].into_iter().collect(),
delimiter_tokens: vec![("{", "}"), ("(", ")"), ("[", "]"), ("<", ">")],
highlight_query: ts::Query::new(
&language,
@ -1278,10 +1276,10 @@ fn tree_highlights(
let mut qc = ts::QueryCursor::new();
let mut q_matches = qc.matches(&config.highlight_query, tree.root_node(), src.as_bytes());
let mut comment_ids = HashSet::new();
let mut keyword_ids = HashSet::new();
let mut string_ids = HashSet::new();
let mut type_ids = HashSet::new();
let mut comment_ids = DftHashSet::default();
let mut keyword_ids = DftHashSet::default();
let mut string_ids = DftHashSet::default();
let mut type_ids = DftHashSet::default();
while let Some(m) = q_matches.next() {
for c in m.captures {
@ -1505,10 +1503,10 @@ fn find_delim_positions(
#[derive(Debug)]
pub(crate) struct HighlightedNodeIds {
keyword_ids: HashSet<usize>,
comment_ids: HashSet<usize>,
string_ids: HashSet<usize>,
type_ids: HashSet<usize>,
keyword_ids: DftHashSet<usize>,
comment_ids: DftHashSet<usize>,
string_ids: DftHashSet<usize>,
type_ids: DftHashSet<usize>,
}
/// Convert all the tree-sitter nodes at this level to difftastic