Prefer marking large subtrees as novel rather than largely novel

This solves the "replace function A with completely different function
B" problem, even if they start with the same function keyword.
pull/25/head
Wilfred Hughes 2021-07-21 01:02:20 +07:00
parent 4c33f34025
commit 61446c916a
4 changed files with 156 additions and 6 deletions

@ -32,6 +32,12 @@ Reduced memory usage when diffing.
Difftastic now highlights word-level changes between comments. Difftastic now highlights word-level changes between comments.
Large AST trees with very few common nodes are now considered wholly
novel, rather than trying to match up the few common nodes. This
avoids nonsensical diffs when toplevel function A is completely
replaced with function B and they only have something trivial in
common (e.g. the `function` keyword).
### Docs ### Docs
Improved `--help`. Improved `--help`.

@ -47,10 +47,6 @@ Performance. Difftastic scales relatively poorly on files with a large
number of changes, and can use a lot of memory. This might be solved number of changes, and can use a lot of memory. This might be solved
by A* search. by A* search.
Replacing top-level expressions. If you delete a function and write a
completely different new one, difftastic will show the small number of
common tokens between them.
Comments. Small changes can show big diffs. Comments. Small changes can show big diffs.
## Non-goals ## Non-goals

@ -82,6 +82,8 @@ enum Edge {
NovelAtomRHS { contiguous: bool }, NovelAtomRHS { contiguous: bool },
NovelDelimiterLHS { contiguous: bool }, NovelDelimiterLHS { contiguous: bool },
NovelDelimiterRHS { contiguous: bool }, NovelDelimiterRHS { contiguous: bool },
NovelTreeLHS { num_descendants: u64 },
NovelTreeRHS { num_descendants: u64 },
} }
impl Edge { impl Edge {
@ -110,6 +112,13 @@ impl Edge {
201 201
} }
} }
// For large trees, it's better to mark the whole tree as
// novel rather than marking 90% of the children as
// novel. This stops us matching up completely unrelated trees.
NovelTreeLHS { num_descendants } | NovelTreeRHS { num_descendants } => {
200 + (*num_descendants - 10) * NovelDelimiterLHS { contiguous: false }.cost()
}
} }
} }
} }
@ -172,6 +181,8 @@ fn shortest_path(start: Vertex) -> Vec<(Edge, Vertex)> {
res res
} }
const NOVEL_TREE_THRESHOLD: usize = 20;
fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> { fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> {
let mut res = vec![]; let mut res = vec![];
@ -279,7 +290,11 @@ fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> {
)); ));
} }
// Step into this partially/fully novel list. // Step into this partially/fully novel list.
Syntax::List { children, .. } => { Syntax::List {
children,
num_descendants,
..
} => {
let lhs_next = if children.is_empty() { let lhs_next = if children.is_empty() {
lhs_syntax.next() lhs_syntax.next()
} else { } else {
@ -298,6 +313,20 @@ fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> {
rhs_prev_novel: v.rhs_prev_novel, rhs_prev_novel: v.rhs_prev_novel,
}, },
)); ));
if *num_descendants > NOVEL_TREE_THRESHOLD {
res.push((
NovelTreeLHS {
num_descendants: *num_descendants as u64,
},
Vertex {
lhs_syntax: lhs_syntax.next(),
lhs_prev_novel: v.lhs_prev_novel,
rhs_syntax: v.rhs_syntax,
rhs_prev_novel: v.rhs_prev_novel,
},
));
}
} }
} }
} }
@ -319,7 +348,11 @@ fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> {
)); ));
} }
// Step into this partially/fully novel list. // Step into this partially/fully novel list.
Syntax::List { children, .. } => { Syntax::List {
children,
num_descendants,
..
} => {
let rhs_next = if children.is_empty() { let rhs_next = if children.is_empty() {
rhs_syntax.next() rhs_syntax.next()
} else { } else {
@ -337,6 +370,20 @@ fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> {
rhs_prev_novel: v.rhs_prev_novel, rhs_prev_novel: v.rhs_prev_novel,
}, },
)); ));
if *num_descendants > NOVEL_TREE_THRESHOLD {
res.push((
NovelTreeRHS {
num_descendants: *num_descendants as u64,
},
Vertex {
lhs_syntax: v.lhs_syntax,
lhs_prev_novel: v.lhs_prev_novel,
rhs_syntax: rhs_syntax.next(),
rhs_prev_novel: v.rhs_prev_novel,
},
));
}
} }
} }
} }
@ -387,6 +434,14 @@ fn mark_route(route: &[(Edge, Vertex)]) {
let rhs = v.rhs_syntax.unwrap(); let rhs = v.rhs_syntax.unwrap();
rhs.set_change(ChangeKind::Novel); rhs.set_change(ChangeKind::Novel);
} }
NovelTreeLHS { .. } => {
let lhs = v.lhs_syntax.unwrap();
lhs.set_change_deep(ChangeKind::Novel);
}
NovelTreeRHS { .. } => {
let rhs = v.rhs_syntax.unwrap();
rhs.set_change_deep(ChangeKind::Novel);
}
} }
} }
} }
@ -642,4 +697,95 @@ mod tests {
] ]
); );
} }
#[test]
fn test_novel_tree() {
let arena = Arena::new();
let lhs: Vec<&Syntax> = vec![Syntax::new_list(
&arena,
"[".into(),
pos_helper(0),
vec![
Syntax::new_atom(&arena, pos_helper(1), "1"),
Syntax::new_atom(&arena, pos_helper(2), "2"),
Syntax::new_atom(&arena, pos_helper(3), "3"),
Syntax::new_atom(&arena, pos_helper(4), "4"),
Syntax::new_atom(&arena, pos_helper(5), "5"),
Syntax::new_atom(&arena, pos_helper(6), "6"),
Syntax::new_atom(&arena, pos_helper(7), "7"),
Syntax::new_atom(&arena, pos_helper(8), "8"),
Syntax::new_atom(&arena, pos_helper(9), "9"),
Syntax::new_atom(&arena, pos_helper(10), "10"),
Syntax::new_atom(&arena, pos_helper(11), "11"),
Syntax::new_atom(&arena, pos_helper(12), "12"),
Syntax::new_atom(&arena, pos_helper(13), "13"),
Syntax::new_atom(&arena, pos_helper(14), "14"),
Syntax::new_atom(&arena, pos_helper(15), "15"),
Syntax::new_atom(&arena, pos_helper(16), "16"),
Syntax::new_atom(&arena, pos_helper(17), "17"),
Syntax::new_atom(&arena, pos_helper(18), "18"),
Syntax::new_atom(&arena, pos_helper(19), "19"),
Syntax::new_atom(&arena, pos_helper(20), "20"),
Syntax::new_atom(&arena, pos_helper(21), "21"),
],
"]".into(),
pos_helper(100),
)];
init_info(&lhs);
let rhs: Vec<&Syntax> = vec![Syntax::new_list(
&arena,
"[".into(),
pos_helper(0),
vec![
Syntax::new_atom(&arena, pos_helper(1), "d1"),
Syntax::new_atom(&arena, pos_helper(2), "d2"),
Syntax::new_atom(&arena, pos_helper(3), "d3"),
Syntax::new_atom(&arena, pos_helper(4), "d4"),
Syntax::new_atom(&arena, pos_helper(5), "d5"),
Syntax::new_atom(&arena, pos_helper(6), "d6"),
Syntax::new_atom(&arena, pos_helper(7), "d7"),
Syntax::new_atom(&arena, pos_helper(8), "d8"),
Syntax::new_atom(&arena, pos_helper(9), "d9"),
// This is the only common atom:
Syntax::new_atom(&arena, pos_helper(10), "10"),
Syntax::new_atom(&arena, pos_helper(11), "d11"),
Syntax::new_atom(&arena, pos_helper(12), "d12"),
Syntax::new_atom(&arena, pos_helper(13), "d13"),
Syntax::new_atom(&arena, pos_helper(14), "d14"),
Syntax::new_atom(&arena, pos_helper(15), "d15"),
Syntax::new_atom(&arena, pos_helper(16), "d16"),
Syntax::new_atom(&arena, pos_helper(17), "d17"),
Syntax::new_atom(&arena, pos_helper(18), "d18"),
Syntax::new_atom(&arena, pos_helper(19), "d19"),
Syntax::new_atom(&arena, pos_helper(20), "d20"),
Syntax::new_atom(&arena, pos_helper(21), "d21"),
],
"]".into(),
pos_helper(100),
)];
init_info(&rhs);
let start = Vertex {
lhs_syntax: lhs.get(0).map(|n| *n),
lhs_prev_novel: None,
rhs_syntax: rhs.get(0).map(|n| *n),
rhs_prev_novel: None,
};
let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
actions,
vec![
NovelTreeLHS {
num_descendants: 21
},
NovelTreeRHS {
num_descendants: 21
},
]
);
}
} }

@ -53,6 +53,8 @@ pub enum Syntax<'a> {
children: Vec<&'a Syntax<'a>>, children: Vec<&'a Syntax<'a>>,
close_position: Vec<SingleLineSpan>, close_position: Vec<SingleLineSpan>,
close_content: String, close_content: String,
// TODO: this probably makes more sense as a u64, since we
// don't use it for indexing.
num_descendants: usize, num_descendants: usize,
}, },
Atom { Atom {