Use depth difference as a heuristic when comparing equal nodes

This reverts commit 7544874a55. It turns
out there are cases where this is still necessary (see new sample
file). It's also performance neutral.

This bug became more obvious with the recent 'skip unchanged'
optimisations. The optimisation changed the number of preceeding nodes and
exposed this bug more often.
pull/297/head
Wilfred Hughes 2022-03-08 09:26:09 +07:00
parent 6f65bbbbb0
commit 88ae00bd88
6 changed files with 54 additions and 16 deletions

@ -4,6 +4,10 @@ Difftastic now requires Rust 1.56 to build.
### Diffing
Improved diff results when choosing between syntax nodes at different
nesting levels. This is restoring a heuristic that was removed in
0.20.
Improved diff results when lists have unequal sizes.
Improved diff results when the language parser thinks that names occur

@ -55,6 +55,9 @@ c7a5800de30556a6a2cf795744fc28b9 -
sample_files/nest_before.rs sample_files/nest_after.rs
791901de922abc7f24e5b9068706417d -
sample_files/nesting_before.el sample_files/nesting_after.el
c9b74f137aada068b0a317c09966e705 -
sample_files/ocaml_before.ml sample_files/ocaml_after.ml
10f583899eee701776e2b25d96e78b56 -
@ -71,7 +74,7 @@ sample_files/simple_before.txt sample_files/simple_after.txt
4b653ebe89321835c35722dd065cf6a2 -
sample_files/slider_before.rs sample_files/slider_after.rs
170b6e249f414881f205c60aac6796d1 -
78de439db7fba270a0d515ee9b786d25 -
sample_files/slow_before.rs sample_files/slow_after.rs
367ef5db3827a8945b16cad2a8cfad47 -

@ -0,0 +1,3 @@
a
b
foo

@ -0,0 +1,3 @@
b
(foo)
foo

@ -150,7 +150,12 @@ mod tests {
let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(actions, vec![UnchangedNode]);
assert_eq!(
actions,
vec![UnchangedNode {
depth_difference: 0
}]
);
}
#[test]
@ -188,7 +193,9 @@ mod tests {
assert_eq!(
actions,
vec![
EnterUnchangedDelimiter,
EnterUnchangedDelimiter {
depth_difference: 0
},
NovelAtomLHS { contiguous: false },
ExitDelimiterBoth,
]
@ -228,7 +235,9 @@ mod tests {
assert_eq!(
actions,
vec![
EnterUnchangedDelimiter,
EnterUnchangedDelimiter {
depth_difference: 0
},
NovelAtomRHS { contiguous: false },
NovelAtomRHS { contiguous: false },
ExitDelimiterBoth,
@ -274,8 +283,12 @@ mod tests {
vec![
EnterNovelDelimiterRHS { contiguous: false },
EnterNovelDelimiterLHS { contiguous: false },
UnchangedNode,
UnchangedNode,
UnchangedNode {
depth_difference: 0
},
UnchangedNode {
depth_difference: 0
},
ExitDelimiterRHS,
ExitDelimiterLHS,
],
@ -307,7 +320,9 @@ mod tests {
assert_eq!(
actions,
vec![
UnchangedNode,
UnchangedNode {
depth_difference: 0
},
NovelAtomLHS { contiguous: false },
NovelAtomLHS { contiguous: true },
]

@ -2,6 +2,7 @@
use rpds::Stack;
use std::{
cmp::min,
fmt,
hash::{Hash, Hasher},
};
@ -250,8 +251,8 @@ impl<'a> Vertex<'a> {
/// See [`neighbours`] for all the edges available for a given `Vertex`.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Edge {
UnchangedNode,
EnterUnchangedDelimiter,
UnchangedNode { depth_difference: u32 },
EnterUnchangedDelimiter { depth_difference: u32 },
ReplacedComment { levenshtein_pct: u8 },
NovelAtomLHS { contiguous: bool },
NovelAtomRHS { contiguous: bool },
@ -273,9 +274,10 @@ impl Edge {
ExitDelimiterLHS | ExitDelimiterRHS => 2,
// Matching nodes is always best.
UnchangedNode => 1,
// TODO: now we model parents correctly, do we need to track depth difference?
UnchangedNode { depth_difference } => min(40, *depth_difference as u64 + 1),
// Matching an outer delimiter is good.
EnterUnchangedDelimiter => 100,
EnterUnchangedDelimiter { depth_difference } => 100 + min(40, *depth_difference as u64),
// Replacing a comment is better than treating it as novel.
ReplacedComment { levenshtein_pct } => 150 + u64::from(100 - levenshtein_pct),
@ -378,9 +380,13 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) {
if lhs_syntax == rhs_syntax {
let depth_difference = (lhs_syntax.num_ancestors() as i32
- rhs_syntax.num_ancestors() as i32)
.abs() as u32;
// Both nodes are equal, the happy case.
buf[i] = Some((
UnchangedNode,
UnchangedNode { depth_difference },
Vertex {
lhs_syntax: lhs_syntax.next_sibling(),
rhs_syntax: rhs_syntax.next_sibling(),
@ -389,7 +395,7 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
rhs_parent_id: v.rhs_parent_id,
},
));
return;
i += 1;
}
if let (
@ -415,8 +421,12 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
// TODO: be consistent between parents_next and next_parents.
let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax);
let depth_difference = (lhs_syntax.num_ancestors() as i32
- rhs_syntax.num_ancestors() as i32)
.abs() as u32;
buf[i] = Some((
EnterUnchangedDelimiter,
EnterUnchangedDelimiter { depth_difference },
Vertex {
lhs_syntax: lhs_next,
rhs_syntax: rhs_next,
@ -597,14 +607,14 @@ pub fn mark_route(route: &[(Edge, Vertex)]) {
ExitDelimiterBoth | ExitDelimiterLHS | ExitDelimiterRHS => {
// Nothing to do: we have already marked this node when we entered it.
}
UnchangedNode => {
UnchangedNode { .. } => {
// No change on this node or its children.
let lhs = v.lhs_syntax.unwrap();
let rhs = v.rhs_syntax.unwrap();
lhs.set_change_deep(ChangeKind::Unchanged(rhs));
rhs.set_change_deep(ChangeKind::Unchanged(lhs));
}
EnterUnchangedDelimiter => {
EnterUnchangedDelimiter { .. } => {
// No change on the outer delimiter, but children may
// have changed.
let lhs = v.lhs_syntax.unwrap();