Prefer matching comments that are similar

Previously, we'd match up any pair of comments with a levenstein
distance of 0.4 or more. This was reasonably effective, but misssed
opportunities even more precise diffs.

Instead, prefer the comment matching with the highest levenshtein
distance.

We still only highlight word-level changes for comments with a
levenshtein of 0.4 or more.

Closes #27
ida_star
Wilfred Hughes 2021-07-30 22:54:30 +07:00
parent 76c7f35d56
commit 573cd32a01
2 changed files with 60 additions and 17 deletions

@ -1,5 +1,10 @@
## 0.7 (unreleased) ## 0.7 (unreleased)
### Diffing
Difftastic will now prefer matching up comments that are similar
(according to levenshtein distance).
## 0.6 ## 0.6
### Parsing ### Parsing

@ -76,7 +76,7 @@ impl<'a> Eq for OrdVertex<'a> {}
enum Edge { enum Edge {
UnchangedNode(u64), UnchangedNode(u64),
UnchangedDelimiter(u64), UnchangedDelimiter(u64),
ReplacedComment, ReplacedComment { levenshtein_pct: u8 },
NovelAtomLHS { contiguous: bool }, NovelAtomLHS { contiguous: bool },
NovelAtomRHS { contiguous: bool }, NovelAtomRHS { contiguous: bool },
NovelDelimiterLHS { contiguous: bool }, NovelDelimiterLHS { contiguous: bool },
@ -94,21 +94,21 @@ impl Edge {
UnchangedDelimiter(depth_difference) => 100 + min(40, *depth_difference), UnchangedDelimiter(depth_difference) => 100 + min(40, *depth_difference),
// Replacing a comment is better than treating it as novel. // Replacing a comment is better than treating it as novel.
ReplacedComment => 150, ReplacedComment { levenshtein_pct } => 150 + ((100 - levenshtein_pct) as u64),
// Otherwise, we've added/removed a node. // Otherwise, we've added/removed a node.
NovelAtomLHS { contiguous } | NovelAtomRHS { contiguous } => { NovelAtomLHS { contiguous } | NovelAtomRHS { contiguous } => {
if *contiguous { if *contiguous {
200 300
} else { } else {
201 301
} }
} }
NovelDelimiterLHS { contiguous } | NovelDelimiterRHS { contiguous } => { NovelDelimiterLHS { contiguous } | NovelDelimiterRHS { contiguous } => {
if *contiguous { if *contiguous {
200 300
} else { } else {
201 301
} }
} }
@ -262,12 +262,11 @@ fn neighbours<'a>(v: &Vertex<'a>) -> Vec<(Edge, Vertex<'a>)> {
{ {
// Both sides are comments and their content is reasonably // Both sides are comments and their content is reasonably
// similar. // similar.
if *lhs_is_comment if *lhs_is_comment && *rhs_is_comment {
&& *rhs_is_comment let levenshtein_pct =
&& normalized_levenshtein(lhs_content, rhs_content) > 0.4 (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8;
{
res.push(( res.push((
ReplacedComment, ReplacedComment { levenshtein_pct },
Vertex { Vertex {
lhs_syntax: lhs_syntax.next(), lhs_syntax: lhs_syntax.next(),
lhs_prev_novel: None, lhs_prev_novel: None,
@ -428,11 +427,18 @@ fn mark_route(route: &[(Edge, Vertex)]) {
lhs.set_change(ChangeKind::Unchanged(rhs)); lhs.set_change(ChangeKind::Unchanged(rhs));
rhs.set_change(ChangeKind::Unchanged(lhs)); rhs.set_change(ChangeKind::Unchanged(lhs));
} }
ReplacedComment => { ReplacedComment { levenshtein_pct } => {
let lhs = v.lhs_syntax.unwrap(); let lhs = v.lhs_syntax.unwrap();
let rhs = v.rhs_syntax.unwrap(); let rhs = v.rhs_syntax.unwrap();
lhs.set_change(ChangeKind::ReplacedComment(lhs, rhs));
rhs.set_change(ChangeKind::ReplacedComment(rhs, lhs)); if *levenshtein_pct > 40 {
lhs.set_change(ChangeKind::ReplacedComment(lhs, rhs));
rhs.set_change(ChangeKind::ReplacedComment(rhs, lhs));
} else {
lhs.set_change(ChangeKind::Novel);
rhs.set_change(ChangeKind::Novel);
}
} }
NovelAtomLHS { .. } | NovelDelimiterLHS { .. } => { NovelAtomLHS { .. } | NovelDelimiterLHS { .. } => {
let lhs = v.lhs_syntax.unwrap(); let lhs = v.lhs_syntax.unwrap();
@ -825,6 +831,7 @@ mod tests {
] ]
); );
} }
#[test] #[test]
fn replace_similar_comment() { fn replace_similar_comment() {
let arena = Arena::new(); let arena = Arena::new();
@ -852,7 +859,7 @@ mod tests {
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(actions, vec![ReplacedComment]); assert_eq!(actions, vec![ReplacedComment { levenshtein_pct: 84 }]);
} }
#[test] #[test]
@ -877,12 +884,43 @@ mod tests {
}; };
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(actions, vec![ReplacedComment { levenshtein_pct: 11 }]);
}
#[test]
fn replace_comment_prefer_most_similar() {
let arena = Arena::new();
let lhs: Vec<&Syntax> = vec![
Syntax::new_comment(&arena, pos_helper(1), "the quick brown fox"),
Syntax::new_comment(&arena, pos_helper(2), "the quick brown thing"),
];
init_info(&lhs);
let rhs: Vec<&Syntax> = vec![Syntax::new_comment(
&arena,
pos_helper(1),
"the quick brown fox.",
)];
init_info(&rhs);
let start = Vertex {
lhs_syntax: lhs.get(0).map(|n| *n),
lhs_prev_novel: None,
rhs_syntax: rhs.get(0).map(|n| *n),
rhs_prev_novel: None,
};
let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!( assert_eq!(
actions, actions,
vec![ vec![
NovelAtomLHS { contiguous: false }, ReplacedComment {
NovelAtomRHS { contiguous: false } levenshtein_pct: 95
},
NovelAtomLHS { contiguous: false }
] ]
); );
} }