Preserve the outer delimiter when shrinking

Previously, we'd always discard the outer delimiter if it matched on
both sides. This prevented the tree diff finding optimal diffs.

Fixes #124
pull/166/head
Wilfred Hughes 2022-03-15 22:46:42 +07:00
parent f4f12003cb
commit 6d58247465
5 changed files with 130 additions and 127 deletions

@ -10,6 +10,9 @@ incorrect diffs.
Fixed an issue where lines were not aligned correctly after correcting
sliders.
Fixed an issue the outermost delimiter in lists was sometimes
incorrectly marked as unchanged, producing non-optimal diffs.
### Display
Fixed an issue where some lines in a hunk were not displayed.

@ -73,6 +73,9 @@ c9b74f137aada068b0a317c09966e705 -
sample_files/ocaml_before.ml sample_files/ocaml_after.ml
10f583899eee701776e2b25d96e78b56 -
sample_files/outer_delimiter_before.el sample_files/outer_delimiter_after.el
7b7f8b78e7b9544473b1a5c55abc3372 -
sample_files/ruby_before.rb sample_files/ruby_after.rb
4a9847a91e32ec6afdc2f0b01e28d2d6 -

@ -0,0 +1,3 @@
(foo
(read)
)

@ -9,8 +9,19 @@ pub fn mark_unchanged<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> Vec<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> {
let (lhs_nodes, rhs_nodes) = shrink_unchanged_at_ends(lhs_nodes, rhs_nodes);
let (_, lhs_nodes, rhs_nodes) = shrink_unchanged_at_ends(lhs_nodes, rhs_nodes);
split_unchanged(&lhs_nodes, &rhs_nodes)
}
enum ChangeState {
Unchanged,
PossiblyChanged,
}
fn split_unchanged<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> Vec<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> {
let size_threshold = if let Ok(env_threshold) = std::env::var("DFT_TINY_THRESHOLD") {
env_threshold
.parse::<u32>()
@ -20,25 +31,36 @@ pub fn mark_unchanged<'a>(
TINY_TREE_THRESHOLD
};
let mut possibly_changed = vec![];
mark_unchanged_toplevel(
&lhs_nodes,
&rhs_nodes,
&mut possibly_changed,
size_threshold,
);
possibly_changed
let mut res: Vec<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> = vec![];
for (cs, lhs_section_nodes, rhs_section_nodes) in
split_unchanged_toplevel(lhs_nodes, rhs_nodes, size_threshold)
{
match cs {
ChangeState::Unchanged => {
assert_eq!(lhs_section_nodes.len(), rhs_section_nodes.len());
for (lhs_section_node, rhs_section_node) in
lhs_section_nodes.iter().zip(rhs_section_nodes.iter())
{
lhs_section_node.set_change_deep(ChangeKind::Unchanged(rhs_section_node));
rhs_section_node.set_change_deep(ChangeKind::Unchanged(lhs_section_node));
}
}
ChangeState::PossiblyChanged => {
res.push((lhs_section_nodes, rhs_section_nodes));
}
}
}
res
}
/// Mark top-level nodes as unchanged if they have exactly the same
/// content on both sides. Write possibly changed sequences of nodes
/// to `possibly_changed`.
fn mark_unchanged_toplevel<'a>(
/// content on both sides.
fn split_unchanged_toplevel<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
possibly_changed: &mut Vec<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)>,
size_threshold: u32,
) {
) -> Vec<(ChangeState, Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> {
let lhs_node_ids = lhs_nodes
.iter()
.map(|n| EqOnFirstItem(n.content_id(), *n))
@ -48,6 +70,7 @@ fn mark_unchanged_toplevel<'a>(
.map(|n| EqOnFirstItem(n.content_id(), *n))
.collect::<Vec<_>>();
let mut res: Vec<(ChangeState, Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> = vec![];
let mut section_lhs_nodes = vec![];
let mut section_rhs_nodes = vec![];
@ -77,19 +100,40 @@ fn mark_unchanged_toplevel<'a>(
section_rhs_nodes.push(rhs_node);
} else {
if !section_lhs_nodes.is_empty() || !section_rhs_nodes.is_empty() {
mark_unchanged_outer_list(
&section_lhs_nodes,
&section_rhs_nodes,
possibly_changed,
size_threshold,
);
match as_singleton_list_children(&section_lhs_nodes, &section_rhs_nodes) {
Some((lhs_children, rhs_children)) => {
let mut split_children = split_unchanged_toplevel(
&lhs_children,
&rhs_children,
size_threshold,
);
if split_children.len() > 1 {
// Managed to further split.
res.append(&mut split_children);
} else {
// Did not split further. Keep the outer list, so we can use
// its delimiter when doing the tree diff.
res.push((
ChangeState::PossiblyChanged,
section_lhs_nodes,
section_rhs_nodes,
));
}
}
None => {
res.push((
ChangeState::PossiblyChanged,
section_lhs_nodes,
section_rhs_nodes,
));
}
}
section_lhs_nodes = vec![];
section_rhs_nodes = vec![];
}
lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node));
res.push((ChangeState::Unchanged, vec![lhs_node], vec![rhs_node]));
}
}
myers_diff::DiffResult::Left(lhs) => {
@ -102,25 +146,30 @@ fn mark_unchanged_toplevel<'a>(
}
if !section_lhs_nodes.is_empty() || !section_rhs_nodes.is_empty() {
mark_unchanged_outer_list(
&section_lhs_nodes,
&section_rhs_nodes,
possibly_changed,
size_threshold,
);
res.push((
ChangeState::PossiblyChanged,
section_lhs_nodes,
section_rhs_nodes,
));
}
res
}
/// If `lhs_nodes` and `rhs_nodes` are both a single list with the
/// same delimiter, mark the outer list as unchanged.
fn mark_unchanged_outer_list<'a>(
#[derive(Debug, Clone)]
struct EqOnFirstItem<X, Y>(X, Y);
impl<X: Eq, Y> PartialEq for EqOnFirstItem<X, Y> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<X: Eq, Y> Eq for EqOnFirstItem<X, Y> {}
fn as_singleton_list_children<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
possibly_changed: &mut Vec<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)>,
size_threshold: u32,
) {
assert!(!lhs_nodes.is_empty() || !rhs_nodes.is_empty());
) -> Option<(Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>)> {
if let (
[Syntax::List {
open_content: lhs_open,
@ -137,33 +186,19 @@ fn mark_unchanged_outer_list<'a>(
) = (lhs_nodes, rhs_nodes)
{
if lhs_open == rhs_open && lhs_close == rhs_close {
lhs_nodes[0].set_change(ChangeKind::Unchanged(rhs_nodes[0]));
rhs_nodes[0].set_change(ChangeKind::Unchanged(lhs_nodes[0]));
mark_unchanged_toplevel(lhs_children, rhs_children, possibly_changed, size_threshold);
} else {
possibly_changed.push((lhs_nodes.into(), rhs_nodes.into()));
return Some((lhs_children.to_vec(), rhs_children.to_vec()));
}
} else {
possibly_changed.push((lhs_nodes.into(), rhs_nodes.into()));
}
}
#[derive(Debug, Clone)]
struct EqOnFirstItem<X, Y>(X, Y);
impl<X: Eq, Y> PartialEq for EqOnFirstItem<X, Y> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
None
}
impl<X: Eq, Y> Eq for EqOnFirstItem<X, Y> {}
/// If we're comparing two lists that have the same delimiters, mark
/// the delimiters as unchanged and return the children.
fn shrink_unchanged_delimiters<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
) -> (bool, Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
if let (
[Syntax::List {
open_content: lhs_open,
@ -180,14 +215,17 @@ fn shrink_unchanged_delimiters<'a>(
) = (lhs_nodes, rhs_nodes)
{
if lhs_open == rhs_open && lhs_close == rhs_close {
lhs_nodes[0].set_change(ChangeKind::Unchanged(rhs_nodes[0]));
rhs_nodes[0].set_change(ChangeKind::Unchanged(lhs_nodes[0]));
return shrink_unchanged_at_ends(lhs_children, rhs_children);
let (changed_later, lhs_shrunk_nodes, rhs_shrunk_nodes) =
shrink_unchanged_at_ends(lhs_children, rhs_children);
if changed_later {
lhs_nodes[0].set_change(ChangeKind::Unchanged(rhs_nodes[0]));
rhs_nodes[0].set_change(ChangeKind::Unchanged(lhs_nodes[0]));
return (true, lhs_shrunk_nodes, rhs_shrunk_nodes);
}
}
}
(Vec::from(lhs_nodes), Vec::from(rhs_nodes))
(false, Vec::from(lhs_nodes), Vec::from(rhs_nodes))
}
/// Skip syntax nodes at the beginning or end that are obviously
@ -198,10 +236,11 @@ fn shrink_unchanged_delimiters<'a>(
fn shrink_unchanged_at_ends<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
) -> (bool, Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
let mut lhs_nodes = lhs_nodes;
let mut rhs_nodes = rhs_nodes;
let mut changed = false;
while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.first(), rhs_nodes.first()) {
// We don't consider TINY_TREE_THRESHOLD here because we are
// only considering equal nodes at the beginning or end of the
@ -211,6 +250,7 @@ fn shrink_unchanged_at_ends<'a>(
lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node));
changed = true;
lhs_nodes = &lhs_nodes[1..];
rhs_nodes = &rhs_nodes[1..];
} else {
@ -223,6 +263,7 @@ fn shrink_unchanged_at_ends<'a>(
lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node));
changed = true;
lhs_nodes = &lhs_nodes[..lhs_nodes.len() - 1];
rhs_nodes = &rhs_nodes[..rhs_nodes.len() - 1];
} else {
@ -231,9 +272,11 @@ fn shrink_unchanged_at_ends<'a>(
}
if lhs_nodes.len() == 1 && rhs_nodes.len() == 1 {
shrink_unchanged_delimiters(lhs_nodes, rhs_nodes)
let (changed_later, lhs_nodes, rhs_nodes) =
shrink_unchanged_delimiters(lhs_nodes, rhs_nodes);
(changed || changed_later, lhs_nodes, rhs_nodes)
} else {
(Vec::from(lhs_nodes), Vec::from(rhs_nodes))
(changed, Vec::from(lhs_nodes), Vec::from(rhs_nodes))
}
}
@ -256,7 +299,7 @@ mod tests {
let rhs_nodes = parse(&arena, "unchanged X", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let (lhs_after_skip, rhs_after_skip) = shrink_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
let (_, lhs_after_skip, rhs_after_skip) = shrink_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
assert_eq!(
lhs_nodes[0].change(),
@ -280,7 +323,7 @@ mod tests {
let rhs_nodes = parse(&arena, "X unchanged", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let (lhs_after_skip, rhs_after_skip) = shrink_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
let (_, lhs_after_skip, rhs_after_skip) = shrink_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
assert_eq!(
lhs_nodes[2].change(),
@ -295,39 +338,6 @@ mod tests {
assert_eq!(rhs_after_skip.len(), 1);
}
#[test]
fn test_shrink_unchanged_delimiters() {
let arena = Arena::new();
let config = from_language(guess_language::Language::EmacsLisp);
let lhs_nodes = parse(&arena, "(A)", &config);
let rhs_nodes = parse(&arena, "(B)", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let (lhs_after_skip, rhs_after_skip) = shrink_unchanged_delimiters(&lhs_nodes, &rhs_nodes);
// The only possibly changed nodes are inside the lists.
assert_eq!(lhs_after_skip.len(), 1);
assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. }));
assert_eq!(rhs_after_skip.len(), 1);
assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. }));
// The outer list delimiters are unchanged.
assert_eq!(
lhs_nodes[0].change(),
Some(ChangeKind::Unchanged(rhs_nodes[0]))
);
assert_eq!(
rhs_nodes[0].change(),
Some(ChangeKind::Unchanged(lhs_nodes[0]))
);
// The inner items haven't had their change set yet.
assert_eq!(lhs_after_skip[0].change(), None);
assert_eq!(rhs_after_skip[0].change(), None);
}
#[test]
fn test_shrink_unchanged_nested() {
let arena = Arena::new();
@ -337,14 +347,14 @@ mod tests {
let rhs_nodes = parse(&arena, "unchanged-before (more-unchanged (B))", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let (lhs_after_skip, rhs_after_skip) = shrink_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
let (_, lhs_after_skip, rhs_after_skip) = shrink_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
// The only possibly changed nodes are inside the lists.
assert_eq!(lhs_after_skip.len(), 1);
assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. }));
assert!(matches!(lhs_after_skip[0], Syntax::List { .. }));
assert_eq!(rhs_after_skip.len(), 1);
assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. }));
assert!(matches!(rhs_after_skip[0], Syntax::List { .. }));
// The inner items haven't had their change set yet.
assert_eq!(lhs_after_skip[0].change(), None);
@ -361,8 +371,7 @@ mod tests {
let rhs_nodes = parse(&arena, "(unchanged (1 2 3 4 5 6 7 8 9 10)) X", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let mut res = vec![];
mark_unchanged_toplevel(&lhs_nodes, &rhs_nodes, &mut res, TINY_TREE_THRESHOLD);
let res = split_unchanged(&lhs_nodes, &rhs_nodes);
assert_eq!(res.len(), 1);
let (lhs_after_skip, rhs_after_skip) = &res[0];
@ -389,8 +398,7 @@ mod tests {
let rhs_nodes = parse(&arena, "X (unchanged (1 2 3 4 5 6 7 8 9 10))", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let mut res = vec![];
mark_unchanged_toplevel(&lhs_nodes, &rhs_nodes, &mut res, TINY_TREE_THRESHOLD);
let res = split_unchanged(&lhs_nodes, &rhs_nodes);
assert_eq!(res.len(), 1);
let (lhs_after_skip, rhs_after_skip) = &res[0];
@ -409,7 +417,7 @@ mod tests {
}
#[test]
fn test_mark_unchanged_outer_delimiters() {
fn test_mark_preserves_outer_delimiters() {
let arena = Arena::new();
let config = from_language(guess_language::Language::EmacsLisp);
@ -417,32 +425,21 @@ mod tests {
let rhs_nodes = parse(&arena, "(B)", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let mut res = vec![];
mark_unchanged_toplevel(&lhs_nodes, &rhs_nodes, &mut res, TINY_TREE_THRESHOLD);
let res = split_unchanged(&lhs_nodes, &rhs_nodes);
assert_eq!(res.len(), 1);
let (lhs_after_skip, rhs_after_skip) = &res[0];
// The only possibly changed nodes are inside the lists.
assert_eq!(lhs_after_skip.len(), 1);
assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. }));
assert!(matches!(lhs_after_skip[0], Syntax::List { .. }));
assert_eq!(rhs_after_skip.len(), 1);
assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. }));
// The outer list delimiters are unchanged.
assert_eq!(
lhs_nodes[0].change(),
Some(ChangeKind::Unchanged(rhs_nodes[0]))
);
assert_eq!(
rhs_nodes[0].change(),
Some(ChangeKind::Unchanged(lhs_nodes[0]))
);
assert!(matches!(rhs_after_skip[0], Syntax::List { .. }));
// The inner items haven't had their change set yet.
assert_eq!(lhs_after_skip[0].change(), None);
assert_eq!(rhs_after_skip[0].change(), None);
// The outer list delimiters don't have their change set yet.
assert_eq!(lhs_nodes[0].change(), None);
assert_eq!(rhs_nodes[0].change(), None);
}
#[test]
@ -462,9 +459,7 @@ mod tests {
);
init_all_info(&lhs_nodes, &rhs_nodes);
let mut res = vec![];
mark_unchanged_toplevel(&lhs_nodes, &rhs_nodes, &mut res, TINY_TREE_THRESHOLD);
let res = split_unchanged(&lhs_nodes, &rhs_nodes);
assert_eq!(
res,
vec![
@ -500,9 +495,7 @@ mod tests {
);
init_all_info(&lhs_nodes, &rhs_nodes);
let mut res = vec![];
mark_unchanged_toplevel(&lhs_nodes, &rhs_nodes, &mut res, TINY_TREE_THRESHOLD);
let res = split_unchanged(&lhs_nodes, &rhs_nodes);
assert_eq!(
res,
vec![