Track entering/leaving parents explicitly

This produces significantly better diffs, and fixes some cases that
were outright wrong before.

Fixes #30
html_output
Wilfred Hughes 2022-02-16 22:55:26 +07:00
parent 692f08f1f1
commit 82890923cd
8 changed files with 311 additions and 99 deletions

@ -2,6 +2,12 @@
### Diffing ### Diffing
Diffing now correctly handles nodes being moved to parent
lists. Previously this would be ignored, leading to difftastic
incorrectly claiming things were unchanged. This also leads to better
diffing results in general, although is somewhat slower (2x in
testing).
Improved slider logic in larger expressions. Improved slider logic in larger expressions.
Increased the default value DFT_NODE_LIMIT to 100,000 (from Increased the default value DFT_NODE_LIMIT to 100,000 (from

61
Cargo.lock generated

@ -43,6 +43,15 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitmaps"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
dependencies = [
"typenum",
]
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.72" version = "1.0.72"
@ -160,6 +169,7 @@ dependencies = [
"clap", "clap",
"const_format", "const_format",
"diff", "diff",
"im-rc",
"itertools", "itertools",
"lazy_static", "lazy_static",
"libc", "libc",
@ -222,6 +232,20 @@ dependencies = [
"quick-error", "quick-error",
] ]
[[package]]
name = "im-rc"
version = "15.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ca8957e71f04a205cb162508f9326aea04676c8dfd0711220190d6b83664f3f"
dependencies = [
"bitmaps",
"rand_core",
"rand_xoshiro",
"sized-chunks",
"typenum",
"version_check",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "1.8.0" version = "1.8.0"
@ -381,6 +405,21 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bde9110b6926b8aaf55623dd7a0e2545b83b9b01cd9f47d36275fc2bc69a0bda" checksum = "bde9110b6926b8aaf55623dd7a0e2545b83b9b01cd9f47d36275fc2bc69a0bda"
[[package]]
name = "rand_core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
[[package]]
name = "rand_xoshiro"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9fcdd2e881d02f1d9390ae47ad8e5696a9e4be7b547a1da2afbc61973217004"
dependencies = [
"rand_core",
]
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.5.1" version = "1.5.1"
@ -444,6 +483,16 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "sized-chunks"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e"
dependencies = [
"bitmaps",
"typenum",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
@ -502,12 +551,24 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae" checksum = "0685c84d5d54d1c26f7d3eb96cd41550adb97baed141a761cf335d3d33bcd0ae"
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
version = "0.2.2" version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]] [[package]]
name = "walkdir" name = "walkdir"
version = "2.3.2" version = "2.3.2"

@ -40,6 +40,7 @@ walkdir = "2.3.2"
term_size = "0.3.2" term_size = "0.3.2"
const_format = "0.2.22" const_format = "0.2.22"
owo-colors = "3.2.0" owo-colors = "3.2.0"
im-rc = "15.0.0"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "1.0.0" pretty_assertions = "1.0.0"

@ -89,9 +89,6 @@ be confusing.
Robustness. Difftastic regularly has releases that fix crashes. Robustness. Difftastic regularly has releases that fix crashes.
Diff accuracy. Some delimiter moves are currently ignored (see
[#30](https://github.com/Wilfred/difftastic/issues/30)).
## Non-goals ## Non-goals
Patch files. If you want to create a patch that you can later apply, Patch files. If you want to create a patch that you can later apply,

@ -1,10 +1,10 @@
==> Building difftastic ==> Building difftastic
==> Check outputs ==> Check outputs
sample_files/bad_combine_before.rs sample_files/bad_combine_after.rs sample_files/bad_combine_before.rs sample_files/bad_combine_after.rs
1390d71291b8221e7ced6078250fbd7b - 5d0d52b7e57900da78ba1f96634901fe -
sample_files/clojure_before.clj sample_files/clojure_after.clj sample_files/clojure_before.clj sample_files/clojure_after.clj
5d8238346885f21b1075debaf97b4aa9 - 6c82d4f44e0b2474b2dd82682940da7e -
sample_files/comments_before.rs sample_files/comments_after.rs sample_files/comments_before.rs sample_files/comments_after.rs
c42c13edc47f4d6a74e3038835d97505 - c42c13edc47f4d6a74e3038835d97505 -
@ -34,28 +34,28 @@ sample_files/helpful-unit-test-before.el sample_files/helpful-unit-test-after.el
14616503c934f36972a3498bd63085c7 - 14616503c934f36972a3498bd63085c7 -
sample_files/if_before.py sample_files/if_after.py sample_files/if_before.py sample_files/if_after.py
5b53818f229e4232c29466ca82bf402b - 942363607f41d9da834350023afd3a1f -
sample_files/java_before.java sample_files/java_after.java sample_files/java_before.java sample_files/java_after.java
4b57e58be24d5ccbc2568810889ced8e - 4b57e58be24d5ccbc2568810889ced8e -
sample_files/javascript_before.js sample_files/javascript_after.js sample_files/javascript_before.js sample_files/javascript_after.js
81d8ae1fdb7847729b19fa615b871e04 - 49afda10b2ba4177cc7cab307f48fc1a -
sample_files/javascript_simple_before.js sample_files/javascript_simple_after.js sample_files/javascript_simple_before.js sample_files/javascript_simple_after.js
58208dbd2347671a5eb068d775a5f4f9 - 58208dbd2347671a5eb068d775a5f4f9 -
sample_files/json_before.json sample_files/json_after.json sample_files/json_before.json sample_files/json_after.json
e0138d5ed144ccf30e64c6f58c744edd - 43a1d679b7c0dc9794cf343c1d1dc6fd -
sample_files/jsx_before.jsx sample_files/jsx_after.jsx sample_files/jsx_before.jsx sample_files/jsx_after.jsx
3c21b73a5d784bca89e76342eb47a422 - 3c21b73a5d784bca89e76342eb47a422 -
sample_files/load_before.js sample_files/load_after.js sample_files/load_before.js sample_files/load_after.js
8506aca66358fb17d06702b05bf0693d - 08a68dc60b213ade4eec37a9f8a12f66 -
sample_files/nest_before.rs sample_files/nest_after.rs sample_files/nest_before.rs sample_files/nest_after.rs
9e561ba227c1fee426820982ac4d10c6 - d02776678882e861a40fee2152e0935a -
sample_files/ocaml_before.ml sample_files/ocaml_after.ml sample_files/ocaml_before.ml sample_files/ocaml_after.ml
9b6f28e64c94a7d615012d72b08265da - 9b6f28e64c94a7d615012d72b08265da -
@ -64,19 +64,19 @@ sample_files/ruby_before.rb sample_files/ruby_after.rb
4a9847a91e32ec6afdc2f0b01e28d2d6 - 4a9847a91e32ec6afdc2f0b01e28d2d6 -
sample_files/scala_before.scala sample_files/scala_after.scala sample_files/scala_before.scala sample_files/scala_after.scala
9d22090a6e439d5cd0057ed4d6d2702f - 3969ef383bce9bd20587eb467fa34934 -
sample_files/simple_before.js sample_files/simple_after.js sample_files/simple_before.js sample_files/simple_after.js
71dd2e5c7beece67e5e49b5b1b48832c - 1ddd283a18d06008f1d1741e9ff4ca70 -
sample_files/simple_before.txt sample_files/simple_after.txt sample_files/simple_before.txt sample_files/simple_after.txt
4b653ebe89321835c35722dd065cf6a2 - 4b653ebe89321835c35722dd065cf6a2 -
sample_files/slider_before.rs sample_files/slider_after.rs sample_files/slider_before.rs sample_files/slider_after.rs
aa8764aeaa628231e9c16dd2476a17c8 - df5572916163b4f2b36aacddb933310b -
sample_files/slow_before.rs sample_files/slow_after.rs sample_files/slow_before.rs sample_files/slow_after.rs
2f6efdf8c27f87d65a5fb226caf7215c - e93bf01abcd97ea2fe4bc141e279a57e -
sample_files/small_before.js sample_files/small_after.js sample_files/small_before.js sample_files/small_after.js
eb0d76aeb39e33b5ef539812d80c78d7 - eb0d76aeb39e33b5ef539812d80c78d7 -
@ -88,5 +88,5 @@ sample_files/text_before.txt sample_files/text_after.txt
f61b57cb275332d01b9539efa88aa2f6 - f61b57cb275332d01b9539efa88aa2f6 -
sample_files/typing_before.ml sample_files/typing_after.ml sample_files/typing_before.ml sample_files/typing_after.ml
3941fd44b0bf744da834a0b3eda1ba76 - 8457b5ead7a12be8ee3f1ad0c11349de -

@ -25,7 +25,9 @@ fn shortest_path(start: Vertex) -> Vec<(Edge, Vertex)> {
// usage. // usage.
let mut predecessors: FxHashMap<Vertex, PredecessorInfo> = FxHashMap::default(); let mut predecessors: FxHashMap<Vertex, PredecessorInfo> = FxHashMap::default();
let mut neighbour_buf = [None, None, None, None, None, None, None, None, None, None]; let mut neighbour_buf = [
None, None, None, None, None, None, None, None, None, None, None, None,
];
let end = loop { let end = loop {
match heap.pop() { match heap.pop() {
Some((Reverse(distance), current)) => { Some((Reverse(distance), current)) => {
@ -89,10 +91,7 @@ fn shortest_path(start: Vertex) -> Vec<(Edge, Vertex)> {
} }
pub fn mark_syntax<'a>(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) { pub fn mark_syntax<'a>(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) {
let start = Vertex { let start = Vertex::new(lhs_syntax, rhs_syntax);
lhs_syntax,
rhs_syntax,
};
let route = shortest_path(start); let route = shortest_path(start);
mark_route(&route); mark_route(&route);
} }
@ -134,10 +133,7 @@ mod tests {
let rhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal); let rhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal);
init_all_info(&[lhs], &[rhs]); init_all_info(&[lhs], &[rhs]);
let start = Vertex { let start = Vertex::new(Some(lhs), Some(rhs));
lhs_syntax: Some(lhs),
rhs_syntax: Some(rhs),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -177,10 +173,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -190,7 +183,8 @@ mod tests {
UnchangedDelimiter { UnchangedDelimiter {
depth_difference: 0 depth_difference: 0
}, },
NovelAtomLHS { contiguous: false } NovelAtomLHS { contiguous: false },
MoveParentBoth,
] ]
); );
} }
@ -221,10 +215,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -235,7 +226,8 @@ mod tests {
depth_difference: 0 depth_difference: 0
}, },
NovelAtomRHS { contiguous: false }, NovelAtomRHS { contiguous: false },
NovelAtomRHS { contiguous: false } NovelAtomRHS { contiguous: false },
MoveParentBoth,
] ]
); );
} }
@ -269,10 +261,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -286,7 +275,9 @@ mod tests {
}, },
UnchangedNode { UnchangedNode {
depth_difference: 0 depth_difference: 0
} },
MoveParentLHS,
MoveParentRHS,
], ],
); );
} }
@ -309,10 +300,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -349,10 +337,7 @@ mod tests {
let rhs = vec![]; let rhs = vec![];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -361,6 +346,7 @@ mod tests {
vec![ vec![
NovelDelimiterLHS { contiguous: false }, NovelDelimiterLHS { contiguous: false },
NovelAtomLHS { contiguous: true }, NovelAtomLHS { contiguous: true },
MoveParentLHS,
] ]
); );
} }
@ -389,10 +375,7 @@ mod tests {
let rhs = vec![]; let rhs = vec![];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -401,7 +384,8 @@ mod tests {
vec![ vec![
NovelDelimiterLHS { contiguous: false }, NovelDelimiterLHS { contiguous: false },
NovelAtomLHS { contiguous: true }, NovelAtomLHS { contiguous: true },
NovelAtomLHS { contiguous: true } MoveParentLHS,
NovelAtomLHS { contiguous: true },
] ]
); );
} }
@ -474,10 +458,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -513,10 +494,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -547,10 +525,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();
@ -589,10 +564,7 @@ mod tests {
)]; )];
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let start = Vertex { let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
lhs_syntax: lhs.get(0).copied(),
rhs_syntax: rhs.get(0).copied(),
};
let route = shortest_path(start); let route = shortest_path(start);
let actions = route.iter().map(|(action, _)| *action).collect_vec(); let actions = route.iter().map(|(action, _)| *action).collect_vec();

@ -1,5 +1,6 @@
//! A graph representation for computing tree diffs. //! A graph representation for computing tree diffs.
use rustc_hash::FxHasher;
use std::{ use std::{
cmp::min, cmp::min,
hash::{Hash, Hasher}, hash::{Hash, Hasher},
@ -41,11 +42,18 @@ use Edge::*;
pub struct Vertex<'a> { pub struct Vertex<'a> {
pub lhs_syntax: Option<&'a Syntax<'a>>, pub lhs_syntax: Option<&'a Syntax<'a>>,
pub rhs_syntax: Option<&'a Syntax<'a>>, pub rhs_syntax: Option<&'a Syntax<'a>>,
// Instead, store parent pointers for both sides and a hashset of
// novel delimiter parent IDs.
parents: im_rc::Vector<(Option<&'a Syntax<'a>>, Option<&'a Syntax<'a>>)>,
parents_hash: u64,
} }
impl<'a> PartialEq for Vertex<'a> { impl<'a> PartialEq for Vertex<'a> {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.lhs_syntax.map(|node| node.id()) == other.lhs_syntax.map(|node| node.id()) self.lhs_syntax.map(|node| node.id()) == other.lhs_syntax.map(|node| node.id())
&& self.rhs_syntax.map(|node| node.id()) == other.rhs_syntax.map(|node| node.id()) && self.rhs_syntax.map(|node| node.id()) == other.rhs_syntax.map(|node| node.id())
&& self.parents_hash == other.parents_hash
&& self.eq_parents_each_side(other)
} }
} }
impl<'a> Eq for Vertex<'a> {} impl<'a> Eq for Vertex<'a> {}
@ -54,12 +62,68 @@ impl<'a> Hash for Vertex<'a> {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
self.lhs_syntax.map(|node| node.id()).hash(state); self.lhs_syntax.map(|node| node.id()).hash(state);
self.rhs_syntax.map(|node| node.id()).hash(state); self.rhs_syntax.map(|node| node.id()).hash(state);
self.parents_hash.hash(state);
} }
} }
fn hash_parents(parents: &im_rc::Vector<(Option<&Syntax>, Option<&Syntax>)>) -> u64 {
let mut hasher = FxHasher::default();
for (parent_id, _) in parents.iter() {
if let Some(parent_id) = parent_id {
// FxHasher finishes with 0 if called with
// .write_u32(0). Ensure the u32 written is always
// non-zero.
hasher.write_u32(parent_id.id() + 1);
}
}
for (_, parent_id) in parents.iter() {
if let Some(parent_id) = parent_id {
// FxHasher finishes with 0 if called with
// .write_u32(0). Ensure the u32 written is always
// non-zero.
hasher.write_u32(parent_id.id() + 1);
}
}
hasher.finish()
}
impl<'a> Vertex<'a> { impl<'a> Vertex<'a> {
pub fn is_end(&self) -> bool { pub fn is_end(&self) -> bool {
self.lhs_syntax.is_none() && self.rhs_syntax.is_none() self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty()
}
fn eq_parents_each_side(&self, other: &Self) -> bool {
let self_lhs_parents = self.parents.iter().filter_map(|(lhs, _)| *lhs);
let other_lhs_parents = other.parents.iter().filter_map(|(lhs, _)| *lhs);
for (self_lhs_parent, other_lhs_parent) in self_lhs_parents.zip(other_lhs_parents) {
if self_lhs_parent.id() != other_lhs_parent.id() {
return false;
}
}
let self_rhs_parents = self.parents.iter().filter_map(|(_, rhs)| *rhs);
let other_rhs_parents = other.parents.iter().filter_map(|(_, rhs)| *rhs);
for (self_rhs_parent, other_rhs_parent) in self_rhs_parents.zip(other_rhs_parents) {
if self_rhs_parent.id() != other_rhs_parent.id() {
return false;
}
}
true
}
pub fn new(lhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>) -> Self {
let parents = im_rc::Vector::new();
let parents_hash = hash_parents(&parents);
Vertex {
lhs_syntax,
rhs_syntax,
parents,
parents_hash,
}
} }
} }
@ -77,16 +141,26 @@ pub enum Edge {
ReplacedComment { levenshtein_pct: u8 }, ReplacedComment { levenshtein_pct: u8 },
NovelAtomLHS { contiguous: bool }, NovelAtomLHS { contiguous: bool },
NovelAtomRHS { contiguous: bool }, NovelAtomRHS { contiguous: bool },
// TODO: A NovelDelimiterBoth node might help performance rather
// doing LHS and RHS separately.
NovelDelimiterLHS { contiguous: bool }, NovelDelimiterLHS { contiguous: bool },
NovelDelimiterRHS { contiguous: bool }, NovelDelimiterRHS { contiguous: bool },
NovelTreeLHS { num_descendants: u32 }, NovelTreeLHS { num_descendants: u32 },
NovelTreeRHS { num_descendants: u32 }, NovelTreeRHS { num_descendants: u32 },
// TODO: rename to EnterNovelDelimiter and LeaveDelimiter?
MoveParentLHS,
MoveParentRHS,
MoveParentBoth,
} }
impl Edge { impl Edge {
pub fn cost(&self) -> u64 { pub fn cost(&self) -> u64 {
match self { match self {
MoveParentBoth => 1,
MoveParentLHS | MoveParentRHS => 2,
// Matching nodes is always best. // Matching nodes is always best.
// TODO: now we model parents correctly, do we need to track depth difference?
UnchangedNode { depth_difference } => min(40, *depth_difference as u64 + 1), UnchangedNode { depth_difference } => min(40, *depth_difference as u64 + 1),
// Matching an outer delimiter is good. // Matching an outer delimiter is good.
UnchangedDelimiter { depth_difference } => 100 + min(40, *depth_difference as u64), UnchangedDelimiter { depth_difference } => 100 + min(40, *depth_difference as u64),
@ -131,6 +205,70 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
} }
let mut i = 0; let mut i = 0;
if let Some((lhs_parent, rhs_parent)) = v.parents.back() {
match (lhs_parent, rhs_parent) {
(Some(lhs_parent), Some(rhs_parent))
if v.lhs_syntax.is_none() && v.rhs_syntax.is_none() =>
{
// We have exhausted all the nodes on both lists, so we can
// move up to the parent node.
let mut next_parents = v.parents.clone();
next_parents.pop_back();
let parents_hash = hash_parents(&next_parents);
// Continue from sibling of parent.
buf[i] = Some((
MoveParentBoth,
Vertex {
lhs_syntax: lhs_parent.next_if_same_layer(),
rhs_syntax: rhs_parent.next_if_same_layer(),
parents: next_parents,
parents_hash,
},
));
i += 1;
}
(Some(lhs_parent), None) if v.lhs_syntax.is_none() => {
// Move to next after LHS parent.
let mut next_parents = v.parents.clone();
next_parents.pop_back();
let parents_hash = hash_parents(&next_parents);
// Continue from sibling of parent.
buf[i] = Some((
MoveParentLHS,
Vertex {
lhs_syntax: lhs_parent.next_if_same_layer(),
rhs_syntax: v.rhs_syntax,
parents: next_parents,
parents_hash,
},
));
i += 1;
}
(None, Some(rhs_parent)) if v.rhs_syntax.is_none() => {
// Move to next after RHS parent.
let mut next_parents = v.parents.clone();
next_parents.pop_back();
let parents_hash = hash_parents(&next_parents);
// Continue from sibling of parent.
buf[i] = Some((
MoveParentRHS,
Vertex {
lhs_syntax: v.lhs_syntax,
rhs_syntax: rhs_parent.next_if_same_layer(),
parents: next_parents,
parents_hash,
},
));
i += 1;
}
_ => {}
}
}
if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) { if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) {
if lhs_syntax == rhs_syntax { if lhs_syntax == rhs_syntax {
let depth_difference = (lhs_syntax.num_ancestors() as i32 let depth_difference = (lhs_syntax.num_ancestors() as i32
@ -138,12 +276,13 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
.abs() as u32; .abs() as u32;
// Both nodes are equal, the happy case. // Both nodes are equal, the happy case.
// TODO: this is only OK if we've not changed depth.
buf[i] = Some(( buf[i] = Some((
UnchangedNode { depth_difference }, UnchangedNode { depth_difference },
Vertex { Vertex {
lhs_syntax: lhs_syntax.next(), lhs_syntax: lhs_syntax.next_if_same_layer(),
rhs_syntax: rhs_syntax.next(), rhs_syntax: rhs_syntax.next_if_same_layer(),
parents: v.parents.clone(),
parents_hash: v.parents_hash,
}, },
)); ));
return; return;
@ -166,16 +305,13 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
{ {
// The list delimiters are equal, but children may not be. // The list delimiters are equal, but children may not be.
if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content { if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content {
let lhs_next = if lhs_children.is_empty() { let lhs_next = lhs_children.get(0).copied();
lhs_syntax.next() let rhs_next = rhs_children.get(0).copied();
} else {
Some(lhs_children[0]) // TODO: be consistent between parents_next and next_parents.
}; let mut parents_next = v.parents.clone();
let rhs_next = if rhs_children.is_empty() { parents_next.push_back((Some(lhs_syntax), Some(rhs_syntax)));
rhs_syntax.next() let parents_hash = hash_parents(&parents_next);
} else {
Some(rhs_children[0])
};
let depth_difference = (lhs_syntax.num_ancestors() as i32 let depth_difference = (lhs_syntax.num_ancestors() as i32
- rhs_syntax.num_ancestors() as i32) - rhs_syntax.num_ancestors() as i32)
@ -186,6 +322,8 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
Vertex { Vertex {
lhs_syntax: lhs_next, lhs_syntax: lhs_next,
rhs_syntax: rhs_next, rhs_syntax: rhs_next,
parents: parents_next,
parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -213,8 +351,10 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
buf[i] = Some(( buf[i] = Some((
ReplacedComment { levenshtein_pct }, ReplacedComment { levenshtein_pct },
Vertex { Vertex {
lhs_syntax: lhs_syntax.next(), lhs_syntax: lhs_syntax.next_if_same_layer(),
rhs_syntax: rhs_syntax.next(), rhs_syntax: rhs_syntax.next_if_same_layer(),
parents: v.parents.clone(),
parents_hash: v.parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -228,11 +368,15 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
Syntax::Atom { .. } => { Syntax::Atom { .. } => {
buf[i] = Some(( buf[i] = Some((
NovelAtomLHS { NovelAtomLHS {
// TODO: should this apply if prev is a parent
// node rather than a sibling?
contiguous: lhs_syntax.prev_is_contiguous(), contiguous: lhs_syntax.prev_is_contiguous(),
}, },
Vertex { Vertex {
lhs_syntax: lhs_syntax.next(), lhs_syntax: lhs_syntax.next_if_same_layer(),
rhs_syntax: v.rhs_syntax, rhs_syntax: v.rhs_syntax,
parents: v.parents.clone(),
parents_hash: v.parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -243,11 +387,11 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
num_descendants, num_descendants,
.. ..
} => { } => {
let lhs_next = if children.is_empty() { let lhs_next = children.get(0).copied();
lhs_syntax.next()
} else { let mut parents_next = v.parents.clone();
Some(children[0]) parents_next.push_back((Some(lhs_syntax), None));
}; let parents_hash = hash_parents(&parents_next);
buf[i] = Some(( buf[i] = Some((
NovelDelimiterLHS { NovelDelimiterLHS {
@ -256,6 +400,8 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
Vertex { Vertex {
lhs_syntax: lhs_next, lhs_syntax: lhs_next,
rhs_syntax: v.rhs_syntax, rhs_syntax: v.rhs_syntax,
parents: parents_next,
parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -266,8 +412,10 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
num_descendants: *num_descendants, num_descendants: *num_descendants,
}, },
Vertex { Vertex {
lhs_syntax: lhs_syntax.next(), lhs_syntax: lhs_syntax.next_if_same_layer(),
rhs_syntax: v.rhs_syntax, rhs_syntax: v.rhs_syntax,
parents: v.parents.clone(),
parents_hash: v.parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -286,7 +434,9 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
}, },
Vertex { Vertex {
lhs_syntax: v.lhs_syntax, lhs_syntax: v.lhs_syntax,
rhs_syntax: rhs_syntax.next(), rhs_syntax: rhs_syntax.next_if_same_layer(),
parents: v.parents.clone(),
parents_hash: v.parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -297,11 +447,11 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
num_descendants, num_descendants,
.. ..
} => { } => {
let rhs_next = if children.is_empty() { let rhs_next = children.get(0).copied();
rhs_syntax.next()
} else { let mut parents_next = v.parents.clone();
Some(children[0]) parents_next.push_back((None, Some(rhs_syntax)));
}; let parents_hash = hash_parents(&parents_next);
buf[i] = Some(( buf[i] = Some((
NovelDelimiterRHS { NovelDelimiterRHS {
@ -310,6 +460,8 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
Vertex { Vertex {
lhs_syntax: v.lhs_syntax, lhs_syntax: v.lhs_syntax,
rhs_syntax: rhs_next, rhs_syntax: rhs_next,
parents: parents_next,
parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -321,7 +473,9 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
}, },
Vertex { Vertex {
lhs_syntax: v.lhs_syntax, lhs_syntax: v.lhs_syntax,
rhs_syntax: rhs_syntax.next(), rhs_syntax: rhs_syntax.next_if_same_layer(),
parents: v.parents.clone(),
parents_hash: v.parents_hash,
}, },
)); ));
i += 1; i += 1;
@ -338,6 +492,9 @@ pub fn neighbours<'a>(v: &Vertex<'a>, buf: &mut [Option<(Edge, Vertex<'a>)>]) {
pub fn mark_route(route: &[(Edge, Vertex)]) { pub fn mark_route(route: &[(Edge, Vertex)]) {
for (e, v) in route { for (e, v) in route {
match e { match e {
MoveParentBoth | MoveParentLHS | MoveParentRHS => {
// Nothing to do: we have already marked this node when we entered it.
}
UnchangedNode { .. } => { UnchangedNode { .. } => {
// No change on this node or its children. // No change on this node or its children.
let lhs = v.lhs_syntax.unwrap(); let lhs = v.lhs_syntax.unwrap();

@ -248,10 +248,28 @@ impl<'a> Syntax<'a> {
} }
} }
pub fn next(&self) -> Option<&'a Syntax<'a>> { fn next(&self) -> Option<&'a Syntax<'a>> {
self.info().next.get() self.info().next.get()
} }
pub fn parent(&self) -> Option<&'a Syntax<'a>> {
self.info().parent.get()
}
// TODO: Replace next() with this logic, maybe even during
// SyntaxInfo init.
pub fn next_if_same_layer(&self) -> Option<&'a Syntax<'a>> {
if let Some(next) = self.next() {
if self.parent().map(|n| n.id()) == next.parent().map(|n| n.id()) {
Some(next)
} else {
None
}
} else {
None
}
}
pub fn prev_is_contiguous(&self) -> bool { pub fn prev_is_contiguous(&self) -> bool {
self.info().prev_is_contiguous.get() self.info().prev_is_contiguous.get()
} }