Skip unchanged delim and find a fixpoint

This lets us discard more nodes before diffing, improving performance.

Fixes #84
html_output
Wilfred Hughes 2022-01-30 14:45:38 +07:00
parent ad6898fdde
commit ea24dd8784
3 changed files with 139 additions and 10 deletions

@ -7,6 +7,10 @@ Fixed an issue with missing positions in OCaml attribute syntax.
Fixed parsing issues in Common Lisp: character literals, `loop` macro
usage with `maximizing`.
### Diffing
Improved performance when diffing a single large expression.
### Display
Fixed display issues where lines were printed more than once.

@ -51,7 +51,7 @@ use style::BackgroundColor;
use summary::DiffResult;
use syntax::init_next_prev;
use typed_arena::Arena;
use unchanged::skip_unchanged_at_ends;
use unchanged::skip_unchanged;
use walkdir::WalkDir;
use crate::{
@ -435,7 +435,7 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) ->
init_all_info(&lhs, &rhs);
let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged_at_ends(&lhs, &rhs);
let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged(&lhs, &rhs);
init_next_prev(&possibly_changed_lhs);
init_next_prev(&possibly_changed_rhs);

@ -1,6 +1,76 @@
use crate::syntax::{ChangeKind, Syntax};
pub fn skip_unchanged_at_ends<'a>(
/// Discard nodes that are obviously unchanged, so we have a smaller
/// number of nodes to run the full diffing algorithm on.
pub fn skip_unchanged<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
let mut lhs_nodes = lhs_nodes.to_vec();
let mut rhs_nodes = rhs_nodes.to_vec();
// Repeatedly skip outer/leading/trailing nodes until we can't any
// more (i.e. find a fixpoint).
let mut keep_trying = true;
while keep_trying {
keep_trying = false;
let (lhs_after_skip, rhs_after_skip) = skip_unchanged_at_ends(&lhs_nodes, &rhs_nodes);
if lhs_after_skip != lhs_nodes {
keep_trying = true;
lhs_nodes = lhs_after_skip;
rhs_nodes = rhs_after_skip;
}
let (lhs_after_skip, rhs_after_skip) = skip_unchanged_delimiters(&lhs_nodes, &rhs_nodes);
if lhs_after_skip != lhs_nodes {
keep_trying = true;
lhs_nodes = lhs_after_skip;
rhs_nodes = rhs_after_skip;
}
}
(lhs_nodes, rhs_nodes)
}
/// If we're comparing two lists that have the same delimiters, mark
/// the delimiters as unchanged and return the children.
fn skip_unchanged_delimiters<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
if let (
[lhs_node @ Syntax::List {
open_content: lhs_open,
children: lhs_children,
close_content: lhs_close,
..
}],
[rhs_node @ Syntax::List {
open_content: rhs_open,
children: rhs_children,
close_content: rhs_close,
..
}],
) = (lhs_nodes, rhs_nodes)
{
if lhs_open == rhs_open && lhs_close == rhs_close {
lhs_node.set_change(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change(ChangeKind::Unchanged(lhs_node));
return (lhs_children.to_vec(), rhs_children.to_vec());
}
}
(lhs_nodes.into(), rhs_nodes.into())
}
/// Skip syntax nodes at the beginning or end that are obviously
/// unchanged.
///
/// Set the ChangeKind on the definitely changed nodes, and return the
/// nodes that may contain changes.
fn skip_unchanged_at_ends<'a>(
lhs_nodes: &[&'a Syntax<'a>],
rhs_nodes: &[&'a Syntax<'a>],
) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) {
@ -9,7 +79,10 @@ pub fn skip_unchanged_at_ends<'a>(
while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.first(), rhs_nodes.first()) {
if lhs_node.content_id() == rhs_node.content_id() {
skip_pair(lhs_node, rhs_node);
{
lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node));
};
lhs_nodes = &lhs_nodes[1..];
rhs_nodes = &rhs_nodes[1..];
@ -20,7 +93,8 @@ pub fn skip_unchanged_at_ends<'a>(
while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.last(), rhs_nodes.last()) {
if lhs_node.content_id() == rhs_node.content_id() {
skip_pair(lhs_node, rhs_node);
lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node));
lhs_nodes = &lhs_nodes[..lhs_nodes.len() - 1];
rhs_nodes = &rhs_nodes[..rhs_nodes.len() - 1];
@ -32,11 +106,6 @@ pub fn skip_unchanged_at_ends<'a>(
(Vec::from(lhs_nodes), Vec::from(rhs_nodes))
}
fn skip_pair<'a>(lhs_node: &'a Syntax<'a>, rhs_node: &'a Syntax<'a>) {
lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node));
rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node));
}
#[cfg(test)]
mod tests {
use super::*;
@ -94,4 +163,60 @@ mod tests {
assert_eq!(lhs_after_skip.len(), 2);
assert_eq!(rhs_after_skip.len(), 1);
}
#[test]
fn test_unchanged_outer_delimiters() {
let arena = Arena::new();
let config = from_language(guess_language::Language::EmacsLisp);
let lhs_nodes = parse(&arena, "(A)", &config);
let rhs_nodes = parse(&arena, "(B)", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let (lhs_after_skip, rhs_after_skip) = skip_unchanged_delimiters(&lhs_nodes, &rhs_nodes);
// The only possibly changed nodes are inside the lists.
assert_eq!(lhs_after_skip.len(), 1);
assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. }));
assert_eq!(rhs_after_skip.len(), 1);
assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. }));
// The outer list delimiters are unchanged.
assert_eq!(
lhs_nodes[0].change(),
Some(ChangeKind::Unchanged(rhs_nodes[0]))
);
assert_eq!(
rhs_nodes[0].change(),
Some(ChangeKind::Unchanged(lhs_nodes[0]))
);
// The inner items haven't had their change set yet.
assert_eq!(lhs_after_skip[0].change(), None);
assert_eq!(rhs_after_skip[0].change(), None);
}
#[test]
fn test_skip_unchanged() {
let arena = Arena::new();
let config = from_language(guess_language::Language::EmacsLisp);
let lhs_nodes = parse(&arena, "unchanged-before (more-unchanged (A))", &config);
let rhs_nodes = parse(&arena, "unchanged-before (more-unchanged (B))", &config);
init_all_info(&lhs_nodes, &rhs_nodes);
let (lhs_after_skip, rhs_after_skip) = skip_unchanged(&lhs_nodes, &rhs_nodes);
// The only possibly changed nodes are inside the lists.
assert_eq!(lhs_after_skip.len(), 1);
assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. }));
assert_eq!(rhs_after_skip.len(), 1);
assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. }));
// The inner items haven't had their change set yet.
assert_eq!(lhs_after_skip[0].change(), None);
assert_eq!(rhs_after_skip[0].change(), None);
}
}