From ea24dd87841913f6190e0e218e263f4ac00b0f2b Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Sun, 30 Jan 2022 14:45:38 -0800 Subject: [PATCH] Skip unchanged delim and find a fixpoint This lets us discard more nodes before diffing, improving performance. Fixes #84 --- CHANGELOG.md | 4 ++ src/main.rs | 4 +- src/unchanged.rs | 141 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 139 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc35636ef..080fb634f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ Fixed an issue with missing positions in OCaml attribute syntax. Fixed parsing issues in Common Lisp: character literals, `loop` macro usage with `maximizing`. +### Diffing + +Improved performance when diffing a single large expression. + ### Display Fixed display issues where lines were printed more than once. diff --git a/src/main.rs b/src/main.rs index 447c3d5f4..03161a56e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,7 +51,7 @@ use style::BackgroundColor; use summary::DiffResult; use syntax::init_next_prev; use typed_arena::Arena; -use unchanged::skip_unchanged_at_ends; +use unchanged::skip_unchanged; use walkdir::WalkDir; use crate::{ @@ -435,7 +435,7 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) -> init_all_info(&lhs, &rhs); - let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged_at_ends(&lhs, &rhs); + let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged(&lhs, &rhs); init_next_prev(&possibly_changed_lhs); init_next_prev(&possibly_changed_rhs); diff --git a/src/unchanged.rs b/src/unchanged.rs index 5cba539bb..50a17b8ea 100644 --- a/src/unchanged.rs +++ b/src/unchanged.rs @@ -1,6 +1,76 @@ use crate::syntax::{ChangeKind, Syntax}; -pub fn skip_unchanged_at_ends<'a>( +/// Discard nodes that are obviously unchanged, so we have a smaller +/// number of nodes to run the full diffing algorithm on. +pub fn skip_unchanged<'a>( + lhs_nodes: &[&'a Syntax<'a>], + rhs_nodes: &[&'a Syntax<'a>], +) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) { + let mut lhs_nodes = lhs_nodes.to_vec(); + let mut rhs_nodes = rhs_nodes.to_vec(); + + // Repeatedly skip outer/leading/trailing nodes until we can't any + // more (i.e. find a fixpoint). + let mut keep_trying = true; + while keep_trying { + keep_trying = false; + + let (lhs_after_skip, rhs_after_skip) = skip_unchanged_at_ends(&lhs_nodes, &rhs_nodes); + if lhs_after_skip != lhs_nodes { + keep_trying = true; + lhs_nodes = lhs_after_skip; + rhs_nodes = rhs_after_skip; + } + + let (lhs_after_skip, rhs_after_skip) = skip_unchanged_delimiters(&lhs_nodes, &rhs_nodes); + if lhs_after_skip != lhs_nodes { + keep_trying = true; + lhs_nodes = lhs_after_skip; + rhs_nodes = rhs_after_skip; + } + } + + (lhs_nodes, rhs_nodes) +} + +/// If we're comparing two lists that have the same delimiters, mark +/// the delimiters as unchanged and return the children. +fn skip_unchanged_delimiters<'a>( + lhs_nodes: &[&'a Syntax<'a>], + rhs_nodes: &[&'a Syntax<'a>], +) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) { + if let ( + [lhs_node @ Syntax::List { + open_content: lhs_open, + children: lhs_children, + close_content: lhs_close, + .. + }], + [rhs_node @ Syntax::List { + open_content: rhs_open, + children: rhs_children, + close_content: rhs_close, + .. + }], + ) = (lhs_nodes, rhs_nodes) + { + if lhs_open == rhs_open && lhs_close == rhs_close { + lhs_node.set_change(ChangeKind::Unchanged(rhs_node)); + rhs_node.set_change(ChangeKind::Unchanged(lhs_node)); + + return (lhs_children.to_vec(), rhs_children.to_vec()); + } + } + + (lhs_nodes.into(), rhs_nodes.into()) +} + +/// Skip syntax nodes at the beginning or end that are obviously +/// unchanged. +/// +/// Set the ChangeKind on the definitely changed nodes, and return the +/// nodes that may contain changes. +fn skip_unchanged_at_ends<'a>( lhs_nodes: &[&'a Syntax<'a>], rhs_nodes: &[&'a Syntax<'a>], ) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) { @@ -9,7 +79,10 @@ pub fn skip_unchanged_at_ends<'a>( while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.first(), rhs_nodes.first()) { if lhs_node.content_id() == rhs_node.content_id() { - skip_pair(lhs_node, rhs_node); + { + lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node)); + rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node)); + }; lhs_nodes = &lhs_nodes[1..]; rhs_nodes = &rhs_nodes[1..]; @@ -20,7 +93,8 @@ pub fn skip_unchanged_at_ends<'a>( while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.last(), rhs_nodes.last()) { if lhs_node.content_id() == rhs_node.content_id() { - skip_pair(lhs_node, rhs_node); + lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node)); + rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node)); lhs_nodes = &lhs_nodes[..lhs_nodes.len() - 1]; rhs_nodes = &rhs_nodes[..rhs_nodes.len() - 1]; @@ -32,11 +106,6 @@ pub fn skip_unchanged_at_ends<'a>( (Vec::from(lhs_nodes), Vec::from(rhs_nodes)) } -fn skip_pair<'a>(lhs_node: &'a Syntax<'a>, rhs_node: &'a Syntax<'a>) { - lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node)); - rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node)); -} - #[cfg(test)] mod tests { use super::*; @@ -94,4 +163,60 @@ mod tests { assert_eq!(lhs_after_skip.len(), 2); assert_eq!(rhs_after_skip.len(), 1); } + + #[test] + fn test_unchanged_outer_delimiters() { + let arena = Arena::new(); + let config = from_language(guess_language::Language::EmacsLisp); + + let lhs_nodes = parse(&arena, "(A)", &config); + let rhs_nodes = parse(&arena, "(B)", &config); + init_all_info(&lhs_nodes, &rhs_nodes); + + let (lhs_after_skip, rhs_after_skip) = skip_unchanged_delimiters(&lhs_nodes, &rhs_nodes); + + // The only possibly changed nodes are inside the lists. + assert_eq!(lhs_after_skip.len(), 1); + assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. })); + + assert_eq!(rhs_after_skip.len(), 1); + assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. })); + + // The outer list delimiters are unchanged. + assert_eq!( + lhs_nodes[0].change(), + Some(ChangeKind::Unchanged(rhs_nodes[0])) + ); + assert_eq!( + rhs_nodes[0].change(), + Some(ChangeKind::Unchanged(lhs_nodes[0])) + ); + + // The inner items haven't had their change set yet. + assert_eq!(lhs_after_skip[0].change(), None); + assert_eq!(rhs_after_skip[0].change(), None); + } + + #[test] + fn test_skip_unchanged() { + let arena = Arena::new(); + let config = from_language(guess_language::Language::EmacsLisp); + + let lhs_nodes = parse(&arena, "unchanged-before (more-unchanged (A))", &config); + let rhs_nodes = parse(&arena, "unchanged-before (more-unchanged (B))", &config); + init_all_info(&lhs_nodes, &rhs_nodes); + + let (lhs_after_skip, rhs_after_skip) = skip_unchanged(&lhs_nodes, &rhs_nodes); + + // The only possibly changed nodes are inside the lists. + assert_eq!(lhs_after_skip.len(), 1); + assert!(matches!(lhs_after_skip[0], Syntax::Atom { .. })); + + assert_eq!(rhs_after_skip.len(), 1); + assert!(matches!(rhs_after_skip[0], Syntax::Atom { .. })); + + // The inner items haven't had their change set yet. + assert_eq!(lhs_after_skip[0].change(), None); + assert_eq!(rhs_after_skip[0].change(), None); + } }