From 94e968b6600969195f11a9ee05ec7f2514de8dcf Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Sat, 22 Jan 2022 22:24:44 -0800 Subject: [PATCH] Ignore unchanged nodes at the start/end of the file This is the first part of #84. --- CHANGELOG.md | 4 ++ src/main.rs | 13 +++++-- src/syntax.rs | 4 +- src/unchanged.rs | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 src/unchanged.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 3654c21e4..beecdddca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## 0.17 (unreleased) +### Diffing + +Improved performance when all file changes are close together. + ## 0.16 (released 22 January 2022) ### Parsing diff --git a/src/main.rs b/src/main.rs index efc2844ac..903764e0c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,6 +24,7 @@ mod style; mod summary; mod syntax; mod tree_sitter_parser; +mod unchanged; #[macro_use] extern crate log; @@ -48,6 +49,7 @@ use sliders::fix_all_sliders; use std::{env, path::Path}; use summary::DiffResult; use typed_arena::Arena; +use unchanged::skip_unchanged_at_ends; use walkdir::WalkDir; use crate::{ @@ -305,10 +307,15 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) -> let rhs = tsp::parse(&arena, &rhs_src, &ts_lang); init_info(&lhs, &rhs); - mark_syntax(lhs.get(0).copied(), rhs.get(0).copied()); - fix_all_sliders(&lhs); - fix_all_sliders(&rhs); + let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged_at_ends(&lhs, &rhs); + mark_syntax( + possibly_changed_lhs.get(0).copied(), + possibly_changed_rhs.get(0).copied(), + ); + + fix_all_sliders(&possibly_changed_lhs); + fix_all_sliders(&possibly_changed_rhs); let lhs_positions = change_positions(&lhs_src, &rhs_src, &lhs); let rhs_positions = change_positions(&rhs_src, &lhs_src, &rhs); diff --git a/src/syntax.rs b/src/syntax.rs index 25ac757e8..b2b303681 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -44,10 +44,10 @@ impl<'a> fmt::Debug for ChangeKind<'a> { pub struct SyntaxInfo<'a> { /// The syntax node that occurs after this one, in a depth-first /// tree traversal. - next: Cell>>, + pub next: Cell>>, /// The syntax node that occurs before this one, in a depth-first /// tree traversal. - prev: Cell>>, + pub prev: Cell>>, /// The parent syntax node, if present. parent: Cell>>, /// Does the previous syntax node occur on the same line as the diff --git a/src/unchanged.rs b/src/unchanged.rs new file mode 100644 index 000000000..6a88e6ff6 --- /dev/null +++ b/src/unchanged.rs @@ -0,0 +1,95 @@ +use crate::syntax::{ChangeKind, Syntax}; + +pub fn skip_unchanged_at_ends<'a>( + lhs_nodes: &[&'a Syntax<'a>], + rhs_nodes: &[&'a Syntax<'a>], +) -> (Vec<&'a Syntax<'a>>, Vec<&'a Syntax<'a>>) { + let mut lhs_nodes = lhs_nodes; + let mut rhs_nodes = rhs_nodes; + + while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.first(), rhs_nodes.first()) { + if lhs_node.content_id() == rhs_node.content_id() { + skip_pair(lhs_node, rhs_node); + + lhs_nodes = &lhs_nodes[1..]; + rhs_nodes = &rhs_nodes[1..]; + } else { + break; + } + } + + while let (Some(lhs_node), Some(rhs_node)) = (lhs_nodes.last(), rhs_nodes.last()) { + if lhs_node.content_id() == rhs_node.content_id() { + skip_pair(lhs_node, rhs_node); + + lhs_nodes = &lhs_nodes[..lhs_nodes.len() - 1]; + rhs_nodes = &rhs_nodes[..rhs_nodes.len() - 1]; + } else { + break; + } + } + + (Vec::from(lhs_nodes), Vec::from(rhs_nodes)) +} + +fn skip_pair<'a>(lhs_node: &'a Syntax<'a>, rhs_node: &'a Syntax<'a>) { + lhs_node.set_change_deep(ChangeKind::Unchanged(rhs_node)); + rhs_node.set_change_deep(ChangeKind::Unchanged(lhs_node)); + + detach(lhs_node); + detach(rhs_node); +} + +fn detach(node: &Syntax) { + if let Some(next_node) = node.next() { + node.info().next.set(None); + next_node.info().prev.set(None); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + guess_language, + syntax::init_info, + tree_sitter_parser::{from_language, parse}, + }; + use typed_arena::Arena; + + #[test] + fn test_unchanged_at_start() { + let arena = Arena::new(); + let config = from_language(guess_language::Language::EmacsLisp); + + let lhs_nodes = parse(&arena, "unchanged A B", &config); + let rhs_nodes = parse(&arena, "unchanged X", &config); + init_info(&lhs_nodes, &rhs_nodes); + + let (lhs_after_skip, rhs_after_skip) = skip_unchanged_at_ends(&lhs_nodes, &rhs_nodes); + + assert_eq!(lhs_nodes[0].change(), Some(ChangeKind::Unchanged(rhs_nodes[0]))); + assert_eq!(rhs_nodes[0].change(), Some(ChangeKind::Unchanged(lhs_nodes[0]))); + + assert_eq!(lhs_after_skip.len(), 2); + assert_eq!(rhs_after_skip.len(), 1); + } + + #[test] + fn test_unchanged_at_end() { + let arena = Arena::new(); + let config = from_language(guess_language::Language::EmacsLisp); + + let lhs_nodes = parse(&arena, "A B unchanged", &config); + let rhs_nodes = parse(&arena, "X unchanged", &config); + init_info(&lhs_nodes, &rhs_nodes); + + let (lhs_after_skip, rhs_after_skip) = skip_unchanged_at_ends(&lhs_nodes, &rhs_nodes); + + assert_eq!(lhs_nodes[2].change(), Some(ChangeKind::Unchanged(rhs_nodes[1]))); + assert_eq!(rhs_nodes[1].change(), Some(ChangeKind::Unchanged(lhs_nodes[2]))); + + assert_eq!(lhs_after_skip.len(), 2); + assert_eq!(rhs_after_skip.len(), 1); + } +}