diff --git a/src/diff/myers_diff.rs b/src/diff/lcs_diff.rs similarity index 95% rename from src/diff/myers_diff.rs rename to src/diff/lcs_diff.rs index 3ea18e9f4..855a39459 100644 --- a/src/diff/myers_diff.rs +++ b/src/diff/lcs_diff.rs @@ -1,4 +1,10 @@ -//! A fast diff for linear content, using Myer's diff algorithm. +//! A fast diff for linear content, particularly lines of text. +//! +//! This file uses the Wu algorithm, using the `wu-diff` crate. +//! +//! Difftastic has the files huge_cpp_1.cpp and huge_cpp_2.cpp in the +//! sample_files directory for a performance stress test. These files +//! are 22 MiB and 590,000 lines. use std::hash::Hash; @@ -11,8 +17,7 @@ pub(crate) enum DiffResult { Right(T), } -/// Compute a linear diff between `lhs` and `rhs`. This is the -/// traditional Myer's diff algorithm. +/// Compute a linear diff between `lhs` and `rhs`. pub(crate) fn slice<'a, T: PartialEq + Clone>( lhs: &'a [T], rhs: &'a [T], diff --git a/src/diff/mod.rs b/src/diff/mod.rs index 54ecfc5f5..c2458241a 100644 --- a/src/diff/mod.rs +++ b/src/diff/mod.rs @@ -1,7 +1,7 @@ pub(crate) mod changes; pub(crate) mod dijkstra; mod graph; -pub(crate) mod myers_diff; +pub(crate) mod lcs_diff; pub(crate) mod sliders; mod stack; pub(crate) mod unchanged; diff --git a/src/diff/unchanged.rs b/src/diff/unchanged.rs index 83ac240a5..359ba5778 100644 --- a/src/diff/unchanged.rs +++ b/src/diff/unchanged.rs @@ -4,7 +4,7 @@ use std::hash::Hash; use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}; -use crate::diff::myers_diff; +use crate::diff::lcs_diff; use crate::hash::DftHashSet; use crate::parse::syntax::Syntax; @@ -255,9 +255,9 @@ fn split_unchanged_toplevel<'a>( let mut section_lhs_nodes = vec![]; let mut section_rhs_nodes = vec![]; - for diff_res in myers_diff::slice(&lhs_node_ids, &rhs_node_ids) { + for diff_res in lcs_diff::slice(&lhs_node_ids, &rhs_node_ids) { match diff_res { - myers_diff::DiffResult::Both(lhs, rhs) => { + lcs_diff::DiffResult::Both(lhs, rhs) => { let lhs_node = lhs.1; let rhs_node = rhs.1; @@ -293,10 +293,10 @@ fn split_unchanged_toplevel<'a>( res.push((ChangeState::UnchangedNode, vec![lhs_node], vec![rhs_node])); } } - myers_diff::DiffResult::Left(lhs) => { + lcs_diff::DiffResult::Left(lhs) => { section_lhs_nodes.push(lhs.1); } - myers_diff::DiffResult::Right(rhs) => { + lcs_diff::DiffResult::Right(rhs) => { section_rhs_nodes.push(rhs.1); } } diff --git a/src/line_parser.rs b/src/line_parser.rs index db03fda3d..ea6308334 100644 --- a/src/line_parser.rs +++ b/src/line_parser.rs @@ -6,7 +6,7 @@ use regex::Regex; use crate::words::split_words; use crate::{ - diff::myers_diff, + diff::lcs_diff, parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind}, }; @@ -81,15 +81,15 @@ fn changed_parts<'a>( let opposite_src_lines = split_lines_keep_newline(opposite_src); let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![]; - for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) { + for diff_res in lcs_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) { match diff_res { - myers_diff::DiffResult::Left(line) => { + lcs_diff::DiffResult::Left(line) => { res.push((TextChangeKind::Novel, vec![line], vec![])); } - myers_diff::DiffResult::Both(line, opposite_line) => { + lcs_diff::DiffResult::Both(line, opposite_line) => { res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line])); } - myers_diff::DiffResult::Right(opposite_line) => { + lcs_diff::DiffResult::Right(opposite_line) => { res.push((TextChangeKind::Novel, vec![], vec![opposite_line])); } } @@ -176,9 +176,9 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec continue; } - for diff_res in myers_diff::slice_unique_by_hash(&lhs_words, &rhs_words) { + for diff_res in lcs_diff::slice_unique_by_hash(&lhs_words, &rhs_words) { match diff_res { - myers_diff::DiffResult::Left(lhs_word) => { + lcs_diff::DiffResult::Left(lhs_word) => { let lhs_pos = lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len()); @@ -191,7 +191,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec lhs_offset += lhs_word.len(); } - myers_diff::DiffResult::Both(lhs_word, rhs_word) => { + lcs_diff::DiffResult::Both(lhs_word, rhs_word) => { if *lhs_word != "\n" { let lhs_pos = lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len()); @@ -211,7 +211,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec lhs_offset += lhs_word.len(); rhs_offset += rhs_word.len(); } - myers_diff::DiffResult::Right(rhs_word) => { + lcs_diff::DiffResult::Right(rhs_word) => { rhs_offset += rhs_word.len(); } } diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index 3cd0fea23..406b9e06f 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -14,7 +14,7 @@ use crate::words::split_words_and_numbers; use crate::{ diff::changes::ChangeKind, diff::changes::{ChangeKind::*, ChangeMap}, - diff::myers_diff, + diff::lcs_diff, hash::DftHashMap, lines::is_all_whitespace, }; @@ -708,7 +708,7 @@ fn split_atom_words( let content_parts = split_words_and_numbers(content); let other_parts = split_words_and_numbers(opposite_content); - let word_diffs = myers_diff::slice_by_hash(&content_parts, &other_parts); + let word_diffs = lcs_diff::slice_by_hash(&content_parts, &other_parts); if !has_common_words(&word_diffs) { return pos @@ -731,7 +731,7 @@ fn split_atom_words( let mut mps = vec![]; for diff_res in word_diffs { match diff_res { - myers_diff::DiffResult::Left(word) => { + lcs_diff::DiffResult::Left(word) => { // This word is novel to this side. if !is_all_whitespace(word) { mps.push(MatchedPos { @@ -748,7 +748,7 @@ fn split_atom_words( } offset += word.len(); } - myers_diff::DiffResult::Both(word, opposite_word) => { + lcs_diff::DiffResult::Both(word, opposite_word) => { // This word is present on both sides. // TODO: don't assume this atom is on a single line. let word_pos = @@ -771,7 +771,7 @@ fn split_atom_words( offset += word.len(); opposite_offset += opposite_word.len(); } - myers_diff::DiffResult::Right(opposite_word) => { + lcs_diff::DiffResult::Right(opposite_word) => { // Only exists on other side, nothing to do on this side. opposite_offset += opposite_word.len(); } @@ -783,13 +783,13 @@ fn split_atom_words( /// Are there sufficient common words that we should only highlight /// individual changed words? -fn has_common_words(word_diffs: &Vec>) -> bool { +fn has_common_words(word_diffs: &Vec>) -> bool { let mut novel_count = 0; let mut unchanged_count = 0; for word_diff in word_diffs { match word_diff { - myers_diff::DiffResult::Both(word, _) => { + lcs_diff::DiffResult::Both(word, _) => { if **word != " " { unchanged_count += 1; }