Rename myers_diff to LCS diff as it's not actually Myers algorithm

2025-03-09 23:55:08 +07:00 · 2025-03-09 23:55:08 +07:00 · d8b715bd5b
parent ca9b7da43f
commit d8b715bd5b
5 changed files with 30 additions and 25 deletions
--- a/src/diff/myers_diff.rs
+++ b/src/diff/myers_diff.rs
@ -1,4 +1,10 @@
-//! A fast diff for linear content, using Myer's diff algorithm.
+//! A fast diff for linear content, particularly lines of text.
+//!
+//! This file uses the Wu algorithm, using the `wu-diff` crate.
+//!
+//! Difftastic has the files huge_cpp_1.cpp and huge_cpp_2.cpp in the
+//! sample_files directory for a performance stress test. These files
+//! are 22 MiB and 590,000 lines.

 use std::hash::Hash;

@ -11,8 +17,7 @@ pub(crate) enum DiffResult<T> {
    Right(T),
 }

-/// Compute a linear diff between `lhs` and `rhs`. This is the
-/// traditional Myer's diff algorithm.
+/// Compute a linear diff between `lhs` and `rhs`.
 pub(crate) fn slice<'a, T: PartialEq + Clone>(
    lhs: &'a [T],
    rhs: &'a [T],
--- a/src/diff/mod.rs
+++ b/src/diff/mod.rs
@ -1,7 +1,7 @@
 pub(crate) mod changes;
 pub(crate) mod dijkstra;
 mod graph;
-pub(crate) mod myers_diff;
+pub(crate) mod lcs_diff;
 pub(crate) mod sliders;
 mod stack;
 pub(crate) mod unchanged;
--- a/src/diff/unchanged.rs
+++ b/src/diff/unchanged.rs
@ -4,7 +4,7 @@
 use std::hash::Hash;

 use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap};
-use crate::diff::myers_diff;
+use crate::diff::lcs_diff;
 use crate::hash::DftHashSet;
 use crate::parse::syntax::Syntax;

@ -255,9 +255,9 @@ fn split_unchanged_toplevel<'a>(
    let mut section_lhs_nodes = vec![];
    let mut section_rhs_nodes = vec![];

-    for diff_res in myers_diff::slice(&lhs_node_ids, &rhs_node_ids) {
+    for diff_res in lcs_diff::slice(&lhs_node_ids, &rhs_node_ids) {
        match diff_res {
-            myers_diff::DiffResult::Both(lhs, rhs) => {
+            lcs_diff::DiffResult::Both(lhs, rhs) => {
                let lhs_node = lhs.1;
                let rhs_node = rhs.1;

@ -293,10 +293,10 @@ fn split_unchanged_toplevel<'a>(
                    res.push((ChangeState::UnchangedNode, vec![lhs_node], vec![rhs_node]));
                }
            }
-            myers_diff::DiffResult::Left(lhs) => {
+            lcs_diff::DiffResult::Left(lhs) => {
                section_lhs_nodes.push(lhs.1);
            }
-            myers_diff::DiffResult::Right(rhs) => {
+            lcs_diff::DiffResult::Right(rhs) => {
                section_rhs_nodes.push(rhs.1);
            }
        }
--- a/src/line_parser.rs
+++ b/src/line_parser.rs
@ -6,7 +6,7 @@ use regex::Regex;

 use crate::words::split_words;
 use crate::{
-    diff::myers_diff,
+    diff::lcs_diff,
    parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind},
 };

@ -81,15 +81,15 @@ fn changed_parts<'a>(
    let opposite_src_lines = split_lines_keep_newline(opposite_src);

    let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![];
-    for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
+    for diff_res in lcs_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
        match diff_res {
-            myers_diff::DiffResult::Left(line) => {
+            lcs_diff::DiffResult::Left(line) => {
                res.push((TextChangeKind::Novel, vec![line], vec![]));
            }
-            myers_diff::DiffResult::Both(line, opposite_line) => {
+            lcs_diff::DiffResult::Both(line, opposite_line) => {
                res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line]));
            }
-            myers_diff::DiffResult::Right(opposite_line) => {
+            lcs_diff::DiffResult::Right(opposite_line) => {
                res.push((TextChangeKind::Novel, vec![], vec![opposite_line]));
            }
        }
@ -176,9 +176,9 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                    continue;
                }

-                for diff_res in myers_diff::slice_unique_by_hash(&lhs_words, &rhs_words) {
+                for diff_res in lcs_diff::slice_unique_by_hash(&lhs_words, &rhs_words) {
                    match diff_res {
-                        myers_diff::DiffResult::Left(lhs_word) => {
+                        lcs_diff::DiffResult::Left(lhs_word) => {
                            let lhs_pos =
                                lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len());

@ -191,7 +191,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>

                            lhs_offset += lhs_word.len();
                        }
-                        myers_diff::DiffResult::Both(lhs_word, rhs_word) => {
+                        lcs_diff::DiffResult::Both(lhs_word, rhs_word) => {
                            if *lhs_word != "\n" {
                                let lhs_pos =
                                    lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len());
@ -211,7 +211,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
                            lhs_offset += lhs_word.len();
                            rhs_offset += rhs_word.len();
                        }
-                        myers_diff::DiffResult::Right(rhs_word) => {
+                        lcs_diff::DiffResult::Right(rhs_word) => {
                            rhs_offset += rhs_word.len();
                        }
                    }
--- a/src/parse/syntax.rs
+++ b/src/parse/syntax.rs
@ -14,7 +14,7 @@ use crate::words::split_words_and_numbers;
 use crate::{
    diff::changes::ChangeKind,
    diff::changes::{ChangeKind::*, ChangeMap},
-    diff::myers_diff,
+    diff::lcs_diff,
    hash::DftHashMap,
    lines::is_all_whitespace,
 };
@ -708,7 +708,7 @@ fn split_atom_words(
    let content_parts = split_words_and_numbers(content);
    let other_parts = split_words_and_numbers(opposite_content);

-    let word_diffs = myers_diff::slice_by_hash(&content_parts, &other_parts);
+    let word_diffs = lcs_diff::slice_by_hash(&content_parts, &other_parts);

    if !has_common_words(&word_diffs) {
        return pos
@ -731,7 +731,7 @@ fn split_atom_words(
    let mut mps = vec![];
    for diff_res in word_diffs {
        match diff_res {
-            myers_diff::DiffResult::Left(word) => {
+            lcs_diff::DiffResult::Left(word) => {
                // This word is novel to this side.
                if !is_all_whitespace(word) {
                    mps.push(MatchedPos {
@ -748,7 +748,7 @@ fn split_atom_words(
                }
                offset += word.len();
            }
-            myers_diff::DiffResult::Both(word, opposite_word) => {
+            lcs_diff::DiffResult::Both(word, opposite_word) => {
                // This word is present on both sides.
                // TODO: don't assume this atom is on a single line.
                let word_pos =
@ -771,7 +771,7 @@ fn split_atom_words(
                offset += word.len();
                opposite_offset += opposite_word.len();
            }
-            myers_diff::DiffResult::Right(opposite_word) => {
+            lcs_diff::DiffResult::Right(opposite_word) => {
                // Only exists on other side, nothing to do on this side.
                opposite_offset += opposite_word.len();
            }
@ -783,13 +783,13 @@ fn split_atom_words(

 /// Are there sufficient common words that we should only highlight
 /// individual changed words?
-fn has_common_words(word_diffs: &Vec<myers_diff::DiffResult<&&str>>) -> bool {
+fn has_common_words(word_diffs: &Vec<lcs_diff::DiffResult<&&str>>) -> bool {
    let mut novel_count = 0;
    let mut unchanged_count = 0;

    for word_diff in word_diffs {
        match word_diff {
-            myers_diff::DiffResult::Both(word, _) => {
+            lcs_diff::DiffResult::Both(word, _) => {
                if **word != " " {
                    unchanged_count += 1;
                }