Rename myers_diff to LCS diff as it's not actually Myers algorithm

fix_python_re_indent
Wilfred Hughes 2025-03-09 23:55:08 +07:00
parent ca9b7da43f
commit d8b715bd5b
5 changed files with 30 additions and 25 deletions

@ -1,4 +1,10 @@
//! A fast diff for linear content, using Myer's diff algorithm. //! A fast diff for linear content, particularly lines of text.
//!
//! This file uses the Wu algorithm, using the `wu-diff` crate.
//!
//! Difftastic has the files huge_cpp_1.cpp and huge_cpp_2.cpp in the
//! sample_files directory for a performance stress test. These files
//! are 22 MiB and 590,000 lines.
use std::hash::Hash; use std::hash::Hash;
@ -11,8 +17,7 @@ pub(crate) enum DiffResult<T> {
Right(T), Right(T),
} }
/// Compute a linear diff between `lhs` and `rhs`. This is the /// Compute a linear diff between `lhs` and `rhs`.
/// traditional Myer's diff algorithm.
pub(crate) fn slice<'a, T: PartialEq + Clone>( pub(crate) fn slice<'a, T: PartialEq + Clone>(
lhs: &'a [T], lhs: &'a [T],
rhs: &'a [T], rhs: &'a [T],

@ -1,7 +1,7 @@
pub(crate) mod changes; pub(crate) mod changes;
pub(crate) mod dijkstra; pub(crate) mod dijkstra;
mod graph; mod graph;
pub(crate) mod myers_diff; pub(crate) mod lcs_diff;
pub(crate) mod sliders; pub(crate) mod sliders;
mod stack; mod stack;
pub(crate) mod unchanged; pub(crate) mod unchanged;

@ -4,7 +4,7 @@
use std::hash::Hash; use std::hash::Hash;
use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap}; use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap};
use crate::diff::myers_diff; use crate::diff::lcs_diff;
use crate::hash::DftHashSet; use crate::hash::DftHashSet;
use crate::parse::syntax::Syntax; use crate::parse::syntax::Syntax;
@ -255,9 +255,9 @@ fn split_unchanged_toplevel<'a>(
let mut section_lhs_nodes = vec![]; let mut section_lhs_nodes = vec![];
let mut section_rhs_nodes = vec![]; let mut section_rhs_nodes = vec![];
for diff_res in myers_diff::slice(&lhs_node_ids, &rhs_node_ids) { for diff_res in lcs_diff::slice(&lhs_node_ids, &rhs_node_ids) {
match diff_res { match diff_res {
myers_diff::DiffResult::Both(lhs, rhs) => { lcs_diff::DiffResult::Both(lhs, rhs) => {
let lhs_node = lhs.1; let lhs_node = lhs.1;
let rhs_node = rhs.1; let rhs_node = rhs.1;
@ -293,10 +293,10 @@ fn split_unchanged_toplevel<'a>(
res.push((ChangeState::UnchangedNode, vec![lhs_node], vec![rhs_node])); res.push((ChangeState::UnchangedNode, vec![lhs_node], vec![rhs_node]));
} }
} }
myers_diff::DiffResult::Left(lhs) => { lcs_diff::DiffResult::Left(lhs) => {
section_lhs_nodes.push(lhs.1); section_lhs_nodes.push(lhs.1);
} }
myers_diff::DiffResult::Right(rhs) => { lcs_diff::DiffResult::Right(rhs) => {
section_rhs_nodes.push(rhs.1); section_rhs_nodes.push(rhs.1);
} }
} }

@ -6,7 +6,7 @@ use regex::Regex;
use crate::words::split_words; use crate::words::split_words;
use crate::{ use crate::{
diff::myers_diff, diff::lcs_diff,
parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind}, parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind},
}; };
@ -81,15 +81,15 @@ fn changed_parts<'a>(
let opposite_src_lines = split_lines_keep_newline(opposite_src); let opposite_src_lines = split_lines_keep_newline(opposite_src);
let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![]; let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![];
for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) { for diff_res in lcs_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
match diff_res { match diff_res {
myers_diff::DiffResult::Left(line) => { lcs_diff::DiffResult::Left(line) => {
res.push((TextChangeKind::Novel, vec![line], vec![])); res.push((TextChangeKind::Novel, vec![line], vec![]));
} }
myers_diff::DiffResult::Both(line, opposite_line) => { lcs_diff::DiffResult::Both(line, opposite_line) => {
res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line])); res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line]));
} }
myers_diff::DiffResult::Right(opposite_line) => { lcs_diff::DiffResult::Right(opposite_line) => {
res.push((TextChangeKind::Novel, vec![], vec![opposite_line])); res.push((TextChangeKind::Novel, vec![], vec![opposite_line]));
} }
} }
@ -176,9 +176,9 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
continue; continue;
} }
for diff_res in myers_diff::slice_unique_by_hash(&lhs_words, &rhs_words) { for diff_res in lcs_diff::slice_unique_by_hash(&lhs_words, &rhs_words) {
match diff_res { match diff_res {
myers_diff::DiffResult::Left(lhs_word) => { lcs_diff::DiffResult::Left(lhs_word) => {
let lhs_pos = let lhs_pos =
lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len()); lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len());
@ -191,7 +191,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
lhs_offset += lhs_word.len(); lhs_offset += lhs_word.len();
} }
myers_diff::DiffResult::Both(lhs_word, rhs_word) => { lcs_diff::DiffResult::Both(lhs_word, rhs_word) => {
if *lhs_word != "\n" { if *lhs_word != "\n" {
let lhs_pos = let lhs_pos =
lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len()); lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len());
@ -211,7 +211,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
lhs_offset += lhs_word.len(); lhs_offset += lhs_word.len();
rhs_offset += rhs_word.len(); rhs_offset += rhs_word.len();
} }
myers_diff::DiffResult::Right(rhs_word) => { lcs_diff::DiffResult::Right(rhs_word) => {
rhs_offset += rhs_word.len(); rhs_offset += rhs_word.len();
} }
} }

@ -14,7 +14,7 @@ use crate::words::split_words_and_numbers;
use crate::{ use crate::{
diff::changes::ChangeKind, diff::changes::ChangeKind,
diff::changes::{ChangeKind::*, ChangeMap}, diff::changes::{ChangeKind::*, ChangeMap},
diff::myers_diff, diff::lcs_diff,
hash::DftHashMap, hash::DftHashMap,
lines::is_all_whitespace, lines::is_all_whitespace,
}; };
@ -708,7 +708,7 @@ fn split_atom_words(
let content_parts = split_words_and_numbers(content); let content_parts = split_words_and_numbers(content);
let other_parts = split_words_and_numbers(opposite_content); let other_parts = split_words_and_numbers(opposite_content);
let word_diffs = myers_diff::slice_by_hash(&content_parts, &other_parts); let word_diffs = lcs_diff::slice_by_hash(&content_parts, &other_parts);
if !has_common_words(&word_diffs) { if !has_common_words(&word_diffs) {
return pos return pos
@ -731,7 +731,7 @@ fn split_atom_words(
let mut mps = vec![]; let mut mps = vec![];
for diff_res in word_diffs { for diff_res in word_diffs {
match diff_res { match diff_res {
myers_diff::DiffResult::Left(word) => { lcs_diff::DiffResult::Left(word) => {
// This word is novel to this side. // This word is novel to this side.
if !is_all_whitespace(word) { if !is_all_whitespace(word) {
mps.push(MatchedPos { mps.push(MatchedPos {
@ -748,7 +748,7 @@ fn split_atom_words(
} }
offset += word.len(); offset += word.len();
} }
myers_diff::DiffResult::Both(word, opposite_word) => { lcs_diff::DiffResult::Both(word, opposite_word) => {
// This word is present on both sides. // This word is present on both sides.
// TODO: don't assume this atom is on a single line. // TODO: don't assume this atom is on a single line.
let word_pos = let word_pos =
@ -771,7 +771,7 @@ fn split_atom_words(
offset += word.len(); offset += word.len();
opposite_offset += opposite_word.len(); opposite_offset += opposite_word.len();
} }
myers_diff::DiffResult::Right(opposite_word) => { lcs_diff::DiffResult::Right(opposite_word) => {
// Only exists on other side, nothing to do on this side. // Only exists on other side, nothing to do on this side.
opposite_offset += opposite_word.len(); opposite_offset += opposite_word.len();
} }
@ -783,13 +783,13 @@ fn split_atom_words(
/// Are there sufficient common words that we should only highlight /// Are there sufficient common words that we should only highlight
/// individual changed words? /// individual changed words?
fn has_common_words(word_diffs: &Vec<myers_diff::DiffResult<&&str>>) -> bool { fn has_common_words(word_diffs: &Vec<lcs_diff::DiffResult<&&str>>) -> bool {
let mut novel_count = 0; let mut novel_count = 0;
let mut unchanged_count = 0; let mut unchanged_count = 0;
for word_diff in word_diffs { for word_diff in word_diffs {
match word_diff { match word_diff {
myers_diff::DiffResult::Both(word, _) => { lcs_diff::DiffResult::Both(word, _) => {
if **word != " " { if **word != " " {
unchanged_count += 1; unchanged_count += 1;
} }