Rename myers_diff to LCS diff as it's not actually Myers algorithm

fix_python_re_indent
Wilfred Hughes 2025-03-09 23:55:08 +07:00
parent ca9b7da43f
commit d8b715bd5b
5 changed files with 30 additions and 25 deletions

@ -1,4 +1,10 @@
//! A fast diff for linear content, using Myer's diff algorithm.
//! A fast diff for linear content, particularly lines of text.
//!
//! This file uses the Wu algorithm, using the `wu-diff` crate.
//!
//! Difftastic has the files huge_cpp_1.cpp and huge_cpp_2.cpp in the
//! sample_files directory for a performance stress test. These files
//! are 22 MiB and 590,000 lines.
use std::hash::Hash;
@ -11,8 +17,7 @@ pub(crate) enum DiffResult<T> {
Right(T),
}
/// Compute a linear diff between `lhs` and `rhs`. This is the
/// traditional Myer's diff algorithm.
/// Compute a linear diff between `lhs` and `rhs`.
pub(crate) fn slice<'a, T: PartialEq + Clone>(
lhs: &'a [T],
rhs: &'a [T],

@ -1,7 +1,7 @@
pub(crate) mod changes;
pub(crate) mod dijkstra;
mod graph;
pub(crate) mod myers_diff;
pub(crate) mod lcs_diff;
pub(crate) mod sliders;
mod stack;
pub(crate) mod unchanged;

@ -4,7 +4,7 @@
use std::hash::Hash;
use crate::diff::changes::{insert_deep_unchanged, ChangeKind, ChangeMap};
use crate::diff::myers_diff;
use crate::diff::lcs_diff;
use crate::hash::DftHashSet;
use crate::parse::syntax::Syntax;
@ -255,9 +255,9 @@ fn split_unchanged_toplevel<'a>(
let mut section_lhs_nodes = vec![];
let mut section_rhs_nodes = vec![];
for diff_res in myers_diff::slice(&lhs_node_ids, &rhs_node_ids) {
for diff_res in lcs_diff::slice(&lhs_node_ids, &rhs_node_ids) {
match diff_res {
myers_diff::DiffResult::Both(lhs, rhs) => {
lcs_diff::DiffResult::Both(lhs, rhs) => {
let lhs_node = lhs.1;
let rhs_node = rhs.1;
@ -293,10 +293,10 @@ fn split_unchanged_toplevel<'a>(
res.push((ChangeState::UnchangedNode, vec![lhs_node], vec![rhs_node]));
}
}
myers_diff::DiffResult::Left(lhs) => {
lcs_diff::DiffResult::Left(lhs) => {
section_lhs_nodes.push(lhs.1);
}
myers_diff::DiffResult::Right(rhs) => {
lcs_diff::DiffResult::Right(rhs) => {
section_rhs_nodes.push(rhs.1);
}
}

@ -6,7 +6,7 @@ use regex::Regex;
use crate::words::split_words;
use crate::{
diff::myers_diff,
diff::lcs_diff,
parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind},
};
@ -81,15 +81,15 @@ fn changed_parts<'a>(
let opposite_src_lines = split_lines_keep_newline(opposite_src);
let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![];
for diff_res in myers_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
for diff_res in lcs_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
match diff_res {
myers_diff::DiffResult::Left(line) => {
lcs_diff::DiffResult::Left(line) => {
res.push((TextChangeKind::Novel, vec![line], vec![]));
}
myers_diff::DiffResult::Both(line, opposite_line) => {
lcs_diff::DiffResult::Both(line, opposite_line) => {
res.push((TextChangeKind::Unchanged, vec![line], vec![opposite_line]));
}
myers_diff::DiffResult::Right(opposite_line) => {
lcs_diff::DiffResult::Right(opposite_line) => {
res.push((TextChangeKind::Novel, vec![], vec![opposite_line]));
}
}
@ -176,9 +176,9 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
continue;
}
for diff_res in myers_diff::slice_unique_by_hash(&lhs_words, &rhs_words) {
for diff_res in lcs_diff::slice_unique_by_hash(&lhs_words, &rhs_words) {
match diff_res {
myers_diff::DiffResult::Left(lhs_word) => {
lcs_diff::DiffResult::Left(lhs_word) => {
let lhs_pos =
lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len());
@ -191,7 +191,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
lhs_offset += lhs_word.len();
}
myers_diff::DiffResult::Both(lhs_word, rhs_word) => {
lcs_diff::DiffResult::Both(lhs_word, rhs_word) => {
if *lhs_word != "\n" {
let lhs_pos =
lhs_lp.from_region(lhs_offset, lhs_offset + lhs_word.len());
@ -211,7 +211,7 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
lhs_offset += lhs_word.len();
rhs_offset += rhs_word.len();
}
myers_diff::DiffResult::Right(rhs_word) => {
lcs_diff::DiffResult::Right(rhs_word) => {
rhs_offset += rhs_word.len();
}
}

@ -14,7 +14,7 @@ use crate::words::split_words_and_numbers;
use crate::{
diff::changes::ChangeKind,
diff::changes::{ChangeKind::*, ChangeMap},
diff::myers_diff,
diff::lcs_diff,
hash::DftHashMap,
lines::is_all_whitespace,
};
@ -708,7 +708,7 @@ fn split_atom_words(
let content_parts = split_words_and_numbers(content);
let other_parts = split_words_and_numbers(opposite_content);
let word_diffs = myers_diff::slice_by_hash(&content_parts, &other_parts);
let word_diffs = lcs_diff::slice_by_hash(&content_parts, &other_parts);
if !has_common_words(&word_diffs) {
return pos
@ -731,7 +731,7 @@ fn split_atom_words(
let mut mps = vec![];
for diff_res in word_diffs {
match diff_res {
myers_diff::DiffResult::Left(word) => {
lcs_diff::DiffResult::Left(word) => {
// This word is novel to this side.
if !is_all_whitespace(word) {
mps.push(MatchedPos {
@ -748,7 +748,7 @@ fn split_atom_words(
}
offset += word.len();
}
myers_diff::DiffResult::Both(word, opposite_word) => {
lcs_diff::DiffResult::Both(word, opposite_word) => {
// This word is present on both sides.
// TODO: don't assume this atom is on a single line.
let word_pos =
@ -771,7 +771,7 @@ fn split_atom_words(
offset += word.len();
opposite_offset += opposite_word.len();
}
myers_diff::DiffResult::Right(opposite_word) => {
lcs_diff::DiffResult::Right(opposite_word) => {
// Only exists on other side, nothing to do on this side.
opposite_offset += opposite_word.len();
}
@ -783,13 +783,13 @@ fn split_atom_words(
/// Are there sufficient common words that we should only highlight
/// individual changed words?
fn has_common_words(word_diffs: &Vec<myers_diff::DiffResult<&&str>>) -> bool {
fn has_common_words(word_diffs: &Vec<lcs_diff::DiffResult<&&str>>) -> bool {
let mut novel_count = 0;
let mut unchanged_count = 0;
for word_diff in word_diffs {
match word_diff {
myers_diff::DiffResult::Both(word, _) => {
lcs_diff::DiffResult::Both(word, _) => {
if **word != " " {
unchanged_count += 1;
}