Try imara-diff

imara_diff_library
Wilfred Hughes 2025-03-20 23:36:36 +07:00
parent 36037cf7e0
commit 5e59b67b78
4 changed files with 78 additions and 34 deletions

29
Cargo.lock generated

@ -249,9 +249,10 @@ dependencies = [
"crossterm",
"encoding_rs",
"glob",
"hashbrown",
"hashbrown 0.14.3",
"humansize",
"ignore",
"imara-diff",
"lazy_static",
"libc",
"libmimalloc-sys",
@ -383,6 +384,12 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "glob"
version = "0.3.1"
@ -412,6 +419,15 @@ dependencies = [
"allocator-api2",
]
[[package]]
name = "hashbrown"
version = "0.15.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
dependencies = [
"foldhash",
]
[[package]]
name = "heck"
version = "0.5.0"
@ -455,6 +471,15 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "imara-diff"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17d34b7d42178945f775e84bc4c36dde7c1c6cdfea656d3354d009056f2bb3d2"
dependencies = [
"hashbrown 0.15.3",
]
[[package]]
name = "indexmap"
version = "2.2.6"
@ -462,7 +487,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
dependencies = [
"equivalent",
"hashbrown",
"hashbrown 0.14.3",
]
[[package]]

@ -109,6 +109,8 @@ tree-sitter-typescript = "0.23.2"
tree-sitter-xml = "0.7.0"
tree-sitter-yaml = "0.7.0"
imara-diff = "0.1.8"
[dev-dependencies]
# assert_cmd 2.0.10 requires predicates 3.
# TODO: update.

@ -95,6 +95,54 @@ pub(crate) fn slice_by_hash<'a, T: Eq + Hash>(
.collect::<Vec<_>>()
}
use imara_diff::intern::InternedInput;
use imara_diff::sources::lines_with_terminator;
use imara_diff::{diff, Algorithm};
/// Diff `lhs` and `rhs` by comparing individual lines.
pub(crate) fn string_lines<'a>(lhs: &'a str, rhs: &'a str) -> Vec<DiffResult<&'a str>> {
let mut res: Vec<DiffResult<&'a str>> = vec![];
let input = InternedInput::new(lines_with_terminator(lhs), lines_with_terminator(rhs));
let mut lhs_i = 0;
let sink = |before: std::ops::Range<u32>, after: std::ops::Range<u32>| {
if before.start > 0 {
while lhs_i < before.start - 1 {
let token = input.before[lhs_i as usize];
res.push(DiffResult::Both(
input.interner[token],
input.interner[token],
));
lhs_i += 1;
}
}
for token in &input.before[before.start as usize..before.end as usize] {
res.push(DiffResult::Left(input.interner[*token]))
}
for token in &input.after[after.start as usize..after.end as usize] {
res.push(DiffResult::Right(input.interner[*token]))
}
lhs_i = before.end;
};
diff(Algorithm::Histogram, &input, sink);
while (lhs_i as usize) < input.before.len() {
let token = input.before[lhs_i as usize];
res.push(DiffResult::Both(
input.interner[token],
input.interner[token],
));
lhs_i += 1;
}
res
}
/// Compute the linear diff between `lhs` and `rhs`. If there are
/// items that only occur on a single side, mark them as novel without
/// processing them with Myer's diff.

@ -1,8 +1,6 @@
//! A fallback "parser" for plain text.
use lazy_static::lazy_static;
use line_numbers::{LinePositions, SingleLineSpan};
use regex::Regex;
use crate::words::split_words;
use crate::{
@ -12,25 +10,6 @@ use crate::{
const MAX_WORDS_IN_LINE: usize = 1000;
fn split_lines_keep_newline(s: &str) -> Vec<&str> {
lazy_static! {
static ref NEWLINE_RE: Regex = Regex::new("\n").unwrap();
}
let mut offset = 0;
let mut lines = vec![];
for newline_match in NEWLINE_RE.find_iter(s) {
lines.push(s[offset..newline_match.end()].into());
offset = newline_match.end();
}
if offset < s.len() {
lines.push(s[offset..].into());
}
lines
}
#[derive(Debug)]
enum TextChangeKind {
Novel,
@ -77,11 +56,8 @@ fn changed_parts<'a>(
src: &'a str,
opposite_src: &'a str,
) -> Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> {
let src_lines = split_lines_keep_newline(src);
let opposite_src_lines = split_lines_keep_newline(opposite_src);
let mut res: Vec<(TextChangeKind, Vec<&'a str>, Vec<&'a str>)> = vec![];
for diff_res in lcs_diff::slice_unique_by_hash(&src_lines, &opposite_src_lines) {
for diff_res in lcs_diff::string_lines(src, opposite_src) {
match diff_res {
lcs_diff::DiffResult::Left(line) => {
res.push((TextChangeKind::Novel, vec![line], vec![]));
@ -258,13 +234,6 @@ mod tests {
use super::*;
#[test]
fn test_split_newlines() {
let s = "foo\nbar\nbaz";
let res = split_lines_keep_newline(s);
assert_eq!(res, vec!["foo\n", "bar\n", "baz"])
}
#[test]
fn test_positions_no_changes() {
let positions = change_positions("foo", "foo");