Improve perf when diffing identical contents

Should help with #98.
better_inline
Wilfred Hughes 2022-01-18 23:34:31 +07:00
parent 5a4b8e56ec
commit 02a25ff251
2 changed files with 35 additions and 5 deletions

@ -50,6 +50,9 @@ Text diffing now has a standalone implementation rather than reusing
structural diff logic. This is signficantly faster and highlighted structural diff logic. This is signficantly faster and highlighted
better. better.
Improved performance when diffing two identical files. This is common
when diffing directorires.
### Display ### Display
Improved highlighting heuristics for added/removed blank lines. Improved highlighting heuristics for added/removed blank lines.

@ -248,10 +248,11 @@ fn main() {
/// Print a diff between two files. /// Print a diff between two files.
fn diff_file(display_path: &str, lhs_path: &Path, rhs_path: &Path) -> DiffResult { fn diff_file(display_path: &str, lhs_path: &Path, rhs_path: &Path) -> DiffResult {
let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path); let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path);
diff_file_content(display_path, &lhs_bytes, &rhs_bytes)
}
let lhs_binary = is_probably_binary(&lhs_bytes); fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) -> DiffResult {
let rhs_binary = is_probably_binary(&rhs_bytes); if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
if lhs_binary || rhs_binary {
return DiffResult { return DiffResult {
path: display_path.into(), path: display_path.into(),
language: None, language: None,
@ -264,10 +265,10 @@ fn diff_file(display_path: &str, lhs_path: &Path, rhs_path: &Path) -> DiffResult
} }
// TODO: don't replace tab characters inside string literals. // TODO: don't replace tab characters inside string literals.
let lhs_src = String::from_utf8_lossy(&lhs_bytes) let lhs_src = String::from_utf8_lossy(lhs_bytes)
.to_string() .to_string()
.replace("\t", " "); .replace("\t", " ");
let rhs_src = String::from_utf8_lossy(&rhs_bytes) let rhs_src = String::from_utf8_lossy(rhs_bytes)
.to_string() .to_string()
.replace("\t", " "); .replace("\t", " ");
@ -283,6 +284,18 @@ fn diff_file(display_path: &str, lhs_path: &Path, rhs_path: &Path) -> DiffResult
}; };
let ts_lang = guess(path, guess_src).map(tsp::from_language); let ts_lang = guess(path, guess_src).map(tsp::from_language);
if lhs_bytes == rhs_bytes {
return DiffResult {
path: display_path.into(),
language: ts_lang.map(|l| l.name.into()),
binary: true,
lhs_src: "".into(),
rhs_src: "".into(),
lhs_positions: vec![],
rhs_positions: vec![],
};
}
let (lang_name, lhs_positions, rhs_positions) = match ts_lang { let (lang_name, lhs_positions, rhs_positions) = match ts_lang {
Some(ts_lang) => { Some(ts_lang) => {
let arena = Arena::new(); let arena = Arena::new();
@ -394,3 +407,17 @@ fn print_diff_result(summary: &DiffResult) {
); );
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_diff_identical_content() {
let s = "foo";
let res = diff_file_content("foo.el", s.as_bytes(), s.as_bytes());
assert_eq!(res.lhs_positions, vec![]);
assert_eq!(res.rhs_positions, vec![]);
}
}