Set a length limit on lines when doing a word diff

See #653
pull/647/head^2
Wilfred Hughes 2024-02-29 00:54:55 +07:00
parent 7e8f928926
commit 53298e4240
5 changed files with 34 additions and 4 deletions

@ -6,6 +6,11 @@ Updated JavaScript, TypeScript and QML parsers.
Added support for Smali.
### Diffing
Fixed an issue with runaway memory usage on text files with very long
lines.
### Display
Fixed an issue where all files would show a permissions change when

@ -127,6 +127,9 @@ sample_files/julia_before.jl sample_files/julia_after.jl
sample_files/load_before.js sample_files/load_after.js
7ead0c677e1ccc5639ea2a8199a8175e -
sample_files/long_line_before.txt sample_files/long_line_after.txt
850e2efa67152b79d5b5099f202d61bb -
sample_files/lua_before.lua sample_files/lua_after.lua
9f5c85cd6806c724c84afa805da76bb7 -

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -10,6 +10,8 @@ use crate::{
parse::syntax::{AtomKind, MatchKind, MatchedPos, TokenKind},
};
const MAX_WORDS_IN_LINE: usize = 1000;
fn split_lines_keep_newline(s: &str) -> Vec<&str> {
lazy_static! {
static ref NEWLINE_RE: Regex = Regex::new("\n").unwrap();
@ -142,10 +144,28 @@ pub(crate) fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos>
let lhs_part = lhs_lines.join("");
let rhs_part = rhs_lines.join("");
for diff_res in myers_diff::slice_unique_by_hash(
&split_words(&lhs_part),
&split_words(&rhs_part),
) {
let lhs_words = split_words(&lhs_part);
let rhs_words = split_words(&rhs_part);
// Myers Diff scales badly on large inputs, and
// word-level diffing is merely nice to have. If we
// have a very large number of words, don't diff
// individual words.
if lhs_words.len() > MAX_WORDS_IN_LINE || rhs_words.len() > MAX_WORDS_IN_LINE {
let lhs_pos = lhs_lp.from_region(lhs_offset, lhs_offset + lhs_part.len());
mps.push(MatchedPos {
kind: MatchKind::NovelWord {
highlight: TokenKind::Atom(AtomKind::Normal),
},
pos: lhs_pos[0],
});
lhs_offset += lhs_part.len();
rhs_offset += rhs_part.len();
continue;
}
for diff_res in myers_diff::slice_unique_by_hash(&lhs_words, &rhs_words) {
match diff_res {
myers_diff::DiffResult::Left(lhs_word) => {
let lhs_pos =