Require some common words to do subword highlighting

This is important when comparing short string literals. This change
has improved several cases in sample_files/ but I've added a new
example that made the previous unwanted behaviour much more obvious.
text_sliders
Wilfred Hughes 2023-07-10 09:01:45 +07:00
parent 4aca79f220
commit 5824322244
4 changed files with 3884 additions and 16 deletions

@ -77,7 +77,7 @@ sample_files/helpful_before.el sample_files/helpful_after.el
a0f2e0115ea94c46d3650ba89b486f09 -
sample_files/html_before.html sample_files/html_after.html
ed77c9d76eefdc82cf52e089d268ac6c -
0cd91f89716413757ee300e0a6f13453 -
sample_files/html_simple_before.html sample_files/html_simple_after.html
ce3bfa12bc21d0eb5528766e18387e86 -
@ -104,7 +104,7 @@ sample_files/javascript_simple_before.js sample_files/javascript_simple_after.js
3357d9d47a5e7efb3c7677745993ea2b -
sample_files/json_before.json sample_files/json_after.json
11bd95ff0aff18781d3421f702d62c17 -
bae479fb04e15baf9460c5274c77963b -
sample_files/jsx_before.jsx sample_files/jsx_after.jsx
5784f67cac95fcdb621751aa80a3402b -
@ -116,7 +116,7 @@ sample_files/load_before.js sample_files/load_after.js
5cb293020a07b0635b864850c07458b3 -
sample_files/lua_before.lua sample_files/lua_after.lua
c12d85c8ffa7ad6b6ca931cf52ac5f3e -
9886d61f459cdf566be9c42f7fa61a12 -
sample_files/makefile_before.mk sample_files/makefile_after.mk
82ed37f60448e7402c62d5319f30fd3c -
@ -199,6 +199,9 @@ sample_files/slow_before.rs sample_files/slow_after.rs
sample_files/small_before.js sample_files/small_after.js
b4300bfc0203acd8f2603b504b859dc8 -
sample_files/strings_before.el sample_files/strings_after.el
adc1c8734906b83deff25b1567e46b56 -
sample_files/swift_before.swift sample_files/swift_after.swift
4285db52158468d58d54115b6cb8f29b -

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -695,6 +695,11 @@ pub fn split_words_and_numbers(s: &str) -> Vec<&str> {
res
}
/// Given the text `content` from a comment or strings, split it into
/// MatchedPos values for the novel and unchanged words.
///
/// If there is negligible text in common with `opposite_content`,
/// treat the whole `content` as a single novel region.
fn split_atom_words(
content: &str,
pos: SingleLineSpan,
@ -709,6 +714,17 @@ fn split_atom_words(
let content_parts = split_words_and_numbers(content);
let other_parts = split_words_and_numbers(opposite_content);
let word_diffs = myers_diff::slice_by_hash(&content_parts, &other_parts);
if !has_common_words(&word_diffs) {
return vec![MatchedPos {
kind: MatchKind::Novel {
highlight: TokenKind::Atom(kind),
},
pos,
}];
}
let content_newlines = NewlinePositions::from(content);
let opposite_content_newlines = NewlinePositions::from(opposite_content);
@ -716,7 +732,7 @@ fn split_atom_words(
let mut opposite_offset = 0;
let mut res = vec![];
for diff_res in myers_diff::slice_by_hash(&content_parts, &other_parts) {
for diff_res in word_diffs {
match diff_res {
myers_diff::DiffResult::Left(word) => {
// This word is novel to this side.
@ -765,6 +781,34 @@ fn split_atom_words(
res
}
/// Are there sufficient common words that we should only highlight
/// individual changed words?
fn has_common_words(word_diffs: &Vec<myers_diff::DiffResult<&&str>>) -> bool {
let mut word_count = 0;
for word_diff in word_diffs {
match word_diff {
myers_diff::DiffResult::Both(word, _) => {
// If we have at least one long word (i.e. not just
// punctuation), that's sufficient.
if word.len() > 2 {
return true;
}
// If we have lots of common short words, not just the
// beginning/end comment delimiter, that qualifies
// too.
word_count += 1;
if word_count > 4 {
return true;
}
}
_ => {}
}
}
false
}
impl MatchedPos {
fn new(
ck: ChangeKind,
@ -1135,15 +1179,15 @@ mod tests {
}
#[test]
fn test_split_comment_words_basic() {
let content = "abc";
fn test_split_atom_words() {
let content = "abc def";
let pos = SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3,
end_col: 7,
};
let opposite_content = "def";
let opposite_content = "abc";
let opposite_pos = SingleLineSpan {
line: 0.into(),
start_col: 0,
@ -1159,16 +1203,38 @@ mod tests {
);
assert_eq!(
res,
vec![MatchedPos {
kind: MatchKind::NovelWord {
highlight: TokenKind::Atom(AtomKind::Comment),
vec![
MatchedPos {
kind: MatchKind::NovelLinePart {
highlight: TokenKind::Atom(AtomKind::Comment),
self_pos: SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
},
opposite_pos: vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
}]
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
},
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
MatchedPos {
kind: MatchKind::NovelWord {
highlight: TokenKind::Atom(AtomKind::Comment),
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 7
},
},
},]
]
);
}