Don't consider newlines to be words in the line parser

This causes us to match up unrelated lines, and doesn't make sense for
a line parser.

Improves #90.
better_inline
Wilfred Hughes 2022-01-22 17:53:53 +07:00
parent e060ab31bf
commit de89caadb3
3 changed files with 45 additions and 22 deletions

@ -33,6 +33,9 @@ Improved diffing performance (both time and memory usage).
Sliders are now fixed up after diffing. This produces better looking
results in more cases, and makes the primary diffing faster.
Fixed some corner cases in the line parser where it would match up
isolated newline character as unchanged, leading to weird alignment.
## 0.15 (released 6 January 2022)
### Parsing

@ -663,7 +663,7 @@ pub fn aligned_lines_from_hunk(
max_lhs_src_line: LineNumber,
max_rhs_src_line: LineNumber,
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = hunk.lines.clone();
let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = dbg!(hunk.lines.clone());
// TODO: this largely duplicates add_context().
let before_context = calculate_before_context(&hunk_lines, opposite_to_lhs, opposite_to_rhs);

@ -130,31 +130,35 @@ pub fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> {
for diff_res in diff::slice(&split_words(&lhs_part), &split_words(&rhs_part)) {
match diff_res {
diff::Result::Left(lhs_word) => {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
res.push(MatchedPos {
// TODO: rename this kind to reflect
// that it's used for both code
// comments and plain text.
kind: MatchKind::ChangedCommentPart {},
pos: lhs_pos[0],
});
if lhs_word != "\n" {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
res.push(MatchedPos {
// TODO: rename this kind to reflect
// that it's used for both code
// comments and plain text.
kind: MatchKind::ChangedCommentPart {},
pos: lhs_pos[0],
});
}
lhs_offset += lhs_word.len();
}
diff::Result::Both(lhs_word, rhs_word) => {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
let rhs_pos =
rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());
res.push(MatchedPos {
kind: MatchKind::UnchangedCommentPart {
self_pos: lhs_pos[0],
opposite_pos: rhs_pos,
},
pos: lhs_pos[0],
});
if lhs_word != "\n" {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
let rhs_pos =
rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());
res.push(MatchedPos {
kind: MatchKind::UnchangedCommentPart {
self_pos: lhs_pos[0],
opposite_pos: rhs_pos,
},
pos: lhs_pos[0],
});
}
lhs_offset += lhs_word.len();
rhs_offset += rhs_word.len();
@ -191,6 +195,22 @@ mod tests {
assert!(!positions[0].kind.is_change());
}
#[test]
fn test_no_changes_trailing_newlines() {
let positions = change_positions("foo\n", "foo\n");
assert_eq!(positions.len(), 1);
assert!(!positions[0].kind.is_change());
}
#[test]
fn test_novel_lhs_trailing_newlines() {
let positions = change_positions("foo\n", "");
assert_eq!(positions.len(), 1);
assert!(positions[0].kind.is_change());
}
#[test]
fn test_positions_novel_lhs() {
let positions = change_positions("foo", "");