Don't consider newlines to be words in the line parser

This causes us to match up unrelated lines, and doesn't make sense for a line parser. Improves #90.
2022-01-22 17:53:53 +07:00 · 2022-01-22 17:53:53 +07:00 · de89caadb3
parent e060ab31bf
commit de89caadb3
3 changed files with 45 additions and 22 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -33,6 +33,9 @@ Improved diffing performance (both time and memory usage).
 Sliders are now fixed up after diffing. This produces better looking
 results in more cases, and makes the primary diffing faster.

+Fixed some corner cases in the line parser where it would match up
+isolated newline character as unchanged, leading to weird alignment.
+
 ## 0.15 (released 6 January 2022)

 ### Parsing
--- a/src/hunks.rs
+++ b/src/hunks.rs
@ -663,7 +663,7 @@ pub fn aligned_lines_from_hunk(
    max_lhs_src_line: LineNumber,
    max_rhs_src_line: LineNumber,
 ) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
-    let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = hunk.lines.clone();
+    let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = dbg!(hunk.lines.clone());

    // TODO: this largely duplicates add_context().
    let before_context = calculate_before_context(&hunk_lines, opposite_to_lhs, opposite_to_rhs);
--- a/src/line_parser.rs
+++ b/src/line_parser.rs
@ -130,31 +130,35 @@ pub fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> {
                for diff_res in diff::slice(&split_words(&lhs_part), &split_words(&rhs_part)) {
                    match diff_res {
                        diff::Result::Left(lhs_word) => {
-                            let lhs_pos =
-                                lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
-                            res.push(MatchedPos {
-                                // TODO: rename this kind to reflect
-                                // that it's used for both code
-                                // comments and plain text.
-                                kind: MatchKind::ChangedCommentPart {},
-                                pos: lhs_pos[0],
-                            });
+                            if lhs_word != "\n" {
+                                let lhs_pos =
+                                    lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
+                                res.push(MatchedPos {
+                                    // TODO: rename this kind to reflect
+                                    // that it's used for both code
+                                    // comments and plain text.
+                                    kind: MatchKind::ChangedCommentPart {},
+                                    pos: lhs_pos[0],
+                                });
+                            }

                            lhs_offset += lhs_word.len();
                        }
                        diff::Result::Both(lhs_word, rhs_word) => {
-                            let lhs_pos =
-                                lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
-                            let rhs_pos =
-                                rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());
-
-                            res.push(MatchedPos {
-                                kind: MatchKind::UnchangedCommentPart {
-                                    self_pos: lhs_pos[0],
-                                    opposite_pos: rhs_pos,
-                                },
-                                pos: lhs_pos[0],
-                            });
+                            if lhs_word != "\n" {
+                                let lhs_pos =
+                                    lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
+                                let rhs_pos =
+                                    rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());
+
+                                res.push(MatchedPos {
+                                    kind: MatchKind::UnchangedCommentPart {
+                                        self_pos: lhs_pos[0],
+                                        opposite_pos: rhs_pos,
+                                    },
+                                    pos: lhs_pos[0],
+                                });
+                            }

                            lhs_offset += lhs_word.len();
                            rhs_offset += rhs_word.len();
@ -191,6 +195,22 @@ mod tests {
        assert!(!positions[0].kind.is_change());
    }

+    #[test]
+    fn test_no_changes_trailing_newlines() {
+        let positions = change_positions("foo\n", "foo\n");
+
+        assert_eq!(positions.len(), 1);
+        assert!(!positions[0].kind.is_change());
+    }
+
+    #[test]
+    fn test_novel_lhs_trailing_newlines() {
+        let positions = change_positions("foo\n", "");
+
+        assert_eq!(positions.len(), 1);
+        assert!(positions[0].kind.is_change());
+    }
+
    #[test]
    fn test_positions_novel_lhs() {
        let positions = change_positions("foo", "");