Don't clean trailing newline before diffing

Difftastic should take the user's input as-is, or it risks performing an incorrect diff in both textual and syntactic diffing. Fixes #499
2023-03-30 08:46:11 +07:00 · 2023-03-30 08:46:11 +07:00 · 8b842387a1
parent 713220613e
commit 8b842387a1
5 changed files with 26 additions and 33 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -20,6 +20,10 @@ Difftastic now prefers treating files as 'mostly UTF-8' or binary rather than
 UTF-16. Many files can be decoded as UTF-16 without decoding errors
 but produce nonsense results, so this heuristic seems to work better.

+Fixed an issue where difftastic would discard the last newline in a
+file before diffing. This was most noticeable when doing textual diffs
+and the last line had changed.
+
 ### Display

 Difftastic no longer uses purple to highlight regions that are missing
--- a/sample_files/compare.expected
+++ b/sample_files/compare.expected
@ -5,7 +5,7 @@ sample_files/ada_before.adb sample_files/ada_after.adb
 a3b2b5033fde04bf9d2d71cb56d9eade  -

 sample_files/added_line_before.txt sample_files/added_line_after.txt
-91af28058b1b7da58f7509d22ed9cc58  -
+2deb040c52fb94459ca0540bcc983000  -

 sample_files/b2_math_before.h sample_files/b2_math_after.h
 ccbd784fa7df5d317fce588ac477956a  -
@ -119,7 +119,7 @@ sample_files/makefile_before.mk sample_files/makefile_after.mk
 82ed37f60448e7402c62d5319f30fd3c  -

 sample_files/metadata_before.clj sample_files/metadata_after.clj
-e8e76c97731accc5572708664bb61910  -
+2d6b3a5245d01fb1cb15dcf6117bbeb5  -

 sample_files/modules_before.ml sample_files/modules_after.ml
 cf88821ead0d432d4841f476c2f26fd2  -
--- a/src/lines.rs
+++ b/src/lines.rs
@ -183,21 +183,10 @@ impl<S: AsRef<str>> MaxLine for S {
    }
 }

-/// Split `s` on \n or \r\n. Always returns a non-empty vec. Each line
-/// in the vec does not include the trailing newline.
-///
-/// This differs from `str::lines`, which considers `""` to be zero
-/// lines and `"foo\n"` to be one line.
+/// Split `s` on \n or \r\n. Each line in the vec does not include the
+/// trailing newline.
 pub fn split_on_newlines(s: &str) -> Vec<&str> {
-    s.split('\n')
-        .map(|l| {
-            if let Some(l) = l.strip_suffix('\r') {
-                l
-            } else {
-                l
-            }
-        })
-        .collect()
+    s.lines().collect()
 }

 pub fn is_all_whitespace(s: &str) -> bool {
--- a/src/main.rs
+++ b/src/main.rs
@ -332,7 +332,7 @@ fn diff_file_content(
    diff_options: &DiffOptions,
    language_override: Option<parse::guess_language::Language>,
 ) -> DiffResult {
-    let (mut lhs_src, mut rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) {
+    let (lhs_src, rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) {
        (ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => {
            return DiffResult {
                lhs_display_path: lhs_display_path.into(),
@ -350,16 +350,6 @@ fn diff_file_content(
        (ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src),
    };

-    // Ignore the trailing newline, if present.
-    // TODO: highlight if this has changes (#144).
-    // TODO: factor out a string cleaning function.
-    if lhs_src.ends_with('\n') {
-        lhs_src.pop();
-    }
-    if rhs_src.ends_with('\n') {
-        rhs_src.pop();
-    }
-
    let (guess_src, guess_path) = match rhs_path {
        FileArgument::NamedPath(_) => (&rhs_src, Path::new(&rhs_display_path)),
        FileArgument::Stdin => (&rhs_src, Path::new(&lhs_display_path)),
--- a/src/parse/syntax.rs
+++ b/src/parse/syntax.rs
@ -805,10 +805,11 @@ impl MatchedPos {
                let mut res = vec![];
                for (i, line_pos) in pos.iter().enumerate() {
                    // Don't create a MatchedPos for empty positions
-                    // at the start. We will want empty positions on
-                    // multiline atoms elsewhere, as a multline string
-                    // literal may include empty lines.
-                    if i == 0 && line_pos.start_col == line_pos.end_col {
+                    // at the start or end. We still want empty
+                    // positions in the middle of multiline atoms, as
+                    // a multiline string literal may include empty
+                    // lines.
+                    if (i == 0 || i == pos.len() - 1) && line_pos.start_col == line_pos.end_col {
                        continue;
                    }

@ -831,14 +832,23 @@ impl MatchedPos {
                let kind = MatchKind::Novel { highlight };
                // Create a MatchedPos for every line that `pos` covers.
                let mut res = vec![];
-                for line_pos in pos {
-                    // Don't create a MatchedPos for empty positions. This
+                for (i, line_pos) in pos.iter().enumerate() {
+                    // Don't create a MatchedPos for entirly empty positions. This
                    // occurs when we have lists with empty open/close
                    // delimiter positions, such as the top-level list of syntax items.
                    if pos.len() == 1 && line_pos.start_col == line_pos.end_col {
                        continue;
                    }

+                    // Don't create a MatchedPos for empty positions
+                    // at the start or end. We still want empty
+                    // positions in the middle of multiline atoms, as
+                    // a multiline string literal may include empty
+                    // lines.
+                    if (i == 0 || i == pos.len() - 1) && line_pos.start_col == line_pos.end_col {
+                        continue;
+                    }
+
                    res.push(Self {
                        kind: kind.clone(),
                        pos: *line_pos,