From 5a51c02157c7fd4981e294e75d044f227ce2a939 Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Fri, 26 Jul 2024 08:48:50 -0700 Subject: [PATCH] Fix crash when the last hunk includes the trailing newline at EOF Input cleaning should happen before we diff the content, not during display. Previously display would crash due to line numbers referencing the line that had been stripped. Fixes #713 Fixes #739 Fixes #742 --- CHANGELOG.md | 5 ++++ sample_files/cli_tests/changes_at_end_1.txt | 21 +++++++++++++++ sample_files/cli_tests/changes_at_end_2.txt | 30 +++++++++++++++++++++ src/display/side_by_side.rs | 16 ++--------- src/main.rs | 12 +++++++++ tests/cli.rs | 10 +++++++ 6 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 sample_files/cli_tests/changes_at_end_1.txt create mode 100644 sample_files/cli_tests/changes_at_end_2.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 121fe6817..6919386ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## 0.60 (unreleased) +### Diffing + +Fixed a crash (introduced in 0.59) when the final changed hunk +included the last line of the file. + ### Display Fixed an issue where files with no common content would show duplicate diff --git a/sample_files/cli_tests/changes_at_end_1.txt b/sample_files/cli_tests/changes_at_end_1.txt new file mode 100644 index 000000000..f3be81d66 --- /dev/null +++ b/sample_files/cli_tests/changes_at_end_1.txt @@ -0,0 +1,21 @@ + TOKEN_PATH@129..133 "/d6/" + TOKEN_WHITESPACE@133..135 "\u{c}\u{c}" + NODE_IDENTIFIER@135..137 + TOKEN_IDENTIFIER@135..137 "o2" + NODE_PATH@137..140 + TOKEN_PATH@137..140 "/Aa" + TOKEN_WHITESPACE@140..142 "\u{c}\u{c}" + TOKEN_DOLLAR@142..143 "$" + NODE_INFIX_OPERATION@143..155 + NODE_IDENTIFIER@143..144 + TOKEN_IDENTIFIER@143..144 "d" + TOKEN_PLUS@144..145 "+" + NODE_APPLICATION@145..155 + NODE_APPLICATION@145..152 + NODE_PATH@145..148 + TOKEN_PATH@145..148 "/6/" + TOKEN_WHITESPACE@148..150 "\u{c}\u{c}" + NODE_IDENTIFIER@150..152 + TOKEN_IDENTIFIER@150..152 "o2" + NODE_PATH@152..155 + TOKEN_PATH@152..155 "/Aa" diff --git a/sample_files/cli_tests/changes_at_end_2.txt b/sample_files/cli_tests/changes_at_end_2.txt new file mode 100644 index 000000000..4435d711f --- /dev/null +++ b/sample_files/cli_tests/changes_at_end_2.txt @@ -0,0 +1,30 @@ + TOKEN_INTEGER@125..127 "77" + TOKEN_DOLLAR@127..128 "$" + NODE_INFIX_OPERATION@128..155 + NODE_APPLICATION@128..140 + NODE_APPLICATION@128..137 + NODE_APPLICATION@128..133 + NODE_NUMBER@128..129 + TOKEN_INTEGER@128..129 "7" + NODE_PATH@129..133 + TOKEN_PATH@129..133 "/d6/" + TOKEN_WHITESPACE@133..135 "\u{c}\u{c}" + NODE_IDENTIFIER@135..137 + TOKEN_IDENTIFIER@135..137 "o2" + NODE_PATH@137..140 + TOKEN_PATH@137..140 "/Aa" + TOKEN_WHITESPACE@140..142 "\u{c}\u{c}" + TOKEN_DOLLAR@142..143 "$" + NODE_INFIX_OPERATION@143..155 + NODE_IDENTIFIER@143..144 + TOKEN_IDENTIFIER@143..144 "d" + TOKEN_PLUS@144..145 "+" + NODE_APPLICATION@145..155 + NODE_APPLICATION@145..152 + NODE_PATH@145..148 + TOKEN_PATH@145..148 "/6/" + TOKEN_WHITESPACE@148..150 "\u{c}\u{c}" + NODE_IDENTIFIER@150..152 + TOKEN_IDENTIFIER@150..152 "o2" + NODE_PATH@152..155 + TOKEN_PATH@152..155 "/Aa" diff --git a/src/display/side_by_side.rs b/src/display/side_by_side.rs index afec92d46..055fa655f 100644 --- a/src/display/side_by_side.rs +++ b/src/display/side_by_side.rs @@ -407,20 +407,8 @@ pub(crate) fn print( let mut prev_lhs_line_num = None; let mut prev_rhs_line_num = None; - let mut lhs_lines = split_on_newlines(lhs_src).collect::>(); - let mut rhs_lines = split_on_newlines(rhs_src).collect::>(); - - // If "foo" is one line, is "foo\n" two lines? Generally we want - // to care about newlines when deciding whether content differs. - // - // Ending a file with a trailing newline is extremely common - // though. If both files have a trailing newline, consider "foo\n" - // to be "foo" so we don't end up displaying a blank line on both - // sides. - if lhs_lines.last() == Some(&"") && rhs_lines.last() == Some(&"") { - lhs_lines.pop(); - rhs_lines.pop(); - } + let lhs_lines = split_on_newlines(lhs_src).collect::>(); + let rhs_lines = split_on_newlines(rhs_src).collect::>(); let matched_lines = all_matched_lines_filled(lhs_mps, rhs_mps, &lhs_lines, &rhs_lines); let mut matched_lines_to_print = &matched_lines[..]; diff --git a/src/main.rs b/src/main.rs index 086d5ed14..772f3175b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -373,6 +373,18 @@ fn diff_file( rhs_src.retain(|c| c != '\r'); } + // If "foo" is one line, is "foo\n" two lines? Generally we want + // to care about newlines when deciding whether content differs. + // + // Ending a file with a trailing newline is extremely common + // though. If both files have a trailing newline, consider "foo\n" + // to be "foo" so we don't end up displaying a blank line on both + // sides. + if lhs_src.ends_with('\n') && rhs_src.ends_with('\n') { + lhs_src.pop(); + rhs_src.pop(); + } + let mut extra_info = renamed; if let (Some(lhs_perms), Some(rhs_perms)) = (lhs_permissions, rhs_permissions) { if lhs_perms != rhs_perms { diff --git a/tests/cli.rs b/tests/cli.rs index fa0ea1df9..2d2a10b87 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -116,6 +116,16 @@ fn check_only_text_file() { cmd.assert().stdout(predicate_fn); } +#[test] +fn text_changes_at_end_doesnt_crash() { + let mut cmd = get_base_command(); + + cmd.arg("sample_files/cli_tests/changes_at_end_1.txt") + .arg("sample_files/cli_tests/changes_at_end_2.txt"); + + cmd.assert().success(); +} + #[test] fn makefile_text_as_atom() { let mut cmd = get_base_command();