Don't clean trailing newline before diffing

Difftastic should take the user's input as-is, or it risks performing
an incorrect diff in both textual and syntactic diffing.

Fixes #499
pull/502/head
Wilfred Hughes 2023-03-30 08:46:11 +07:00
parent 713220613e
commit 8b842387a1
5 changed files with 26 additions and 33 deletions

@ -20,6 +20,10 @@ Difftastic now prefers treating files as 'mostly UTF-8' or binary rather than
UTF-16. Many files can be decoded as UTF-16 without decoding errors UTF-16. Many files can be decoded as UTF-16 without decoding errors
but produce nonsense results, so this heuristic seems to work better. but produce nonsense results, so this heuristic seems to work better.
Fixed an issue where difftastic would discard the last newline in a
file before diffing. This was most noticeable when doing textual diffs
and the last line had changed.
### Display ### Display
Difftastic no longer uses purple to highlight regions that are missing Difftastic no longer uses purple to highlight regions that are missing

@ -5,7 +5,7 @@ sample_files/ada_before.adb sample_files/ada_after.adb
a3b2b5033fde04bf9d2d71cb56d9eade - a3b2b5033fde04bf9d2d71cb56d9eade -
sample_files/added_line_before.txt sample_files/added_line_after.txt sample_files/added_line_before.txt sample_files/added_line_after.txt
91af28058b1b7da58f7509d22ed9cc58 - 2deb040c52fb94459ca0540bcc983000 -
sample_files/b2_math_before.h sample_files/b2_math_after.h sample_files/b2_math_before.h sample_files/b2_math_after.h
ccbd784fa7df5d317fce588ac477956a - ccbd784fa7df5d317fce588ac477956a -
@ -119,7 +119,7 @@ sample_files/makefile_before.mk sample_files/makefile_after.mk
82ed37f60448e7402c62d5319f30fd3c - 82ed37f60448e7402c62d5319f30fd3c -
sample_files/metadata_before.clj sample_files/metadata_after.clj sample_files/metadata_before.clj sample_files/metadata_after.clj
e8e76c97731accc5572708664bb61910 - 2d6b3a5245d01fb1cb15dcf6117bbeb5 -
sample_files/modules_before.ml sample_files/modules_after.ml sample_files/modules_before.ml sample_files/modules_after.ml
cf88821ead0d432d4841f476c2f26fd2 - cf88821ead0d432d4841f476c2f26fd2 -

@ -183,21 +183,10 @@ impl<S: AsRef<str>> MaxLine for S {
} }
} }
/// Split `s` on \n or \r\n. Always returns a non-empty vec. Each line /// Split `s` on \n or \r\n. Each line in the vec does not include the
/// in the vec does not include the trailing newline. /// trailing newline.
///
/// This differs from `str::lines`, which considers `""` to be zero
/// lines and `"foo\n"` to be one line.
pub fn split_on_newlines(s: &str) -> Vec<&str> { pub fn split_on_newlines(s: &str) -> Vec<&str> {
s.split('\n') s.lines().collect()
.map(|l| {
if let Some(l) = l.strip_suffix('\r') {
l
} else {
l
}
})
.collect()
} }
pub fn is_all_whitespace(s: &str) -> bool { pub fn is_all_whitespace(s: &str) -> bool {

@ -332,7 +332,7 @@ fn diff_file_content(
diff_options: &DiffOptions, diff_options: &DiffOptions,
language_override: Option<parse::guess_language::Language>, language_override: Option<parse::guess_language::Language>,
) -> DiffResult { ) -> DiffResult {
let (mut lhs_src, mut rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) { let (lhs_src, rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) {
(ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => { (ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => {
return DiffResult { return DiffResult {
lhs_display_path: lhs_display_path.into(), lhs_display_path: lhs_display_path.into(),
@ -350,16 +350,6 @@ fn diff_file_content(
(ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src), (ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src),
}; };
// Ignore the trailing newline, if present.
// TODO: highlight if this has changes (#144).
// TODO: factor out a string cleaning function.
if lhs_src.ends_with('\n') {
lhs_src.pop();
}
if rhs_src.ends_with('\n') {
rhs_src.pop();
}
let (guess_src, guess_path) = match rhs_path { let (guess_src, guess_path) = match rhs_path {
FileArgument::NamedPath(_) => (&rhs_src, Path::new(&rhs_display_path)), FileArgument::NamedPath(_) => (&rhs_src, Path::new(&rhs_display_path)),
FileArgument::Stdin => (&rhs_src, Path::new(&lhs_display_path)), FileArgument::Stdin => (&rhs_src, Path::new(&lhs_display_path)),

@ -805,10 +805,11 @@ impl MatchedPos {
let mut res = vec![]; let mut res = vec![];
for (i, line_pos) in pos.iter().enumerate() { for (i, line_pos) in pos.iter().enumerate() {
// Don't create a MatchedPos for empty positions // Don't create a MatchedPos for empty positions
// at the start. We will want empty positions on // at the start or end. We still want empty
// multiline atoms elsewhere, as a multline string // positions in the middle of multiline atoms, as
// literal may include empty lines. // a multiline string literal may include empty
if i == 0 && line_pos.start_col == line_pos.end_col { // lines.
if (i == 0 || i == pos.len() - 1) && line_pos.start_col == line_pos.end_col {
continue; continue;
} }
@ -831,14 +832,23 @@ impl MatchedPos {
let kind = MatchKind::Novel { highlight }; let kind = MatchKind::Novel { highlight };
// Create a MatchedPos for every line that `pos` covers. // Create a MatchedPos for every line that `pos` covers.
let mut res = vec![]; let mut res = vec![];
for line_pos in pos { for (i, line_pos) in pos.iter().enumerate() {
// Don't create a MatchedPos for empty positions. This // Don't create a MatchedPos for entirly empty positions. This
// occurs when we have lists with empty open/close // occurs when we have lists with empty open/close
// delimiter positions, such as the top-level list of syntax items. // delimiter positions, such as the top-level list of syntax items.
if pos.len() == 1 && line_pos.start_col == line_pos.end_col { if pos.len() == 1 && line_pos.start_col == line_pos.end_col {
continue; continue;
} }
// Don't create a MatchedPos for empty positions
// at the start or end. We still want empty
// positions in the middle of multiline atoms, as
// a multiline string literal may include empty
// lines.
if (i == 0 || i == pos.len() - 1) && line_pos.start_col == line_pos.end_col {
continue;
}
res.push(Self { res.push(Self {
kind: kind.clone(), kind: kind.clone(),
pos: *line_pos, pos: *line_pos,