Don't clean trailing newline before diffing

Difftastic should take the user's input as-is, or it risks performing
an incorrect diff in both textual and syntactic diffing.

Fixes #499
pull/502/head
Wilfred Hughes 2023-03-30 08:46:11 +07:00
parent 713220613e
commit 8b842387a1
5 changed files with 26 additions and 33 deletions

@ -20,6 +20,10 @@ Difftastic now prefers treating files as 'mostly UTF-8' or binary rather than
UTF-16. Many files can be decoded as UTF-16 without decoding errors
but produce nonsense results, so this heuristic seems to work better.
Fixed an issue where difftastic would discard the last newline in a
file before diffing. This was most noticeable when doing textual diffs
and the last line had changed.
### Display
Difftastic no longer uses purple to highlight regions that are missing

@ -5,7 +5,7 @@ sample_files/ada_before.adb sample_files/ada_after.adb
a3b2b5033fde04bf9d2d71cb56d9eade -
sample_files/added_line_before.txt sample_files/added_line_after.txt
91af28058b1b7da58f7509d22ed9cc58 -
2deb040c52fb94459ca0540bcc983000 -
sample_files/b2_math_before.h sample_files/b2_math_after.h
ccbd784fa7df5d317fce588ac477956a -
@ -119,7 +119,7 @@ sample_files/makefile_before.mk sample_files/makefile_after.mk
82ed37f60448e7402c62d5319f30fd3c -
sample_files/metadata_before.clj sample_files/metadata_after.clj
e8e76c97731accc5572708664bb61910 -
2d6b3a5245d01fb1cb15dcf6117bbeb5 -
sample_files/modules_before.ml sample_files/modules_after.ml
cf88821ead0d432d4841f476c2f26fd2 -

@ -183,21 +183,10 @@ impl<S: AsRef<str>> MaxLine for S {
}
}
/// Split `s` on \n or \r\n. Always returns a non-empty vec. Each line
/// in the vec does not include the trailing newline.
///
/// This differs from `str::lines`, which considers `""` to be zero
/// lines and `"foo\n"` to be one line.
/// Split `s` on \n or \r\n. Each line in the vec does not include the
/// trailing newline.
pub fn split_on_newlines(s: &str) -> Vec<&str> {
s.split('\n')
.map(|l| {
if let Some(l) = l.strip_suffix('\r') {
l
} else {
l
}
})
.collect()
s.lines().collect()
}
pub fn is_all_whitespace(s: &str) -> bool {

@ -332,7 +332,7 @@ fn diff_file_content(
diff_options: &DiffOptions,
language_override: Option<parse::guess_language::Language>,
) -> DiffResult {
let (mut lhs_src, mut rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) {
let (lhs_src, rhs_src) = match (guess_content(lhs_bytes), guess_content(rhs_bytes)) {
(ProbableFileKind::Binary, _) | (_, ProbableFileKind::Binary) => {
return DiffResult {
lhs_display_path: lhs_display_path.into(),
@ -350,16 +350,6 @@ fn diff_file_content(
(ProbableFileKind::Text(lhs_src), ProbableFileKind::Text(rhs_src)) => (lhs_src, rhs_src),
};
// Ignore the trailing newline, if present.
// TODO: highlight if this has changes (#144).
// TODO: factor out a string cleaning function.
if lhs_src.ends_with('\n') {
lhs_src.pop();
}
if rhs_src.ends_with('\n') {
rhs_src.pop();
}
let (guess_src, guess_path) = match rhs_path {
FileArgument::NamedPath(_) => (&rhs_src, Path::new(&rhs_display_path)),
FileArgument::Stdin => (&rhs_src, Path::new(&lhs_display_path)),

@ -805,10 +805,11 @@ impl MatchedPos {
let mut res = vec![];
for (i, line_pos) in pos.iter().enumerate() {
// Don't create a MatchedPos for empty positions
// at the start. We will want empty positions on
// multiline atoms elsewhere, as a multline string
// literal may include empty lines.
if i == 0 && line_pos.start_col == line_pos.end_col {
// at the start or end. We still want empty
// positions in the middle of multiline atoms, as
// a multiline string literal may include empty
// lines.
if (i == 0 || i == pos.len() - 1) && line_pos.start_col == line_pos.end_col {
continue;
}
@ -831,14 +832,23 @@ impl MatchedPos {
let kind = MatchKind::Novel { highlight };
// Create a MatchedPos for every line that `pos` covers.
let mut res = vec![];
for line_pos in pos {
// Don't create a MatchedPos for empty positions. This
for (i, line_pos) in pos.iter().enumerate() {
// Don't create a MatchedPos for entirly empty positions. This
// occurs when we have lists with empty open/close
// delimiter positions, such as the top-level list of syntax items.
if pos.len() == 1 && line_pos.start_col == line_pos.end_col {
continue;
}
// Don't create a MatchedPos for empty positions
// at the start or end. We still want empty
// positions in the middle of multiline atoms, as
// a multiline string literal may include empty
// lines.
if (i == 0 || i == pos.len() - 1) && line_pos.start_col == line_pos.end_col {
continue;
}
res.push(Self {
kind: kind.clone(),
pos: *line_pos,