Add the ability to parse conflict markers and diff the two files

syntax_id
Wilfred Hughes 2023-08-12 00:01:48 +07:00
parent f06e95ca02
commit e0a1405453
6 changed files with 287 additions and 1 deletions

@ -1,5 +1,14 @@
## 0.50 (unreleased) ## 0.50 (unreleased)
### Conflicts
Difftastic now supports parsing files with conflict markers, enabling
you to diff the two conflicting file states.
```
$ difft file_with_conflicts.js
```
### Parsing ### Parsing
Updated Elixir, Erlang, Go, Kotlin and Racket parsers. Updated Elixir, Erlang, Go, Kotlin and Racket parsers.

@ -28,6 +28,19 @@ You can read a file from stdin by specifying `-` as the file path.
$ cat sample_files/before.js | difft - sample_files/after.js $ cat sample_files/before.js | difft - sample_files/after.js
``` ```
### Files With Conflicts
*(Added in version 0.50.)*
If you have a file with `<<<<<<<` conflict markers, you can pass it as
a single argument to difftastic. Difftastic will construct the two
file states and diff those.
```
$ difft sample_files/conflicts.el
```
## Language Detection ## Language Detection
Difftastic guesses the language used based on the file extension, file Difftastic guesses the language used based on the file extension, file

@ -0,0 +1,137 @@
//! Apply conflict markers to obtain the original file contents.
//!
//! https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-mergeconflictStyle
use ConflictState::*;
#[derive(Debug, Clone, Copy)]
enum ConflictState {
NoConflict,
Left,
Base,
Right,
}
pub const START_LHS_MARKER: &str = "<<<<<<<";
const START_BASE_MARKER: &str = "|||||||";
const START_RHS_MARKER: &str = "=======";
const END_RHS_MARKER: &str = ">>>>>>>";
pub struct ConflictFiles {
pub lhs_name: String,
pub lhs_content: String,
pub rhs_name: String,
pub rhs_content: String,
pub num_conflicts: usize,
}
/// Convert a string with conflict markers into the two conflicting
/// file contents.
pub fn apply_conflict_markers(s: &str) -> Result<ConflictFiles, String> {
let mut lhs_name = String::new();
let mut rhs_name = String::new();
let mut lhs_content = String::with_capacity(s.len());
let mut rhs_content = String::with_capacity(s.len());
let mut num_conflicts = 0;
let mut state = NoConflict;
let mut conflict_start_line = None;
for (i, line) in s.split_inclusive('\n').enumerate() {
if let Some(hunk_lhs_name) = line.strip_prefix(START_LHS_MARKER) {
state = Left;
num_conflicts += 1;
conflict_start_line = Some(i);
let hunk_lhs_name = hunk_lhs_name.trim();
if hunk_lhs_name.len() > lhs_name.len() {
lhs_name = hunk_lhs_name.to_owned();
}
continue;
}
if line.starts_with(START_BASE_MARKER) {
state = Base;
continue;
}
if line.starts_with(START_RHS_MARKER) {
state = Right;
continue;
}
if let Some(hunk_rhs_name) = line.strip_prefix(END_RHS_MARKER) {
state = NoConflict;
let hunk_rhs_name = hunk_rhs_name.trim();
if hunk_rhs_name.len() > rhs_name.len() {
rhs_name = hunk_rhs_name.to_owned();
}
continue;
}
match state {
NoConflict => {
lhs_content.push_str(line);
rhs_content.push_str(line);
}
Left => {
lhs_content.push_str(line);
}
Right => {
rhs_content.push_str(line);
}
Base => {}
}
}
if matches!(state, NoConflict) {
Ok(ConflictFiles {
lhs_name,
lhs_content,
rhs_name,
rhs_content,
num_conflicts,
})
} else {
let message = match conflict_start_line {
Some(line_i) => format!(
"Could not parse conflict markers, line {} has no matching {}.",
line_i, END_RHS_MARKER
),
None => "Could not parse conflict markers.".to_owned(),
};
Err(message)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_with_base() {
// Deliberately avoid a multiline string literal to avoid
// confusing text editors when we open this file.
let s = "before\n<<<<<<< Temporary merge branch 1\nnew in left\n||||||| merged common ancestors\noriginal\n=======\nnew in right\n>>>>>>> Temporary merge branch 2\nafter";
let conflict_files = apply_conflict_markers(s).unwrap();
assert_eq!(conflict_files.lhs_content, "before\nnew in left\nafter");
assert_eq!(conflict_files.rhs_content, "before\nnew in right\nafter");
assert_eq!(conflict_files.lhs_name, "Temporary merge branch 1");
assert_eq!(conflict_files.rhs_name, "Temporary merge branch 2");
}
#[test]
fn test_without_base() {
// Deliberately avoid a multiline string literal to avoid
// confusing text editors when we open this file.
let s = "before\n<<<<<<< Temporary merge branch 1\nnew in left\n=======\nnew in right\n>>>>>>> Temporary merge branch 2\nafter";
let conflict_files = apply_conflict_markers(s).unwrap();
assert_eq!(conflict_files.lhs_content, "before\nnew in left\nafter");
assert_eq!(conflict_files.rhs_content, "before\nnew in right\nafter");
assert_eq!(conflict_files.lhs_name, "Temporary merge branch 1");
assert_eq!(conflict_files.rhs_name, "Temporary merge branch 2");
}
}

@ -13,6 +13,16 @@ use walkdir::WalkDir;
use crate::exit_codes::EXIT_BAD_ARGUMENTS; use crate::exit_codes::EXIT_BAD_ARGUMENTS;
use crate::options::FileArgument; use crate::options::FileArgument;
pub fn read_file_or_die(path: &FileArgument) -> Vec<u8> {
match read_file_arg(path) {
Ok(src) => src,
Err(e) => {
eprint_read_error(path, &e);
std::process::exit(EXIT_BAD_ARGUMENTS);
}
}
}
pub fn read_files_or_die( pub fn read_files_or_die(
lhs_path: &FileArgument, lhs_path: &FileArgument,
rhs_path: &FileArgument, rhs_path: &FileArgument,

@ -23,6 +23,7 @@
// correct. // correct.
#![allow(clippy::mutable_key_type)] #![allow(clippy::mutable_key_type)]
mod conflicts;
mod constants; mod constants;
mod diff; mod diff;
mod display; mod display;
@ -39,16 +40,20 @@ mod summary;
#[macro_use] #[macro_use]
extern crate log; extern crate log;
use crate::conflicts::START_LHS_MARKER;
use crate::diff::{dijkstra, unchanged}; use crate::diff::{dijkstra, unchanged};
use crate::display::hunks::{matched_pos_to_hunks, merge_adjacent}; use crate::display::hunks::{matched_pos_to_hunks, merge_adjacent};
use crate::exit_codes::EXIT_BAD_ARGUMENTS;
use crate::parse::guess_language::language_globs; use crate::parse::guess_language::language_globs;
use crate::parse::syntax; use crate::parse::syntax;
use conflicts::apply_conflict_markers;
use diff::changes::ChangeMap; use diff::changes::ChangeMap;
use diff::dijkstra::ExceededGraphLimit; use diff::dijkstra::ExceededGraphLimit;
use display::context::opposite_positions; use display::context::opposite_positions;
use exit_codes::{EXIT_FOUND_CHANGES, EXIT_SUCCESS}; use exit_codes::{EXIT_FOUND_CHANGES, EXIT_SUCCESS};
use files::{ use files::{
guess_content, read_files_or_die, read_or_die, relative_paths_in_either, ProbableFileKind, guess_content, read_file_or_die, read_files_or_die, read_or_die, relative_paths_in_either,
ProbableFileKind,
}; };
use log::info; use log::info;
use mimalloc::MiMalloc; use mimalloc::MiMalloc;
@ -174,6 +179,31 @@ fn main() {
println!(); println!();
} }
} }
Mode::DiffFromConflicts {
display_path,
path,
diff_options,
display_options,
set_exit_code,
language_overrides,
} => {
let diff_result = diff_conflicts_file(
&display_path,
&path,
&display_options,
&diff_options,
&language_overrides,
);
print_diff_result(&display_options, &diff_result);
let exit_code = if set_exit_code && diff_result.has_reportable_change() {
EXIT_FOUND_CHANGES
} else {
EXIT_SUCCESS
};
std::process::exit(exit_code);
}
Mode::Diff { Mode::Diff {
diff_options, diff_options,
display_options, display_options,
@ -307,6 +337,56 @@ fn diff_file(
) )
} }
fn diff_conflicts_file(
display_path: &str,
path: &FileArgument,
display_options: &DisplayOptions,
diff_options: &DiffOptions,
overrides: &[(glob::Pattern, LanguageOverride)],
) -> DiffResult {
let bytes = read_file_or_die(path);
let src = match guess_content(&bytes) {
ProbableFileKind::Text(src) => src,
ProbableFileKind::Binary => {
eprintln!("error: Expected a text file with conflict markers, got a binary file.");
std::process::exit(EXIT_BAD_ARGUMENTS);
}
};
let conflict_files = match apply_conflict_markers(&src) {
Ok(cf) => cf,
Err(msg) => {
eprintln!("error: {}", msg);
std::process::exit(EXIT_BAD_ARGUMENTS);
}
};
if conflict_files.num_conflicts == 0 {
eprintln!(
"warning: Expected a file with conflict markers {}, but none were found.",
START_LHS_MARKER,
);
eprintln!("Difftastic parses conflict markers from a single file argument. Did you forget a second file argument?");
}
let extra_info = format!(
"Comparing '{}' with '{}'",
conflict_files.lhs_name, conflict_files.rhs_name
);
diff_file_content(
display_path,
Some(extra_info),
path,
path,
&conflict_files.lhs_content,
&conflict_files.rhs_content,
display_options,
diff_options,
overrides,
)
}
fn check_only_text( fn check_only_text(
file_format: &FileFormat, file_format: &FileFormat,
display_path: &str, display_path: &str,

@ -95,6 +95,10 @@ fn app() -> clap::Command<'static> {
"$ ", "$ ",
env!("CARGO_BIN_NAME"), env!("CARGO_BIN_NAME"),
" old/ new/\n\n", " old/ new/\n\n",
"If you have a file with conflict markers, you can pass it as a single argument. Difftastic will diff the two conflicting file states.\n\n",
"$ ",
env!("CARGO_BIN_NAME"),
" file_with_conflicts.js\n\n",
"Difftastic can also be invoked with 7 arguments in the format that GIT_EXTERNAL_DIFF expects.\n\n", "Difftastic can also be invoked with 7 arguments in the format that GIT_EXTERNAL_DIFF expects.\n\n",
"See the full manual at: https://difftastic.wilfred.me.uk/") "See the full manual at: https://difftastic.wilfred.me.uk/")
) )
@ -357,6 +361,15 @@ pub enum Mode {
/// If this file has been renamed, the name it had previously. /// If this file has been renamed, the name it had previously.
old_path: Option<String>, old_path: Option<String>,
}, },
DiffFromConflicts {
diff_options: DiffOptions,
display_options: DisplayOptions,
set_exit_code: bool,
language_overrides: Vec<(glob::Pattern, LanguageOverride)>,
path: FileArgument,
/// The path that we show to the user.
display_path: String,
},
ListLanguages { ListLanguages {
use_color: bool, use_color: bool,
language_overrides: Vec<(glob::Pattern, LanguageOverride)>, language_overrides: Vec<(glob::Pattern, LanguageOverride)>,
@ -613,6 +626,30 @@ pub fn parse_args() -> Mode {
true, true,
) )
} }
[path] => {
let display_options = DisplayOptions {
background_color,
use_color,
print_unchanged,
tab_width,
display_mode,
display_width,
num_context_lines,
syntax_highlight,
in_vcs: true,
};
let display_path = path.to_string_lossy().to_string();
let path = FileArgument::from_path_argument(path);
return Mode::DiffFromConflicts {
display_path,
path,
diff_options,
display_options,
set_exit_code,
language_overrides,
};
}
_ => { _ => {
if !args.is_empty() { if !args.is_empty() {
eprintln!( eprintln!(