Set a maximum size on syntactic diffing, and use line diffing otherwise

Fixes #82
html_output
Wilfred Hughes 2022-02-06 16:38:55 +07:00
parent da05d6e69d
commit e2832dabb3
4 changed files with 116 additions and 17 deletions

@ -24,6 +24,10 @@ constants.
If given binary files, difftastic will now report if the file contents If given binary files, difftastic will now report if the file contents
are identical. are identical.
Difftastic will now use a text diff for large files, rather than
trying to use more memory than is available. This threshold is
configurable with `--node-limit` and `DFT_NODE_LIMIT`.
### Command Line Interface ### Command Line Interface
Difftastic will now error if either argument does not exist, unless Difftastic will now error if either argument does not exist, unless

21
Cargo.lock generated

@ -82,6 +82,26 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "const_format"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22bc6cd49b0ec407b680c3e380182b6ac63b73991cb7602de350352fc309b614"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef196d5d972878a48da7decb7686eded338b4858fbabeed513d63a7c98b2b82d"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.1" version = "0.5.1"
@ -150,6 +170,7 @@ dependencies = [
"cc", "cc",
"clap", "clap",
"colored", "colored",
"const_format",
"diff", "diff",
"itertools", "itertools",
"lazy_static", "lazy_static",

@ -39,6 +39,7 @@ mimalloc = { version = "0.1.26", default-features = false }
radix-heap = "0.4.1" radix-heap = "0.4.1"
walkdir = "2.3.2" walkdir = "2.3.2"
term_size = "0.3.2" term_size = "0.3.2"
const_format = "0.2.22"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "1.0.0" pretty_assertions = "1.0.0"

@ -30,6 +30,7 @@ mod unchanged;
extern crate log; extern crate log;
use crate::hunks::{matched_pos_to_hunks, merge_adjacent}; use crate::hunks::{matched_pos_to_hunks, merge_adjacent};
use const_format::formatcp;
use context::opposite_positions; use context::opposite_positions;
use files::read_files_or_die; use files::read_files_or_die;
use guess_language::guess; use guess_language::guess;
@ -64,6 +65,8 @@ use crate::{
extern crate pretty_env_logger; extern crate pretty_env_logger;
const DEFAULT_NODE_LIMIT: u32 = 50_000;
/// Choose the display width: try to autodetect, or fall back to a /// Choose the display width: try to autodetect, or fall back to a
/// sensible default. /// sensible default.
fn detect_display_width() -> usize { fn detect_display_width() -> usize {
@ -93,6 +96,7 @@ enum ColorOutput {
enum Mode { enum Mode {
Diff { Diff {
node_limit: u32,
print_unchanged: bool, print_unchanged: bool,
missing_as_empty: bool, missing_as_empty: bool,
background_color: BackgroundColor, background_color: BackgroundColor,
@ -174,6 +178,15 @@ fn app() -> clap::App<'static> {
Arg::new("missing-as-empty").long("missing-as-empty") Arg::new("missing-as-empty").long("missing-as-empty")
.help("Treat paths that don't exist as equivalent to an empty file.") .help("Treat paths that don't exist as equivalent to an empty file.")
) )
.arg(
Arg::new("node-limit").long("node-limit")
.takes_value(true)
.value_name("LIMIT")
.help(concat!("Use a text diff if the number of syntax nodes exceeds this number. Overrides $DFT_NODE_LIMIT if present."))
.default_value(formatcp!("{}", DEFAULT_NODE_LIMIT))
.validator(|s| s.parse::<u32>())
.required(false),
)
.arg( .arg(
Arg::new("paths") Arg::new("paths")
.value_name("PATHS") .value_name("PATHS")
@ -277,10 +290,25 @@ fn parse_args() -> Mode {
} }
}; };
let node_limit: u32 = if matches.occurrences_of("node-limit") == 0 {
if let Ok(env_width) = env::var("DFT_NODE_LIMIT") {
env_width.parse::<u32>().ok().unwrap_or(DEFAULT_NODE_LIMIT)
} else {
DEFAULT_NODE_LIMIT
}
} else {
matches
.value_of("node-limit")
.expect("Always present as we've given clap a default")
.parse::<u32>()
.expect("Value already validated by clap")
};
let print_unchanged = !matches.is_present("skip-unchanged"); let print_unchanged = !matches.is_present("skip-unchanged");
let missing_as_empty = matches.is_present("missing-as-empty"); let missing_as_empty = matches.is_present("missing-as-empty");
Mode::Diff { Mode::Diff {
node_limit,
print_unchanged, print_unchanged,
missing_as_empty, missing_as_empty,
background_color, background_color,
@ -345,6 +373,7 @@ fn main() {
} }
} }
Mode::Diff { Mode::Diff {
node_limit,
print_unchanged, print_unchanged,
missing_as_empty, missing_as_empty,
background_color, background_color,
@ -361,7 +390,9 @@ fn main() {
let rhs_path = Path::new(&rhs_path); let rhs_path = Path::new(&rhs_path);
if lhs_path.is_dir() && rhs_path.is_dir() { if lhs_path.is_dir() && rhs_path.is_dir() {
for diff_result in diff_directories(lhs_path, rhs_path, missing_as_empty) { for diff_result in
diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit)
{
print_diff_result( print_diff_result(
display_width, display_width,
background_color, background_color,
@ -370,7 +401,13 @@ fn main() {
); );
} }
} else { } else {
let diff_result = diff_file(&display_path, lhs_path, rhs_path, missing_as_empty); let diff_result = diff_file(
&display_path,
lhs_path,
rhs_path,
missing_as_empty,
node_limit,
);
print_diff_result( print_diff_result(
display_width, display_width,
background_color, background_color,
@ -388,12 +425,18 @@ fn diff_file(
lhs_path: &Path, lhs_path: &Path,
rhs_path: &Path, rhs_path: &Path,
missing_as_empty: bool, missing_as_empty: bool,
node_limit: u32,
) -> DiffResult { ) -> DiffResult {
let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty); let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
diff_file_content(display_path, &lhs_bytes, &rhs_bytes) diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit)
} }
fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) -> DiffResult { fn diff_file_content(
display_path: &str,
lhs_bytes: &[u8],
rhs_bytes: &[u8],
node_limit: u32,
) -> DiffResult {
if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) { if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
return DiffResult { return DiffResult {
path: display_path.into(), path: display_path.into(),
@ -448,20 +491,29 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) ->
init_all_info(&lhs, &rhs); init_all_info(&lhs, &rhs);
let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged(&lhs, &rhs); let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged(&lhs, &rhs);
init_next_prev(&possibly_changed_lhs);
init_next_prev(&possibly_changed_rhs);
mark_syntax( let possibly_changed_num =
possibly_changed_lhs.get(0).copied(), num_nodes(&possibly_changed_lhs) + num_nodes(&possibly_changed_rhs);
possibly_changed_rhs.get(0).copied(), if possibly_changed_num > node_limit {
); let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
(Some("hit limit".into()), lhs_positions, rhs_positions)
} else {
init_next_prev(&possibly_changed_lhs);
init_next_prev(&possibly_changed_rhs);
mark_syntax(
possibly_changed_lhs.get(0).copied(),
possibly_changed_rhs.get(0).copied(),
);
fix_all_sliders(&possibly_changed_lhs); fix_all_sliders(&possibly_changed_lhs);
fix_all_sliders(&possibly_changed_rhs); fix_all_sliders(&possibly_changed_rhs);
let lhs_positions = syntax::change_positions(&lhs); let lhs_positions = syntax::change_positions(&lhs);
let rhs_positions = syntax::change_positions(&rhs); let rhs_positions = syntax::change_positions(&rhs);
(Some(ts_lang.name.into()), lhs_positions, rhs_positions) (Some(ts_lang.name.into()), lhs_positions, rhs_positions)
}
} }
None => { None => {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src); let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
@ -485,7 +537,12 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) ->
/// ///
/// When more than one file is modified, the hg extdiff extension passes directory /// When more than one file is modified, the hg extdiff extension passes directory
/// paths with the all the modified files. /// paths with the all the modified files.
fn diff_directories(lhs_dir: &Path, rhs_dir: &Path, missing_as_empty: bool) -> Vec<DiffResult> { fn diff_directories(
lhs_dir: &Path,
rhs_dir: &Path,
missing_as_empty: bool,
node_limit: u32,
) -> Vec<DiffResult> {
let mut res = vec![]; let mut res = vec![];
for entry in WalkDir::new(lhs_dir).into_iter().filter_map(Result::ok) { for entry in WalkDir::new(lhs_dir).into_iter().filter_map(Result::ok) {
let lhs_path = entry.path(); let lhs_path = entry.path();
@ -503,6 +560,7 @@ fn diff_directories(lhs_dir: &Path, rhs_dir: &Path, missing_as_empty: bool) -> V
lhs_path, lhs_path,
&rhs_path, &rhs_path,
missing_as_empty, missing_as_empty,
node_limit,
)); ));
} }
res res
@ -600,6 +658,21 @@ fn print_diff_result(
} }
} }
/// What is the total number of nodes in `roots`?
fn num_nodes(roots: &[&syntax::Syntax]) -> u32 {
roots
.iter()
.map(|n| {
1 + match n {
syntax::Syntax::List {
num_descendants, ..
} => *num_descendants,
syntax::Syntax::Atom { .. } => 0,
}
})
.sum()
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -607,7 +680,7 @@ mod tests {
#[test] #[test]
fn test_diff_identical_content() { fn test_diff_identical_content() {
let s = "foo"; let s = "foo";
let res = diff_file_content("foo.el", s.as_bytes(), s.as_bytes()); let res = diff_file_content("foo.el", s.as_bytes(), s.as_bytes(), DEFAULT_NODE_LIMIT);
assert_eq!(res.lhs_positions, vec![]); assert_eq!(res.lhs_positions, vec![]);
assert_eq!(res.rhs_positions, vec![]); assert_eq!(res.rhs_positions, vec![]);