Set a maximum size on syntactic diffing, and use line diffing otherwise

Fixes #82
html_output
Wilfred Hughes 2022-02-06 16:38:55 +07:00
parent da05d6e69d
commit e2832dabb3
4 changed files with 116 additions and 17 deletions

@ -24,6 +24,10 @@ constants.
If given binary files, difftastic will now report if the file contents
are identical.
Difftastic will now use a text diff for large files, rather than
trying to use more memory than is available. This threshold is
configurable with `--node-limit` and `DFT_NODE_LIMIT`.
### Command Line Interface
Difftastic will now error if either argument does not exist, unless

21
Cargo.lock generated

@ -82,6 +82,26 @@ dependencies = [
"winapi",
]
[[package]]
name = "const_format"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22bc6cd49b0ec407b680c3e380182b6ac63b73991cb7602de350352fc309b614"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef196d5d972878a48da7decb7686eded338b4858fbabeed513d63a7c98b2b82d"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
@ -150,6 +170,7 @@ dependencies = [
"cc",
"clap",
"colored",
"const_format",
"diff",
"itertools",
"lazy_static",

@ -39,6 +39,7 @@ mimalloc = { version = "0.1.26", default-features = false }
radix-heap = "0.4.1"
walkdir = "2.3.2"
term_size = "0.3.2"
const_format = "0.2.22"
[dev-dependencies]
pretty_assertions = "1.0.0"

@ -30,6 +30,7 @@ mod unchanged;
extern crate log;
use crate::hunks::{matched_pos_to_hunks, merge_adjacent};
use const_format::formatcp;
use context::opposite_positions;
use files::read_files_or_die;
use guess_language::guess;
@ -64,6 +65,8 @@ use crate::{
extern crate pretty_env_logger;
const DEFAULT_NODE_LIMIT: u32 = 50_000;
/// Choose the display width: try to autodetect, or fall back to a
/// sensible default.
fn detect_display_width() -> usize {
@ -93,6 +96,7 @@ enum ColorOutput {
enum Mode {
Diff {
node_limit: u32,
print_unchanged: bool,
missing_as_empty: bool,
background_color: BackgroundColor,
@ -174,6 +178,15 @@ fn app() -> clap::App<'static> {
Arg::new("missing-as-empty").long("missing-as-empty")
.help("Treat paths that don't exist as equivalent to an empty file.")
)
.arg(
Arg::new("node-limit").long("node-limit")
.takes_value(true)
.value_name("LIMIT")
.help(concat!("Use a text diff if the number of syntax nodes exceeds this number. Overrides $DFT_NODE_LIMIT if present."))
.default_value(formatcp!("{}", DEFAULT_NODE_LIMIT))
.validator(|s| s.parse::<u32>())
.required(false),
)
.arg(
Arg::new("paths")
.value_name("PATHS")
@ -277,10 +290,25 @@ fn parse_args() -> Mode {
}
};
let node_limit: u32 = if matches.occurrences_of("node-limit") == 0 {
if let Ok(env_width) = env::var("DFT_NODE_LIMIT") {
env_width.parse::<u32>().ok().unwrap_or(DEFAULT_NODE_LIMIT)
} else {
DEFAULT_NODE_LIMIT
}
} else {
matches
.value_of("node-limit")
.expect("Always present as we've given clap a default")
.parse::<u32>()
.expect("Value already validated by clap")
};
let print_unchanged = !matches.is_present("skip-unchanged");
let missing_as_empty = matches.is_present("missing-as-empty");
Mode::Diff {
node_limit,
print_unchanged,
missing_as_empty,
background_color,
@ -345,6 +373,7 @@ fn main() {
}
}
Mode::Diff {
node_limit,
print_unchanged,
missing_as_empty,
background_color,
@ -361,7 +390,9 @@ fn main() {
let rhs_path = Path::new(&rhs_path);
if lhs_path.is_dir() && rhs_path.is_dir() {
for diff_result in diff_directories(lhs_path, rhs_path, missing_as_empty) {
for diff_result in
diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit)
{
print_diff_result(
display_width,
background_color,
@ -370,7 +401,13 @@ fn main() {
);
}
} else {
let diff_result = diff_file(&display_path, lhs_path, rhs_path, missing_as_empty);
let diff_result = diff_file(
&display_path,
lhs_path,
rhs_path,
missing_as_empty,
node_limit,
);
print_diff_result(
display_width,
background_color,
@ -388,12 +425,18 @@ fn diff_file(
lhs_path: &Path,
rhs_path: &Path,
missing_as_empty: bool,
node_limit: u32,
) -> DiffResult {
let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
diff_file_content(display_path, &lhs_bytes, &rhs_bytes)
diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit)
}
fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) -> DiffResult {
fn diff_file_content(
display_path: &str,
lhs_bytes: &[u8],
rhs_bytes: &[u8],
node_limit: u32,
) -> DiffResult {
if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
return DiffResult {
path: display_path.into(),
@ -448,6 +491,14 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) ->
init_all_info(&lhs, &rhs);
let (possibly_changed_lhs, possibly_changed_rhs) = skip_unchanged(&lhs, &rhs);
let possibly_changed_num =
num_nodes(&possibly_changed_lhs) + num_nodes(&possibly_changed_rhs);
if possibly_changed_num > node_limit {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
(Some("hit limit".into()), lhs_positions, rhs_positions)
} else {
init_next_prev(&possibly_changed_lhs);
init_next_prev(&possibly_changed_rhs);
@ -463,6 +514,7 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) ->
let rhs_positions = syntax::change_positions(&rhs);
(Some(ts_lang.name.into()), lhs_positions, rhs_positions)
}
}
None => {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
@ -485,7 +537,12 @@ fn diff_file_content(display_path: &str, lhs_bytes: &[u8], rhs_bytes: &[u8]) ->
///
/// When more than one file is modified, the hg extdiff extension passes directory
/// paths with the all the modified files.
fn diff_directories(lhs_dir: &Path, rhs_dir: &Path, missing_as_empty: bool) -> Vec<DiffResult> {
fn diff_directories(
lhs_dir: &Path,
rhs_dir: &Path,
missing_as_empty: bool,
node_limit: u32,
) -> Vec<DiffResult> {
let mut res = vec![];
for entry in WalkDir::new(lhs_dir).into_iter().filter_map(Result::ok) {
let lhs_path = entry.path();
@ -503,6 +560,7 @@ fn diff_directories(lhs_dir: &Path, rhs_dir: &Path, missing_as_empty: bool) -> V
lhs_path,
&rhs_path,
missing_as_empty,
node_limit,
));
}
res
@ -600,6 +658,21 @@ fn print_diff_result(
}
}
/// What is the total number of nodes in `roots`?
fn num_nodes(roots: &[&syntax::Syntax]) -> u32 {
roots
.iter()
.map(|n| {
1 + match n {
syntax::Syntax::List {
num_descendants, ..
} => *num_descendants,
syntax::Syntax::Atom { .. } => 0,
}
})
.sum()
}
#[cfg(test)]
mod tests {
use super::*;
@ -607,7 +680,7 @@ mod tests {
#[test]
fn test_diff_identical_content() {
let s = "foo";
let res = diff_file_content("foo.el", s.as_bytes(), s.as_bytes());
let res = diff_file_content("foo.el", s.as_bytes(), s.as_bytes(), DEFAULT_NODE_LIMIT);
assert_eq!(res.lhs_positions, vec![]);
assert_eq!(res.rhs_positions, vec![]);