Don't crash on binary files

pull/25/head
Wilfred Hughes 2021-07-25 12:50:42 +07:00
parent c8481775e2
commit e36a2e8c96
4 changed files with 39 additions and 13 deletions

@ -4,6 +4,9 @@
Fixed handling of `@`, `<` and `>` in elisp.
Fixed crash on binary files. Difftastic now simply shows "binary" for
files that don't look like text.
### Diffing
Fixed an issue where comment replacements were not detected.

@ -41,7 +41,7 @@ terminal. It will try to align unchanged nodes (see screenshot above).
## Known Problems
Crashes. The code is underdocumented, undertested, and
unfinished. Binary files are known to make difftastic panic.
unfinished.
Performance. Difftastic scales relatively poorly on files with a large
number of changes, and can use a lot of memory. This might be solved

@ -23,6 +23,20 @@ fn term_width() -> Option<usize> {
term_size::dimensions().map(|(w, _)| w)
}
/// Do these bytes look like a binary format that we can't do anything
/// useful with?
fn is_probably_binary(bytes: &[u8]) -> bool {
// If more than 20 of the first 1,000 characters are not valid
// UTF-8, we assume it's binary.
let num_replaced = String::from_utf8_lossy(&bytes)
.to_string()
.chars()
.take(1000)
.filter(|c| *c == std::char::REPLACEMENT_CHARACTER)
.count();
num_replaced > 20
}
const VERSION: &str = env!("CARGO_PKG_VERSION");
fn main() {
@ -57,16 +71,31 @@ fn main() {
_ => panic!("Expected 2 arguments or 7 arguments"),
};
let lhs_src = read_or_die(&lhs_path);
let rhs_src = read_or_die(&rhs_path);
let syntax_toml = ConfigDir::read_default_toml();
let lang = match Path::new(&display_path).extension() {
Some(extension) => find_lang(syntax_toml, &OsStr::to_string_lossy(extension)),
None => None,
};
let lhs_bytes = read_or_die(&lhs_path);
let rhs_bytes = read_or_die(&rhs_path);
let lhs_binary = is_probably_binary(&lhs_bytes);
let rhs_binary = is_probably_binary(&rhs_bytes);
let lang_name = match &lang {
_ if lhs_binary || rhs_binary => "binary".to_string(),
Some(lang) => lang.name.clone(),
None => "plain text".to_string(),
};
println!("{}", style::header(&display_path, &lang_name));
if lhs_binary || rhs_binary {
return;
}
let lhs_src = String::from_utf8_lossy(&lhs_bytes).to_string();
let rhs_src = String::from_utf8_lossy(&rhs_bytes).to_string();
let terminal_width = match matches.value_of("COLUMNS") {
Some(width) => width.parse::<usize>().unwrap(),
None => term_width().unwrap_or(80),
@ -91,12 +120,6 @@ fn main() {
let lhs_matched_lines = matching_lines(&lhs);
let lang_name = match &lang {
Some(lang) => lang.name.clone(),
None => "plain text".to_string(),
};
println!("{}", style::header(&display_path, &lang_name));
let mut groups = visible_groups(&lhs_positions, &rhs_positions);
if groups.is_empty() {
if lang.is_some() {

@ -7,9 +7,9 @@ use std::fs;
use toml::Value;
use typed_arena::Arena;
pub fn read_or_die(path: &str) -> String {
pub fn read_or_die(path: &str) -> Vec<u8> {
match fs::read(path) {
Ok(src) => String::from_utf8_lossy(&src).to_string(),
Ok(src) => src,
Err(e) => {
match e.kind() {
std::io::ErrorKind::NotFound => {