From e472acb8050e84c671470421f4fd5c74a9d3b2bd Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Fri, 1 Apr 2022 23:09:17 -0700 Subject: [PATCH] Add a --language option to override language detection Fixes #199 --- CHANGELOG.md | 4 ++++ src/guess_language.rs | 2 +- src/main.rs | 48 +++++++++++++++++++++++++++++++++++-------- src/options.rs | 30 ++++++++++++++++++++++++++- 4 files changed, 73 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e265128ca..bd75ae873 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ Added the `--display` option to switch between `side-by-side`, `side-by-side-show-both`, and `inline` display modes. This replaces the `INLINE` and `DFT_SHOW_BOTH` environment variables. +Added the `--language` option to enable overriding language +detection. When specified, language detection is disabled, and the +input file is assumed to have the extension specified. + ## 0.25 (released 31st March 2022) ### Display diff --git a/src/guess_language.rs b/src/guess_language.rs index 0ed13918b..61b31c377 100644 --- a/src/guess_language.rs +++ b/src/guess_language.rs @@ -172,7 +172,7 @@ fn from_name(path: &Path) -> Option { } } -fn from_extension(extension: &OsStr) -> Option { +pub fn from_extension(extension: &OsStr) -> Option { match extension.to_string_lossy().borrow() { "sh" | "bash" | "bats" | "cgi" | "command" | "env" | "fcgi" | "ksh" | "sh.in" | "tmux" | "tool" | "zsh" => Some(Bash), diff --git a/src/main.rs b/src/main.rs index b736e5355..417292715 100644 --- a/src/main.rs +++ b/src/main.rs @@ -88,11 +88,16 @@ fn main() { reset_sigpipe(); match options::parse_args() { - Mode::DumpTreeSitter { path } => { + Mode::DumpTreeSitter { + path, + language_override, + } => { let path = Path::new(&path); let bytes = read_or_die(path); let src = String::from_utf8_lossy(&bytes).to_string(); - match guess(path, &src) { + + let language = language_override.or_else(|| guess(path, &src)); + match language { Some(lang) => { let ts_lang = tsp::from_language(lang); let tree = tsp::parse_to_tree(&src, &ts_lang); @@ -103,12 +108,16 @@ fn main() { } } } - Mode::DumpSyntax { path } => { + Mode::DumpSyntax { + path, + language_override, + } => { let path = Path::new(&path); let bytes = read_or_die(path); let src = String::from_utf8_lossy(&bytes).to_string(); - match guess(path, &src) { + let language = language_override.or_else(|| guess(path, &src)); + match language { Some(lang) => { let ts_lang = tsp::from_language(lang); let arena = Arena::new(); @@ -131,6 +140,7 @@ fn main() { color_output, display_width, display_path, + language_override, lhs_path, rhs_path, .. @@ -152,9 +162,14 @@ fn main() { } if lhs_path.is_dir() && rhs_path.is_dir() { - for diff_result in - diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit, byte_limit) - { + for diff_result in diff_directories( + lhs_path, + rhs_path, + missing_as_empty, + node_limit, + byte_limit, + language_override, + ) { print_diff_result( display_width, use_color, @@ -172,6 +187,7 @@ fn main() { missing_as_empty, node_limit, byte_limit, + language_override, ); print_diff_result( display_width, @@ -194,9 +210,17 @@ fn diff_file( missing_as_empty: bool, node_limit: u32, byte_limit: usize, + language_override: Option, ) -> DiffResult { let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty); - diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit, byte_limit) + diff_file_content( + display_path, + &lhs_bytes, + &rhs_bytes, + node_limit, + byte_limit, + language_override, + ) } fn diff_file_content( @@ -205,6 +229,7 @@ fn diff_file_content( rhs_bytes: &[u8], node_limit: u32, byte_limit: usize, + language_override: Option, ) -> DiffResult { if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) { return DiffResult { @@ -246,7 +271,9 @@ fn diff_file_content( } else { &rhs_src }; - let ts_lang = guess(path, guess_src).map(tsp::from_language); + let ts_lang = language_override + .or_else(|| guess(path, guess_src)) + .map(tsp::from_language); if lhs_bytes == rhs_bytes { // If the two files are completely identical, return early @@ -346,6 +373,7 @@ fn diff_directories<'a>( missing_as_empty: bool, node_limit: u32, byte_limit: usize, + language_override: Option, ) -> impl Iterator + 'a { WalkDir::new(lhs_dir) .into_iter() @@ -365,6 +393,7 @@ fn diff_directories<'a>( missing_as_empty, node_limit, byte_limit, + language_override, ) }) } @@ -503,6 +532,7 @@ mod tests { s.as_bytes(), DEFAULT_NODE_LIMIT, DEFAULT_BYTE_LIMIT, + None, ); assert_eq!(res.lhs_positions, vec![]); diff --git a/src/options.rs b/src/options.rs index a0763cc4d..a7bbccbf4 100644 --- a/src/options.rs +++ b/src/options.rs @@ -4,7 +4,7 @@ use atty::Stream; use clap::{crate_authors, crate_description, crate_version, App, AppSettings, Arg}; use const_format::formatcp; -use crate::style::BackgroundColor; +use crate::{guess_language, style::BackgroundColor}; pub const DEFAULT_NODE_LIMIT: u32 = 30_000; pub const DEFAULT_BYTE_LIMIT: usize = 1_000_000; @@ -89,6 +89,13 @@ fn app() -> clap::App<'static> { Arg::new("missing-as-empty").long("missing-as-empty") .help("Treat paths that don't exist as equivalent to an empty file.") ) + .arg( + Arg::new("language").long("language") + .value_name("EXT") + .allow_invalid_utf8(true) + .help("Override language detection. Inputs are assumed to have this file extension. When diffing directories, applies to all files.") + // TODO: support DFT_LANGUAGE for consistency + ) .arg( Arg::new("node-limit").long("node-limit") .takes_value(true) @@ -135,14 +142,17 @@ pub enum Mode { color_output: ColorOutput, display_width: usize, display_path: String, + language_override: Option, lhs_path: String, rhs_path: String, }, DumpTreeSitter { path: String, + language_override: Option, }, DumpSyntax { path: String, + language_override: Option, }, } @@ -150,15 +160,32 @@ pub enum Mode { pub fn parse_args() -> Mode { let matches = app().get_matches(); + let language_override = match matches.value_of_os("language") { + Some(lang_str) => { + if let Some(lang) = guess_language::from_extension(lang_str) { + Some(lang) + } else { + eprintln!( + "No language is associated with extension: {}", + lang_str.to_string_lossy() + ); + None + } + } + None => None, + }; + if let Some(path) = matches.value_of("dump-syntax") { return Mode::DumpSyntax { path: path.to_string(), + language_override, }; } if let Some(path) = matches.value_of("dump-ts") { return Mode::DumpTreeSitter { path: path.to_string(), + language_override, }; } @@ -309,6 +336,7 @@ pub fn parse_args() -> Mode { color_output, display_width, display_path, + language_override, lhs_path, rhs_path, }