Add a --language option to override language detection

Fixes #199
pull/230/head
Wilfred Hughes 2022-04-01 23:09:17 +07:00
parent e3d91d6dcc
commit e472acb805
4 changed files with 73 additions and 11 deletions

@ -6,6 +6,10 @@ Added the `--display` option to switch between `side-by-side`,
`side-by-side-show-both`, and `inline` display modes. This replaces `side-by-side-show-both`, and `inline` display modes. This replaces
the `INLINE` and `DFT_SHOW_BOTH` environment variables. the `INLINE` and `DFT_SHOW_BOTH` environment variables.
Added the `--language` option to enable overriding language
detection. When specified, language detection is disabled, and the
input file is assumed to have the extension specified.
## 0.25 (released 31st March 2022) ## 0.25 (released 31st March 2022)
### Display ### Display

@ -172,7 +172,7 @@ fn from_name(path: &Path) -> Option<Language> {
} }
} }
fn from_extension(extension: &OsStr) -> Option<Language> { pub fn from_extension(extension: &OsStr) -> Option<Language> {
match extension.to_string_lossy().borrow() { match extension.to_string_lossy().borrow() {
"sh" | "bash" | "bats" | "cgi" | "command" | "env" | "fcgi" | "ksh" | "sh.in" | "tmux" "sh" | "bash" | "bats" | "cgi" | "command" | "env" | "fcgi" | "ksh" | "sh.in" | "tmux"
| "tool" | "zsh" => Some(Bash), | "tool" | "zsh" => Some(Bash),

@ -88,11 +88,16 @@ fn main() {
reset_sigpipe(); reset_sigpipe();
match options::parse_args() { match options::parse_args() {
Mode::DumpTreeSitter { path } => { Mode::DumpTreeSitter {
path,
language_override,
} => {
let path = Path::new(&path); let path = Path::new(&path);
let bytes = read_or_die(path); let bytes = read_or_die(path);
let src = String::from_utf8_lossy(&bytes).to_string(); let src = String::from_utf8_lossy(&bytes).to_string();
match guess(path, &src) {
let language = language_override.or_else(|| guess(path, &src));
match language {
Some(lang) => { Some(lang) => {
let ts_lang = tsp::from_language(lang); let ts_lang = tsp::from_language(lang);
let tree = tsp::parse_to_tree(&src, &ts_lang); let tree = tsp::parse_to_tree(&src, &ts_lang);
@ -103,12 +108,16 @@ fn main() {
} }
} }
} }
Mode::DumpSyntax { path } => { Mode::DumpSyntax {
path,
language_override,
} => {
let path = Path::new(&path); let path = Path::new(&path);
let bytes = read_or_die(path); let bytes = read_or_die(path);
let src = String::from_utf8_lossy(&bytes).to_string(); let src = String::from_utf8_lossy(&bytes).to_string();
match guess(path, &src) { let language = language_override.or_else(|| guess(path, &src));
match language {
Some(lang) => { Some(lang) => {
let ts_lang = tsp::from_language(lang); let ts_lang = tsp::from_language(lang);
let arena = Arena::new(); let arena = Arena::new();
@ -131,6 +140,7 @@ fn main() {
color_output, color_output,
display_width, display_width,
display_path, display_path,
language_override,
lhs_path, lhs_path,
rhs_path, rhs_path,
.. ..
@ -152,9 +162,14 @@ fn main() {
} }
if lhs_path.is_dir() && rhs_path.is_dir() { if lhs_path.is_dir() && rhs_path.is_dir() {
for diff_result in for diff_result in diff_directories(
diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit, byte_limit) lhs_path,
{ rhs_path,
missing_as_empty,
node_limit,
byte_limit,
language_override,
) {
print_diff_result( print_diff_result(
display_width, display_width,
use_color, use_color,
@ -172,6 +187,7 @@ fn main() {
missing_as_empty, missing_as_empty,
node_limit, node_limit,
byte_limit, byte_limit,
language_override,
); );
print_diff_result( print_diff_result(
display_width, display_width,
@ -194,9 +210,17 @@ fn diff_file(
missing_as_empty: bool, missing_as_empty: bool,
node_limit: u32, node_limit: u32,
byte_limit: usize, byte_limit: usize,
language_override: Option<guess_language::Language>,
) -> DiffResult { ) -> DiffResult {
let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty); let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit, byte_limit) diff_file_content(
display_path,
&lhs_bytes,
&rhs_bytes,
node_limit,
byte_limit,
language_override,
)
} }
fn diff_file_content( fn diff_file_content(
@ -205,6 +229,7 @@ fn diff_file_content(
rhs_bytes: &[u8], rhs_bytes: &[u8],
node_limit: u32, node_limit: u32,
byte_limit: usize, byte_limit: usize,
language_override: Option<guess_language::Language>,
) -> DiffResult { ) -> DiffResult {
if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) { if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
return DiffResult { return DiffResult {
@ -246,7 +271,9 @@ fn diff_file_content(
} else { } else {
&rhs_src &rhs_src
}; };
let ts_lang = guess(path, guess_src).map(tsp::from_language); let ts_lang = language_override
.or_else(|| guess(path, guess_src))
.map(tsp::from_language);
if lhs_bytes == rhs_bytes { if lhs_bytes == rhs_bytes {
// If the two files are completely identical, return early // If the two files are completely identical, return early
@ -346,6 +373,7 @@ fn diff_directories<'a>(
missing_as_empty: bool, missing_as_empty: bool,
node_limit: u32, node_limit: u32,
byte_limit: usize, byte_limit: usize,
language_override: Option<guess_language::Language>,
) -> impl Iterator<Item = DiffResult> + 'a { ) -> impl Iterator<Item = DiffResult> + 'a {
WalkDir::new(lhs_dir) WalkDir::new(lhs_dir)
.into_iter() .into_iter()
@ -365,6 +393,7 @@ fn diff_directories<'a>(
missing_as_empty, missing_as_empty,
node_limit, node_limit,
byte_limit, byte_limit,
language_override,
) )
}) })
} }
@ -503,6 +532,7 @@ mod tests {
s.as_bytes(), s.as_bytes(),
DEFAULT_NODE_LIMIT, DEFAULT_NODE_LIMIT,
DEFAULT_BYTE_LIMIT, DEFAULT_BYTE_LIMIT,
None,
); );
assert_eq!(res.lhs_positions, vec![]); assert_eq!(res.lhs_positions, vec![]);

@ -4,7 +4,7 @@ use atty::Stream;
use clap::{crate_authors, crate_description, crate_version, App, AppSettings, Arg}; use clap::{crate_authors, crate_description, crate_version, App, AppSettings, Arg};
use const_format::formatcp; use const_format::formatcp;
use crate::style::BackgroundColor; use crate::{guess_language, style::BackgroundColor};
pub const DEFAULT_NODE_LIMIT: u32 = 30_000; pub const DEFAULT_NODE_LIMIT: u32 = 30_000;
pub const DEFAULT_BYTE_LIMIT: usize = 1_000_000; pub const DEFAULT_BYTE_LIMIT: usize = 1_000_000;
@ -89,6 +89,13 @@ fn app() -> clap::App<'static> {
Arg::new("missing-as-empty").long("missing-as-empty") Arg::new("missing-as-empty").long("missing-as-empty")
.help("Treat paths that don't exist as equivalent to an empty file.") .help("Treat paths that don't exist as equivalent to an empty file.")
) )
.arg(
Arg::new("language").long("language")
.value_name("EXT")
.allow_invalid_utf8(true)
.help("Override language detection. Inputs are assumed to have this file extension. When diffing directories, applies to all files.")
// TODO: support DFT_LANGUAGE for consistency
)
.arg( .arg(
Arg::new("node-limit").long("node-limit") Arg::new("node-limit").long("node-limit")
.takes_value(true) .takes_value(true)
@ -135,14 +142,17 @@ pub enum Mode {
color_output: ColorOutput, color_output: ColorOutput,
display_width: usize, display_width: usize,
display_path: String, display_path: String,
language_override: Option<guess_language::Language>,
lhs_path: String, lhs_path: String,
rhs_path: String, rhs_path: String,
}, },
DumpTreeSitter { DumpTreeSitter {
path: String, path: String,
language_override: Option<guess_language::Language>,
}, },
DumpSyntax { DumpSyntax {
path: String, path: String,
language_override: Option<guess_language::Language>,
}, },
} }
@ -150,15 +160,32 @@ pub enum Mode {
pub fn parse_args() -> Mode { pub fn parse_args() -> Mode {
let matches = app().get_matches(); let matches = app().get_matches();
let language_override = match matches.value_of_os("language") {
Some(lang_str) => {
if let Some(lang) = guess_language::from_extension(lang_str) {
Some(lang)
} else {
eprintln!(
"No language is associated with extension: {}",
lang_str.to_string_lossy()
);
None
}
}
None => None,
};
if let Some(path) = matches.value_of("dump-syntax") { if let Some(path) = matches.value_of("dump-syntax") {
return Mode::DumpSyntax { return Mode::DumpSyntax {
path: path.to_string(), path: path.to_string(),
language_override,
}; };
} }
if let Some(path) = matches.value_of("dump-ts") { if let Some(path) = matches.value_of("dump-ts") {
return Mode::DumpTreeSitter { return Mode::DumpTreeSitter {
path: path.to_string(), path: path.to_string(),
language_override,
}; };
} }
@ -309,6 +336,7 @@ pub fn parse_args() -> Mode {
color_output, color_output,
display_width, display_width,
display_path, display_path,
language_override,
lhs_path, lhs_path,
rhs_path, rhs_path,
} }