Add a --language option to override language detection

Fixes #199
pull/230/head
Wilfred Hughes 2022-04-01 23:09:17 +07:00
parent e3d91d6dcc
commit e472acb805
4 changed files with 73 additions and 11 deletions

@ -6,6 +6,10 @@ Added the `--display` option to switch between `side-by-side`,
`side-by-side-show-both`, and `inline` display modes. This replaces
the `INLINE` and `DFT_SHOW_BOTH` environment variables.
Added the `--language` option to enable overriding language
detection. When specified, language detection is disabled, and the
input file is assumed to have the extension specified.
## 0.25 (released 31st March 2022)
### Display

@ -172,7 +172,7 @@ fn from_name(path: &Path) -> Option<Language> {
}
}
fn from_extension(extension: &OsStr) -> Option<Language> {
pub fn from_extension(extension: &OsStr) -> Option<Language> {
match extension.to_string_lossy().borrow() {
"sh" | "bash" | "bats" | "cgi" | "command" | "env" | "fcgi" | "ksh" | "sh.in" | "tmux"
| "tool" | "zsh" => Some(Bash),

@ -88,11 +88,16 @@ fn main() {
reset_sigpipe();
match options::parse_args() {
Mode::DumpTreeSitter { path } => {
Mode::DumpTreeSitter {
path,
language_override,
} => {
let path = Path::new(&path);
let bytes = read_or_die(path);
let src = String::from_utf8_lossy(&bytes).to_string();
match guess(path, &src) {
let language = language_override.or_else(|| guess(path, &src));
match language {
Some(lang) => {
let ts_lang = tsp::from_language(lang);
let tree = tsp::parse_to_tree(&src, &ts_lang);
@ -103,12 +108,16 @@ fn main() {
}
}
}
Mode::DumpSyntax { path } => {
Mode::DumpSyntax {
path,
language_override,
} => {
let path = Path::new(&path);
let bytes = read_or_die(path);
let src = String::from_utf8_lossy(&bytes).to_string();
match guess(path, &src) {
let language = language_override.or_else(|| guess(path, &src));
match language {
Some(lang) => {
let ts_lang = tsp::from_language(lang);
let arena = Arena::new();
@ -131,6 +140,7 @@ fn main() {
color_output,
display_width,
display_path,
language_override,
lhs_path,
rhs_path,
..
@ -152,9 +162,14 @@ fn main() {
}
if lhs_path.is_dir() && rhs_path.is_dir() {
for diff_result in
diff_directories(lhs_path, rhs_path, missing_as_empty, node_limit, byte_limit)
{
for diff_result in diff_directories(
lhs_path,
rhs_path,
missing_as_empty,
node_limit,
byte_limit,
language_override,
) {
print_diff_result(
display_width,
use_color,
@ -172,6 +187,7 @@ fn main() {
missing_as_empty,
node_limit,
byte_limit,
language_override,
);
print_diff_result(
display_width,
@ -194,9 +210,17 @@ fn diff_file(
missing_as_empty: bool,
node_limit: u32,
byte_limit: usize,
language_override: Option<guess_language::Language>,
) -> DiffResult {
let (lhs_bytes, rhs_bytes) = read_files_or_die(lhs_path, rhs_path, missing_as_empty);
diff_file_content(display_path, &lhs_bytes, &rhs_bytes, node_limit, byte_limit)
diff_file_content(
display_path,
&lhs_bytes,
&rhs_bytes,
node_limit,
byte_limit,
language_override,
)
}
fn diff_file_content(
@ -205,6 +229,7 @@ fn diff_file_content(
rhs_bytes: &[u8],
node_limit: u32,
byte_limit: usize,
language_override: Option<guess_language::Language>,
) -> DiffResult {
if is_probably_binary(lhs_bytes) || is_probably_binary(rhs_bytes) {
return DiffResult {
@ -246,7 +271,9 @@ fn diff_file_content(
} else {
&rhs_src
};
let ts_lang = guess(path, guess_src).map(tsp::from_language);
let ts_lang = language_override
.or_else(|| guess(path, guess_src))
.map(tsp::from_language);
if lhs_bytes == rhs_bytes {
// If the two files are completely identical, return early
@ -346,6 +373,7 @@ fn diff_directories<'a>(
missing_as_empty: bool,
node_limit: u32,
byte_limit: usize,
language_override: Option<guess_language::Language>,
) -> impl Iterator<Item = DiffResult> + 'a {
WalkDir::new(lhs_dir)
.into_iter()
@ -365,6 +393,7 @@ fn diff_directories<'a>(
missing_as_empty,
node_limit,
byte_limit,
language_override,
)
})
}
@ -503,6 +532,7 @@ mod tests {
s.as_bytes(),
DEFAULT_NODE_LIMIT,
DEFAULT_BYTE_LIMIT,
None,
);
assert_eq!(res.lhs_positions, vec![]);

@ -4,7 +4,7 @@ use atty::Stream;
use clap::{crate_authors, crate_description, crate_version, App, AppSettings, Arg};
use const_format::formatcp;
use crate::style::BackgroundColor;
use crate::{guess_language, style::BackgroundColor};
pub const DEFAULT_NODE_LIMIT: u32 = 30_000;
pub const DEFAULT_BYTE_LIMIT: usize = 1_000_000;
@ -89,6 +89,13 @@ fn app() -> clap::App<'static> {
Arg::new("missing-as-empty").long("missing-as-empty")
.help("Treat paths that don't exist as equivalent to an empty file.")
)
.arg(
Arg::new("language").long("language")
.value_name("EXT")
.allow_invalid_utf8(true)
.help("Override language detection. Inputs are assumed to have this file extension. When diffing directories, applies to all files.")
// TODO: support DFT_LANGUAGE for consistency
)
.arg(
Arg::new("node-limit").long("node-limit")
.takes_value(true)
@ -135,14 +142,17 @@ pub enum Mode {
color_output: ColorOutput,
display_width: usize,
display_path: String,
language_override: Option<guess_language::Language>,
lhs_path: String,
rhs_path: String,
},
DumpTreeSitter {
path: String,
language_override: Option<guess_language::Language>,
},
DumpSyntax {
path: String,
language_override: Option<guess_language::Language>,
},
}
@ -150,15 +160,32 @@ pub enum Mode {
pub fn parse_args() -> Mode {
let matches = app().get_matches();
let language_override = match matches.value_of_os("language") {
Some(lang_str) => {
if let Some(lang) = guess_language::from_extension(lang_str) {
Some(lang)
} else {
eprintln!(
"No language is associated with extension: {}",
lang_str.to_string_lossy()
);
None
}
}
None => None,
};
if let Some(path) = matches.value_of("dump-syntax") {
return Mode::DumpSyntax {
path: path.to_string(),
language_override,
};
}
if let Some(path) = matches.value_of("dump-ts") {
return Mode::DumpTreeSitter {
path: path.to_string(),
language_override,
};
}
@ -309,6 +336,7 @@ pub fn parse_args() -> Mode {
color_output,
display_width,
display_path,
language_override,
lhs_path,
rhs_path,
}