Add JSON cli flag

pull/557/head
Alex Krantz 2023-04-25 08:54:20 +07:00 committed by Wilfred Hughes
parent 11f457b5f9
commit 11a96e5aec
7 changed files with 433 additions and 29 deletions

@ -1,5 +1,12 @@
## 0.51 (unreleased)
### Display
Added a JSON display option. This is currently unstable, and requires
you to set DFT_UNSTABLE. The JSON structure may change in future.
Please give it a try and give feedback on GitHub issues.
## 0.50 (released 15th August 2023)
### Conflicts

45
Cargo.lock generated

@ -259,6 +259,8 @@ dependencies = [
"rayon",
"regex",
"rustc-hash",
"serde",
"serde_json",
"strsim",
"strum",
"tree-sitter",
@ -400,6 +402,12 @@ dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -731,6 +739,12 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "same-file"
version = "1.0.6"
@ -746,6 +760,37 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.176"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76dc28c9523c5d70816e393136b86d48909cfb27cecaa902d338c19ed47164dc"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.176"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4e7b8c5dc823e3b90651ff1d3808419cd14e5ad76de04feaf37da114e7a306f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.27",
]
[[package]]
name = "serde_json"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "signal-hook"
version = "0.3.14"

@ -64,6 +64,8 @@ strum = { version = "0.25", features = ["derive"] }
# hashbrown 0.13 requires rust 1.61
hashbrown = "0.12.3"
humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
[dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60

@ -0,0 +1,313 @@
use crate::{
display::{
context::{all_matched_lines_filled, opposite_positions},
hunks::{matched_lines_indexes_for_hunk, matched_pos_to_hunks, merge_adjacent},
side_by_side::lines_with_novel,
},
lines::{LineNumber, MaxLine},
parse::syntax::{self, MatchedPos},
summary::{DiffResult, FileContent, FileFormat},
};
use serde::{ser::SerializeStruct, Serialize, Serializer};
use std::collections::HashMap;
#[derive(Debug, Serialize)]
#[serde(rename_all = "lowercase")]
enum Status {
Unchanged,
Changed,
Created,
Deleted,
}
#[derive(Debug)]
struct File<'f> {
language: &'f FileFormat,
path: &'f str,
chunks: Vec<Vec<Line<'f>>>,
status: Status,
}
impl<'f> File<'f> {
fn with_sections(
language: &'f FileFormat,
path: &'f str,
chunks: Vec<Vec<Line<'f>>>,
) -> File<'f> {
File {
language,
path,
chunks,
status: Status::Changed,
}
}
fn with_status(language: &'f FileFormat, path: &'f str, status: Status) -> File<'f> {
File {
language,
path,
chunks: Vec::new(),
status,
}
}
}
impl<'f> From<&'f DiffResult> for File<'f> {
fn from(summary: &'f DiffResult) -> Self {
match (&summary.lhs_src, &summary.rhs_src) {
(FileContent::Text(lhs_src), FileContent::Text(rhs_src)) => {
// TODO: move into function as it is effectively duplicates lines 365-375 of main::print_diff_result
let opposite_to_lhs = opposite_positions(&summary.lhs_positions);
let opposite_to_rhs = opposite_positions(&summary.rhs_positions);
let hunks = matched_pos_to_hunks(&summary.lhs_positions, &summary.rhs_positions);
let hunks = merge_adjacent(
&hunks,
&opposite_to_lhs,
&opposite_to_rhs,
lhs_src.max_line(),
rhs_src.max_line(),
0,
);
if hunks.is_empty() {
return File::with_status(
&summary.file_format,
&summary.display_path,
Status::Unchanged,
);
}
if lhs_src.is_empty() {
return File::with_status(
&summary.file_format,
&summary.display_path,
Status::Created,
);
}
if rhs_src.is_empty() {
return File::with_status(
&summary.file_format,
&summary.display_path,
Status::Deleted,
);
}
let lhs_lines = lhs_src.split('\n').collect::<Vec<&str>>();
let rhs_lines = rhs_src.split('\n').collect::<Vec<&str>>();
let (lhs_lines_with_novel, rhs_lines_with_novel) =
lines_with_novel(&summary.lhs_positions, &summary.rhs_positions);
let matched_lines = all_matched_lines_filled(
&summary.lhs_positions,
&summary.rhs_positions,
&lhs_lines,
&rhs_lines,
);
let mut matched_lines = &matched_lines[..];
let mut chunks = Vec::with_capacity(hunks.len());
for hunk in &hunks {
let mut lines = HashMap::with_capacity(hunk.lines.len());
let (start_i, end_i) = matched_lines_indexes_for_hunk(matched_lines, hunk, 0);
let aligned_lines = &matched_lines[start_i..end_i];
matched_lines = &matched_lines[start_i..];
for (lhs_line_num, rhs_line_num) in aligned_lines {
if !lhs_lines_with_novel.contains(&lhs_line_num.unwrap_or(LineNumber(0)))
&& !rhs_lines_with_novel
.contains(&rhs_line_num.unwrap_or(LineNumber(0)))
{
continue;
}
let line = lines
.entry((lhs_line_num.map(|l| l.0), rhs_line_num.map(|l| l.0)))
.or_insert_with(|| {
Line::new(lhs_line_num.map(|l| l.0), rhs_line_num.map(|l| l.0))
});
if let Some(line_num) = lhs_line_num {
add_changes_to_side(
line.lhs.as_mut().unwrap(),
*line_num,
&lhs_lines,
&summary.lhs_positions,
);
}
if let Some(line_num) = rhs_line_num {
add_changes_to_side(
line.rhs.as_mut().unwrap(),
*line_num,
&rhs_lines,
&summary.rhs_positions,
);
}
}
chunks.push(lines.into_values().collect());
}
File::with_sections(&summary.file_format, &summary.display_path, chunks)
}
(FileContent::Binary, FileContent::Binary) => {
let status = if summary.has_byte_changes {
Status::Changed
} else {
Status::Unchanged
};
File::with_status(&FileFormat::Binary, &summary.display_path, status)
}
(_, FileContent::Binary) | (FileContent::Binary, _) => {
File::with_status(&FileFormat::Binary, &summary.display_path, Status::Changed)
}
}
}
}
impl<'f> Serialize for File<'f> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// equivalent to #[serde(skip_serializing_if = "Vec::is_empty")]
let mut file = if self.chunks.is_empty() {
serializer.serialize_struct("File", 3)?
} else {
let mut file = serializer.serialize_struct("File", 4)?;
file.serialize_field("chunks", &self.chunks)?;
file
};
file.serialize_field("language", &format!("{}", self.language))?;
file.serialize_field("path", &self.path)?;
file.serialize_field("status", &self.status)?;
file.end()
}
}
#[derive(Debug, Serialize)]
struct Line<'l> {
#[serde(skip_serializing_if = "Option::is_none")]
lhs: Option<Side<'l>>,
#[serde(skip_serializing_if = "Option::is_none")]
rhs: Option<Side<'l>>,
}
impl<'l> Line<'l> {
fn new(lhs_number: Option<u32>, rhs_number: Option<u32>) -> Line<'l> {
Line {
lhs: lhs_number.map(Side::new),
rhs: rhs_number.map(Side::new),
}
}
}
#[derive(Debug, Serialize)]
struct Side<'s> {
line_number: u32,
changes: Vec<Change<'s>>,
}
impl<'s> Side<'s> {
fn new(line_number: u32) -> Side<'s> {
Side {
line_number,
changes: Vec::new(),
}
}
}
#[derive(Debug, Serialize)]
struct Change<'c> {
start: u32,
end: u32,
content: &'c str,
highlight: Highlight,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "snake_case")]
// TODO: use syntax::TokenKind and syntax::AtomKind instead of this merged enum,
// blocked by https://github.com/serde-rs/serde/issues/1402
enum Highlight {
Delimiter,
Normal,
String,
Type,
Comment,
Keyword,
TreeSitterError,
}
impl Highlight {
fn from_match(kind: &syntax::MatchKind) -> Self {
use syntax::{AtomKind, MatchKind, TokenKind};
let highlight = match kind {
MatchKind::Ignored { highlight, .. } => highlight,
MatchKind::UnchangedToken { highlight, .. } => highlight,
MatchKind::Novel { highlight, .. } => highlight,
MatchKind::NovelWord { highlight, .. } => highlight,
MatchKind::NovelLinePart { highlight, .. } => highlight,
};
match highlight {
TokenKind::Delimiter => Highlight::Delimiter,
TokenKind::Atom(atom) => match atom {
AtomKind::String => Highlight::String,
AtomKind::Keyword => Highlight::Keyword,
AtomKind::Comment => Highlight::Comment,
AtomKind::Type => Highlight::Type,
AtomKind::Normal => Highlight::Normal,
AtomKind::TreeSitterError => Highlight::TreeSitterError,
},
}
}
}
pub fn print_directory(diffs: Vec<DiffResult>) {
let files = diffs.iter().map(File::from).collect::<Vec<File>>();
println!(
"{}",
serde_json::to_string(&files).expect("failed to serialize files")
);
}
pub fn print(diff: &DiffResult) {
let file = File::from(diff);
println!(
"{}",
serde_json::to_string(&file).expect("failed to serialize file")
)
}
fn add_changes_to_side<'s>(
side: &mut Side<'s>,
line_num: LineNumber,
src_lines: &[&'s str],
all_matches: &[MatchedPos],
) {
let src_line = src_lines[line_num.0 as usize];
let matches = matches_for_line(all_matches, line_num);
for m in matches {
side.changes.push(Change {
start: m.pos.start_col,
end: m.pos.end_col,
content: &src_line[(m.pos.start_col as usize)..(m.pos.end_col as usize)],
highlight: Highlight::from_match(&m.kind),
})
}
}
fn matches_for_line(matches: &[MatchedPos], line_num: LineNumber) -> Vec<&MatchedPos> {
matches
.iter()
.filter(|m| m.pos.line == line_num)
.filter(|m| m.kind.is_novel())
.collect()
}

@ -1,5 +1,6 @@
pub mod context;
pub mod hunks;
pub mod inline;
pub mod json;
pub mod side_by_side;
pub mod style;

@ -232,38 +232,55 @@ fn main() {
options::FileArgument::NamedPath(lhs_path),
options::FileArgument::NamedPath(rhs_path),
) if lhs_path.is_dir() && rhs_path.is_dir() => {
// We want to diff files in the directory in
// parallel, but print the results serially (to
// prevent display interleaving).
// https://github.com/rayon-rs/rayon/issues/210#issuecomment-551319338
let (send, recv) = std::sync::mpsc::sync_channel(1);
let encountered_changes = encountered_changes.clone();
let print_options = display_options.clone();
let printing_thread = std::thread::spawn(move || {
for diff_result in recv.into_iter() {
print_diff_result(&print_options, &diff_result);
if diff_result.has_reportable_change() {
encountered_changes.store(true, Ordering::Relaxed);
}
}
});
diff_directories(
let diff_iter = diff_directories(
lhs_path,
rhs_path,
&display_options,
&diff_options,
&language_overrides,
)
.try_for_each_with(send, |s, diff_result| s.send(diff_result))
.expect("Receiver should be connected");
);
if matches!(display_options.display_mode, DisplayMode::Json) {
let results = diff_iter
.map(|diff_result| {
if diff_result.has_reportable_change() {
encountered_changes.store(true, Ordering::Relaxed);
}
diff_result
})
.collect();
display::json::print_directory(results);
} else {
// We want to diff files in the directory in
// parallel, but print the results serially (to
// prevent display interleaving).
// https://github.com/rayon-rs/rayon/issues/210#issuecomment-551319338
let (send, recv) = std::sync::mpsc::sync_channel(1);
printing_thread
.join()
.expect("Printing thread should not panic");
let encountered_changes = encountered_changes.clone();
let print_options = display_options.clone();
let printing_thread = std::thread::spawn(move || {
for diff_result in recv.into_iter() {
print_diff_result(&print_options, &diff_result);
if diff_result.has_reportable_change() {
encountered_changes.store(true, Ordering::Relaxed);
}
}
});
diff_iter
.try_for_each_with(send, |s, diff_result| s.send(diff_result))
.expect("Receiver should be connected");
printing_thread
.join()
.expect("Printing thread should not panic");
}
}
_ => {
let diff_result = diff_file(
@ -276,11 +293,18 @@ fn main() {
false,
&language_overrides,
);
print_diff_result(&display_options, &diff_result);
if diff_result.has_reportable_change() {
encountered_changes.store(true, Ordering::Relaxed);
}
match display_options.display_mode {
DisplayMode::Inline
| DisplayMode::SideBySide
| DisplayMode::SideBySideShowBoth => {
print_diff_result(&display_options, &diff_result);
}
DisplayMode::Json => display::json::print(&diff_result),
}
}
}
@ -775,6 +799,7 @@ fn print_diff_result(display_options: &DisplayOptions, summary: &DiffResult) {
&summary.rhs_positions,
);
}
DisplayMode::Json => unreachable!(),
}
}
(FileContent::Binary, FileContent::Binary) => {

@ -154,7 +154,7 @@ fn app() -> clap::Command<'static> {
)
.arg(
Arg::new("display").long("display")
.possible_values(["side-by-side", "side-by-side-show-both", "inline"])
.possible_values(["side-by-side", "side-by-side-show-both", "inline", "json"])
.default_value("side-by-side")
.value_name("MODE")
.env("DFT_DISPLAY")
@ -164,7 +164,9 @@ side-by-side: Display the before file and the after file in two separate columns
side-by-side-show-both: The same as side-by-side, but always uses two columns.
inline: A single column display, closer to traditional diff display.")
inline: A single column display, closer to traditional diff display.
json: Output the results as a machine-readable JSON array with an element per file.")
)
.arg(
Arg::new("color").long("color")
@ -285,6 +287,7 @@ pub enum DisplayMode {
Inline,
SideBySide,
SideBySideShowBoth,
Json,
}
#[derive(Eq, PartialEq, Debug)]
@ -528,6 +531,14 @@ pub fn parse_args() -> Mode {
"side-by-side" => DisplayMode::SideBySide,
"side-by-side-show-both" => DisplayMode::SideBySideShowBoth,
"inline" => DisplayMode::Inline,
"json" => {
if env::var(format!("DFT_UNSTABLE")).is_err() {
eprintln!("JSON output is an unstable feature and its format may change in future. To enable JSON output, set the environment variable DFT_UNSTABLE=yes.");
std::process::exit(EXIT_BAD_ARGUMENTS);
}
DisplayMode::Json
}
_ => {
unreachable!("clap has already validated display")
}