Fix nested sliders in C-like languages

Improves #165
unified_patch
Wilfred Hughes 2022-04-04 23:13:28 +07:00
parent 1f2d8bd7ab
commit 1c416733ef
6 changed files with 153 additions and 8 deletions

@ -23,6 +23,11 @@ Added the `--language` option to enable overriding language
detection. When specified, language detection is disabled, and the
input file is assumed to have the extension specified.
### Diffing
Improved diff results for nested sequences `foo(bar(baz()))` in C-like
languages.
## 0.25 (released 31st March 2022)
### Display

@ -85,6 +85,9 @@ sample_files/multiline_string_before.ml sample_files/multiline_string_after.ml
sample_files/nest_before.rs sample_files/nest_after.rs
a5f34e778d04fc5732c17162e0b16f2d -
sample_files/nested_slider_before.rs sample_files/nested_slider_after.rs
3a901b805dd8b541c43edb96c7e4e148 -
sample_files/nesting_before.el sample_files/nesting_after.el
16639761819b53b9216a9031ae94455c -

@ -0,0 +1,11 @@
fn split_string_by_codepoint_brace() {
if true {
if pad_last {
x;
}
}
}
fn split_string_by_codepoint_paren() {
foo(bar(x))
}

@ -0,0 +1,9 @@
fn split_string_by_codepoint_brace() {
if true {
x;
}
}
fn split_string_by_codepoint_paren() {
foo(x)
}

@ -272,16 +272,15 @@ fn diff_file_content(
} else {
&rhs_src
};
let ts_lang = language_override
.or_else(|| guess(path, guess_src))
.map(tsp::from_language);
let language = language_override.or_else(|| guess(path, guess_src));
let lang_config = language.map(tsp::from_language);
if lhs_bytes == rhs_bytes {
// If the two files are completely identical, return early
// rather than doing any more work.
return DiffResult {
path: display_path.into(),
language: ts_lang.map(|l| l.name.into()),
language: lang_config.map(|l| l.name.into()),
lhs_src: FileContent::Text("".into()),
rhs_src: FileContent::Text("".into()),
lhs_positions: vec![],
@ -289,7 +288,7 @@ fn diff_file_content(
};
}
let (lang_name, lhs_positions, rhs_positions) = match ts_lang {
let (lang_name, lhs_positions, rhs_positions) = match lang_config {
_ if lhs_bytes.len() > byte_limit || rhs_bytes.len() > byte_limit => {
let lhs_positions = line_parser::change_positions(&lhs_src, &rhs_src);
let rhs_positions = line_parser::change_positions(&rhs_src, &lhs_src);
@ -336,8 +335,9 @@ fn diff_file_content(
rhs_section_nodes.get(0).copied(),
);
fix_all_sliders(&lhs_section_nodes);
fix_all_sliders(&rhs_section_nodes);
let language = language.unwrap();
fix_all_sliders(language, &lhs_section_nodes);
fix_all_sliders(language, &rhs_section_nodes);
}
let lhs_positions = syntax::change_positions(&lhs);

@ -30,15 +30,40 @@
//! (B in this example).
use crate::{
guess_language,
positions::SingleLineSpan,
syntax::{ChangeKind::*, Syntax},
};
use Syntax::*;
pub fn fix_all_sliders<'a>(nodes: &[&'a Syntax<'a>]) {
pub fn fix_all_sliders<'a>(language: guess_language::Language, nodes: &[&'a Syntax<'a>]) {
// TODO: fix sliders that require more than two steps.
fix_all_sliders_one_step(nodes);
fix_all_sliders_one_step(nodes);
if !prefer_outer_delimiter(language) {
fix_all_nested_sliders(nodes);
}
}
/// Should nester slider correction prefer the inner or outer
/// delimiter?
fn prefer_outer_delimiter(language: guess_language::Language) -> bool {
use guess_language::Language::*;
match language {
// For Lisp family languages, we get the best result with the
// outer delimiter.
EmacsLisp | Clojure | CommonLisp | JanetSimple => true,
// JSON is like Lisp: the outer delimiter in an array object
// is the most relevant.
Json => true,
// For everything else, prefer the inner delimiter. These
// languages have syntax like `foo(bar)` or `foo[bar]` where
// the inner delimiter is more relevant.
Bash | C | CPlusPlus | CSharp | Css | Dart | Elixir | Elm | Gleam | Go | Haskell | Java
| JavaScript | Jsx | Lua | Nix | OCaml | OCamlInterface | Php | Python | Ruby | Rust
| Scala | Tsx | TypeScript | Yaml | Zig => false,
}
}
fn fix_all_sliders_one_step<'a>(nodes: &[&'a Syntax<'a>]) {
@ -50,6 +75,98 @@ fn fix_all_sliders_one_step<'a>(nodes: &[&'a Syntax<'a>]) {
fix_sliders(nodes);
}
/// Correct sliders in middle insertions.
///
/// Consider the code:
///
/// ```text
/// // Before
/// old1(old2);
/// // After
/// old1(new1(old2));
/// ```
///
/// Tree diffing has two possible solution here. Either we've added
/// `new1(...)` or we've added `(new1...)`. Both are valid.
///
/// For C-like languages, the first case matches human intuition much
/// better. Fix the slider to make the inner delimiter novel.
fn fix_all_nested_sliders<'a>(nodes: &[&'a Syntax<'a>]) {
for node in nodes {
fix_nested_slider(node);
}
}
fn fix_nested_slider<'a>(node: &'a Syntax<'a>) {
if let List { children, .. } = node {
match node
.change()
.expect("Changes should be set before slider correction")
{
Unchanged(_) => {
for child in children {
fix_nested_slider(child);
}
}
ReplacedComment(_, _) => {}
Novel => {
let mut found_unchanged = vec![];
for child in children {
unchanged_descendants(child, &mut found_unchanged);
}
if let [List { .. }] = found_unchanged[..] {
push_unchanged_to_ancestor(node, found_unchanged[0]);
}
}
}
}
}
fn unchanged_descendants<'a>(node: &'a Syntax<'a>, found: &mut Vec<&'a Syntax<'a>>) {
if found.len() > 1 {
return;
}
match node.change().unwrap() {
Unchanged(_) => {
found.push(node);
}
Novel | ReplacedComment(_, _) => {
if let List { children, .. } = node {
for child in children {
unchanged_descendants(child, found);
}
}
}
}
}
fn push_unchanged_to_ancestor<'a>(root: &'a Syntax<'a>, inner: &'a Syntax<'a>) {
let inner_change = inner.change().expect("Node changes should be set");
let delimiters_match = match (root, inner) {
(
List {
open_content: root_open,
close_content: root_close,
..
},
List {
open_content: inner_open,
close_content: inner_close,
..
},
) => root_open == inner_open && root_close == inner_close,
_ => false,
};
if delimiters_match {
root.set_change(inner_change);
inner.set_change(Novel);
}
}
fn fix_sliders<'a>(nodes: &[&'a Syntax<'a>]) {
for (region_start, region_end) in novel_regions_after_unchanged(nodes) {
slide_to_prev_node(nodes, region_start, region_end);