From ffd49d523a3ac2d3cfdec7dc463f8b20eb524c3a Mon Sep 17 00:00:00 2001 From: Zhenge Chen Date: Thu, 6 Jul 2023 08:43:57 -0700 Subject: [PATCH] Detect replaced strings If a string is replaced with another, apply subword highlighting similar to how we handle replaced comments. Co-authored-by: Wilfred Hughes --- sample_files/compare.expected | 28 +++++++++++++------------- src/diff/changes.rs | 1 + src/diff/graph.rs | 37 +++++++++++++++++++++++++--------- src/diff/sliders.rs | 10 ++++----- src/parse/syntax.rs | 38 +++++++++++++++++++++++++++-------- 5 files changed, 77 insertions(+), 37 deletions(-) diff --git a/sample_files/compare.expected b/sample_files/compare.expected index e08e87056..262b11f00 100644 --- a/sample_files/compare.expected +++ b/sample_files/compare.expected @@ -2,7 +2,7 @@ sample_files/Session_before.kt sample_files/Session_after.kt 9def134da4bbc423a1fde93001618776 - sample_files/ada_before.adb sample_files/ada_after.adb -a3b2b5033fde04bf9d2d71cb56d9eade - +dc605dc77a26c333671cb29a50f61c46 - sample_files/added_line_before.txt sample_files/added_line_after.txt 2deb040c52fb94459ca0540bcc983000 - @@ -74,10 +74,10 @@ sample_files/helpful-unit-test-before.el sample_files/helpful-unit-test-after.el ce09e8127c21b8c186cd8a2143035b28 - sample_files/helpful_before.el sample_files/helpful_after.el -65ad6cb40353750f05d183f9276930b6 - +a0f2e0115ea94c46d3650ba89b486f09 - sample_files/html_before.html sample_files/html_after.html -a84fc473494c982bb1c0fb8ad9f1e84f - +3d10040707b655920988fae2e610c4be - sample_files/html_simple_before.html sample_files/html_simple_after.html ce3bfa12bc21d0eb5528766e18387e86 - @@ -98,13 +98,13 @@ sample_files/java_before.java sample_files/java_after.java 96fabae990d201558531d24b777ca8f5 - sample_files/javascript_before.js sample_files/javascript_after.js -51ffab103bd475c322b7aa42f35c094e - +d738cb26e3f5086c9aa36df22c6d9a74 - sample_files/javascript_simple_before.js sample_files/javascript_simple_after.js 3357d9d47a5e7efb3c7677745993ea2b - sample_files/json_before.json sample_files/json_after.json -bae479fb04e15baf9460c5274c77963b - +11bd95ff0aff18781d3421f702d62c17 - sample_files/jsx_before.jsx sample_files/jsx_after.jsx 5784f67cac95fcdb621751aa80a3402b - @@ -116,7 +116,7 @@ sample_files/load_before.js sample_files/load_after.js 5cb293020a07b0635b864850c07458b3 - sample_files/lua_before.lua sample_files/lua_after.lua -9886d61f459cdf566be9c42f7fa61a12 - +c12d85c8ffa7ad6b6ca931cf52ac5f3e - sample_files/makefile_before.mk sample_files/makefile_after.mk 82ed37f60448e7402c62d5319f30fd3c - @@ -128,10 +128,10 @@ sample_files/modules_before.ml sample_files/modules_after.ml cf88821ead0d432d4841f476c2f26fd2 - sample_files/multibyte_before.py sample_files/multibyte_after.py -4f06087b10fec86e4cf323aa228afff5 - +2b597ab6bdbfdc65abd2aac047de0f76 - sample_files/multiline_string_before.ml sample_files/multiline_string_after.ml -ba135b1451962f563ce8c2f449a904bf - +2f163f0cefda59af2127992da932fbed - sample_files/nest_before.rs sample_files/nest_after.rs 59f4e15e83e05c0fa36d03f4b2bb4cf4 - @@ -146,7 +146,7 @@ sample_files/newick_before.nwk sample_files/newick_after.nwk d17c12222f804e4973c166d751a9ae06 - sample_files/nix_before.nix sample_files/nix_after.nix -09a56752c1eb7f3f5c10d631a01973fc - +2ed4635736d268a580701ebdbf8101db - sample_files/ocaml_before.ml sample_files/ocaml_after.ml 53146610a48e80bf52e845110307c83d - @@ -158,7 +158,7 @@ sample_files/pascal_before.pascal sample_files/pascal_after.pascal dfea5599b7f5e180d0fafab326f612cc - sample_files/perl_before.pl sample_files/perl_after.pl -a4ce0c52073b162c3935b75b4070889d - +09034cdf9cc4853ba7527de6d633e9be - sample_files/prefer_outer_before.el sample_files/prefer_outer_after.el de31a80dc8a06987aeff4aaa04ce3b87 - @@ -173,7 +173,7 @@ sample_files/r_before.R sample_files/r_after.R 7a9bc4e3ba87b6f2139a6cdadcc7ee5f - sample_files/racket_before.rkt sample_files/racket_after.rkt -37b30401886359735b9bc43518361945 - +969d25be4b7870d519dff2445d9c9a28 - sample_files/ruby_before.rb sample_files/ruby_after.rb db81701f87486b18f99d326d028d9929 - @@ -206,7 +206,7 @@ sample_files/syntax_error_before.js sample_files/syntax_error_after.js 0e71145527541a4a76b8140f0659223c - sample_files/tab_before.c sample_files/tab_after.c -b652d15f3a05b82a7d871cfeca2f453f - +e3a8f186b8ae28330c4a6b8ae79e74a3 - sample_files/tailwind_before.css sample_files/tailwind_after.css cee5ee7415b1bd50bdc2dacd11e7303a - @@ -218,7 +218,7 @@ sample_files/todomvc_before.gleam sample_files/todomvc_after.gleam b142169ae6ac08ef64d0cf67a2e66f5b - sample_files/toml_before.toml sample_files/toml_after.toml -08dad07d7c85b807094b5b4cf065cda9 - +9ce1757892a0d13451ebc9ef2ce7f6be - sample_files/typescript_before.ts sample_files/typescript_after.ts 06648ebbe63a69e29de54b541fa2b3b8 - @@ -227,7 +227,7 @@ sample_files/typing_before.ml sample_files/typing_after.ml 544c31c1d6651437ba9ed3a3a3524d76 - sample_files/utf16_before.py sample_files/utf16_after.py -3d4e36306f4bae1fa1a3d800de413726 - +b248963f77b09b5d5db8b036beeaee23 - sample_files/whitespace_before.tsx sample_files/whitespace_after.tsx 49ed560e481c23633a00cce3674d0985 - diff --git a/src/diff/changes.rs b/src/diff/changes.rs index 54df33514..af80df5ab 100644 --- a/src/diff/changes.rs +++ b/src/diff/changes.rs @@ -8,6 +8,7 @@ use crate::parse::syntax::{Syntax, SyntaxId}; pub enum ChangeKind<'a> { Unchanged(&'a Syntax<'a>), ReplacedComment(&'a Syntax<'a>, &'a Syntax<'a>), + ReplacedString(&'a Syntax<'a>, &'a Syntax<'a>), Novel, } diff --git a/src/diff/graph.rs b/src/diff/graph.rs index d031e666a..87e06c644 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -284,6 +284,9 @@ pub enum Edge { ReplacedComment { levenshtein_pct: u8, }, + ReplacedString { + levenshtein_pct: u8, + }, NovelAtomLHS {}, NovelAtomRHS {}, // TODO: An EnterNovelDelimiterBoth edge might help performance @@ -333,7 +336,9 @@ impl Edge { // novel. However, since ReplacedComment is an alternative // to NovelAtomLHS and NovelAtomRHS, we need to be // slightly less than 2 * 300. - ReplacedComment { levenshtein_pct } => 500 + u32::from(100 - levenshtein_pct), + ReplacedComment { levenshtein_pct } | ReplacedString { levenshtein_pct } => { + 500 + u32::from(100 - levenshtein_pct) + } } } } @@ -574,21 +579,26 @@ pub fn set_neighbours<'s, 'b>( if let ( Syntax::Atom { content: lhs_content, - kind: AtomKind::Comment, + kind: lhs_kind @ AtomKind::Comment | lhs_kind @ AtomKind::String, .. }, Syntax::Atom { content: rhs_content, - kind: AtomKind::Comment, + kind: rhs_kind @ AtomKind::Comment | rhs_kind @ AtomKind::String, .. }, ) = (lhs_syntax, rhs_syntax) { - // Both sides are comments and their content is reasonably - // similar. - if lhs_content != rhs_content { + // Both sides are comments/both sides are strings and + // their content is reasonably similar. + if lhs_kind == rhs_kind && lhs_content != rhs_content { let levenshtein_pct = (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8; + let edge = if lhs_kind == &AtomKind::Comment { + ReplacedComment { levenshtein_pct } + } else { + ReplacedString { levenshtein_pct } + }; let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents( @@ -599,7 +609,7 @@ pub fn set_neighbours<'s, 'b>( &v.parents, ); res.push(( - ReplacedComment { levenshtein_pct }, + edge, allocate_if_new( Vertex { neighbours: RefCell::new(None), @@ -776,13 +786,20 @@ pub fn populate_change_map<'s, 'b>( change_map.insert(lhs, ChangeKind::Unchanged(rhs)); change_map.insert(rhs, ChangeKind::Unchanged(lhs)); } - ReplacedComment { levenshtein_pct } => { + ReplacedComment { levenshtein_pct } | ReplacedString { levenshtein_pct } => { let lhs = v.lhs_syntax.unwrap(); let rhs = v.rhs_syntax.unwrap(); + let change_kind = |first, second| { + if let ReplacedComment { .. } = e { + ChangeKind::ReplacedComment(first, second) + } else { + ChangeKind::ReplacedString(first, second) + } + }; if *levenshtein_pct > 40 { - change_map.insert(lhs, ChangeKind::ReplacedComment(lhs, rhs)); - change_map.insert(rhs, ChangeKind::ReplacedComment(rhs, lhs)); + change_map.insert(lhs, change_kind(lhs, rhs)); + change_map.insert(rhs, change_kind(rhs, lhs)); } else { change_map.insert(lhs, ChangeKind::Novel); change_map.insert(rhs, ChangeKind::Novel); diff --git a/src/diff/sliders.rs b/src/diff/sliders.rs index 7c7b99a10..4d9c0e153 100644 --- a/src/diff/sliders.rs +++ b/src/diff/sliders.rs @@ -134,7 +134,7 @@ fn fix_nested_slider_prefer_outer<'a>(node: &'a Syntax<'a>, change_map: &mut Cha } } } - ReplacedComment(_, _) => {} + ReplacedComment(_, _) | ReplacedString(_, _) => {} Novel => {} } @@ -154,7 +154,7 @@ fn fix_nested_slider_prefer_inner<'a>(node: &'a Syntax<'a>, change_map: &mut Cha .expect("Changes should be set before slider correction") { Unchanged(_) => {} - ReplacedComment(_, _) => {} + ReplacedComment(_, _) | ReplacedString(_, _) => {} Novel => { let mut found_unchanged = vec![]; unchanged_descendants(children, &mut found_unchanged, change_map); @@ -188,7 +188,7 @@ fn unchanged_descendants<'a>( Unchanged(_) => { found.push(node); } - Novel | ReplacedComment(_, _) => { + Novel | ReplacedComment(_, _) | ReplacedString(_, _) => { if let List { children, .. } = node { unchanged_descendants(children, found, change_map); } @@ -347,7 +347,7 @@ fn novel_regions_after_unchanged<'a>( region = Some(r); } } - ReplacedComment(_, _) => { + ReplacedComment(_, _) | ReplacedString(_, _) => { // Could have just finished a novel region. if let Some(region) = region { regions.push(region); @@ -395,7 +395,7 @@ fn novel_regions_before_unchanged<'a>( r.push(i); region = Some(r); } - ReplacedComment(_, _) => { + ReplacedComment(_, _) | ReplacedString(_, _) => { region = None; } } diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index b80f70c0e..da5927e1b 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -21,9 +21,16 @@ impl<'a> fmt::Debug for ChangeKind<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let desc = match self { Unchanged(node) => format!("Unchanged(ID: {})", node.id()), - ReplacedComment(lhs_node, rhs_node) => { + ReplacedComment(lhs_node, rhs_node) | ReplacedString(lhs_node, rhs_node) => { + let change_kind = if let ReplacedComment(_, _) = self { + "ReplacedComment" + } else { + "ReplacedString" + }; + format!( - "ReplacedComment(lhs ID: {}, rhs ID: {})", + "{}(lhs ID: {}, rhs ID: {})", + change_kind, lhs_node.id(), rhs_node.id() ) @@ -687,12 +694,15 @@ pub fn split_words_and_numbers(s: &str) -> Vec<&str> { res } -fn split_comment_words( +fn split_atom_words( content: &str, pos: SingleLineSpan, opposite_content: &str, opposite_pos: SingleLineSpan, + kind: AtomKind, ) -> Vec { + debug_assert!(kind == AtomKind::Comment || kind == AtomKind::String); + // TODO: merge adjacent single-line comments unless there are // blank lines between them. let content_parts = split_words_and_numbers(content); @@ -712,7 +722,7 @@ fn split_comment_words( if !is_all_whitespace(word) { res.push(MatchedPos { kind: MatchKind::NovelWord { - highlight: TokenKind::Atom(AtomKind::Comment), + highlight: TokenKind::Atom(kind), }, pos: content_newlines.from_offsets_relative_to( pos, @@ -735,7 +745,7 @@ fn split_comment_words( res.push(MatchedPos { kind: MatchKind::NovelLinePart { - highlight: TokenKind::Atom(AtomKind::Comment), + highlight: TokenKind::Atom(kind), self_pos: word_pos, opposite_pos: opposite_word_pos, }, @@ -762,7 +772,7 @@ impl MatchedPos { is_close: bool, ) -> Vec { match ck { - ReplacedComment(this, opposite) => { + ReplacedComment(this, opposite) | ReplacedString(this, opposite) => { let this_content = match this { List { .. } => unreachable!(), Atom { content, .. } => content, @@ -773,13 +783,19 @@ impl MatchedPos { content, position, .. } => (content, position), }; + let kind = if let ReplacedComment(_, _) = ck { + AtomKind::Comment + } else { + AtomKind::String + }; - split_comment_words( + split_atom_words( this_content, // TODO: handle the whole pos here. pos[0], opposite_content, opposite_pos[0], + kind, ) } Unchanged(opposite) => { @@ -1133,7 +1149,13 @@ mod tests { end_col: 3, }; - let res = split_comment_words(content, pos, opposite_content, opposite_pos); + let res = split_atom_words( + content, + pos, + opposite_content, + opposite_pos, + AtomKind::Comment, + ); assert_eq!( res, vec![MatchedPos {