From 6fb800606ea83ebe2c53eca5562876acc3a83a73 Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Mon, 24 Jan 2022 21:13:45 -0800 Subject: [PATCH] Get all matched lines and then slice This is much simpler conceptually and less prone to bugs. Fixes #111 --- CHANGELOG.md | 2 + src/context.rs | 158 +++++++++++++++++++++- src/hunks.rs | 310 ++++++-------------------------------------- src/side_by_side.rs | 19 +-- 4 files changed, 208 insertions(+), 281 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90ff78ace..e4ceb0921 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Display +Fixed display issues where lines were printed more than once. + Subword changes in comments are now shown in bold, to make them more visible. ## 0.17 (released 25 January 2022) diff --git a/src/context.rs b/src/context.rs index 97c7be56b..f297e7629 100644 --- a/src/context.rs +++ b/src/context.rs @@ -3,6 +3,7 @@ use std::collections::{HashMap, HashSet}; use crate::{ + hunks::{compact_gaps, ensure_contiguous}, lines::LineNumber, syntax::{zip_repeat_shorter, MatchKind, MatchedPos}, }; @@ -12,7 +13,111 @@ use crate::{ /// /// We may show fewer lines if the modified lines are at the beginning /// or end of the file. -const MAX_PADDING: usize = 3; +pub const MAX_PADDING: usize = 3; + +pub fn all_matched_lines_filled( + lhs_mps: &[MatchedPos], + rhs_mps: &[MatchedPos], +) -> Vec<(Option, Option)> { + let matched_lines = all_matched_lines(lhs_mps, rhs_mps); + + compact_gaps(ensure_contiguous(&matched_lines)) +} + +fn all_matched_lines( + lhs_mps: &[MatchedPos], + rhs_mps: &[MatchedPos], +) -> Vec<(Option, Option)> { + let lhs_matched_lines = matched_lines(lhs_mps); + let rhs_novel_lines = novel_lines(rhs_mps); + merge_in_opposite_lines(&lhs_matched_lines, &rhs_novel_lines) +} + +fn novel_lines(mps: &[MatchedPos]) -> Vec { + let mut lines = HashSet::new(); + for mp in mps { + match mp.kind { + MatchKind::Novel { .. } | MatchKind::ChangedCommentPart {} => { + lines.insert(mp.pos.line); + } + MatchKind::Unchanged { .. } | MatchKind::UnchangedCommentPart { .. } => {} + } + } + + let mut res: Vec = lines.into_iter().collect(); + res.sort_unstable(); + res +} + +fn matched_lines(mps: &[MatchedPos]) -> Vec<(Option, Option)> { + let mut highest_line = None; + let mut highest_opposite_line = None; + + let mut res: Vec<(Option, Option)> = vec![]; + for mp in mps { + let opposite_line = match &mp.kind { + MatchKind::Unchanged { opposite_pos, .. } + | MatchKind::UnchangedCommentPart { opposite_pos, .. } => { + if let Some(highest_opposite_side) = highest_opposite_line { + opposite_pos + .iter() + .map(|p| p.line) + .find(|l| *l > highest_opposite_side) + } else { + opposite_pos.first().map(|p| p.line) + } + } + MatchKind::Novel { .. } | MatchKind::ChangedCommentPart {} => None, + }; + + let should_insert = match highest_line { + Some(highest_this_side) => mp.pos.line > highest_this_side, + None => true, + }; + + if should_insert { + res.push((Some(mp.pos.line), opposite_line)); + + highest_line = Some(mp.pos.line); + if opposite_line.is_some() { + highest_opposite_line = opposite_line; + } + } + } + + res +} + +fn merge_in_opposite_lines( + matched_lines: &[(Option, Option)], + novel_opposite_lines: &[LineNumber], +) -> Vec<(Option, Option)> { + let mut res: Vec<(Option, Option)> = vec![]; + + let mut i = 0; + for (line, opposite_line) in matched_lines { + if let Some(opposite_line) = opposite_line { + while let Some(novel_opposite_line) = novel_opposite_lines.get(i) { + if novel_opposite_line < opposite_line { + res.push((None, Some(*novel_opposite_line))); + i += 1; + } else if novel_opposite_line == opposite_line { + i += 1; + } else { + break; + } + } + } + res.push((*line, *opposite_line)); + } + + while let Some(novel_opposite_line) = novel_opposite_lines.get(i) { + res.push((None, Some(*novel_opposite_line))); + i += 1; + } + + res +} // TODO: use FxHashMap here. pub fn opposite_positions(mps: &[MatchedPos]) -> HashMap> { @@ -296,6 +401,8 @@ pub fn add_context( mod tests { use std::iter::FromIterator; + use crate::{positions::SingleLineSpan, syntax::TokenKind}; + use super::*; use pretty_assertions::assert_eq; @@ -312,4 +419,53 @@ mod tests { let res = calculate_before_context(&lines, &opposite_to_lhs, &opposite_to_rhs); assert_eq!(res, vec![(Some(0.into()), Some(0.into()))]); } + + #[test] + fn test_matched_lines() { + let matched_pos = SingleLineSpan { + line: 1.into(), + start_col: 2, + end_col: 3, + }; + let mps = [ + MatchedPos { + kind: MatchKind::Novel { + highlight: TokenKind::Delimiter, + }, + pos: SingleLineSpan { + line: 0.into(), + start_col: 1, + end_col: 2, + }, + }, + MatchedPos { + kind: MatchKind::Unchanged { + highlight: TokenKind::Delimiter, + self_pos: vec![matched_pos], + opposite_pos: vec![matched_pos], + }, + pos: matched_pos, + }, + ]; + + assert_eq!( + matched_lines(&mps), + vec![(Some(0.into()), None), (Some(1.into()), Some(1.into()))] + ); + } + + #[test] + fn test_merge_in_opposite_lines() { + let matched_lines = [(Some(1.into()), Some(1.into()))]; + let novel_lines = [0.into(), 3.into()]; + + assert_eq!( + merge_in_opposite_lines(&matched_lines, &novel_lines), + vec![ + (None, Some(0.into())), + (Some(1.into()), Some(1.into())), + (None, Some(3.into())) + ] + ); + } } diff --git a/src/hunks.rs b/src/hunks.rs index 10644b855..1f681749b 100644 --- a/src/hunks.rs +++ b/src/hunks.rs @@ -5,16 +5,10 @@ /// If we exceed this, the lines are stored in separate hunks. const MAX_DISTANCE: usize = 4; -use std::{ - collections::{HashMap, HashSet}, - iter::FromIterator, -}; +use std::collections::{HashMap, HashSet}; use crate::{ - context::{ - add_context, calculate_after_context, calculate_before_context, flip_tuple, flip_tuples, - opposite_positions, - }, + context::{add_context, opposite_positions, MAX_PADDING}, lines::LineNumber, syntax::{zip_pad_shorter, MatchedPos}, }; @@ -491,7 +485,7 @@ pub fn matched_pos_to_hunks(lhs_mps: &[MatchedPos], rhs_mps: &[MatchedPos]) -> V /// 3 14 /// 4 -- (preserve outer gaps) /// ``` -fn ensure_contiguous( +pub fn ensure_contiguous( lines: &[(Option, Option)], ) -> Vec<(Option, Option)> { let mut res: Vec<(Option, Option)> = vec![]; @@ -536,7 +530,7 @@ fn ensure_contiguous( /// 12 21 /// /// The returned vec will contain no (None, None) pairs. -fn compact_gaps( +pub fn compact_gaps( items: Vec<(Option, Option)>, ) -> Vec<(Option, Option)> { let mut res: Vec<(Option, Option)> = vec![]; @@ -590,145 +584,60 @@ fn compact_gaps( res } -/// Return the smallest item in `item_set` that is greater than `value`. -/// -/// If `value` is None, just return the smallest value. -fn first_greater(item_set: &HashSet, value: Option) -> Option { - let mut items = Vec::from_iter(item_set.iter().copied()); - items.sort_unstable(); - - match value { - Some(value) => { - for item in items { - if item > value { - return Some(item); - } - } - - None - } - None => items.first().copied(), - } -} - -/// Starting from `lhs_start`, fill in all the lines with known -/// opposites up to `end`. The resulting vec will include both start -/// and end, but the intervening values are all Some. -fn fill_matched_lines( - lhs_start: LineNumber, - max_lhs_src_line: LineNumber, - end: (Option, Option), - opposite_to_lhs: &HashMap>, - opposite_to_rhs: &HashMap>, +pub fn matched_lines_for_hunk( + matched_lines: &[(Option, Option)], + hunk: &Hunk, ) -> Vec<(Option, Option)> { - let (lhs_end, rhs_end) = end; - - let mut lhs_num = lhs_start; - let mut rhs_num: Option = None; - - let mut res: Vec<(Option, Option)> = vec![]; - if !opposite_to_lhs.contains_key(&lhs_start) { - res.push((Some(lhs_start), None)); - } - - loop { - if lhs_num > max_lhs_src_line { - break; - } - - if let Some(lhs_end) = lhs_end { - if lhs_num >= lhs_end { + // TODO: Use binary search instead. + let (hunk_lhs_first, hunk_rhs_first) = hunk.lines.first().expect("Hunks are non-empty"); + let (hunk_lhs_last, hunk_rhs_last) = hunk.lines.last().expect("Hunks are non-empty"); + + let mut start_i = None; + for (i, (lhs_matched_line, rhs_matched_line)) in matched_lines.iter().enumerate() { + if let (Some(lhs_matched_line), Some(hunk_lhs_first)) = (lhs_matched_line, hunk_lhs_first) { + if lhs_matched_line == hunk_lhs_first { + start_i = Some(i); break; } } - - if let Some(rhs_line_set) = opposite_to_lhs.get(&lhs_num) { - rhs_num = first_greater(rhs_line_set, rhs_num); - - if let Some(rhs_num) = rhs_num { - if let Some(rhs_end) = rhs_end { - if rhs_num >= rhs_end { - break; - } - } - - res.push((Some(lhs_num), Some(rhs_num))); + if let (Some(rhs_matched_line), Some(hunk_rhs_first)) = (rhs_matched_line, hunk_rhs_first) { + if rhs_matched_line == hunk_rhs_first { + start_i = Some(i); + break; } } - - lhs_num = (lhs_num.0 + 1).into(); } - match end { - (Some(_), Some(_)) => { - res.push(end); - } - (Some(lhs_end), None) => { - if lhs_end != lhs_start && !opposite_to_lhs.contains_key(&lhs_end) { - res.push(end); + let mut end_i = None; + for (i, (lhs_matched_line, rhs_matched_line)) in matched_lines.iter().enumerate() { + if let (Some(lhs_matched_line), Some(hunk_lhs_last)) = (lhs_matched_line, hunk_lhs_last) { + if lhs_matched_line == hunk_lhs_last { + end_i = Some(i + 1); + break; } } - (None, Some(rhs_end)) => { - if !opposite_to_rhs.contains_key(&rhs_end) { - res.push(end); + if let (Some(rhs_matched_line), Some(hunk_rhs_last)) = (rhs_matched_line, hunk_rhs_last) { + if rhs_matched_line == hunk_rhs_last { + end_i = Some(i + 1); + break; } } - _ => {} } - res -} - -pub fn aligned_lines_from_hunk( - hunk: &Hunk, - opposite_to_lhs: &HashMap>, - opposite_to_rhs: &HashMap>, - max_lhs_src_line: LineNumber, - max_rhs_src_line: LineNumber, -) -> Vec<(Option, Option)> { - let hunk_lines: Vec<(Option, Option)> = hunk.lines.clone(); - - // TODO: this largely duplicates add_context(). - let before_context = calculate_before_context(&hunk_lines, opposite_to_lhs, opposite_to_rhs); + let mut start_i = start_i.expect("Hunk lines should be present in matched lines"); + let mut end_i = end_i.expect("Hunk lines should be present in matched lines"); + if start_i >= MAX_PADDING { + start_i -= MAX_PADDING; + } else { + start_i = 0; + } + if end_i + MAX_PADDING < matched_lines.len() { + end_i += MAX_PADDING + } else { + end_i = matched_lines.len(); + } - let mut res: Vec<(Option, Option)> = vec![]; - res.extend(&before_context); - - let first_novel = hunk_lines[0]; - let hunk_end = *hunk_lines.last().expect("Hunk lines should be non-empty"); - - let aligned_between = match first_novel { - (Some(lhs_start), _) => { - // TODO: align based on blank lines too. - fill_matched_lines( - lhs_start, - max_lhs_src_line, - hunk_end, - opposite_to_lhs, - opposite_to_rhs, - ) - } - (_, Some(rhs_start)) => flip_tuples(&fill_matched_lines( - rhs_start, - max_rhs_src_line, - flip_tuple(hunk_end), - opposite_to_rhs, - opposite_to_lhs, - )), - (None, None) => unreachable!(), - }; - res.extend(aligned_between); - - let after_context = calculate_after_context( - &res, - opposite_to_lhs, - opposite_to_rhs, - max_lhs_src_line, - max_rhs_src_line, - ); - res.extend(after_context); - - compact_gaps(ensure_contiguous(&res)) + matched_lines[start_i..end_i].iter().copied().collect() } #[cfg(test)] @@ -779,139 +688,6 @@ mod tests { assert_eq!(res, vec![(Side::LHS, novel_mp)]); } - /// Simulate a simple diff: - /// - /// ```text - /// // Old - /// A - /// B - /// - /// // New - /// A - /// x - /// B - /// ``` - #[test] - fn test_aligned_lines_from_hunk() { - let hunk = Hunk { - lines: vec![(None, Some(1.into()))], - }; - - let mut opposite_to_lhs = HashMap::new(); - opposite_to_lhs.insert(0.into(), HashSet::from_iter([0.into()])); - opposite_to_lhs.insert(1.into(), HashSet::from_iter([2.into()])); - - let mut opposite_to_rhs = HashMap::new(); - opposite_to_rhs.insert(0.into(), HashSet::from_iter([0.into()])); - opposite_to_rhs.insert(2.into(), HashSet::from_iter([1.into()])); - - let res = aligned_lines_from_hunk( - &hunk, - &opposite_to_lhs, - &opposite_to_rhs, - 1.into(), - 2.into(), - ); - assert_eq!( - res, - vec![ - (Some(0.into()), Some(0.into())), - (None, Some(1.into())), - (Some(1.into()), Some(2.into())) - ] - ); - } - - /// Simulate a simple diff: - /// - /// ```text - /// // Old - /// A - /// B - /// - /// // New - /// A - /// x - /// B - /// y - /// ``` - #[test] - fn test_aligned_lines_trailing_novel() { - let hunk = Hunk { - lines: vec![(None, Some(1.into())), (None, Some(3.into()))], - }; - - let mut opposite_to_lhs = HashMap::new(); - opposite_to_lhs.insert(0.into(), HashSet::from_iter([0.into()])); - opposite_to_lhs.insert(1.into(), HashSet::from_iter([2.into()])); - - let mut opposite_to_rhs = HashMap::new(); - opposite_to_rhs.insert(0.into(), HashSet::from_iter([0.into()])); - opposite_to_rhs.insert(2.into(), HashSet::from_iter([1.into()])); - - let res = aligned_lines_from_hunk( - &hunk, - &opposite_to_lhs, - &opposite_to_rhs, - 1.into(), - 3.into(), - ); - assert_eq!( - res, - vec![ - (Some(0.into()), Some(0.into())), - (None, Some(1.into())), - (Some(1.into()), Some(2.into())), - (None, Some(3.into())), - ] - ); - } - - /// Regression test for a case where inserting a single line on - /// the RHS caused us to repeat lines. - #[test] - fn test_aligned_lines_hunk_one_line_regression() { - let hunk = Hunk { - lines: vec![(None, Some(3.into()))], - }; - - let mut opposite_to_lhs = HashMap::new(); - opposite_to_lhs.insert(0.into(), HashSet::from_iter([1.into()])); - opposite_to_lhs.insert(1.into(), HashSet::from_iter([2.into()])); - // No match for line 3 on RHS. - opposite_to_lhs.insert(2.into(), HashSet::from_iter([4.into()])); - - let mut opposite_to_rhs = HashMap::new(); - opposite_to_rhs.insert(1.into(), HashSet::from_iter([0.into()])); - opposite_to_lhs.insert(2.into(), HashSet::from_iter([1.into()])); - // No match for line 2 on RHS. - opposite_to_rhs.insert(4.into(), HashSet::from_iter([2.into()])); - - let res = aligned_lines_from_hunk( - &hunk, - &opposite_to_lhs, - &opposite_to_rhs, - 2.into(), - 4.into(), - ); - assert_eq!( - res, - vec![ - (None, Some(0.into())), - (Some(0.into()), Some(1.into())), - (None, Some(2.into())), - // Choosing to align RHS 3 despite it not being - // present in opposite_to_rhs is an arbitrary choice - // due to compact_gaps(). - // - // The only important thing to check here is just that - // nodes are monotonically increasing. - (Some(1.into()), Some(3.into())), - (Some(2.into()), Some(4.into())), - ] - ); - } - #[test] fn test_matched_pos_to_hunks() { let matched_pos = SingleLineSpan { diff --git a/src/side_by_side.rs b/src/side_by_side.rs index 39e3380ba..5ffaf3656 100644 --- a/src/side_by_side.rs +++ b/src/side_by_side.rs @@ -8,9 +8,9 @@ use std::{ }; use crate::{ - context::opposite_positions, - hunks::{aligned_lines_from_hunk, Hunk}, - lines::{codepoint_len, format_line_num, LineNumber, MaxLine}, + context::all_matched_lines_filled, + hunks::{matched_lines_for_hunk, Hunk}, + lines::{codepoint_len, format_line_num, LineNumber}, positions::SingleLineSpan, style::{self, apply_colors, color_positions, split_and_apply, Style}, syntax::{zip_pad_shorter, MatchedPos}, @@ -246,9 +246,6 @@ pub fn display_hunks( lhs_mps: &[MatchedPos], rhs_mps: &[MatchedPos], ) -> String { - let opposite_to_lhs = opposite_positions(lhs_mps); - let opposite_to_rhs = opposite_positions(rhs_mps); - let lhs_colored_src = apply_colors(lhs_src, true, lhs_mps); let rhs_colored_src = apply_colors(rhs_src, false, rhs_mps); @@ -276,18 +273,14 @@ pub fn display_hunks( let mut prev_lhs_line_num = None; let mut prev_rhs_line_num = None; + let matched_lines = all_matched_lines_filled(lhs_mps, rhs_mps); + let mut out_lines: Vec = vec![]; for (i, hunk) in hunks.iter().enumerate() { out_lines.push(style::header(display_path, i + 1, hunks.len(), lang_name)); - let aligned_lines = aligned_lines_from_hunk( - hunk, - &opposite_to_lhs, - &opposite_to_rhs, - lhs_src.max_line(), - rhs_src.max_line(), - ); + let aligned_lines = matched_lines_for_hunk(&matched_lines, hunk); let no_lhs_changes = hunk.lines.iter().all(|(l, _)| l.is_none()); let no_rhs_changes = hunk.lines.iter().all(|(_, r)| r.is_none()); let same_lines = aligned_lines.iter().all(|(l, r)| l == r);