#![allow(clippy::mutable_key_type)] // Hash for Node doesn't use mutable fields. #![allow(dead_code)] use diff::{slice, Result::*}; use itertools::EitherOrBoth; use itertools::Itertools; use std::cell::Cell; use std::cmp::Ordering; use std::collections::HashMap; use std::fmt; use std::hash::{Hash, Hasher}; use typed_arena::Arena; use crate::lines::NewlinePositions; use crate::positions::SingleLineSpan; use ChangeKind::*; use Node::*; #[derive(PartialEq, Eq, Clone, Copy)] pub enum ChangeKind<'a> { Unchanged(&'a Node<'a>), Moved, Novel, } /// A Debug implementation that ignores the corresponding node /// mentioned for Unchanged. Otherwise we will infinitely loop on /// unchanged nodes, which both point to the other. impl<'a> fmt::Debug for ChangeKind<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let desc = match self { Unchanged(_) => "Unchanged", Moved => "Moved", Novel => "Novel", }; f.write_str(desc) } } #[derive(PartialEq, Eq, Copy, Clone, Debug, PartialOrd, Ord)] pub enum AtomKind { String, Comment, Other, } pub enum Node<'a> { List { next: Cell>>, change: Cell>>, open_position: Vec, open_delimiter: String, children: Vec<&'a Node<'a>>, close_position: Vec, close_delimiter: String, num_descendants: usize, }, Atom { next: Cell>>, change: Cell>>, position: Vec, content: String, kind: AtomKind, }, } impl<'a> fmt::Debug for Node<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { List { open_delimiter, children, close_delimiter, change, next, .. } => { let mut ds = f.debug_struct("List"); ds.field("open_delimiter", &open_delimiter) .field("children", &children) .field("close_delimiter", &close_delimiter) .field("change", &change.get()); let next_s = match next.get() { Some(List { .. }) => "Some(List)", Some(Atom { .. }) => "Some(Atom)", None => "None", }; ds.field("next", &next_s); ds.finish() } Atom { content, change, next, .. } => { let mut ds = f.debug_struct("Atom"); ds.field("content", &content).field("change", &change.get()); let next_s = match next.get() { Some(List { .. }) => "Some(List)", Some(Atom { .. }) => "Some(Atom)", None => "None", }; ds.field("next", &next_s); ds.finish() } } } } impl<'a> Node<'a> { #[allow(clippy::clippy::mut_from_ref)] // Clippy doesn't understand arenas. pub fn new_list( arena: &'a Arena>, open_delimiter: &str, open_position: Vec, children: Vec<&'a Node<'a>>, close_delimiter: &str, close_position: Vec, ) -> &'a mut Node<'a> { let mut num_descendants = 0; for child in &children { num_descendants += match child { List { num_descendants, .. } => *num_descendants + 1, Atom { .. } => 1, }; } arena.alloc(List { next: Cell::new(None), change: Cell::new(None), open_position, open_delimiter: open_delimiter.into(), close_delimiter: close_delimiter.into(), close_position, children, num_descendants, }) } #[allow(clippy::clippy::mut_from_ref)] // Clippy doesn't understand arenas. pub fn new_atom( arena: &'a Arena>, position: Vec, content: &str, kind: AtomKind, ) -> &'a mut Node<'a> { arena.alloc(Atom { next: Cell::new(None), position, content: content.into(), change: Cell::new(None), kind, }) } pub fn get_next(&self) -> Option<&'a Node<'a>> { match self { List { next, .. } => next.get(), Atom { next, .. } => next.get(), } } fn get_change(&self) -> Option> { match self { List { change, .. } => change.get(), Atom { change, .. } => change.get(), } } pub fn set_change(&self, ck: ChangeKind<'a>) { match self { List { change, .. } => { change.set(Some(ck)); } Atom { change, .. } => { change.set(Some(ck)); } } } pub fn set_change_deep(&self, ck: ChangeKind<'a>) { self.set_change(ck); if let List { children, .. } = self { // For unchanged lists, match up children with the // unchanged children on the other side. if let Unchanged(List { children: other_children, .. }) = ck { for (child, other_child) in children.iter().zip(other_children) { child.set_change_deep(Unchanged(other_child)); } } else { for child in children { child.set_change_deep(ck); } }; } } } pub fn set_next<'a>(node: &'a Node<'a>) { set_next_(node, None); } fn set_next_<'a>(node: &'a Node<'a>, new_next: Option<&'a Node<'a>>) { match node { List { next, children, .. } => { next.set(new_next); for (i, child) in children.iter().enumerate() { let child_next = match children.get(i + 1) { Some(child_next) => Some(*child_next), None => new_next, }; set_next_(child, child_next); } } Atom { next, .. } => { next.set(new_next); } } } impl<'a> PartialEq for Node<'a> { fn eq(&self, other: &Self) -> bool { match (&self, other) { ( Atom { content: lhs_content, kind: lhs_kind, .. }, Atom { content: rhs_content, kind: rhs_kind, .. }, ) => lhs_content == rhs_content && lhs_kind == rhs_kind, ( List { open_delimiter: lhs_open_delimiter, close_delimiter: lhs_close_delimiter, children: lhs_children, .. }, List { open_delimiter: rhs_open_delimiter, close_delimiter: rhs_close_delimiter, children: rhs_children, .. }, ) => { lhs_open_delimiter == rhs_open_delimiter && lhs_close_delimiter == rhs_close_delimiter && lhs_children == rhs_children } _ => false, } } } impl<'a> Eq for Node<'a> {} impl<'a> Hash for Node<'a> { fn hash(&self, state: &mut H) { match self { List { open_delimiter, close_delimiter, children, .. } => { open_delimiter.hash(state); close_delimiter.hash(state); for child in children { child.hash(state); } } Atom { content, .. } => { content.hash(state); } } } } /// Compare two nodes, treating nodes with more children as /// greater. If the number of nodes match, consider nodes with an /// earlier position to be greater. fn cmp_nodes(lhs: &&Node, rhs: &&Node) -> Ordering { match lhs { List { open_position: lhs_open_position, num_descendants: lhs_num_descendants, .. } => match rhs { List { open_position: rhs_open_position, num_descendants: rhs_num_descendants, .. } => match lhs_num_descendants.cmp(rhs_num_descendants) { Ordering::Equal => lhs_open_position.cmp(rhs_open_position), o => o, }, Atom { .. } => Ordering::Greater, }, Atom { position: lhs_position, .. } => match rhs { List { .. } => Ordering::Less, Atom { position: rhs_position, .. } => lhs_position.cmp(rhs_position), }, } } // Sort a vec of nodes by size, largest first. fn sort_by_size(nodes: &mut Vec<&Node>) { nodes.sort_unstable_by(cmp_nodes); nodes.reverse(); } #[derive(PartialEq, Eq, Debug)] pub enum MatchKind { Unchanged, Moved, Novel, } impl MatchKind { fn from_change(ck: ChangeKind) -> Self { match ck { Unchanged(_) => MatchKind::Unchanged, Moved => MatchKind::Moved, Novel => MatchKind::Novel, } } } #[derive(Debug)] pub struct MatchedPos { pub kind: MatchKind, pub pos: Vec, pub prev_opposite_pos: Vec, } /// Walk `nodes` and return a vec of all the changed positions. pub fn change_positions<'a>(src: &str, opposite_src: &str, nodes: &[&Node<'a>]) -> Vec { let nl_pos = NewlinePositions::from(src); let opposite_nl_pos = NewlinePositions::from(opposite_src); let mut positions = Vec::new(); let mut prev_unchanged = vec![SingleLineSpan { line: 0.into(), start_col: 0, end_col: 0, }]; change_positions_( &nl_pos, &opposite_nl_pos, nodes, &mut prev_unchanged, &mut positions, ); positions } fn change_positions_<'a>( nl_pos: &NewlinePositions, opposite_nl_pos: &NewlinePositions, nodes: &[&Node<'a>], prev_opposite_pos: &mut Vec, positions: &mut Vec, ) { for node in nodes { match node { List { change, open_position, children, close_position, .. } => { let change = change.get().expect(&format!( "Should have changes set in all nodes: {:#?}", node )); if let Unchanged(opposite_node) = change { match opposite_node { List { open_position: opposite_open_pos, .. } => { *prev_opposite_pos = opposite_open_pos.clone(); } Atom { .. } => unreachable!(), } } positions.push(MatchedPos { kind: MatchKind::from_change(change), pos: open_position.clone(), prev_opposite_pos: prev_opposite_pos.clone(), }); change_positions_( nl_pos, opposite_nl_pos, children, prev_opposite_pos, positions, ); if let Unchanged(opposite_node) = change { match opposite_node { List { close_position: opposite_close_pos, .. } => { *prev_opposite_pos = opposite_close_pos.clone(); } Atom { .. } => unreachable!(), } } positions.push(MatchedPos { kind: MatchKind::from_change(change), pos: close_position.clone(), prev_opposite_pos: prev_opposite_pos.clone(), }); } Atom { change, position, .. } => { let change = change.get().expect(&format!( "Should have changes set in all nodes: {:#?}", node )); if let Unchanged(opposite_node) = change { match opposite_node { List { .. } => { dbg!(node, opposite_node); unreachable!() } Atom { position: opposite_position, .. } => { *prev_opposite_pos = opposite_position.clone(); } } } positions.push(MatchedPos { kind: MatchKind::from_change(change), pos: position.clone(), prev_opposite_pos: prev_opposite_pos.clone(), }); } } } } /// Calculate a structural diff between `lhs` and `rhs`, and write the /// change state to the `.change` field on the nodes. pub fn mark_nodes<'a>(lhs: &[&'a Node<'a>], rhs: &[&'a Node<'a>]) { let mut env = Env::new(lhs, rhs); mark_unchanged_or_novel(lhs, rhs, &mut env); mark_moves(env); } /// Handles nodes that exist on both sides, but in different /// positions. These may be moves (when both sides have the same /// number of a node) or additional nodes. /// /// Try to find a minimal set of moves by considering the largest /// subtrees first. fn mark_moves(mut env: Env) { sort_by_size(&mut env.lhs_unmatched); for lhs_node in env.lhs_unmatched { // Partial overlaps? if lhs_node.get_change().is_none() { if env.rhs_counts.try_decrement(lhs_node) { lhs_node.set_change_deep(Moved) } else { lhs_node.set_change_deep(Novel) } } } sort_by_size(&mut env.rhs_unmatched); for rhs_node in env.rhs_unmatched { if rhs_node.get_change().is_none() { if env.lhs_counts.try_decrement(rhs_node) { rhs_node.set_change_deep(Moved) } else { rhs_node.set_change_deep(Novel) } } } } struct NodeCounts<'a> { counts: HashMap<&'a Node<'a>, i64>, } impl<'a> NodeCounts<'a> { fn new(nodes: &[&'a Node<'a>]) -> Self { let mut res = Self { counts: HashMap::new(), }; for node in nodes { res._insert_node(node); } res } fn _insert_node(&mut self, node: &'a Node<'a>) { let entry = self.counts.entry(node).or_insert(0); *entry += 1; match node { List { children, .. } => { for child in children { self._insert_node(child); } } Atom { .. } => {} } } fn possible_move(&self, node: &'a Node<'a>) -> bool { *self.counts.get(node).unwrap_or(&0) > 0 } /// Decrement the count of `node` from `counts`, along with all its children. fn decrement(&mut self, node: &'a Node<'a>) { let count = if let Some(count) = self.counts.get(node) { *count } else { panic!("Called decrement on a node that isn't in counts") }; assert!(count > 0); self.counts.insert(node, count - 1); match node { List { children, .. } => { for child in children { self.decrement(child); } } Atom { .. } => {} } } fn try_decrement(&mut self, node: &'a Node<'a>) -> bool { let node_count = *self.counts.get(node).unwrap_or(&0); if node_count > 0 { self.counts.insert(node, node_count - 1); match node { List { children, .. } => { for child in children { self.try_decrement(child); } } Atom { .. } => {} } true } else { false } } } struct Env<'a> { lhs_counts: NodeCounts<'a>, rhs_counts: NodeCounts<'a>, lhs_unmatched: Vec<&'a Node<'a>>, rhs_unmatched: Vec<&'a Node<'a>>, } impl<'a> Env<'a> { fn new(lhs: &[&'a Node<'a>], rhs: &[&'a Node<'a>]) -> Self { Env { lhs_counts: NodeCounts::new(lhs), rhs_counts: NodeCounts::new(rhs), lhs_unmatched: Vec::new(), rhs_unmatched: Vec::new(), } } } /// Mark nodes that are unambiguously unchanged (they have a /// corresponding node on the other side) or unambiguously novel /// (exactly zero occurrences on the other side). fn mark_unchanged_or_novel<'a>(lhs: &[&'a Node<'a>], rhs: &[&'a Node<'a>], env: &mut Env<'a>) { // Run a longest-common-subsequence diff algorithm on the nodes at // this level, and mark as many things as unchanged as we can. for res in slice(lhs, rhs) { if let Both(lhs_node, rhs_node) = res { // todo: fix sliders by preferring atoms that come before // a list. lhs_node.set_change_deep(Unchanged(rhs_node)); rhs_node.set_change_deep(Unchanged(lhs_node)); env.lhs_counts.decrement(lhs_node); env.rhs_counts.decrement(rhs_node); } } let lhs_unprocessed = lhs.iter().filter(|node| node.get_change().is_none()); let rhs_unprocessed = rhs.iter().filter(|node| node.get_change().is_none()); // For the remaining nodes, process children. for res in lhs_unprocessed.zip_longest(rhs_unprocessed) { let (lhs_node, rhs_node) = match res { EitherOrBoth::Both(lhs_node, rhs_node) => (Some(*lhs_node), Some(*rhs_node)), EitherOrBoth::Left(lhs_node) => (Some(*lhs_node), None), EitherOrBoth::Right(rhs_node) => (None, Some(*rhs_node)), }; mark_novel(lhs_node, rhs_node, env); } } fn mark_novel<'a>(lhs: Option<&'a Node<'a>>, rhs: Option<&'a Node<'a>>, env: &mut Env<'a>) { match (lhs, rhs) { (Some(lhs_node), Some(rhs_node)) => { match ( env.rhs_counts.possible_move(lhs_node), env.lhs_counts.possible_move(rhs_node), ) { (true, true) => { env.lhs_unmatched.push(lhs_node); env.rhs_unmatched.push(rhs_node); return; } (true, false) => { env.lhs_unmatched.push(lhs_node); mark_novel(None, rhs, env); return; } (false, true) => { env.rhs_unmatched.push(rhs_node); mark_novel(lhs, None, env); return; } (false, false) => {} } // Neither is present on the opposite side. Atoms are // novel, but check lists for moved subtrees. match (lhs_node, rhs_node) { ( List { open_delimiter: lhs_start_content, close_delimiter: lhs_end_content, children: lhs_children, .. }, List { open_delimiter: rhs_start_content, close_delimiter: rhs_end_content, children: rhs_children, .. }, ) => { // Both sides are lists, so check the // delimiters for the list node themselves, then // recurse. if lhs_start_content == rhs_start_content && lhs_end_content == rhs_end_content { // We didn't see either the LHS or RHS // node on the other side, but they have // the same delimiters, so only the // children are different. lhs_node.set_change(Unchanged(rhs_node)); rhs_node.set_change(Unchanged(lhs_node)); } else { // Children are different and the wrapping // has changed (e.g. from {} to []). lhs_node.set_change(Novel); rhs_node.set_change(Novel); } mark_unchanged_or_novel(&lhs_children[..], &rhs_children[..], env); } ( List { children: lhs_children, .. }, Atom { .. }, ) => { // TODO: this produces poor results when RHS is // not a descedant of the LHS children. We should // step over RHS in that case. RHS is never a // descendant, or it would be a potential move. lhs_node.set_change(Novel); mark_unchanged_or_novel( &lhs_children[..], std::slice::from_ref(&rhs_node), env, ); } ( Atom { .. }, List { children: rhs_children, .. }, ) => { rhs_node.set_change(Novel); mark_unchanged_or_novel( std::slice::from_ref(&lhs_node), &rhs_children[..], env, ); } (Atom { .. }, Atom { .. }) => { lhs_node.set_change(Novel); rhs_node.set_change(Novel); } } } (Some(lhs_node), None) => { if env.rhs_counts.possible_move(lhs_node) { env.lhs_unmatched.push(lhs_node); } else { lhs_node.set_change(Novel); if let List { children, .. } = lhs_node { mark_unchanged_or_novel(&children[..], &[], env); } } } (None, Some(rhs_node)) => { if env.lhs_counts.possible_move(rhs_node) { env.rhs_unmatched.push(rhs_node); } else { rhs_node.set_change(Novel); if let List { children, .. } = rhs_node { mark_unchanged_or_novel(&[], &children[..], env); } } } (None, None) => {} } } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; use AtomKind::Other; /// Ensure that we assign prev_opposite_pos even if the change is on the first node. #[test] fn test_prev_opposite_pos_first_node() { let nodes = &[&Atom { next: Cell::new(None), change: Cell::new(Some(Novel)), position: vec![SingleLineSpan { line: 0.into(), start_col: 2, end_col: 3, }], content: "foo".into(), kind: Other, }]; let positions = change_positions("irrelevant", "also irrelevant", nodes); assert_eq!( positions[0].prev_opposite_pos, vec![SingleLineSpan { line: 0.into(), start_col: 0, end_col: 0 }] ); } #[test] fn test_atom_equality_ignores_change_and_pos() { assert_eq!( Atom { next: Cell::new(None), change: Cell::new(Some(Novel)), position: vec![SingleLineSpan { line: 1.into(), start_col: 2, end_col: 3 }], content: "foo".into(), kind: Other, }, Atom { next: Cell::new(None), change: Cell::new(None), position: vec![SingleLineSpan { line: 10.into(), start_col: 20, end_col: 30 }], content: "foo".into(), kind: Other, } ); } }