//! Syntax tree definitions with change metadata. #![allow(clippy::mutable_key_type)] // Hash for Syntax doesn't use mutable fields. use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32}; use line_numbers::LinePositions; use line_numbers::SingleLineSpan; use typed_arena::Arena; use self::Syntax::*; use crate::lines::split_on_newlines; use crate::words::split_words_and_numbers; use crate::{ diff::changes::ChangeKind, diff::changes::{ChangeKind::*, ChangeMap}, diff::lcs_diff, hash::DftHashMap, lines::is_all_whitespace, }; /// A Debug implementation that does not recurse into the /// corresponding node mentioned for Unchanged. Otherwise we will /// infinitely loop on unchanged nodes, which both point to the other. impl fmt::Debug for ChangeKind<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let desc = match self { Unchanged(node) => format!("Unchanged(ID: {})", node.id()), ReplacedComment(lhs_node, rhs_node) | ReplacedString(lhs_node, rhs_node) => { let change_kind = if let ReplacedComment(_, _) = self { "ReplacedComment" } else { "ReplacedString" }; format!( "{}(lhs ID: {}, rhs ID: {})", change_kind, lhs_node.id(), rhs_node.id() ) } Novel => "Novel".to_owned(), }; f.write_str(&desc) } } pub(crate) type SyntaxId = NonZeroU32; /// Fields that are common to both `Syntax::List` and `Syntax::Atom`. pub(crate) struct SyntaxInfo<'a> { /// The previous node with the same parent as this one. previous_sibling: Cell>>, /// The next node with the same parent as this one. next_sibling: Cell>>, /// The syntax node that occurs before this one, in a depth-first /// tree traversal. prev: Cell>>, /// The parent syntax node, if present. parent: Cell>>, /// The number of nodes that are ancestors of this one. num_ancestors: Cell, pub(crate) num_after: Cell, /// A number that uniquely identifies this syntax node. unique_id: Cell, /// A number that uniquely identifies the content of this syntax /// node. This may be the same as nodes on the other side of the /// diff, or nodes at different positions. /// /// Values are sequential, not hashes. Collisions never occur. content_id: Cell, /// Is this the only node with this content? Ignores nodes on the /// other side. content_is_unique: Cell, } impl<'a> SyntaxInfo<'a> { pub(crate) fn new() -> Self { Self { previous_sibling: Cell::new(None), next_sibling: Cell::new(None), prev: Cell::new(None), parent: Cell::new(None), num_ancestors: Cell::new(0), num_after: Cell::new(0), unique_id: Cell::new(NonZeroU32::new(u32::MAX).unwrap()), content_id: Cell::new(0), content_is_unique: Cell::new(false), } } } impl Default for SyntaxInfo<'_> { fn default() -> Self { Self::new() } } pub(crate) enum Syntax<'a> { List { info: SyntaxInfo<'a>, open_position: Vec, open_content: String, children: Vec<&'a Syntax<'a>>, close_position: Vec, close_content: String, num_descendants: u32, }, Atom { info: SyntaxInfo<'a>, position: Vec, content: String, kind: AtomKind, }, } fn dbg_pos(pos: &[SingleLineSpan]) -> String { match pos { [] => "-".into(), [pos] => format!("{}:{}-{}", pos.line.0, pos.start_col, pos.end_col), [start, .., end] => format!( "{}:{}-{}:{}", start.line.0, start.start_col, end.line.0, end.end_col ), } } impl<'a> fmt::Debug for Syntax<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { List { open_content, open_position, children, close_content, close_position, info, .. } => { let mut ds = f.debug_struct(&format!( "List id:{} content_id:{}", self.id(), self.content_id() )); ds.field("open_content", &open_content) .field("open_position", &dbg_pos(open_position)) .field("children", &children) .field("close_content", &close_content) .field("close_position", &dbg_pos(close_position)); if env::var("DFT_VERBOSE").is_ok() { let next_sibling_s = match info.next_sibling.get() { Some(List { .. }) => "Some(List)", Some(Atom { .. }) => "Some(Atom)", None => "None", }; ds.field("next_sibling", &next_sibling_s); } ds.finish() } Atom { content, position, info, kind: highlight, .. } => { let mut ds = f.debug_struct(&format!( "Atom id:{} content_id:{}", self.id(), self.content_id() )); ds.field("content", &content); ds.field("position", &dbg_pos(position)); if env::var("DFT_VERBOSE").is_ok() { ds.field("highlight", highlight); let next_sibling_s = match info.next_sibling.get() { Some(List { .. }) => "Some(List)", Some(Atom { .. }) => "Some(Atom)", None => "None", }; ds.field("next_sibling", &next_sibling_s); } ds.finish() } } } } impl<'a> Syntax<'a> { pub(crate) fn new_list( arena: &'a Arena>, open_content: &str, open_position: Vec, children: Vec<&'a Syntax<'a>>, close_content: &str, close_position: Vec, ) -> &'a Syntax<'a> { // Skip empty atoms: they aren't displayed, so there's no // point making our syntax tree bigger. These occur when we're // parsing incomplete or malformed programs. let children = children .into_iter() .filter(|n| match n { List { .. } => true, Atom { content, .. } => !content.is_empty(), }) .collect::>(); // Don't bother creating a list if we have no open/close and // there's only one child. This occurs in small files with // thorough tree-sitter parsers: you get parse trees like: // // (compilation-unit (top-level-def (function ...))) // // This is a small performance win as it makes the difftastic // syntax tree smaller. It also really helps when looking at // debug output for small inputs. if children.len() == 1 && open_content.is_empty() && close_content.is_empty() { return children[0]; } let mut num_descendants = 0; for child in &children { num_descendants += match child { List { num_descendants, .. } => *num_descendants + 1, Atom { .. } => 1, }; } arena.alloc(List { info: SyntaxInfo::default(), open_position, open_content: open_content.into(), close_content: close_content.into(), close_position, children, num_descendants, }) } pub(crate) fn new_atom( arena: &'a Arena>, mut position: Vec, mut content: String, kind: AtomKind, ) -> &'a Syntax<'a> { // If a parser hasn't cleaned up \r on CRLF files with // comments, discard it. if content.ends_with('\r') { content.pop(); } // If a parser adds a trailing newline to the atom, discard // it. It produces worse diffs: we'd rather align on real // content, and complicates handling of trailing newlines at // the end of the file. if content.ends_with('\n') { position.pop(); content.pop(); } arena.alloc(Atom { info: SyntaxInfo::default(), position, content, kind, }) } pub(crate) fn info(&self) -> &SyntaxInfo<'a> { match self { List { info, .. } | Atom { info, .. } => info, } } pub(crate) fn parent(&self) -> Option<&'a Syntax<'a>> { self.info().parent.get() } pub(crate) fn next_sibling(&self) -> Option<&'a Syntax<'a>> { self.info().next_sibling.get() } /// A unique ID of this syntax node. Every node is guaranteed to /// have a different value. pub(crate) fn id(&self) -> SyntaxId { self.info().unique_id.get() } /// A content ID of this syntax node. Two nodes have the same /// content ID if they have the same content, regardless of /// position. pub(crate) fn content_id(&self) -> u32 { self.info().content_id.get() } pub(crate) fn content_is_unique(&self) -> bool { self.info().content_is_unique.get() } pub(crate) fn num_ancestors(&self) -> u32 { self.info().num_ancestors.get() } pub(crate) fn dbg_content(&self) -> String { match self { List { open_content, open_position, close_content, .. } => { let line = open_position .first() .map(|p| p.line.display()) .unwrap_or_else(|| "?".to_owned()); format!("line:{} {} ... {}", line, open_content, close_content) } Atom { content, position, .. } => { let line = position .first() .map_or_else(|| "?".to_owned(), |p| p.line.display()); format!("line:{} {}", line, content) } } } } pub(crate) fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec { fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec) { match node { List { children, .. } => { for child in children { walk_comment_positions(child, positions); } } Atom { position, kind, .. } => { if matches!(kind, AtomKind::Comment) { positions.extend(position); } } } } let mut positions = vec![]; for node in nodes { walk_comment_positions(node, &mut positions); } positions } /// Initialise all the fields in `SyntaxInfo`. pub(crate) fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) { init_info(lhs_roots, rhs_roots); init_next_prev(lhs_roots); init_next_prev(rhs_roots); } pub(crate) fn print_as_dot<'a>(roots: &[&'a Syntax<'a>]) { println!("digraph {{"); print_as_dot_(roots); println!("}}"); } fn print_as_dot_<'a>(nodes: &[&'a Syntax<'a>]) { for node in nodes { let label = match node { List { open_content, close_content, .. } => { if open_content != "" { format!("[label=\"{open_content}{close_content}\"]") } else { "[style=dotted]".to_owned() } } Atom { content, .. } => { let content = content.replace('\"', "\\\""); format!("[label=\"{content}\"]") } }; println!(" id{} {};", node.id().get(), label); if let List { children, .. } = node { for child in children { println!(" id{} -> id{};", node.id().get(), child.id().get()); } print_as_dot_(children); } } } fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) { let mut id = NonZeroU32::new(1).unwrap(); init_info_on_side(lhs_roots, &mut id); init_info_on_side(rhs_roots, &mut id); let mut existing = DftHashMap::default(); set_content_id(lhs_roots, &mut existing); set_content_id(rhs_roots, &mut existing); set_content_is_unique(lhs_roots); set_content_is_unique(rhs_roots); } type ContentKey = (Option, Option, Vec, bool, bool); fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap) { for node in nodes { let key: ContentKey = match node { List { open_content, close_content, children, .. } => { // Recurse first, so children all have their content_id set. set_content_id(children, existing); let children_content_ids: Vec<_> = children.iter().map(|c| c.info().content_id.get()).collect(); ( Some(open_content.clone()), Some(close_content.clone()), children_content_ids, true, true, ) } Atom { content, kind: highlight, .. } => { let is_comment = *highlight == AtomKind::Comment; let clean_content = if is_comment && split_on_newlines(content).count() > 1 { split_on_newlines(content) .map(|l| l.trim_start()) .collect::>() .join("\n") } else { content.clone() }; (Some(clean_content), None, vec![], false, is_comment) } }; // Ensure the ID is always greater than zero, so we can // distinguish an uninitialised SyntaxInfo value. let next_id = existing.len() as u32 + 1; let content_id = existing.entry(key).or_insert(next_id); node.info().content_id.set(*content_id); } } fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) { for (i, node) in nodes.iter().enumerate() { let num_after = parent_num_after + nodes.len() - 1 - i; node.info().num_after.set(num_after); if let List { children, .. } = node { set_num_after(children, num_after); } } } pub(crate) fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { set_prev_sibling(roots); set_next_sibling(roots); set_prev(roots, None); } /// Set all the `SyntaxInfo` values for all the `roots` on a single /// side (LHS or RHS). fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) { set_parent(roots, None); set_num_ancestors(roots, 0); set_num_after(roots, 0); set_unique_id(roots, next_id); } fn set_unique_id(nodes: &[&Syntax], next_id: &mut SyntaxId) { for node in nodes { node.info().unique_id.set(*next_id); *next_id = NonZeroU32::new(u32::from(*next_id) + 1) .expect("Should not have more than u32::MAX nodes"); if let List { children, .. } = node { set_unique_id(children, next_id); } } } /// Assumes that `set_content_id` has already run. fn find_nodes_with_unique_content(nodes: &[&Syntax], counts: &mut DftHashMap) { for node in nodes { *counts.entry(node.content_id()).or_insert(0) += 1; if let List { children, .. } = node { find_nodes_with_unique_content(children, counts); } } } fn set_content_is_unique_from_counts(nodes: &[&Syntax], counts: &DftHashMap) { for node in nodes { let count = counts .get(&node.content_id()) .expect("Count should be present"); node.info().content_is_unique.set(*count == 1); if let List { children, .. } = node { set_content_is_unique_from_counts(children, counts); } } } fn set_content_is_unique(nodes: &[&Syntax]) { let mut counts = DftHashMap::default(); find_nodes_with_unique_content(nodes, &mut counts); set_content_is_unique_from_counts(nodes, &counts); } fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) { let mut prev = None; for node in nodes { node.info().previous_sibling.set(prev); prev = Some(node); if let List { children, .. } = node { set_prev_sibling(children); } } } fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) { for (i, node) in nodes.iter().enumerate() { let sibling = nodes.get(i + 1).copied(); node.info().next_sibling.set(sibling); if let List { children, .. } = node { set_next_sibling(children); } } } /// For every syntax node in the tree, mark the previous node /// according to a preorder traversal. fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { for (i, node) in nodes.iter().enumerate() { let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) }; node.info().prev.set(node_prev); if let List { children, .. } = node { set_prev(children, Some(node)); } } } fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { for node in nodes { node.info().parent.set(parent); if let List { children, .. } = node { set_parent(children, Some(node)); } } } fn set_num_ancestors(nodes: &[&Syntax], num_ancestors: u32) { for node in nodes { node.info().num_ancestors.set(num_ancestors); if let List { children, .. } = node { set_num_ancestors(children, num_ancestors + 1); } } } impl PartialEq for Syntax<'_> { fn eq(&self, other: &Self) -> bool { debug_assert!(self.content_id() > 0); debug_assert!(other.content_id() > 0); self.content_id() == other.content_id() } } impl<'a> Eq for Syntax<'a> {} /// Different types of strings. We want to diff these the same way, /// but highlight them differently. #[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] pub(crate) enum StringKind { /// A string literal, such as `"foo"`. StringLiteral, /// Plain text, such as the content of `

foo

`. Text, } #[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] pub(crate) enum AtomKind { /// The kind of this atom when we don't know anything else about /// it. This is typically a variable, e.g. `foo`, or a literal /// `123`. Note that string literals have a separate kind. Normal, // TODO: We should either have a AtomWithWords(HighlightKind) or a // separate String, Text and Comment kind. String(StringKind), Type, Comment, Keyword, TreeSitterError, } /// Unlike atoms, tokens can be delimiters like `{`. #[derive(PartialEq, Eq, Debug, Clone, Copy)] pub(crate) enum TokenKind { Delimiter, Atom(AtomKind), } /// A matched token (an atom, a delimiter, or a comment word). #[derive(PartialEq, Eq, Debug, Clone)] pub(crate) enum MatchKind { UnchangedToken { highlight: TokenKind, self_pos: Vec, opposite_pos: Vec, }, /// A novel token in an AST diff. Novel { highlight: TokenKind }, /// When we have a novel item, we often want to highlight novel /// words more prominently. UnchangedPartOfNovelItem represents /// the parts that don't get this special highlighting. /// /// For example, line-based diffs we want to highlight `a` and `b` /// differently to `foo` here. /// /// foo a /// foo b /// /// Whereas for syntactic diffs, we want to do the same thing for /// strings and comments. /// /// "foo a" /// "foo b" /// /// The whole string is a distinct value, but the `a` and `b` are /// the most interesting parts. UnchangedPartOfNovelItem { highlight: TokenKind, self_pos: SingleLineSpan, opposite_pos: Vec, }, /// The novel part of the novel item. For line-based diffs, this /// is the words that are unique to this line. /// /// See the discussion in `UnchangedPartOfNovelItem`. NovelWord { highlight: TokenKind }, /// A syntactic token that was ignored by the AST diff (e.g. when /// ignoring comments for diffing). Ignored { highlight: TokenKind }, } impl MatchKind { pub(crate) fn is_novel(&self) -> bool { matches!( self, MatchKind::Novel { .. } | MatchKind::NovelWord { .. } | MatchKind::UnchangedPartOfNovelItem { .. } ) } } #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct MatchedPos { pub(crate) kind: MatchKind, pub(crate) pos: SingleLineSpan, } /// Given the text `content` from a comment or string, split it into /// `MatchedPos` values for the novel and unchanged words. /// /// If there is negligible text in common with `opposite_content`, /// treat the whole `content` as a single novel region. fn split_atom_words( content: &str, pos: &[SingleLineSpan], opposite_content: &str, opposite_pos: &[SingleLineSpan], kind: AtomKind, ) -> Vec { debug_assert!(kind == AtomKind::Comment || matches!(kind, AtomKind::String(_))); // TODO: merge adjacent single-line comments unless there are // blank lines between them. let content_parts = split_words_and_numbers(content); let other_parts = split_words_and_numbers(opposite_content); let word_diffs = lcs_diff::slice_by_hash(&content_parts, &other_parts); if !has_common_words(&word_diffs) { return pos .iter() .map(|line| MatchedPos { kind: MatchKind::Novel { highlight: TokenKind::Atom(kind), }, pos: *line, }) .collect(); } let content_newlines = LinePositions::from(content); let opposite_content_newlines = LinePositions::from(opposite_content); let mut offset = 0; let mut opposite_offset = 0; let mut mps = vec![]; for diff_res in word_diffs { match diff_res { lcs_diff::DiffResult::Left(word) => { // This word is novel to this side. if !is_all_whitespace(word) { mps.push(MatchedPos { kind: MatchKind::NovelWord { highlight: TokenKind::Atom(kind), }, pos: content_newlines.from_region_relative_to( // TODO: don't assume a single line atom. pos[0], offset, offset + word.len(), )[0], }); } offset += word.len(); } lcs_diff::DiffResult::Both(word, opposite_word) => { // This word is present on both sides. // TODO: don't assume this atom is on a single line. let word_pos = content_newlines.from_region_relative_to(pos[0], offset, offset + word.len()) [0]; let opposite_word_pos = opposite_content_newlines.from_region_relative_to( opposite_pos[0], opposite_offset, opposite_offset + opposite_word.len(), ); mps.push(MatchedPos { kind: MatchKind::UnchangedPartOfNovelItem { highlight: TokenKind::Atom(kind), self_pos: word_pos, opposite_pos: opposite_word_pos, }, pos: word_pos, }); offset += word.len(); opposite_offset += opposite_word.len(); } lcs_diff::DiffResult::Right(opposite_word) => { // Only exists on other side, nothing to do on this side. opposite_offset += opposite_word.len(); } } } mps } /// Are there sufficient common words that we should only highlight /// individual changed words? fn has_common_words(word_diffs: &Vec>) -> bool { let mut novel_count = 0; let mut unchanged_count = 0; for word_diff in word_diffs { match word_diff { lcs_diff::DiffResult::Both(word, _) => { if **word != " " { unchanged_count += 1; } } _ => { novel_count += 1; } } } // We want more than two unchanged words, because the text content // includes the comment or string delimiters. // // A sufficiently similar set of words is when more than 50% of // the words are common between the two sides. We multiply by two // because non-matching words gives us two novel words, whereas // matched words only gives us one unchanged word. unchanged_count > 2 && unchanged_count * 2 >= novel_count } /// Skip line spans at the beginning or end that have zero width. fn filter_empty_ends(line_spans: &[SingleLineSpan]) -> Vec { let mut spans: Vec = vec![]; for (i, span) in line_spans.iter().enumerate() { if (i == 0 || i == line_spans.len() - 1) && span.start_col == span.end_col { continue; } spans.push(*span); } spans } impl MatchedPos { fn new( ck: ChangeKind, highlight: TokenKind, pos: &[SingleLineSpan], is_close_delim: bool, ) -> Vec { // Don't create a MatchedPos for empty positions at the start // or end. We still want empty positions in the middle of // multiline atoms, as a multiline string literal may include // empty lines. let pos = filter_empty_ends(pos); match ck { ReplacedComment(this, opposite) | ReplacedString(this, opposite) => { let this_content = match this { List { .. } => unreachable!(), Atom { content, .. } => content, }; let (opposite_content, opposite_pos) = match opposite { List { .. } => unreachable!(), Atom { content, position, .. } => (content, position), }; let kind = if let ReplacedString(this, _) = ck { match this { Atom { kind: AtomKind::String(StringKind::Text), .. } => AtomKind::String(StringKind::Text), _ => AtomKind::String(StringKind::StringLiteral), } } else { AtomKind::Comment }; split_atom_words(this_content, &pos, opposite_content, opposite_pos, kind) } Unchanged(opposite) => { let opposite_pos = match opposite { List { open_position, close_position, .. } => { if is_close_delim { close_position.clone() } else { open_position.clone() } } Atom { position, .. } => position.clone(), }; let opposite_pos_len = opposite_pos.len(); let kind = MatchKind::UnchangedToken { highlight, self_pos: pos.to_vec(), opposite_pos, }; // Create a MatchedPos for every line that `pos` covers. let mut mps = vec![]; for line_pos in &pos { mps.push(Self { kind: kind.clone(), pos: *line_pos, }); // Ensure we have the same number of unchanged // MatchedPos on the LHS and RHS. This allows us // to consider unchanged MatchedPos values // pairwise. if mps.len() == opposite_pos_len { break; } } mps } Novel => { let kind = MatchKind::Novel { highlight }; // Create a MatchedPos for every line that `pos` covers. let mut mps = vec![]; for line_pos in &pos { // Don't create a MatchedPos for entirely empty positions. This // occurs when we have lists with empty open/close // delimiter positions, such as the top-level list of syntax items. if pos.len() == 1 && line_pos.start_col == line_pos.end_col { continue; } mps.push(Self { kind: kind.clone(), pos: *line_pos, }); } mps } } } } /// Walk `nodes` and return a vec of all the changed positions. pub(crate) fn change_positions<'a>( nodes: &[&'a Syntax<'a>], change_map: &ChangeMap<'a>, ) -> Vec { let mut positions = Vec::new(); let mut seen_unchanged = false; change_positions_(nodes, change_map, &mut positions, &mut seen_unchanged); // If there are no unchanged items, insert a dummy item at the // beginning of both files with a width of zero. This gives // display something to use when aligning. if !seen_unchanged { let lhs_pos = SingleLineSpan { line: 0.into(), start_col: 0, end_col: 0, }; let rhs_pos = SingleLineSpan { line: 0.into(), start_col: 0, end_col: 0, }; positions.insert( 0, MatchedPos { kind: MatchKind::UnchangedToken { highlight: TokenKind::Atom(AtomKind::Normal), self_pos: vec![lhs_pos], opposite_pos: vec![rhs_pos], }, pos: lhs_pos, }, ); } positions } fn change_positions_<'a>( nodes: &[&'a Syntax<'a>], change_map: &ChangeMap<'a>, positions: &mut Vec, seen_unchanged: &mut bool, ) { for node in nodes { let change = change_map .get(node) .unwrap_or_else(|| panic!("Should have changes set in all nodes: {:#?}", node)); if matches!(change, ChangeKind::Unchanged(_)) { *seen_unchanged = true; } match node { List { open_position, children, close_position, .. } => { positions.extend(MatchedPos::new( change, TokenKind::Delimiter, open_position, false, )); change_positions_(children, change_map, positions, seen_unchanged); positions.extend(MatchedPos::new( change, TokenKind::Delimiter, close_position, true, )); } Atom { position, kind, .. } => { positions.extend(MatchedPos::new( change, TokenKind::Atom(*kind), position, false, )); } } } } pub(crate) fn zip_pad_shorter( lhs: &[Tx], rhs: &[Ty], ) -> Vec<(Option, Option)> { let mut res = vec![]; let mut lhs_iter = lhs.iter(); let mut rhs_iter = rhs.iter(); loop { match (lhs_iter.next(), rhs_iter.next()) { (None, None) => break, (x, y) => res.push((x.cloned(), y.cloned())), } } res } /// Zip `lhs` with `rhs`, but repeat the last item from the shorter /// slice. pub(crate) fn zip_repeat_shorter(lhs: &[Tx], rhs: &[Ty]) -> Vec<(Tx, Ty)> { let lhs_last: Tx = match lhs.last() { Some(last) => last.clone(), None => return vec![], }; let rhs_last: Ty = match rhs.last() { Some(last) => last.clone(), None => return vec![], }; let mut res = vec![]; let mut lhs_iter = lhs.iter(); let mut rhs_iter = rhs.iter(); loop { match (lhs_iter.next(), rhs_iter.next()) { (None, None) => break, (x, y) => res.push(( x.cloned().unwrap_or_else(|| lhs_last.clone()), y.cloned().unwrap_or_else(|| rhs_last.clone()), )), } } res } #[cfg(test)] mod tests { use pretty_assertions::assert_eq; use super::*; /// Consider comment atoms as distinct to other atoms even if the /// content matches otherwise. #[test] fn test_comment_and_atom_differ() { let pos = vec![SingleLineSpan { line: 0.into(), start_col: 2, end_col: 3, }]; let arena = Arena::new(); let comment = Syntax::new_atom(&arena, pos.clone(), "foo".to_owned(), AtomKind::Comment); let atom = Syntax::new_atom(&arena, pos, "foo".to_owned(), AtomKind::Normal); init_all_info(&[comment], &[atom]); assert_ne!(comment, atom); } #[test] fn test_new_atom_truncates_carriage_return() { let arena = Arena::new(); let position = vec![]; let content = "foo\r"; let atom = Syntax::new_atom(&arena, position, content.to_owned(), AtomKind::Comment); match atom { List { .. } => unreachable!(), Atom { content, .. } => { assert_eq!(content, "foo"); } } } #[test] fn test_new_atom_truncates_trailing_newline() { let arena = Arena::new(); let position = vec![ SingleLineSpan { line: 0.into(), start_col: 0, end_col: 8, }, SingleLineSpan { line: 1.into(), start_col: 0, end_col: 1, }, ]; let content = ";; hello\n"; let atom = Syntax::new_atom(&arena, position, content.to_owned(), AtomKind::Comment); match atom { List { .. } => unreachable!(), Atom { position, content, .. } => { assert_eq!(content, ";; hello"); assert_eq!( *position, vec![SingleLineSpan { line: 0.into(), start_col: 0, end_col: 8, }] ); } } } /// Ignore the syntax highlighting kind when comparing /// atoms. Sometimes changing delimiter wrapping can change /// whether a parser thinks that a node is e.g. a type. #[test] fn test_atom_equality_ignores_highlighting() { let pos = vec![SingleLineSpan { line: 0.into(), start_col: 2, end_col: 3, }]; let arena = Arena::new(); let type_atom = Syntax::new_atom(&arena, pos.clone(), "foo".to_owned(), AtomKind::Type); let atom = Syntax::new_atom(&arena, pos, "foo".to_owned(), AtomKind::Normal); init_all_info(&[type_atom], &[atom]); assert_eq!(type_atom, atom); } #[test] fn test_flatten_trivial_list() { let pos = vec![SingleLineSpan { line: 0.into(), start_col: 2, end_col: 3, }]; let arena = Arena::new(); let atom = Syntax::new_atom(&arena, pos, "foo".to_owned(), AtomKind::Normal); let trivial_list = Syntax::new_list(&arena, "", vec![], vec![atom], "", vec![]); assert!(matches!(trivial_list, Atom { .. })); } #[test] fn test_ignore_empty_atoms() { let pos = vec![SingleLineSpan { line: 0.into(), start_col: 2, end_col: 2, }]; let arena = Arena::new(); let atom = Syntax::new_atom(&arena, pos, "".to_owned(), AtomKind::Normal); let trivial_list = Syntax::new_list(&arena, "(", vec![], vec![atom], ")", vec![]); match trivial_list { List { children, .. } => { assert_eq!(children.len(), 0); } Atom { .. } => unreachable!(), } } #[test] fn test_multiline_comment_ignores_leading_whitespace() { let pos = vec![SingleLineSpan { line: 0.into(), start_col: 2, end_col: 3, }]; let arena = Arena::new(); let x = Syntax::new_atom( &arena, pos.clone(), "foo\nbar".to_owned(), AtomKind::Comment, ); let y = Syntax::new_atom(&arena, pos, "foo\n bar".to_owned(), AtomKind::Comment); init_all_info(&[x], &[y]); assert_eq!(x, y); } #[test] fn test_split_atom_words() { let content = "abc def ghi novel"; let pos = vec![SingleLineSpan { line: 0.into(), start_col: 0, end_col: 17, }]; let opposite_content = "abc def ghi"; let opposite_pos = vec![SingleLineSpan { line: 0.into(), start_col: 0, end_col: 11, }]; let res = split_atom_words( content, &pos, opposite_content, &opposite_pos, AtomKind::Comment, ); assert_eq!( res, vec![ MatchedPos { kind: MatchKind::UnchangedPartOfNovelItem { highlight: TokenKind::Atom(AtomKind::Comment), self_pos: SingleLineSpan { line: 0.into(), start_col: 0, end_col: 3 }, opposite_pos: vec![SingleLineSpan { line: 0.into(), start_col: 0, end_col: 3 }] }, pos: SingleLineSpan { line: 0.into(), start_col: 0, end_col: 3 } }, MatchedPos { kind: MatchKind::UnchangedPartOfNovelItem { highlight: TokenKind::Atom(AtomKind::Comment), self_pos: SingleLineSpan { line: 0.into(), start_col: 3, end_col: 4 }, opposite_pos: vec![SingleLineSpan { line: 0.into(), start_col: 3, end_col: 4 }] }, pos: SingleLineSpan { line: 0.into(), start_col: 3, end_col: 4 } }, MatchedPos { kind: MatchKind::UnchangedPartOfNovelItem { highlight: TokenKind::Atom(AtomKind::Comment), self_pos: SingleLineSpan { line: 0.into(), start_col: 4, end_col: 7 }, opposite_pos: vec![SingleLineSpan { line: 0.into(), start_col: 4, end_col: 7 }] }, pos: SingleLineSpan { line: 0.into(), start_col: 4, end_col: 7 } }, MatchedPos { kind: MatchKind::UnchangedPartOfNovelItem { highlight: TokenKind::Atom(AtomKind::Comment), self_pos: SingleLineSpan { line: 0.into(), start_col: 7, end_col: 8 }, opposite_pos: vec![SingleLineSpan { line: 0.into(), start_col: 7, end_col: 8 }] }, pos: SingleLineSpan { line: 0.into(), start_col: 7, end_col: 8 } }, MatchedPos { kind: MatchKind::UnchangedPartOfNovelItem { highlight: TokenKind::Atom(AtomKind::Comment), self_pos: SingleLineSpan { line: 0.into(), start_col: 8, end_col: 11 }, opposite_pos: vec![SingleLineSpan { line: 0.into(), start_col: 8, end_col: 11 }] }, pos: SingleLineSpan { line: 0.into(), start_col: 8, end_col: 11 } }, MatchedPos { kind: MatchKind::NovelWord { highlight: TokenKind::Atom(AtomKind::Comment) }, pos: SingleLineSpan { line: 0.into(), start_col: 12, end_col: 17 } } ], ); } }