difftastic/src/parse/syntax.rs

1369 lines
43 KiB
Rust

//! Syntax tree definitions with change metadata.
#![allow(clippy::mutable_key_type)] // Hash for Syntax doesn't use mutable fields.
use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32};
use line_numbers::LinePositions;
use line_numbers::SingleLineSpan;
use typed_arena::Arena;
use self::Syntax::*;
use crate::lines::split_on_newlines;
use crate::words::split_words_and_numbers;
use crate::{
diff::changes::ChangeKind,
diff::changes::{ChangeKind::*, ChangeMap},
diff::lcs_diff,
hash::DftHashMap,
lines::is_all_whitespace,
};
/// A Debug implementation that does not recurse into the
/// corresponding node mentioned for Unchanged. Otherwise we will
/// infinitely loop on unchanged nodes, which both point to the other.
impl fmt::Debug for ChangeKind<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let desc = match self {
Unchanged(node) => format!("Unchanged(ID: {})", node.id()),
ReplacedComment(lhs_node, rhs_node) | ReplacedString(lhs_node, rhs_node) => {
let change_kind = if let ReplacedComment(_, _) = self {
"ReplacedComment"
} else {
"ReplacedString"
};
format!(
"{}(lhs ID: {}, rhs ID: {})",
change_kind,
lhs_node.id(),
rhs_node.id()
)
}
Novel => "Novel".to_owned(),
};
f.write_str(&desc)
}
}
pub(crate) type SyntaxId = NonZeroU32;
/// Fields that are common to both `Syntax::List` and `Syntax::Atom`.
pub(crate) struct SyntaxInfo<'a> {
/// The previous node with the same parent as this one.
previous_sibling: Cell<Option<&'a Syntax<'a>>>,
/// The next node with the same parent as this one.
next_sibling: Cell<Option<&'a Syntax<'a>>>,
/// The syntax node that occurs before this one, in a depth-first
/// tree traversal.
prev: Cell<Option<&'a Syntax<'a>>>,
/// The parent syntax node, if present.
parent: Cell<Option<&'a Syntax<'a>>>,
/// The number of nodes that are ancestors of this one.
num_ancestors: Cell<u32>,
pub(crate) num_after: Cell<usize>,
/// A number that uniquely identifies this syntax node.
unique_id: Cell<SyntaxId>,
/// A number that uniquely identifies the content of this syntax
/// node. This may be the same as nodes on the other side of the
/// diff, or nodes at different positions.
///
/// Values are sequential, not hashes. Collisions never occur.
content_id: Cell<u32>,
/// Is this the only node with this content? Ignores nodes on the
/// other side.
content_is_unique: Cell<bool>,
}
impl<'a> SyntaxInfo<'a> {
pub(crate) fn new() -> Self {
Self {
previous_sibling: Cell::new(None),
next_sibling: Cell::new(None),
prev: Cell::new(None),
parent: Cell::new(None),
num_ancestors: Cell::new(0),
num_after: Cell::new(0),
unique_id: Cell::new(NonZeroU32::new(u32::MAX).unwrap()),
content_id: Cell::new(0),
content_is_unique: Cell::new(false),
}
}
}
impl Default for SyntaxInfo<'_> {
fn default() -> Self {
Self::new()
}
}
pub(crate) enum Syntax<'a> {
List {
info: SyntaxInfo<'a>,
open_position: Vec<SingleLineSpan>,
open_content: String,
children: Vec<&'a Syntax<'a>>,
close_position: Vec<SingleLineSpan>,
close_content: String,
num_descendants: u32,
},
Atom {
info: SyntaxInfo<'a>,
position: Vec<SingleLineSpan>,
content: String,
kind: AtomKind,
},
}
fn dbg_pos(pos: &[SingleLineSpan]) -> String {
match pos {
[] => "-".into(),
[pos] => format!("{}:{}-{}", pos.line.0, pos.start_col, pos.end_col),
[start, .., end] => format!(
"{}:{}-{}:{}",
start.line.0, start.start_col, end.line.0, end.end_col
),
}
}
impl<'a> fmt::Debug for Syntax<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
List {
open_content,
open_position,
children,
close_content,
close_position,
info,
..
} => {
let mut ds = f.debug_struct(&format!(
"List id:{} content_id:{}",
self.id(),
self.content_id()
));
ds.field("open_content", &open_content)
.field("open_position", &dbg_pos(open_position))
.field("children", &children)
.field("close_content", &close_content)
.field("close_position", &dbg_pos(close_position));
if env::var("DFT_VERBOSE").is_ok() {
let next_sibling_s = match info.next_sibling.get() {
Some(List { .. }) => "Some(List)",
Some(Atom { .. }) => "Some(Atom)",
None => "None",
};
ds.field("next_sibling", &next_sibling_s);
}
ds.finish()
}
Atom {
content,
position,
info,
kind: highlight,
..
} => {
let mut ds = f.debug_struct(&format!(
"Atom id:{} content_id:{}",
self.id(),
self.content_id()
));
ds.field("content", &content);
ds.field("position", &dbg_pos(position));
if env::var("DFT_VERBOSE").is_ok() {
ds.field("highlight", highlight);
let next_sibling_s = match info.next_sibling.get() {
Some(List { .. }) => "Some(List)",
Some(Atom { .. }) => "Some(Atom)",
None => "None",
};
ds.field("next_sibling", &next_sibling_s);
}
ds.finish()
}
}
}
}
impl<'a> Syntax<'a> {
pub(crate) fn new_list(
arena: &'a Arena<Syntax<'a>>,
open_content: &str,
open_position: Vec<SingleLineSpan>,
children: Vec<&'a Syntax<'a>>,
close_content: &str,
close_position: Vec<SingleLineSpan>,
) -> &'a Syntax<'a> {
// Skip empty atoms: they aren't displayed, so there's no
// point making our syntax tree bigger. These occur when we're
// parsing incomplete or malformed programs.
let children = children
.into_iter()
.filter(|n| match n {
List { .. } => true,
Atom { content, .. } => !content.is_empty(),
})
.collect::<Vec<_>>();
// Don't bother creating a list if we have no open/close and
// there's only one child. This occurs in small files with
// thorough tree-sitter parsers: you get parse trees like:
//
// (compilation-unit (top-level-def (function ...)))
//
// This is a small performance win as it makes the difftastic
// syntax tree smaller. It also really helps when looking at
// debug output for small inputs.
if children.len() == 1 && open_content.is_empty() && close_content.is_empty() {
return children[0];
}
let mut num_descendants = 0;
for child in &children {
num_descendants += match child {
List {
num_descendants, ..
} => *num_descendants + 1,
Atom { .. } => 1,
};
}
arena.alloc(List {
info: SyntaxInfo::default(),
open_position,
open_content: open_content.into(),
close_content: close_content.into(),
close_position,
children,
num_descendants,
})
}
pub(crate) fn new_atom(
arena: &'a Arena<Syntax<'a>>,
mut position: Vec<SingleLineSpan>,
mut content: String,
kind: AtomKind,
) -> &'a Syntax<'a> {
// If a parser hasn't cleaned up \r on CRLF files with
// comments, discard it.
if content.ends_with('\r') {
content.pop();
}
// If a parser adds a trailing newline to the atom, discard
// it. It produces worse diffs: we'd rather align on real
// content, and complicates handling of trailing newlines at
// the end of the file.
if content.ends_with('\n') {
position.pop();
content.pop();
}
arena.alloc(Atom {
info: SyntaxInfo::default(),
position,
content,
kind,
})
}
pub(crate) fn info(&self) -> &SyntaxInfo<'a> {
match self {
List { info, .. } | Atom { info, .. } => info,
}
}
pub(crate) fn parent(&self) -> Option<&'a Syntax<'a>> {
self.info().parent.get()
}
pub(crate) fn next_sibling(&self) -> Option<&'a Syntax<'a>> {
self.info().next_sibling.get()
}
/// A unique ID of this syntax node. Every node is guaranteed to
/// have a different value.
pub(crate) fn id(&self) -> SyntaxId {
self.info().unique_id.get()
}
/// A content ID of this syntax node. Two nodes have the same
/// content ID if they have the same content, regardless of
/// position.
pub(crate) fn content_id(&self) -> u32 {
self.info().content_id.get()
}
pub(crate) fn content_is_unique(&self) -> bool {
self.info().content_is_unique.get()
}
pub(crate) fn num_ancestors(&self) -> u32 {
self.info().num_ancestors.get()
}
pub(crate) fn dbg_content(&self) -> String {
match self {
List {
open_content,
open_position,
close_content,
..
} => {
let line = open_position
.first()
.map(|p| p.line.display())
.unwrap_or_else(|| "?".to_owned());
format!("line:{} {} ... {}", line, open_content, close_content)
}
Atom {
content, position, ..
} => {
let line = position
.first()
.map_or_else(|| "?".to_owned(), |p| p.line.display());
format!("line:{} {}", line, content)
}
}
}
}
pub(crate) fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> {
fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec<SingleLineSpan>) {
match node {
List { children, .. } => {
for child in children {
walk_comment_positions(child, positions);
}
}
Atom { position, kind, .. } => {
if matches!(kind, AtomKind::Comment) {
positions.extend(position);
}
}
}
}
let mut positions = vec![];
for node in nodes {
walk_comment_positions(node, &mut positions);
}
positions
}
/// Initialise all the fields in `SyntaxInfo`.
pub(crate) fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
init_info(lhs_roots, rhs_roots);
init_next_prev(lhs_roots);
init_next_prev(rhs_roots);
}
pub(crate) fn print_as_dot<'a>(roots: &[&'a Syntax<'a>]) {
println!("digraph {{");
print_as_dot_(roots);
println!("}}");
}
fn print_as_dot_<'a>(nodes: &[&'a Syntax<'a>]) {
for node in nodes {
let label = match node {
List {
open_content,
close_content,
..
} => {
if open_content != "" {
format!("[label=\"{open_content}{close_content}\"]")
} else {
"[style=dotted]".to_owned()
}
}
Atom { content, .. } => {
let content = content.replace('\"', "\\\"");
format!("[label=\"{content}\"]")
}
};
println!(" id{} {};", node.id().get(), label);
if let List { children, .. } = node {
for child in children {
println!(" id{} -> id{};", node.id().get(), child.id().get());
}
print_as_dot_(children);
}
}
}
fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
let mut id = NonZeroU32::new(1).unwrap();
init_info_on_side(lhs_roots, &mut id);
init_info_on_side(rhs_roots, &mut id);
let mut existing = DftHashMap::default();
set_content_id(lhs_roots, &mut existing);
set_content_id(rhs_roots, &mut existing);
set_content_is_unique(lhs_roots);
set_content_is_unique(rhs_roots);
}
type ContentKey = (Option<String>, Option<String>, Vec<u32>, bool, bool);
fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap<ContentKey, u32>) {
for node in nodes {
let key: ContentKey = match node {
List {
open_content,
close_content,
children,
..
} => {
// Recurse first, so children all have their content_id set.
set_content_id(children, existing);
let children_content_ids: Vec<_> =
children.iter().map(|c| c.info().content_id.get()).collect();
(
Some(open_content.clone()),
Some(close_content.clone()),
children_content_ids,
true,
true,
)
}
Atom {
content,
kind: highlight,
..
} => {
let is_comment = *highlight == AtomKind::Comment;
let clean_content = if is_comment && split_on_newlines(content).count() > 1 {
split_on_newlines(content)
.map(|l| l.trim_start())
.collect::<Vec<_>>()
.join("\n")
} else {
content.clone()
};
(Some(clean_content), None, vec![], false, is_comment)
}
};
// Ensure the ID is always greater than zero, so we can
// distinguish an uninitialised SyntaxInfo value.
let next_id = existing.len() as u32 + 1;
let content_id = existing.entry(key).or_insert(next_id);
node.info().content_id.set(*content_id);
}
}
fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) {
for (i, node) in nodes.iter().enumerate() {
let num_after = parent_num_after + nodes.len() - 1 - i;
node.info().num_after.set(num_after);
if let List { children, .. } = node {
set_num_after(children, num_after);
}
}
}
pub(crate) fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
set_prev_sibling(roots);
set_next_sibling(roots);
set_prev(roots, None);
}
/// Set all the `SyntaxInfo` values for all the `roots` on a single
/// side (LHS or RHS).
fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) {
set_parent(roots, None);
set_num_ancestors(roots, 0);
set_num_after(roots, 0);
set_unique_id(roots, next_id);
}
fn set_unique_id(nodes: &[&Syntax], next_id: &mut SyntaxId) {
for node in nodes {
node.info().unique_id.set(*next_id);
*next_id = NonZeroU32::new(u32::from(*next_id) + 1)
.expect("Should not have more than u32::MAX nodes");
if let List { children, .. } = node {
set_unique_id(children, next_id);
}
}
}
/// Assumes that `set_content_id` has already run.
fn find_nodes_with_unique_content(nodes: &[&Syntax], counts: &mut DftHashMap<u32, usize>) {
for node in nodes {
*counts.entry(node.content_id()).or_insert(0) += 1;
if let List { children, .. } = node {
find_nodes_with_unique_content(children, counts);
}
}
}
fn set_content_is_unique_from_counts(nodes: &[&Syntax], counts: &DftHashMap<u32, usize>) {
for node in nodes {
let count = counts
.get(&node.content_id())
.expect("Count should be present");
node.info().content_is_unique.set(*count == 1);
if let List { children, .. } = node {
set_content_is_unique_from_counts(children, counts);
}
}
}
fn set_content_is_unique(nodes: &[&Syntax]) {
let mut counts = DftHashMap::default();
find_nodes_with_unique_content(nodes, &mut counts);
set_content_is_unique_from_counts(nodes, &counts);
}
fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
let mut prev = None;
for node in nodes {
node.info().previous_sibling.set(prev);
prev = Some(node);
if let List { children, .. } = node {
set_prev_sibling(children);
}
}
}
fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
for (i, node) in nodes.iter().enumerate() {
let sibling = nodes.get(i + 1).copied();
node.info().next_sibling.set(sibling);
if let List { children, .. } = node {
set_next_sibling(children);
}
}
}
/// For every syntax node in the tree, mark the previous node
/// according to a preorder traversal.
fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
for (i, node) in nodes.iter().enumerate() {
let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
node.info().prev.set(node_prev);
if let List { children, .. } = node {
set_prev(children, Some(node));
}
}
}
fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
for node in nodes {
node.info().parent.set(parent);
if let List { children, .. } = node {
set_parent(children, Some(node));
}
}
}
fn set_num_ancestors(nodes: &[&Syntax], num_ancestors: u32) {
for node in nodes {
node.info().num_ancestors.set(num_ancestors);
if let List { children, .. } = node {
set_num_ancestors(children, num_ancestors + 1);
}
}
}
impl PartialEq for Syntax<'_> {
fn eq(&self, other: &Self) -> bool {
debug_assert!(self.content_id() > 0);
debug_assert!(other.content_id() > 0);
self.content_id() == other.content_id()
}
}
impl<'a> Eq for Syntax<'a> {}
/// Different types of strings. We want to diff these the same way,
/// but highlight them differently.
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
pub(crate) enum StringKind {
/// A string literal, such as `"foo"`.
StringLiteral,
/// Plain text, such as the content of `<p>foo</p>`.
Text,
}
#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
pub(crate) enum AtomKind {
/// The kind of this atom when we don't know anything else about
/// it. This is typically a variable, e.g. `foo`, or a literal
/// `123`. Note that string literals have a separate kind.
Normal,
// TODO: We should either have a AtomWithWords(HighlightKind) or a
// separate String, Text and Comment kind.
String(StringKind),
Type,
Comment,
Keyword,
TreeSitterError,
}
/// Unlike atoms, tokens can be delimiters like `{`.
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub(crate) enum TokenKind {
Delimiter,
Atom(AtomKind),
}
/// A matched token (an atom, a delimiter, or a comment word).
#[derive(PartialEq, Eq, Debug, Clone)]
pub(crate) enum MatchKind {
UnchangedToken {
highlight: TokenKind,
self_pos: Vec<SingleLineSpan>,
opposite_pos: Vec<SingleLineSpan>,
},
/// A novel token in an AST diff.
Novel { highlight: TokenKind },
/// When we have a novel item, we often want to highlight novel
/// words more prominently. UnchangedPartOfNovelItem represents
/// the parts that don't get this special highlighting.
///
/// For example, line-based diffs we want to highlight `a` and `b`
/// differently to `foo` here.
///
/// foo a
/// foo b
///
/// Whereas for syntactic diffs, we want to do the same thing for
/// strings and comments.
///
/// "foo a"
/// "foo b"
///
/// The whole string is a distinct value, but the `a` and `b` are
/// the most interesting parts.
UnchangedPartOfNovelItem {
highlight: TokenKind,
self_pos: SingleLineSpan,
opposite_pos: Vec<SingleLineSpan>,
},
/// The novel part of the novel item. For line-based diffs, this
/// is the words that are unique to this line.
///
/// See the discussion in `UnchangedPartOfNovelItem`.
NovelWord { highlight: TokenKind },
/// A syntactic token that was ignored by the AST diff (e.g. when
/// ignoring comments for diffing).
Ignored { highlight: TokenKind },
}
impl MatchKind {
pub(crate) fn is_novel(&self) -> bool {
matches!(
self,
MatchKind::Novel { .. }
| MatchKind::NovelWord { .. }
| MatchKind::UnchangedPartOfNovelItem { .. }
)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct MatchedPos {
pub(crate) kind: MatchKind,
pub(crate) pos: SingleLineSpan,
}
/// Given the text `content` from a comment or string, split it into
/// `MatchedPos` values for the novel and unchanged words.
///
/// If there is negligible text in common with `opposite_content`,
/// treat the whole `content` as a single novel region.
fn split_atom_words(
content: &str,
pos: &[SingleLineSpan],
opposite_content: &str,
opposite_pos: &[SingleLineSpan],
kind: AtomKind,
) -> Vec<MatchedPos> {
debug_assert!(kind == AtomKind::Comment || matches!(kind, AtomKind::String(_)));
// TODO: merge adjacent single-line comments unless there are
// blank lines between them.
let content_parts = split_words_and_numbers(content);
let other_parts = split_words_and_numbers(opposite_content);
let word_diffs = lcs_diff::slice_by_hash(&content_parts, &other_parts);
if !has_common_words(&word_diffs) {
return pos
.iter()
.map(|line| MatchedPos {
kind: MatchKind::Novel {
highlight: TokenKind::Atom(kind),
},
pos: *line,
})
.collect();
}
let content_newlines = LinePositions::from(content);
let opposite_content_newlines = LinePositions::from(opposite_content);
let mut offset = 0;
let mut opposite_offset = 0;
let mut mps = vec![];
for diff_res in word_diffs {
match diff_res {
lcs_diff::DiffResult::Left(word) => {
// This word is novel to this side.
if !is_all_whitespace(word) {
mps.push(MatchedPos {
kind: MatchKind::NovelWord {
highlight: TokenKind::Atom(kind),
},
pos: content_newlines.from_region_relative_to(
// TODO: don't assume a single line atom.
pos[0],
offset,
offset + word.len(),
)[0],
});
}
offset += word.len();
}
lcs_diff::DiffResult::Both(word, opposite_word) => {
// This word is present on both sides.
// TODO: don't assume this atom is on a single line.
let word_pos =
content_newlines.from_region_relative_to(pos[0], offset, offset + word.len())
[0];
let opposite_word_pos = opposite_content_newlines.from_region_relative_to(
opposite_pos[0],
opposite_offset,
opposite_offset + opposite_word.len(),
);
mps.push(MatchedPos {
kind: MatchKind::UnchangedPartOfNovelItem {
highlight: TokenKind::Atom(kind),
self_pos: word_pos,
opposite_pos: opposite_word_pos,
},
pos: word_pos,
});
offset += word.len();
opposite_offset += opposite_word.len();
}
lcs_diff::DiffResult::Right(opposite_word) => {
// Only exists on other side, nothing to do on this side.
opposite_offset += opposite_word.len();
}
}
}
mps
}
/// Are there sufficient common words that we should only highlight
/// individual changed words?
fn has_common_words(word_diffs: &Vec<lcs_diff::DiffResult<&&str>>) -> bool {
let mut novel_count = 0;
let mut unchanged_count = 0;
for word_diff in word_diffs {
match word_diff {
lcs_diff::DiffResult::Both(word, _) => {
if **word != " " {
unchanged_count += 1;
}
}
_ => {
novel_count += 1;
}
}
}
// We want more than two unchanged words, because the text content
// includes the comment or string delimiters.
//
// A sufficiently similar set of words is when more than 50% of
// the words are common between the two sides. We multiply by two
// because non-matching words gives us two novel words, whereas
// matched words only gives us one unchanged word.
unchanged_count > 2 && unchanged_count * 2 >= novel_count
}
/// Skip line spans at the beginning or end that have zero width.
fn filter_empty_ends(line_spans: &[SingleLineSpan]) -> Vec<SingleLineSpan> {
let mut spans: Vec<SingleLineSpan> = vec![];
for (i, span) in line_spans.iter().enumerate() {
if (i == 0 || i == line_spans.len() - 1) && span.start_col == span.end_col {
continue;
}
spans.push(*span);
}
spans
}
impl MatchedPos {
fn new(
ck: ChangeKind,
highlight: TokenKind,
pos: &[SingleLineSpan],
is_close_delim: bool,
) -> Vec<Self> {
// Don't create a MatchedPos for empty positions at the start
// or end. We still want empty positions in the middle of
// multiline atoms, as a multiline string literal may include
// empty lines.
let pos = filter_empty_ends(pos);
match ck {
ReplacedComment(this, opposite) | ReplacedString(this, opposite) => {
let this_content = match this {
List { .. } => unreachable!(),
Atom { content, .. } => content,
};
let (opposite_content, opposite_pos) = match opposite {
List { .. } => unreachable!(),
Atom {
content, position, ..
} => (content, position),
};
let kind = if let ReplacedString(this, _) = ck {
match this {
Atom {
kind: AtomKind::String(StringKind::Text),
..
} => AtomKind::String(StringKind::Text),
_ => AtomKind::String(StringKind::StringLiteral),
}
} else {
AtomKind::Comment
};
split_atom_words(this_content, &pos, opposite_content, opposite_pos, kind)
}
Unchanged(opposite) => {
let opposite_pos = match opposite {
List {
open_position,
close_position,
..
} => {
if is_close_delim {
close_position.clone()
} else {
open_position.clone()
}
}
Atom { position, .. } => position.clone(),
};
let opposite_pos_len = opposite_pos.len();
let kind = MatchKind::UnchangedToken {
highlight,
self_pos: pos.to_vec(),
opposite_pos,
};
// Create a MatchedPos for every line that `pos` covers.
let mut mps = vec![];
for line_pos in &pos {
mps.push(Self {
kind: kind.clone(),
pos: *line_pos,
});
// Ensure we have the same number of unchanged
// MatchedPos on the LHS and RHS. This allows us
// to consider unchanged MatchedPos values
// pairwise.
if mps.len() == opposite_pos_len {
break;
}
}
mps
}
Novel => {
let kind = MatchKind::Novel { highlight };
// Create a MatchedPos for every line that `pos` covers.
let mut mps = vec![];
for line_pos in &pos {
// Don't create a MatchedPos for entirely empty positions. This
// occurs when we have lists with empty open/close
// delimiter positions, such as the top-level list of syntax items.
if pos.len() == 1 && line_pos.start_col == line_pos.end_col {
continue;
}
mps.push(Self {
kind: kind.clone(),
pos: *line_pos,
});
}
mps
}
}
}
}
/// Walk `nodes` and return a vec of all the changed positions.
pub(crate) fn change_positions<'a>(
nodes: &[&'a Syntax<'a>],
change_map: &ChangeMap<'a>,
) -> Vec<MatchedPos> {
let mut positions = Vec::new();
let mut seen_unchanged = false;
change_positions_(nodes, change_map, &mut positions, &mut seen_unchanged);
// If there are no unchanged items, insert a dummy item at the
// beginning of both files with a width of zero. This gives
// display something to use when aligning.
if !seen_unchanged {
let lhs_pos = SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 0,
};
let rhs_pos = SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 0,
};
positions.insert(
0,
MatchedPos {
kind: MatchKind::UnchangedToken {
highlight: TokenKind::Atom(AtomKind::Normal),
self_pos: vec![lhs_pos],
opposite_pos: vec![rhs_pos],
},
pos: lhs_pos,
},
);
}
positions
}
fn change_positions_<'a>(
nodes: &[&'a Syntax<'a>],
change_map: &ChangeMap<'a>,
positions: &mut Vec<MatchedPos>,
seen_unchanged: &mut bool,
) {
for node in nodes {
let change = change_map
.get(node)
.unwrap_or_else(|| panic!("Should have changes set in all nodes: {:#?}", node));
if matches!(change, ChangeKind::Unchanged(_)) {
*seen_unchanged = true;
}
match node {
List {
open_position,
children,
close_position,
..
} => {
positions.extend(MatchedPos::new(
change,
TokenKind::Delimiter,
open_position,
false,
));
change_positions_(children, change_map, positions, seen_unchanged);
positions.extend(MatchedPos::new(
change,
TokenKind::Delimiter,
close_position,
true,
));
}
Atom { position, kind, .. } => {
positions.extend(MatchedPos::new(
change,
TokenKind::Atom(*kind),
position,
false,
));
}
}
}
}
pub(crate) fn zip_pad_shorter<Tx: Clone, Ty: Clone>(
lhs: &[Tx],
rhs: &[Ty],
) -> Vec<(Option<Tx>, Option<Ty>)> {
let mut res = vec![];
let mut lhs_iter = lhs.iter();
let mut rhs_iter = rhs.iter();
loop {
match (lhs_iter.next(), rhs_iter.next()) {
(None, None) => break,
(x, y) => res.push((x.cloned(), y.cloned())),
}
}
res
}
/// Zip `lhs` with `rhs`, but repeat the last item from the shorter
/// slice.
pub(crate) fn zip_repeat_shorter<Tx: Clone, Ty: Clone>(lhs: &[Tx], rhs: &[Ty]) -> Vec<(Tx, Ty)> {
let lhs_last: Tx = match lhs.last() {
Some(last) => last.clone(),
None => return vec![],
};
let rhs_last: Ty = match rhs.last() {
Some(last) => last.clone(),
None => return vec![],
};
let mut res = vec![];
let mut lhs_iter = lhs.iter();
let mut rhs_iter = rhs.iter();
loop {
match (lhs_iter.next(), rhs_iter.next()) {
(None, None) => break,
(x, y) => res.push((
x.cloned().unwrap_or_else(|| lhs_last.clone()),
y.cloned().unwrap_or_else(|| rhs_last.clone()),
)),
}
}
res
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
/// Consider comment atoms as distinct to other atoms even if the
/// content matches otherwise.
#[test]
fn test_comment_and_atom_differ() {
let pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 3,
}];
let arena = Arena::new();
let comment = Syntax::new_atom(&arena, pos.clone(), "foo".to_owned(), AtomKind::Comment);
let atom = Syntax::new_atom(&arena, pos, "foo".to_owned(), AtomKind::Normal);
init_all_info(&[comment], &[atom]);
assert_ne!(comment, atom);
}
#[test]
fn test_new_atom_truncates_carriage_return() {
let arena = Arena::new();
let position = vec![];
let content = "foo\r";
let atom = Syntax::new_atom(&arena, position, content.to_owned(), AtomKind::Comment);
match atom {
List { .. } => unreachable!(),
Atom { content, .. } => {
assert_eq!(content, "foo");
}
}
}
#[test]
fn test_new_atom_truncates_trailing_newline() {
let arena = Arena::new();
let position = vec![
SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 8,
},
SingleLineSpan {
line: 1.into(),
start_col: 0,
end_col: 1,
},
];
let content = ";; hello\n";
let atom = Syntax::new_atom(&arena, position, content.to_owned(), AtomKind::Comment);
match atom {
List { .. } => unreachable!(),
Atom {
position, content, ..
} => {
assert_eq!(content, ";; hello");
assert_eq!(
*position,
vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 8,
}]
);
}
}
}
/// Ignore the syntax highlighting kind when comparing
/// atoms. Sometimes changing delimiter wrapping can change
/// whether a parser thinks that a node is e.g. a type.
#[test]
fn test_atom_equality_ignores_highlighting() {
let pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 3,
}];
let arena = Arena::new();
let type_atom = Syntax::new_atom(&arena, pos.clone(), "foo".to_owned(), AtomKind::Type);
let atom = Syntax::new_atom(&arena, pos, "foo".to_owned(), AtomKind::Normal);
init_all_info(&[type_atom], &[atom]);
assert_eq!(type_atom, atom);
}
#[test]
fn test_flatten_trivial_list() {
let pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 3,
}];
let arena = Arena::new();
let atom = Syntax::new_atom(&arena, pos, "foo".to_owned(), AtomKind::Normal);
let trivial_list = Syntax::new_list(&arena, "", vec![], vec![atom], "", vec![]);
assert!(matches!(trivial_list, Atom { .. }));
}
#[test]
fn test_ignore_empty_atoms() {
let pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 2,
}];
let arena = Arena::new();
let atom = Syntax::new_atom(&arena, pos, "".to_owned(), AtomKind::Normal);
let trivial_list = Syntax::new_list(&arena, "(", vec![], vec![atom], ")", vec![]);
match trivial_list {
List { children, .. } => {
assert_eq!(children.len(), 0);
}
Atom { .. } => unreachable!(),
}
}
#[test]
fn test_multiline_comment_ignores_leading_whitespace() {
let pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 2,
end_col: 3,
}];
let arena = Arena::new();
let x = Syntax::new_atom(
&arena,
pos.clone(),
"foo\nbar".to_owned(),
AtomKind::Comment,
);
let y = Syntax::new_atom(&arena, pos, "foo\n bar".to_owned(), AtomKind::Comment);
init_all_info(&[x], &[y]);
assert_eq!(x, y);
}
#[test]
fn test_split_atom_words() {
let content = "abc def ghi novel";
let pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 17,
}];
let opposite_content = "abc def ghi";
let opposite_pos = vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 11,
}];
let res = split_atom_words(
content,
&pos,
opposite_content,
&opposite_pos,
AtomKind::Comment,
);
assert_eq!(
res,
vec![
MatchedPos {
kind: MatchKind::UnchangedPartOfNovelItem {
highlight: TokenKind::Atom(AtomKind::Comment),
self_pos: SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
},
opposite_pos: vec![SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
}]
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 0,
end_col: 3
}
},
MatchedPos {
kind: MatchKind::UnchangedPartOfNovelItem {
highlight: TokenKind::Atom(AtomKind::Comment),
self_pos: SingleLineSpan {
line: 0.into(),
start_col: 3,
end_col: 4
},
opposite_pos: vec![SingleLineSpan {
line: 0.into(),
start_col: 3,
end_col: 4
}]
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 3,
end_col: 4
}
},
MatchedPos {
kind: MatchKind::UnchangedPartOfNovelItem {
highlight: TokenKind::Atom(AtomKind::Comment),
self_pos: SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 7
},
opposite_pos: vec![SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 7
}]
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 4,
end_col: 7
}
},
MatchedPos {
kind: MatchKind::UnchangedPartOfNovelItem {
highlight: TokenKind::Atom(AtomKind::Comment),
self_pos: SingleLineSpan {
line: 0.into(),
start_col: 7,
end_col: 8
},
opposite_pos: vec![SingleLineSpan {
line: 0.into(),
start_col: 7,
end_col: 8
}]
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 7,
end_col: 8
}
},
MatchedPos {
kind: MatchKind::UnchangedPartOfNovelItem {
highlight: TokenKind::Atom(AtomKind::Comment),
self_pos: SingleLineSpan {
line: 0.into(),
start_col: 8,
end_col: 11
},
opposite_pos: vec![SingleLineSpan {
line: 0.into(),
start_col: 8,
end_col: 11
}]
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 8,
end_col: 11
}
},
MatchedPos {
kind: MatchKind::NovelWord {
highlight: TokenKind::Atom(AtomKind::Comment)
},
pos: SingleLineSpan {
line: 0.into(),
start_col: 12,
end_col: 17
}
}
],
);
}
}