mirror of https://github.com/Wilfred/difftastic/
1153 lines
35 KiB
Rust
1153 lines
35 KiB
Rust
//! Syntax tree definitions with change metadata.
|
|
|
|
#![allow(clippy::mutable_key_type)] // Hash for Syntax doesn't use mutable fields.
|
|
|
|
use itertools::{EitherOrBoth, Itertools};
|
|
use lazy_static::lazy_static;
|
|
use regex::Regex;
|
|
use std::cell::Cell;
|
|
use std::cmp::{max, min};
|
|
use std::collections::hash_map::DefaultHasher;
|
|
use std::collections::HashMap;
|
|
use std::fmt;
|
|
use std::hash::{Hash, Hasher};
|
|
use typed_arena::Arena;
|
|
|
|
use crate::lines::{LineGroup, LineNumber, NewlinePositions};
|
|
use crate::positions::SingleLineSpan;
|
|
use ChangeKind::*;
|
|
use Syntax::*;
|
|
|
|
#[derive(PartialEq, Eq, Clone, Copy)]
|
|
pub enum ChangeKind<'a> {
|
|
Unchanged(&'a Syntax<'a>),
|
|
ReplacedComment(&'a Syntax<'a>, &'a Syntax<'a>),
|
|
Novel,
|
|
}
|
|
|
|
/// A Debug implementation that ignores the corresponding node
|
|
/// mentioned for Unchanged. Otherwise we will infinitely loop on
|
|
/// unchanged nodes, which both point to the other.
|
|
impl<'a> fmt::Debug for ChangeKind<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let desc = match self {
|
|
Unchanged(_) => "Unchanged",
|
|
ReplacedComment(_, _) => "ReplacedComment",
|
|
Novel => "Novel",
|
|
};
|
|
f.write_str(desc)
|
|
}
|
|
}
|
|
|
|
/// Fields that are common to both `Syntax::List` and `Syntax::Atom`.
|
|
pub struct SyntaxInfo<'a> {
|
|
// TODO: Make these fields private.
|
|
pub pos_content_hash: u64,
|
|
pub next: Cell<Option<&'a Syntax<'a>>>,
|
|
pub prev: Cell<Option<&'a Syntax<'a>>>,
|
|
pub change: Cell<Option<ChangeKind<'a>>>,
|
|
pub num_ancestors: Cell<u64>,
|
|
pub unique_id: Cell<u64>,
|
|
}
|
|
|
|
impl<'a> SyntaxInfo<'a> {
|
|
pub fn new(pos_content_hash: u64) -> Self {
|
|
Self {
|
|
pos_content_hash,
|
|
next: Cell::new(None),
|
|
prev: Cell::new(None),
|
|
change: Cell::new(None),
|
|
num_ancestors: Cell::new(0),
|
|
unique_id: Cell::new(0),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub enum Syntax<'a> {
|
|
List {
|
|
info: SyntaxInfo<'a>,
|
|
open_position: Vec<SingleLineSpan>,
|
|
open_content: String,
|
|
children: Vec<&'a Syntax<'a>>,
|
|
close_position: Vec<SingleLineSpan>,
|
|
close_content: String,
|
|
num_descendants: u64,
|
|
},
|
|
Atom {
|
|
info: SyntaxInfo<'a>,
|
|
position: Vec<SingleLineSpan>,
|
|
content: String,
|
|
is_comment: bool,
|
|
},
|
|
}
|
|
|
|
fn dbg_pos(pos: &[SingleLineSpan]) -> String {
|
|
match pos {
|
|
[] => "-".into(),
|
|
[pos] => format!("{}:{}-{}", pos.line.0, pos.start_col, pos.end_col),
|
|
[start, .., end] => format!(
|
|
"{}:{}-{}:{}",
|
|
start.line.0, start.start_col, end.line.0, end.end_col
|
|
),
|
|
}
|
|
}
|
|
|
|
impl<'a> fmt::Debug for Syntax<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
List {
|
|
open_content,
|
|
open_position,
|
|
children,
|
|
close_content,
|
|
close_position,
|
|
info,
|
|
..
|
|
} => {
|
|
let mut ds = f.debug_struct(&format!("List id:{}", self.id()));
|
|
|
|
ds.field("open_content", &open_content)
|
|
.field("open_position", &dbg_pos(open_position))
|
|
.field("children", &children)
|
|
.field("close_content", &close_content)
|
|
.field("close_position", &dbg_pos(close_position))
|
|
.field("change", &info.change.get());
|
|
|
|
let next_s = match info.next.get() {
|
|
Some(List { .. }) => "Some(List)",
|
|
Some(Atom { .. }) => "Some(Atom)",
|
|
None => "None",
|
|
};
|
|
ds.field("next", &next_s);
|
|
|
|
ds.finish()
|
|
}
|
|
Atom {
|
|
content,
|
|
position,
|
|
info,
|
|
..
|
|
} => {
|
|
let mut ds = f.debug_struct(&format!("Atom id:{}", self.id()));
|
|
ds.field("content", &content)
|
|
.field("change", &info.change.get());
|
|
ds.field("position", &dbg_pos(position));
|
|
|
|
let next_s = match info.next.get() {
|
|
Some(List { .. }) => "Some(List)",
|
|
Some(Atom { .. }) => "Some(Atom)",
|
|
None => "None",
|
|
};
|
|
ds.field("next", &next_s);
|
|
|
|
ds.finish()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn trim_left(max_trim: usize, content: &str, pos: SingleLineSpan) -> (String, SingleLineSpan) {
|
|
let chars: Vec<_> = content.chars().collect();
|
|
|
|
match chars.iter().position(|c| *c != ' ' && *c != '\t') {
|
|
Some(first_non_whitespace) => {
|
|
let skip_num = max(max_trim, first_non_whitespace);
|
|
|
|
let mut new_pos = pos;
|
|
new_pos.start_col += skip_num;
|
|
(chars.iter().skip(skip_num).collect(), new_pos)
|
|
}
|
|
None => (content.to_string(), pos),
|
|
}
|
|
}
|
|
|
|
impl<'a> Syntax<'a> {
|
|
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.
|
|
pub fn new_list(
|
|
arena: &'a Arena<Syntax<'a>>,
|
|
open_content: &str,
|
|
open_position: Vec<SingleLineSpan>,
|
|
children: Vec<&'a Syntax<'a>>,
|
|
close_content: &str,
|
|
close_position: Vec<SingleLineSpan>,
|
|
) -> &'a mut Syntax<'a> {
|
|
let mut num_descendants = 0;
|
|
for child in &children {
|
|
num_descendants += match child {
|
|
List {
|
|
num_descendants, ..
|
|
} => *num_descendants + 1,
|
|
Atom { .. } => 1,
|
|
};
|
|
}
|
|
|
|
let mut hasher = DefaultHasher::new();
|
|
|
|
open_position.hash(&mut hasher);
|
|
open_content.hash(&mut hasher);
|
|
close_content.hash(&mut hasher);
|
|
close_position.hash(&mut hasher);
|
|
for child in &children {
|
|
child.hash(&mut hasher);
|
|
}
|
|
|
|
arena.alloc(List {
|
|
info: SyntaxInfo::new(hasher.finish()),
|
|
open_position,
|
|
open_content: open_content.into(),
|
|
close_content: close_content.into(),
|
|
close_position,
|
|
children,
|
|
num_descendants,
|
|
})
|
|
}
|
|
|
|
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.
|
|
pub fn new_atom(
|
|
arena: &'a Arena<Syntax<'a>>,
|
|
position: Vec<SingleLineSpan>,
|
|
content: &str,
|
|
) -> &'a mut Syntax<'a> {
|
|
Self::new_atom_(arena, position, content, false)
|
|
}
|
|
|
|
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.
|
|
pub fn new_comment(
|
|
arena: &'a Arena<Syntax<'a>>,
|
|
position: Vec<SingleLineSpan>,
|
|
content: &str,
|
|
) -> &'a mut Syntax<'a> {
|
|
// Ignore leading whitespace in multiline comments, so changes
|
|
// in comment indentation are ignored.
|
|
let first_line_indent = match position.first() {
|
|
Some(line_pos) => line_pos.start_col,
|
|
None => 0,
|
|
};
|
|
|
|
let mut new_lines: Vec<String> = vec![];
|
|
let mut new_position = vec![];
|
|
for (i, (line, span)) in content.lines().zip(position).enumerate() {
|
|
if i == 0 {
|
|
new_lines.push(line.to_string());
|
|
new_position.push(span);
|
|
} else {
|
|
let (new_line, new_span) = trim_left(first_line_indent, line, span);
|
|
new_lines.push(new_line);
|
|
new_position.push(new_span);
|
|
}
|
|
}
|
|
|
|
Self::new_atom_(arena, new_position, &new_lines.join("\n"), true)
|
|
}
|
|
|
|
#[allow(clippy::mut_from_ref)] // Clippy doesn't understand arenas.
|
|
fn new_atom_(
|
|
arena: &'a Arena<Syntax<'a>>,
|
|
position: Vec<SingleLineSpan>,
|
|
content: &str,
|
|
is_comment: bool,
|
|
) -> &'a mut Syntax<'a> {
|
|
let mut hasher = DefaultHasher::new();
|
|
|
|
position.hash(&mut hasher);
|
|
content.hash(&mut hasher);
|
|
|
|
arena.alloc(Atom {
|
|
info: SyntaxInfo::new(hasher.finish()),
|
|
position,
|
|
content: content.into(),
|
|
is_comment,
|
|
})
|
|
}
|
|
|
|
pub fn info(&self) -> &SyntaxInfo<'a> {
|
|
match self {
|
|
List { info, .. } => info,
|
|
Atom { info, .. } => info,
|
|
}
|
|
}
|
|
|
|
pub fn next(&self) -> Option<&'a Syntax<'a>> {
|
|
self.info().next.get()
|
|
}
|
|
|
|
pub fn prev_is_contiguous(&self) -> bool {
|
|
if let Some(prev) = self.info().prev.get() {
|
|
match prev {
|
|
List {
|
|
open_position,
|
|
close_position,
|
|
..
|
|
} => {
|
|
let prev_is_parent = prev.num_ancestors() < self.num_ancestors();
|
|
if prev_is_parent {
|
|
open_position.last().map(|p| p.line) == self.first_line()
|
|
} else {
|
|
// predecessor node at the same level.
|
|
close_position.last().map(|p| p.line) == self.first_line()
|
|
}
|
|
}
|
|
Atom { .. } => prev.last_line() == self.first_line(),
|
|
}
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
pub fn id(&self) -> u64 {
|
|
self.info().unique_id.get()
|
|
}
|
|
|
|
pub fn num_ancestors(&self) -> u64 {
|
|
self.info().num_ancestors.get()
|
|
}
|
|
|
|
pub fn first_line(&self) -> Option<LineNumber> {
|
|
let position = match self {
|
|
List { open_position, .. } => open_position,
|
|
Atom { position, .. } => position,
|
|
};
|
|
position.first().map(|lp| lp.line)
|
|
}
|
|
|
|
pub fn last_line(&self) -> Option<LineNumber> {
|
|
let position = match self {
|
|
List { close_position, .. } => close_position,
|
|
Atom { position, .. } => position,
|
|
};
|
|
position.last().map(|lp| lp.line)
|
|
}
|
|
|
|
pub fn set_change(&self, ck: ChangeKind<'a>) {
|
|
self.info().change.set(Some(ck));
|
|
}
|
|
|
|
pub fn set_change_deep(&self, ck: ChangeKind<'a>) {
|
|
self.set_change(ck);
|
|
|
|
if let List { children, .. } = self {
|
|
// For unchanged lists, match up children with the
|
|
// unchanged children on the other side.
|
|
if let Unchanged(List {
|
|
children: other_children,
|
|
..
|
|
}) = ck
|
|
{
|
|
for (child, other_child) in children.iter().zip(other_children) {
|
|
child.set_change_deep(Unchanged(other_child));
|
|
}
|
|
} else {
|
|
for child in children {
|
|
child.set_change_deep(ck);
|
|
}
|
|
};
|
|
}
|
|
}
|
|
|
|
pub fn equal_content(&self, other: &Self) -> bool {
|
|
match (&self, other) {
|
|
(
|
|
Atom {
|
|
content: lhs_content,
|
|
is_comment: lhs_is_comment,
|
|
..
|
|
},
|
|
Atom {
|
|
content: rhs_content,
|
|
is_comment: rhs_is_comment,
|
|
..
|
|
},
|
|
) => lhs_content == rhs_content && lhs_is_comment == rhs_is_comment,
|
|
(
|
|
List {
|
|
open_content: lhs_open_content,
|
|
close_content: lhs_close_content,
|
|
children: lhs_children,
|
|
..
|
|
},
|
|
List {
|
|
open_content: rhs_open_content,
|
|
close_content: rhs_close_content,
|
|
children: rhs_children,
|
|
..
|
|
},
|
|
) => {
|
|
if lhs_open_content != rhs_open_content || lhs_close_content != rhs_close_content {
|
|
return false;
|
|
}
|
|
if lhs_children.len() != rhs_children.len() {
|
|
return false;
|
|
}
|
|
for (lhs_child, rhs_child) in lhs_children.iter().zip(rhs_children.iter()) {
|
|
if !lhs_child.equal_content(rhs_child) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
true
|
|
}
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
/// Does this `Node` have the same position in all its subnodes?
|
|
///
|
|
/// Nodes with different numbers of children return false
|
|
/// regardless of top-level positions.
|
|
fn equal_pos(&self, other: &Self) -> bool {
|
|
match (&self, other) {
|
|
(
|
|
Atom {
|
|
position: lhs_position,
|
|
..
|
|
},
|
|
Atom {
|
|
position: rhs_position,
|
|
..
|
|
},
|
|
) => lhs_position == rhs_position,
|
|
(
|
|
List {
|
|
open_position: lhs_open_position,
|
|
close_position: lhs_close_position,
|
|
children: lhs_children,
|
|
..
|
|
},
|
|
List {
|
|
open_position: rhs_open_position,
|
|
close_position: rhs_close_position,
|
|
children: rhs_children,
|
|
..
|
|
},
|
|
) => {
|
|
if lhs_open_position != rhs_open_position
|
|
|| lhs_close_position != rhs_close_position
|
|
{
|
|
return false;
|
|
}
|
|
if lhs_children.len() != rhs_children.len() {
|
|
return false;
|
|
}
|
|
|
|
for (lhs_child, rhs_child) in lhs_children.iter().zip(rhs_children.iter()) {
|
|
if !lhs_child.equal_pos(rhs_child) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
true
|
|
}
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn init_info<'a>(roots: &[&'a Syntax<'a>]) {
|
|
set_unique_id(roots, 0);
|
|
set_next(roots, None);
|
|
set_prev(roots, None);
|
|
set_num_ancestors(roots, 0);
|
|
}
|
|
|
|
fn set_unique_id<'a>(nodes: &[&'a Syntax<'a>], prev_id: u64) -> u64 {
|
|
let mut id = prev_id + 1;
|
|
for node in nodes {
|
|
node.info().unique_id.set(id);
|
|
if let List { children, .. } = node {
|
|
id = set_unique_id(children, id);
|
|
}
|
|
id += 1;
|
|
}
|
|
id
|
|
}
|
|
|
|
/// For every syntax node in the tree, mark the next node according to
|
|
/// a preorder traversal.
|
|
fn set_next<'a>(nodes: &[&'a Syntax<'a>], parent_next: Option<&'a Syntax<'a>>) {
|
|
for (i, node) in nodes.iter().enumerate() {
|
|
let node_next = match nodes.get(i + 1) {
|
|
Some(node_next) => Some(*node_next),
|
|
None => parent_next,
|
|
};
|
|
|
|
node.info().next.set(node_next);
|
|
if let List { children, .. } = node {
|
|
set_next(children, node_next);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// For every syntax node in the tree, mark the previous node
|
|
/// according to a preorder traversal.
|
|
fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
|
|
for (i, node) in nodes.iter().enumerate() {
|
|
let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
|
|
|
|
node.info().prev.set(node_prev);
|
|
if let List { children, .. } = node {
|
|
set_prev(children, Some(node));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn set_num_ancestors<'a>(nodes: &[&Syntax<'a>], num_ancestors: u64) {
|
|
for node in nodes {
|
|
node.info().num_ancestors.set(num_ancestors);
|
|
|
|
if let List { children, .. } = node {
|
|
set_num_ancestors(children, num_ancestors + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> PartialEq for Syntax<'a> {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.equal_pos(other) && self.equal_content(other)
|
|
}
|
|
}
|
|
impl<'a> Eq for Syntax<'a> {}
|
|
|
|
impl<'a> Hash for Syntax<'a> {
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
self.info().pos_content_hash.hash(state);
|
|
}
|
|
}
|
|
|
|
#[derive(PartialEq, Eq, Debug, Clone)]
|
|
pub enum MatchKind {
|
|
Unchanged { opposite_pos: Vec<SingleLineSpan> },
|
|
Novel,
|
|
UnchangedCommentPart { opposite_pos: Vec<SingleLineSpan> },
|
|
ChangedCommentPart,
|
|
}
|
|
|
|
impl MatchKind {
|
|
pub fn is_unchanged(&self) -> bool {
|
|
match self {
|
|
MatchKind::Unchanged { .. } => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct MatchedPos {
|
|
pub kind: MatchKind,
|
|
pub pos: Vec<SingleLineSpan>,
|
|
// TODO: this is confusing: the previous syntax node with a match
|
|
// may be on the current line or a previous one.
|
|
pub prev_opposite_pos: Vec<SingleLineSpan>,
|
|
}
|
|
|
|
fn split_comment_words(
|
|
content: &str,
|
|
pos: &[SingleLineSpan],
|
|
opposite_content: &str,
|
|
opposite_pos: &[SingleLineSpan],
|
|
prev_opposite_pos: &[SingleLineSpan],
|
|
) -> Vec<MatchedPos> {
|
|
// TODO: also split on whitespace, so "// (foo)" splits before "(".
|
|
|
|
// TODO: merge adjacent single-line comments unless there are
|
|
// blank lines between them.
|
|
lazy_static! {
|
|
static ref WORD_BOUNDARY_RE: Regex = Regex::new(r"\b").unwrap();
|
|
}
|
|
let content_parts: Vec<_> = WORD_BOUNDARY_RE.split(content).collect();
|
|
let other_parts: Vec<_> = WORD_BOUNDARY_RE.split(opposite_content).collect();
|
|
|
|
let content_newlines = NewlinePositions::from(content);
|
|
let opposite_content_newlines = NewlinePositions::from(opposite_content);
|
|
|
|
let mut offset = 0;
|
|
let mut opposite_offset = 0;
|
|
|
|
let mut res = vec![];
|
|
for diff_res in diff::slice(&content_parts, &other_parts) {
|
|
match diff_res {
|
|
diff::Result::Left(word) => {
|
|
// This word is novel to this side.
|
|
res.push(MatchedPos {
|
|
kind: MatchKind::ChangedCommentPart,
|
|
pos: content_newlines.from_offsets_relative_to(
|
|
pos[0],
|
|
offset,
|
|
offset + word.len(),
|
|
),
|
|
prev_opposite_pos: prev_opposite_pos.to_vec(),
|
|
});
|
|
offset += word.len();
|
|
}
|
|
diff::Result::Both(word, opposite_word) => {
|
|
// This word is present on both sides.
|
|
let word_pos =
|
|
content_newlines.from_offsets_relative_to(pos[0], offset, offset + word.len());
|
|
let opposite_word_pos = opposite_content_newlines.from_offsets_relative_to(
|
|
opposite_pos[0],
|
|
opposite_offset,
|
|
opposite_offset + opposite_word.len(),
|
|
);
|
|
|
|
res.push(MatchedPos {
|
|
kind: MatchKind::UnchangedCommentPart {
|
|
opposite_pos: opposite_word_pos,
|
|
},
|
|
pos: word_pos,
|
|
prev_opposite_pos: prev_opposite_pos.to_vec(),
|
|
});
|
|
offset += word.len();
|
|
opposite_offset += opposite_word.len();
|
|
}
|
|
diff::Result::Right(opposite_word) => {
|
|
// Only exists on other side, nothing to do on this side.
|
|
opposite_offset += opposite_word.len();
|
|
}
|
|
}
|
|
}
|
|
|
|
res
|
|
}
|
|
|
|
impl MatchedPos {
|
|
fn new(
|
|
ck: ChangeKind,
|
|
pos: Vec<SingleLineSpan>,
|
|
prev_opposite_pos: Vec<SingleLineSpan>,
|
|
) -> Vec<Self> {
|
|
let kind = match ck {
|
|
ReplacedComment(this, opposite) => {
|
|
let this_content = match this {
|
|
List { .. } => unreachable!(),
|
|
Atom { content, .. } => content,
|
|
};
|
|
let (opposite_content, opposite_pos) = match opposite {
|
|
List { .. } => unreachable!(),
|
|
Atom {
|
|
content, position, ..
|
|
} => (content, position),
|
|
};
|
|
|
|
return split_comment_words(
|
|
this_content,
|
|
&pos,
|
|
opposite_content,
|
|
opposite_pos,
|
|
&prev_opposite_pos,
|
|
);
|
|
}
|
|
Unchanged(opposite) => {
|
|
// TODO: is close_position the best position for
|
|
// unchanged lists?
|
|
let opposite_pos = match opposite {
|
|
List { close_position, .. } => close_position.clone(),
|
|
Atom { position, .. } => position.clone(),
|
|
};
|
|
|
|
MatchKind::Unchanged { opposite_pos }
|
|
}
|
|
Novel => MatchKind::Novel,
|
|
};
|
|
|
|
vec![Self {
|
|
kind,
|
|
pos,
|
|
prev_opposite_pos,
|
|
}]
|
|
}
|
|
}
|
|
|
|
/// Walk `nodes` and return a vec of all the changed positions.
|
|
pub fn change_positions<'a>(
|
|
src: &str,
|
|
opposite_src: &str,
|
|
nodes: &[&Syntax<'a>],
|
|
) -> Vec<MatchedPos> {
|
|
let nl_pos = NewlinePositions::from(src);
|
|
let opposite_nl_pos = NewlinePositions::from(opposite_src);
|
|
|
|
let mut positions = Vec::new();
|
|
let mut prev_unchanged = vec![SingleLineSpan {
|
|
line: 0.into(),
|
|
start_col: 0,
|
|
end_col: 0,
|
|
}];
|
|
change_positions_(
|
|
&nl_pos,
|
|
&opposite_nl_pos,
|
|
nodes,
|
|
&mut prev_unchanged,
|
|
&mut positions,
|
|
);
|
|
positions
|
|
}
|
|
|
|
fn change_positions_<'a>(
|
|
nl_pos: &NewlinePositions,
|
|
opposite_nl_pos: &NewlinePositions,
|
|
nodes: &[&Syntax<'a>],
|
|
prev_opposite_pos: &mut Vec<SingleLineSpan>,
|
|
positions: &mut Vec<MatchedPos>,
|
|
) {
|
|
for node in nodes {
|
|
match node {
|
|
List {
|
|
info,
|
|
open_position,
|
|
children,
|
|
close_position,
|
|
..
|
|
} => {
|
|
let change = info
|
|
.change
|
|
.get()
|
|
.unwrap_or_else(|| panic!("Should have changes set in all nodes: {:#?}", node));
|
|
|
|
if let Unchanged(opposite_node) = change {
|
|
match opposite_node {
|
|
List {
|
|
open_position: opposite_open_pos,
|
|
..
|
|
} => {
|
|
*prev_opposite_pos = opposite_open_pos.clone();
|
|
}
|
|
Atom { .. } => unreachable!(),
|
|
}
|
|
}
|
|
|
|
positions.extend(MatchedPos::new(
|
|
change,
|
|
open_position.clone(),
|
|
prev_opposite_pos.clone(),
|
|
));
|
|
|
|
change_positions_(
|
|
nl_pos,
|
|
opposite_nl_pos,
|
|
children,
|
|
prev_opposite_pos,
|
|
positions,
|
|
);
|
|
|
|
if let Unchanged(opposite_node) = change {
|
|
match opposite_node {
|
|
List {
|
|
close_position: opposite_close_pos,
|
|
..
|
|
} => {
|
|
*prev_opposite_pos = opposite_close_pos.clone();
|
|
}
|
|
Atom { .. } => unreachable!(),
|
|
}
|
|
}
|
|
positions.extend(MatchedPos::new(
|
|
change,
|
|
close_position.clone(),
|
|
prev_opposite_pos.clone(),
|
|
));
|
|
}
|
|
Atom { info, position, .. } => {
|
|
let change = info
|
|
.change
|
|
.get()
|
|
.unwrap_or_else(|| panic!("Should have changes set in all nodes: {:#?}", node));
|
|
if let Unchanged(opposite_node) = change {
|
|
match opposite_node {
|
|
List { .. } => {
|
|
dbg!(node, opposite_node);
|
|
unreachable!();
|
|
}
|
|
Atom {
|
|
position: opposite_position,
|
|
..
|
|
} => {
|
|
*prev_opposite_pos = opposite_position.clone();
|
|
}
|
|
}
|
|
}
|
|
positions.extend(MatchedPos::new(
|
|
change,
|
|
position.clone(),
|
|
prev_opposite_pos.clone(),
|
|
));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn zip_pad_shorter<Tx: Copy, Ty: Copy>(lhs: &[Tx], rhs: &[Ty]) -> Vec<(Option<Tx>, Option<Ty>)> {
|
|
let mut res = vec![];
|
|
|
|
let mut i = 0;
|
|
loop {
|
|
match (lhs.get(i), rhs.get(i)) {
|
|
(None, None) => break,
|
|
(x, y) => res.push((x.copied(), y.copied())),
|
|
}
|
|
|
|
i += 1;
|
|
}
|
|
|
|
res
|
|
}
|
|
|
|
/// Given two slices of line positions, return a list of line number
|
|
/// pairs. If the slices have different lengths, reuse the last item
|
|
/// from the shorter slice.
|
|
fn zip_lines(lhs: &[SingleLineSpan], rhs: &[SingleLineSpan]) -> Vec<(LineNumber, LineNumber)> {
|
|
let lhs_lines: Vec<_> = lhs.iter().map(|slp| slp.line).collect();
|
|
let rhs_lines: Vec<_> = rhs.iter().map(|slp| slp.line).collect();
|
|
|
|
let lhs_last = match lhs_lines.last() {
|
|
Some(last) => *last,
|
|
None => {
|
|
return vec![];
|
|
}
|
|
};
|
|
let rhs_last = match rhs_lines.last() {
|
|
Some(last) => *last,
|
|
None => {
|
|
return vec![];
|
|
}
|
|
};
|
|
|
|
lhs_lines
|
|
.into_iter()
|
|
.zip_longest(rhs_lines.into_iter())
|
|
.map(|l| match l {
|
|
EitherOrBoth::Both(lhs_line, rhs_line) => (lhs_line, rhs_line),
|
|
EitherOrBoth::Left(lhs_line) => (lhs_line, rhs_last),
|
|
EitherOrBoth::Right(rhs_line) => (lhs_last, rhs_line),
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
pub fn aligned_lines(
|
|
group: &LineGroup,
|
|
lhs_line_matches: &HashMap<LineNumber, LineNumber>,
|
|
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
|
|
let lhs_lines = group.lhs_lines();
|
|
let rhs_lines = group.rhs_lines();
|
|
|
|
// When adding padding to a LineGroup where each side has a
|
|
// different number of lines, we can end up with extra padding on
|
|
// the side with fewer lines.
|
|
//
|
|
// TODO: fix padding to be smarter.
|
|
aligned_lines_(&lhs_lines, &rhs_lines, lhs_line_matches)
|
|
}
|
|
|
|
/// Given two slices of contiguous line numbers, return pairs of
|
|
/// matched lines.
|
|
///
|
|
/// A LHS line is matched with a RHS line if it's present in
|
|
/// `lhs_line_matches` and hasn't already been matched.
|
|
///
|
|
/// If a line has no match on the other side, the pair will contain
|
|
/// None on the other side.
|
|
fn aligned_lines_(
|
|
lhs_lines: &[LineNumber],
|
|
rhs_lines: &[LineNumber],
|
|
lhs_line_matches: &HashMap<LineNumber, LineNumber>,
|
|
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
|
|
let mut rhs_highest_matched = rhs_lines.first().map_or(0, |l| l.0 as isize) - 1;
|
|
|
|
// For every LHS line, if there is a RHS line that is included in
|
|
// `rhs_lines` and hasn't yet been paired up, add it to
|
|
// matched_lines.
|
|
//
|
|
// TODO: prefer the line with the most similarity, not just the
|
|
// first line. See spurious `let` alignment in 9c71298f8294ce8f,
|
|
// LHS line 96 in lines.rs.
|
|
let mut matched_lines = vec![];
|
|
for lhs_line in lhs_lines {
|
|
if let Some(rhs_line) = lhs_line_matches.get(lhs_line) {
|
|
if rhs_line.0 as isize > rhs_highest_matched {
|
|
matched_lines.push((lhs_line, rhs_line));
|
|
rhs_highest_matched = rhs_line.0 as isize;
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut res = vec![];
|
|
|
|
let mut lhs_i = 0;
|
|
let mut rhs_i = 0;
|
|
|
|
// Build a vec of matched line tuples. For lines without matches
|
|
// (novel lines, empty lines), just match lines up pairwise. Pad
|
|
// gaps if one side has more lines.
|
|
for (lhs_matched_line, rhs_matched_line) in matched_lines {
|
|
let mut lhs_prev_lines = vec![];
|
|
while lhs_i < lhs_lines.len() && lhs_lines[lhs_i] < *lhs_matched_line {
|
|
lhs_prev_lines.push(lhs_lines[lhs_i]);
|
|
lhs_i += 1;
|
|
}
|
|
let mut rhs_prev_lines = vec![];
|
|
while rhs_i < rhs_lines.len() && rhs_lines[rhs_i] < *rhs_matched_line {
|
|
rhs_prev_lines.push(rhs_lines[rhs_i]);
|
|
rhs_i += 1;
|
|
}
|
|
|
|
res.extend(zip_pad_shorter(&lhs_prev_lines, &rhs_prev_lines));
|
|
|
|
res.push((Some(*lhs_matched_line), Some(*rhs_matched_line)));
|
|
lhs_i += 1;
|
|
rhs_i += 1;
|
|
}
|
|
|
|
// Handle unmatched lines after the last match.
|
|
res.extend(zip_pad_shorter(
|
|
&lhs_lines[min(lhs_i, lhs_lines.len())..],
|
|
&rhs_lines[min(rhs_i, rhs_lines.len())..],
|
|
));
|
|
|
|
res
|
|
}
|
|
|
|
pub fn matching_lines<'a>(nodes: &[&Syntax<'a>]) -> HashMap<LineNumber, LineNumber> {
|
|
let mut res = HashMap::new();
|
|
for node in nodes {
|
|
matching_lines_(node, &mut res);
|
|
}
|
|
res
|
|
}
|
|
|
|
fn matching_lines_<'a>(node: &Syntax<'a>, matches: &mut HashMap<LineNumber, LineNumber>) {
|
|
match node {
|
|
List {
|
|
info,
|
|
open_position,
|
|
children,
|
|
close_position,
|
|
..
|
|
} => {
|
|
if let Some(Unchanged(List {
|
|
open_position: other_open,
|
|
close_position: other_close,
|
|
..
|
|
})) = info.change.get()
|
|
{
|
|
for (line, other_line) in zip_lines(open_position, other_open) {
|
|
matches.entry(line).or_insert(other_line);
|
|
}
|
|
|
|
for (line, other_line) in zip_lines(close_position, other_close) {
|
|
matches.entry(line).or_insert(other_line);
|
|
}
|
|
}
|
|
|
|
for child in children {
|
|
matching_lines_(child, matches);
|
|
}
|
|
}
|
|
Atom { info, position, .. } => {
|
|
if let Some(Unchanged(Atom {
|
|
position: other_pos,
|
|
..
|
|
})) = info.change.get()
|
|
{
|
|
for (line, other_line) in zip_lines(position, other_pos) {
|
|
matches.entry(line).or_insert(other_line);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use pretty_assertions::assert_eq;
|
|
|
|
#[test]
|
|
fn test_aligned_middle() {
|
|
let lhs_lines: Vec<LineNumber> = vec![1.into(), 2.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![12.into(), 13.into()];
|
|
|
|
let mut line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
line_matches.insert(2.into(), 12.into());
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![
|
|
(Some(1.into()), None),
|
|
(Some(2.into()), Some(12.into())),
|
|
(None, Some(13.into()))
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aligned_all() {
|
|
let lhs_lines: Vec<LineNumber> = vec![1.into(), 2.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![11.into(), 12.into()];
|
|
|
|
let mut line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
line_matches.insert(1.into(), 2.into());
|
|
line_matches.insert(2.into(), 12.into());
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![
|
|
(Some(1.into()), Some(11.into())),
|
|
(Some(2.into()), Some(12.into())),
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aligned_none() {
|
|
let lhs_lines: Vec<LineNumber> = vec![1.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![11.into()];
|
|
|
|
let line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![(Some(1.into()), Some(11.into()))]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aligned_line_overlap() {
|
|
let lhs_lines: Vec<LineNumber> = vec![1.into(), 2.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![11.into()];
|
|
|
|
let mut line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
line_matches.insert(1.into(), 11.into());
|
|
line_matches.insert(2.into(), 11.into());
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![(Some(1.into()), Some(11.into())), (Some(2.into()), None)]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aligned_out_of_order() {
|
|
let lhs_lines: Vec<LineNumber> = vec![1.into(), 2.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![11.into(), 12.into()];
|
|
|
|
let mut line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
line_matches.insert(2.into(), 11.into());
|
|
line_matches.insert(1.into(), 12.into());
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![
|
|
(None, Some(11.into())),
|
|
(Some(1.into()), Some(12.into())),
|
|
(Some(2.into()), None)
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aligned_out_of_range() {
|
|
let lhs_lines: Vec<LineNumber> = vec![1.into(), 2.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![11.into(), 12.into()];
|
|
|
|
let mut line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
line_matches.insert(1.into(), 10.into());
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![
|
|
(Some(1.into()), Some(11.into())),
|
|
(Some(2.into()), Some(12.into())),
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aligned_first_line() {
|
|
let lhs_lines: Vec<LineNumber> = vec![0.into()];
|
|
let rhs_lines: Vec<LineNumber> = vec![0.into()];
|
|
|
|
let mut line_matches: HashMap<LineNumber, LineNumber> = HashMap::new();
|
|
line_matches.insert(0.into(), 0.into());
|
|
|
|
assert_eq!(
|
|
aligned_lines_(&lhs_lines, &rhs_lines, &line_matches),
|
|
vec![(Some(0.into()), Some(0.into()))]
|
|
);
|
|
}
|
|
|
|
/// Ensure that we assign prev_opposite_pos even if the change is on the first node.
|
|
#[test]
|
|
fn test_prev_opposite_pos_first_node() {
|
|
let arena = Arena::new();
|
|
|
|
let atom = Syntax::new_atom(
|
|
&arena,
|
|
vec![SingleLineSpan {
|
|
line: 0.into(),
|
|
start_col: 2,
|
|
end_col: 3,
|
|
}],
|
|
"foo",
|
|
);
|
|
atom.set_change(ChangeKind::Novel);
|
|
let nodes: Vec<&Syntax> = vec![atom];
|
|
|
|
let positions = change_positions("irrelevant", "also irrelevant", &nodes);
|
|
assert_eq!(
|
|
positions[0].prev_opposite_pos,
|
|
vec![SingleLineSpan {
|
|
line: 0.into(),
|
|
start_col: 0,
|
|
end_col: 0
|
|
}]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_comment_and_atom_differ() {
|
|
let pos = vec![SingleLineSpan {
|
|
line: 0.into(),
|
|
start_col: 2,
|
|
end_col: 3,
|
|
}];
|
|
|
|
let arena = Arena::new();
|
|
|
|
let comment = Syntax::new_comment(&arena, pos.clone(), "foo");
|
|
let atom = Syntax::new_atom(&arena, pos, "foo");
|
|
|
|
assert_ne!(comment, atom);
|
|
}
|
|
|
|
#[test]
|
|
fn test_atom_equality_ignores_change() {
|
|
assert_eq!(
|
|
Atom {
|
|
info: SyntaxInfo {
|
|
change: Cell::new(Some(Novel)),
|
|
..SyntaxInfo::new(1)
|
|
},
|
|
|
|
position: vec![SingleLineSpan {
|
|
line: 1.into(),
|
|
start_col: 2,
|
|
end_col: 3
|
|
}],
|
|
content: "foo".into(),
|
|
is_comment: false,
|
|
},
|
|
Atom {
|
|
info: SyntaxInfo {
|
|
change: Cell::new(None),
|
|
..SyntaxInfo::new(1)
|
|
},
|
|
position: vec![SingleLineSpan {
|
|
line: 1.into(),
|
|
start_col: 2,
|
|
end_col: 3
|
|
}],
|
|
content: "foo".into(),
|
|
is_comment: false,
|
|
}
|
|
);
|
|
}
|
|
}
|