id_arena_syntax
Wilfred Hughes 2023-08-18 09:11:42 +07:00
parent b6d8ecbd4f
commit 4fa5505431
6 changed files with 101 additions and 53 deletions

7
Cargo.lock generated

@ -245,6 +245,7 @@ dependencies = [
"glob",
"hashbrown 0.12.3",
"humansize",
"id-arena",
"itertools",
"lazy_static",
"libc",
@ -383,6 +384,12 @@ dependencies = [
"quick-error",
]
[[package]]
name = "id-arena"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
[[package]]
name = "indexmap"
version = "1.7.0"

@ -66,6 +66,7 @@ hashbrown = "0.12.3"
humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
id-arena = "2.2.1"
[dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60

@ -2,14 +2,14 @@
use crate::{
hash::DftHashMap,
parse::syntax::{Syntax, SyntaxId},
parse::syntax::{Syntax, SyntaxArena, SyntaxArenaId, SyntaxId},
};
#[derive(PartialEq, Eq, Clone, Copy)]
pub enum ChangeKind<'a> {
Unchanged(&'a Syntax<'a>),
ReplacedComment(&'a Syntax<'a>, &'a Syntax<'a>),
ReplacedString(&'a Syntax<'a>, &'a Syntax<'a>),
Unchanged(SyntaxArenaId<'a>),
ReplacedComment(SyntaxArenaId<'a>, SyntaxArenaId<'a>),
ReplacedString(SyntaxArenaId<'a>, SyntaxArenaId<'a>),
Novel,
}
@ -19,21 +19,36 @@ pub struct ChangeMap<'a> {
}
impl<'a> ChangeMap<'a> {
pub fn insert(&mut self, node: &'a Syntax<'a>, ck: ChangeKind<'a>) {
pub fn insert(
&mut self,
node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
ck: ChangeKind<'a>,
) {
let node = &node_arena[node_id];
self.changes.insert(node.id(), ck);
}
pub fn get(&self, node: &Syntax<'a>) -> Option<ChangeKind<'a>> {
pub fn get(
&self,
node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
) -> Option<ChangeKind<'a>> {
let node = &node_arena[node_id];
self.changes.get(&node.id()).copied()
}
}
pub fn insert_deep_unchanged<'a>(
node: &'a Syntax<'a>,
opposite_node: &'a Syntax<'a>,
node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
opposite_node_id: SyntaxArenaId<'a>,
change_map: &mut ChangeMap<'a>,
) {
change_map.insert(node, ChangeKind::Unchanged(opposite_node));
let node = &node_arena[node_id];
let opposite_node = &node_arena[opposite_node_id];
change_map.insert(node_arena, node_id, ChangeKind::Unchanged(opposite_node_id));
match (node, opposite_node) {
(
@ -47,7 +62,7 @@ pub fn insert_deep_unchanged<'a>(
},
) => {
for (child, opposite_child) in node_children.iter().zip(opposite_children) {
insert_deep_unchanged(child, opposite_child, change_map);
insert_deep_unchanged(node_arena, *child, *opposite_child, change_map);
}
}
(Syntax::Atom { .. }, Syntax::Atom { .. }) => {}
@ -55,12 +70,17 @@ pub fn insert_deep_unchanged<'a>(
}
}
pub fn insert_deep_novel<'a>(node: &'a Syntax<'a>, change_map: &mut ChangeMap<'a>) {
change_map.insert(node, ChangeKind::Novel);
pub fn insert_deep_novel<'a>(
node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
change_map: &mut ChangeMap<'a>,
) {
let node = &node_arena[node_id];
change_map.insert(node_arena, node_id, ChangeKind::Novel);
if let Syntax::List { children, .. } = node {
for child in children.iter() {
insert_deep_novel(child, change_map);
insert_deep_novel(node_arena, *child, change_map);
}
}
}

@ -7,7 +7,7 @@ use crate::{
diff::changes::ChangeMap,
diff::graph::{populate_change_map, set_neighbours, Edge, Vertex},
hash::DftHashMap,
parse::syntax::Syntax,
parse::syntax::{Syntax, SyntaxArena},
};
use bumpalo::Bump;
use itertools::Itertools;
@ -155,7 +155,7 @@ fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge
}
/// What is the total number of AST nodes?
fn node_count(root: Option<&Syntax>) -> u32 {
fn node_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 {
let mut node = root;
let mut count = 0;
while let Some(current_node) = node {
@ -167,38 +167,39 @@ fn node_count(root: Option<&Syntax>) -> u32 {
};
count += current_count;
node = current_node.next_sibling();
node = current_node.next_sibling().map(|id| &node_arena[id]);
}
count
}
/// How many top-level AST nodes do we have?
fn tree_count(root: Option<&Syntax>) -> u32 {
fn tree_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 {
let mut node = root;
let mut count = 0;
while let Some(current_node) = node {
count += 1;
node = current_node.next_sibling();
node = current_node.next_sibling().map(|id| &node_arena[id]);
}
count
}
pub fn mark_syntax<'a>(
node_arena: &SyntaxArena<'a>,
lhs_syntax: Option<&'a Syntax<'a>>,
rhs_syntax: Option<&'a Syntax<'a>>,
change_map: &mut ChangeMap<'a>,
graph_limit: usize,
) -> Result<(), ExceededGraphLimit> {
let lhs_node_count = node_count(lhs_syntax) as usize;
let rhs_node_count = node_count(rhs_syntax) as usize;
let lhs_node_count = node_count(node_arena, lhs_syntax) as usize;
let rhs_node_count = node_count(node_arena, rhs_syntax) as usize;
info!(
"LHS nodes: {} ({} toplevel), RHS nodes: {} ({} toplevel)",
lhs_node_count,
tree_count(lhs_syntax),
tree_count(node_arena, lhs_syntax),
rhs_node_count,
tree_count(rhs_syntax),
tree_count(node_arena, rhs_syntax),
);
// When there are a large number of changes, we end up building a

@ -16,7 +16,7 @@ use crate::{
stack::Stack,
},
hash::DftHashMap,
parse::syntax::{AtomKind, Syntax, SyntaxId},
parse::syntax::{AtomKind, Syntax, SyntaxArena, SyntaxArenaId, SyntaxId},
};
use Edge::*;
@ -408,18 +408,22 @@ fn looks_like_punctuation(node: &Syntax) -> bool {
/// Pop as many parents of `lhs_node` and `rhs_node` as
/// possible. Return the new syntax nodes and parents.
fn pop_all_parents<'s>(
lhs_node: Option<&'s Syntax<'s>>,
rhs_node: Option<&'s Syntax<'s>>,
node_arena: &SyntaxArena<'s>,
lhs_node_id: Option<SyntaxArenaId<'s>>,
rhs_node_id: Option<SyntaxArenaId<'s>>,
lhs_parent_id: Option<SyntaxId>,
rhs_parent_id: Option<SyntaxId>,
parents: &Stack<EnteredDelimiter<'s>>,
) -> (
Option<&'s Syntax<'s>>,
Option<&'s Syntax<'s>>,
Option<SyntaxArenaId<'s>>,
Option<SyntaxArenaId<'s>>,
Option<SyntaxId>,
Option<SyntaxId>,
Stack<EnteredDelimiter<'s>>,
) {
let lhs_node = lhs_node_id.map(|id| &node_arena[id]);
let rhs_node = rhs_node_id.map(|id| &node_arena[id]);
let mut lhs_node = lhs_node;
let mut rhs_node = rhs_node;
let mut lhs_parent_id = lhs_parent_id;
@ -432,8 +436,8 @@ fn pop_all_parents<'s>(
// Move to next after LHS parent.
// Continue from sibling of parent.
lhs_node = lhs_parent.next_sibling();
lhs_parent_id = lhs_parent.parent().map(Syntax::id);
lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]);
lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next;
continue;
}
@ -444,8 +448,8 @@ fn pop_all_parents<'s>(
// Move to next after RHS parent.
// Continue from sibling of parent.
rhs_node = rhs_parent.next_sibling();
rhs_parent_id = rhs_parent.parent().map(Syntax::id);
rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]);
rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next;
continue;
}
@ -457,10 +461,12 @@ fn pop_all_parents<'s>(
// Continue from sibling of parent.
if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) {
lhs_node = lhs_parent.next_sibling();
rhs_node = rhs_parent.next_sibling();
lhs_parent_id = lhs_parent.parent().map(Syntax::id);
rhs_parent_id = rhs_parent.parent().map(Syntax::id);
lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]);
lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id());
rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]);
rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next;
continue;
}
@ -469,12 +475,19 @@ fn pop_all_parents<'s>(
break;
}
(lhs_node, rhs_node, lhs_parent_id, rhs_parent_id, parents)
(
lhs_node_id,
rhs_node_id,
lhs_parent_id,
rhs_parent_id,
parents,
)
}
/// Compute the neighbours of `v` if we haven't previously done so,
/// and write them to the .neighbours cell inside `v`.
pub fn set_neighbours<'s, 'b>(
node_arena: &SyntaxArena<'s>,
v: &Vertex<'s, 'b>,
alloc: &'b Bump,
seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
@ -496,6 +509,7 @@ pub fn set_neighbours<'s, 'b>(
// Both nodes are equal, the happy case.
let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents(
node_arena,
lhs_syntax.next_sibling(),
rhs_syntax.next_sibling(),
v.lhs_parent_id,

@ -4,6 +4,8 @@
use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32};
use typed_arena::Arena;
use id_arena::Arena as IdArena;
use id_arena::Id;
use crate::{
diff::changes::ChangeKind,
@ -42,6 +44,9 @@ impl<'a> fmt::Debug for ChangeKind<'a> {
}
}
pub type SyntaxArena<'a> = IdArena<Syntax<'a>>;
pub type SyntaxArenaId<'a> = Id<Syntax<'a>>;
pub type SyntaxId = NonZeroU32;
/// Fields that are common to both `Syntax::List` and `Syntax::Atom`.
@ -98,7 +103,7 @@ pub enum Syntax<'a> {
info: SyntaxInfo<'a>,
open_position: Vec<SingleLineSpan>,
open_content: String,
children: Vec<&'a Syntax<'a>>,
children: Vec<SyntaxArenaId<'a>>,
close_position: Vec<SingleLineSpan>,
close_content: String,
num_descendants: u32,
@ -193,10 +198,10 @@ impl<'a> Syntax<'a> {
arena: &'a Arena<Syntax<'a>>,
open_content: &str,
open_position: Vec<SingleLineSpan>,
children: Vec<&'a Syntax<'a>>,
children: Vec<SyntaxArenaId<'a>>,
close_content: &str,
close_position: Vec<SingleLineSpan>,
) -> &'a Syntax<'a> {
) -> SyntaxArenaId<'a> {
// Skip empty atoms: they aren't displayed, so there's no
// point making our syntax tree bigger. These occur when we're
// parsing incomplete or malformed programs.
@ -247,7 +252,7 @@ impl<'a> Syntax<'a> {
mut position: Vec<SingleLineSpan>,
mut content: &str,
kind: AtomKind,
) -> &'a Syntax<'a> {
) -> SyntaxArenaId<'a> {
// If a parser hasn't cleaned up \r on CRLF files with
// comments, discard it.
if content.ends_with('\r') {
@ -273,11 +278,11 @@ impl<'a> Syntax<'a> {
}
}
pub fn parent(&self) -> Option<&'a Syntax<'a>> {
pub fn parent(&self) -> Option<SyntaxArenaId<'a>> {
self.info().parent.get()
}
pub fn next_sibling(&self) -> Option<&'a Syntax<'a>> {
pub fn next_sibling(&self) -> Option<SyntaxArenaId<'a>> {
self.info().next_sibling.get()
}
@ -330,7 +335,7 @@ impl<'a> Syntax<'a> {
}
}
pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> {
pub fn comment_positions<'a>(nodes: &[SyntaxArenaId<'a>]) -> Vec<SingleLineSpan> {
fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec<SingleLineSpan>) {
match node {
List { children, .. } => {
@ -355,13 +360,13 @@ pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> {
}
/// Initialise all the fields in `SyntaxInfo`.
pub fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
pub fn init_all_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) {
init_info(lhs_roots, rhs_roots);
init_next_prev(lhs_roots);
init_next_prev(rhs_roots);
}
fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
fn init_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) {
let mut id = NonZeroU32::new(1).unwrap();
init_info_on_side(lhs_roots, &mut id);
init_info_on_side(rhs_roots, &mut id);
@ -437,7 +442,7 @@ fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) {
}
}
}
pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
pub fn init_next_prev<'a>(roots: &[SyntaxArenaId<'a>]) {
set_prev_sibling(roots);
set_next_sibling(roots);
set_prev(roots, None);
@ -445,7 +450,7 @@ pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
/// Set all the `SyntaxInfo` values for all the `roots` on a single
/// side (LHS or RHS).
fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) {
fn init_info_on_side<'a>(roots: &[SyntaxArenaId<'a>], next_id: &mut SyntaxId) {
set_parent(roots, None);
set_num_ancestors(roots, 0);
set_num_after(roots, 0);
@ -492,7 +497,7 @@ fn set_content_is_unique(nodes: &[&Syntax]) {
set_content_is_unique_from_counts(nodes, &counts);
}
fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
fn set_prev_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) {
let mut prev = None;
for node in nodes {
@ -505,7 +510,7 @@ fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
}
}
fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
fn set_next_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) {
for (i, node) in nodes.iter().enumerate() {
let sibling = nodes.get(i + 1).copied();
node.info().next_sibling.set(sibling);
@ -518,7 +523,7 @@ fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
/// For every syntax node in the tree, mark the previous node
/// according to a preorder traversal.
fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
fn set_prev<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) {
for (i, node) in nodes.iter().enumerate() {
let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
@ -529,7 +534,7 @@ fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
}
}
fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
fn set_parent<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) {
for node in nodes {
node.info().parent.set(parent);
if let List { children, .. } = node {
@ -928,7 +933,7 @@ impl MatchedPos {
/// Walk `nodes` and return a vec of all the changed positions.
pub fn change_positions<'a>(
nodes: &[&'a Syntax<'a>],
nodes: &[SyntaxArenaId<'a>],
change_map: &ChangeMap<'a>,
) -> Vec<MatchedPos> {
let mut positions = Vec::new();
@ -937,7 +942,7 @@ pub fn change_positions<'a>(
}
fn change_positions_<'a>(
nodes: &[&'a Syntax<'a>],
nodes: &[SyntaxArenaId<'a>],
change_map: &ChangeMap<'a>,
positions: &mut Vec<MatchedPos>,
) {