id_arena_syntax
Wilfred Hughes 2023-08-18 09:11:42 +07:00
parent b6d8ecbd4f
commit 4fa5505431
6 changed files with 101 additions and 53 deletions

7
Cargo.lock generated

@ -245,6 +245,7 @@ dependencies = [
"glob", "glob",
"hashbrown 0.12.3", "hashbrown 0.12.3",
"humansize", "humansize",
"id-arena",
"itertools", "itertools",
"lazy_static", "lazy_static",
"libc", "libc",
@ -383,6 +384,12 @@ dependencies = [
"quick-error", "quick-error",
] ]
[[package]]
name = "id-arena"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "1.7.0" version = "1.7.0"

@ -66,6 +66,7 @@ hashbrown = "0.12.3"
humansize = "2.1.3" humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
id-arena = "2.2.1"
[dev-dependencies] [dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60 # assert_cmd 2.0.6 requires rust 1.60

@ -2,14 +2,14 @@
use crate::{ use crate::{
hash::DftHashMap, hash::DftHashMap,
parse::syntax::{Syntax, SyntaxId}, parse::syntax::{Syntax, SyntaxArena, SyntaxArenaId, SyntaxId},
}; };
#[derive(PartialEq, Eq, Clone, Copy)] #[derive(PartialEq, Eq, Clone, Copy)]
pub enum ChangeKind<'a> { pub enum ChangeKind<'a> {
Unchanged(&'a Syntax<'a>), Unchanged(SyntaxArenaId<'a>),
ReplacedComment(&'a Syntax<'a>, &'a Syntax<'a>), ReplacedComment(SyntaxArenaId<'a>, SyntaxArenaId<'a>),
ReplacedString(&'a Syntax<'a>, &'a Syntax<'a>), ReplacedString(SyntaxArenaId<'a>, SyntaxArenaId<'a>),
Novel, Novel,
} }
@ -19,21 +19,36 @@ pub struct ChangeMap<'a> {
} }
impl<'a> ChangeMap<'a> { impl<'a> ChangeMap<'a> {
pub fn insert(&mut self, node: &'a Syntax<'a>, ck: ChangeKind<'a>) { pub fn insert(
&mut self,
node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
ck: ChangeKind<'a>,
) {
let node = &node_arena[node_id];
self.changes.insert(node.id(), ck); self.changes.insert(node.id(), ck);
} }
pub fn get(&self, node: &Syntax<'a>) -> Option<ChangeKind<'a>> { pub fn get(
&self,
node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
) -> Option<ChangeKind<'a>> {
let node = &node_arena[node_id];
self.changes.get(&node.id()).copied() self.changes.get(&node.id()).copied()
} }
} }
pub fn insert_deep_unchanged<'a>( pub fn insert_deep_unchanged<'a>(
node: &'a Syntax<'a>, node_arena: &SyntaxArena<'a>,
opposite_node: &'a Syntax<'a>, node_id: SyntaxArenaId<'a>,
opposite_node_id: SyntaxArenaId<'a>,
change_map: &mut ChangeMap<'a>, change_map: &mut ChangeMap<'a>,
) { ) {
change_map.insert(node, ChangeKind::Unchanged(opposite_node)); let node = &node_arena[node_id];
let opposite_node = &node_arena[opposite_node_id];
change_map.insert(node_arena, node_id, ChangeKind::Unchanged(opposite_node_id));
match (node, opposite_node) { match (node, opposite_node) {
( (
@ -47,7 +62,7 @@ pub fn insert_deep_unchanged<'a>(
}, },
) => { ) => {
for (child, opposite_child) in node_children.iter().zip(opposite_children) { for (child, opposite_child) in node_children.iter().zip(opposite_children) {
insert_deep_unchanged(child, opposite_child, change_map); insert_deep_unchanged(node_arena, *child, *opposite_child, change_map);
} }
} }
(Syntax::Atom { .. }, Syntax::Atom { .. }) => {} (Syntax::Atom { .. }, Syntax::Atom { .. }) => {}
@ -55,12 +70,17 @@ pub fn insert_deep_unchanged<'a>(
} }
} }
pub fn insert_deep_novel<'a>(node: &'a Syntax<'a>, change_map: &mut ChangeMap<'a>) { pub fn insert_deep_novel<'a>(
change_map.insert(node, ChangeKind::Novel); node_arena: &SyntaxArena<'a>,
node_id: SyntaxArenaId<'a>,
change_map: &mut ChangeMap<'a>,
) {
let node = &node_arena[node_id];
change_map.insert(node_arena, node_id, ChangeKind::Novel);
if let Syntax::List { children, .. } = node { if let Syntax::List { children, .. } = node {
for child in children.iter() { for child in children.iter() {
insert_deep_novel(child, change_map); insert_deep_novel(node_arena, *child, change_map);
} }
} }
} }

@ -7,7 +7,7 @@ use crate::{
diff::changes::ChangeMap, diff::changes::ChangeMap,
diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, diff::graph::{populate_change_map, set_neighbours, Edge, Vertex},
hash::DftHashMap, hash::DftHashMap,
parse::syntax::Syntax, parse::syntax::{Syntax, SyntaxArena},
}; };
use bumpalo::Bump; use bumpalo::Bump;
use itertools::Itertools; use itertools::Itertools;
@ -155,7 +155,7 @@ fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge
} }
/// What is the total number of AST nodes? /// What is the total number of AST nodes?
fn node_count(root: Option<&Syntax>) -> u32 { fn node_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 {
let mut node = root; let mut node = root;
let mut count = 0; let mut count = 0;
while let Some(current_node) = node { while let Some(current_node) = node {
@ -167,38 +167,39 @@ fn node_count(root: Option<&Syntax>) -> u32 {
}; };
count += current_count; count += current_count;
node = current_node.next_sibling(); node = current_node.next_sibling().map(|id| &node_arena[id]);
} }
count count
} }
/// How many top-level AST nodes do we have? /// How many top-level AST nodes do we have?
fn tree_count(root: Option<&Syntax>) -> u32 { fn tree_count<'a>(node_arena: &SyntaxArena<'a>, root: Option<&Syntax>) -> u32 {
let mut node = root; let mut node = root;
let mut count = 0; let mut count = 0;
while let Some(current_node) = node { while let Some(current_node) = node {
count += 1; count += 1;
node = current_node.next_sibling(); node = current_node.next_sibling().map(|id| &node_arena[id]);
} }
count count
} }
pub fn mark_syntax<'a>( pub fn mark_syntax<'a>(
node_arena: &SyntaxArena<'a>,
lhs_syntax: Option<&'a Syntax<'a>>, lhs_syntax: Option<&'a Syntax<'a>>,
rhs_syntax: Option<&'a Syntax<'a>>, rhs_syntax: Option<&'a Syntax<'a>>,
change_map: &mut ChangeMap<'a>, change_map: &mut ChangeMap<'a>,
graph_limit: usize, graph_limit: usize,
) -> Result<(), ExceededGraphLimit> { ) -> Result<(), ExceededGraphLimit> {
let lhs_node_count = node_count(lhs_syntax) as usize; let lhs_node_count = node_count(node_arena, lhs_syntax) as usize;
let rhs_node_count = node_count(rhs_syntax) as usize; let rhs_node_count = node_count(node_arena, rhs_syntax) as usize;
info!( info!(
"LHS nodes: {} ({} toplevel), RHS nodes: {} ({} toplevel)", "LHS nodes: {} ({} toplevel), RHS nodes: {} ({} toplevel)",
lhs_node_count, lhs_node_count,
tree_count(lhs_syntax), tree_count(node_arena, lhs_syntax),
rhs_node_count, rhs_node_count,
tree_count(rhs_syntax), tree_count(node_arena, rhs_syntax),
); );
// When there are a large number of changes, we end up building a // When there are a large number of changes, we end up building a

@ -16,7 +16,7 @@ use crate::{
stack::Stack, stack::Stack,
}, },
hash::DftHashMap, hash::DftHashMap,
parse::syntax::{AtomKind, Syntax, SyntaxId}, parse::syntax::{AtomKind, Syntax, SyntaxArena, SyntaxArenaId, SyntaxId},
}; };
use Edge::*; use Edge::*;
@ -408,18 +408,22 @@ fn looks_like_punctuation(node: &Syntax) -> bool {
/// Pop as many parents of `lhs_node` and `rhs_node` as /// Pop as many parents of `lhs_node` and `rhs_node` as
/// possible. Return the new syntax nodes and parents. /// possible. Return the new syntax nodes and parents.
fn pop_all_parents<'s>( fn pop_all_parents<'s>(
lhs_node: Option<&'s Syntax<'s>>, node_arena: &SyntaxArena<'s>,
rhs_node: Option<&'s Syntax<'s>>, lhs_node_id: Option<SyntaxArenaId<'s>>,
rhs_node_id: Option<SyntaxArenaId<'s>>,
lhs_parent_id: Option<SyntaxId>, lhs_parent_id: Option<SyntaxId>,
rhs_parent_id: Option<SyntaxId>, rhs_parent_id: Option<SyntaxId>,
parents: &Stack<EnteredDelimiter<'s>>, parents: &Stack<EnteredDelimiter<'s>>,
) -> ( ) -> (
Option<&'s Syntax<'s>>, Option<SyntaxArenaId<'s>>,
Option<&'s Syntax<'s>>, Option<SyntaxArenaId<'s>>,
Option<SyntaxId>, Option<SyntaxId>,
Option<SyntaxId>, Option<SyntaxId>,
Stack<EnteredDelimiter<'s>>, Stack<EnteredDelimiter<'s>>,
) { ) {
let lhs_node = lhs_node_id.map(|id| &node_arena[id]);
let rhs_node = rhs_node_id.map(|id| &node_arena[id]);
let mut lhs_node = lhs_node; let mut lhs_node = lhs_node;
let mut rhs_node = rhs_node; let mut rhs_node = rhs_node;
let mut lhs_parent_id = lhs_parent_id; let mut lhs_parent_id = lhs_parent_id;
@ -432,8 +436,8 @@ fn pop_all_parents<'s>(
// Move to next after LHS parent. // Move to next after LHS parent.
// Continue from sibling of parent. // Continue from sibling of parent.
lhs_node = lhs_parent.next_sibling(); lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]);
lhs_parent_id = lhs_parent.parent().map(Syntax::id); lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next; parents = parents_next;
continue; continue;
} }
@ -444,8 +448,8 @@ fn pop_all_parents<'s>(
// Move to next after RHS parent. // Move to next after RHS parent.
// Continue from sibling of parent. // Continue from sibling of parent.
rhs_node = rhs_parent.next_sibling(); rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]);
rhs_parent_id = rhs_parent.parent().map(Syntax::id); rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next; parents = parents_next;
continue; continue;
} }
@ -457,10 +461,12 @@ fn pop_all_parents<'s>(
// Continue from sibling of parent. // Continue from sibling of parent.
if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) { if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) {
lhs_node = lhs_parent.next_sibling(); lhs_node = lhs_parent.next_sibling().map(|id| &node_arena[id]);
rhs_node = rhs_parent.next_sibling(); lhs_parent_id = lhs_parent.parent().map(|id| node_arena[id].id());
lhs_parent_id = lhs_parent.parent().map(Syntax::id);
rhs_parent_id = rhs_parent.parent().map(Syntax::id); rhs_node = rhs_parent.next_sibling().map(|id| &node_arena[id]);
rhs_parent_id = rhs_parent.parent().map(|id| node_arena[id].id());
parents = parents_next; parents = parents_next;
continue; continue;
} }
@ -469,12 +475,19 @@ fn pop_all_parents<'s>(
break; break;
} }
(lhs_node, rhs_node, lhs_parent_id, rhs_parent_id, parents) (
lhs_node_id,
rhs_node_id,
lhs_parent_id,
rhs_parent_id,
parents,
)
} }
/// Compute the neighbours of `v` if we haven't previously done so, /// Compute the neighbours of `v` if we haven't previously done so,
/// and write them to the .neighbours cell inside `v`. /// and write them to the .neighbours cell inside `v`.
pub fn set_neighbours<'s, 'b>( pub fn set_neighbours<'s, 'b>(
node_arena: &SyntaxArena<'s>,
v: &Vertex<'s, 'b>, v: &Vertex<'s, 'b>,
alloc: &'b Bump, alloc: &'b Bump,
seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>, seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
@ -496,6 +509,7 @@ pub fn set_neighbours<'s, 'b>(
// Both nodes are equal, the happy case. // Both nodes are equal, the happy case.
let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents( let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents(
node_arena,
lhs_syntax.next_sibling(), lhs_syntax.next_sibling(),
rhs_syntax.next_sibling(), rhs_syntax.next_sibling(),
v.lhs_parent_id, v.lhs_parent_id,

@ -4,6 +4,8 @@
use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32}; use std::{cell::Cell, env, fmt, hash::Hash, num::NonZeroU32};
use typed_arena::Arena; use typed_arena::Arena;
use id_arena::Arena as IdArena;
use id_arena::Id;
use crate::{ use crate::{
diff::changes::ChangeKind, diff::changes::ChangeKind,
@ -42,6 +44,9 @@ impl<'a> fmt::Debug for ChangeKind<'a> {
} }
} }
pub type SyntaxArena<'a> = IdArena<Syntax<'a>>;
pub type SyntaxArenaId<'a> = Id<Syntax<'a>>;
pub type SyntaxId = NonZeroU32; pub type SyntaxId = NonZeroU32;
/// Fields that are common to both `Syntax::List` and `Syntax::Atom`. /// Fields that are common to both `Syntax::List` and `Syntax::Atom`.
@ -98,7 +103,7 @@ pub enum Syntax<'a> {
info: SyntaxInfo<'a>, info: SyntaxInfo<'a>,
open_position: Vec<SingleLineSpan>, open_position: Vec<SingleLineSpan>,
open_content: String, open_content: String,
children: Vec<&'a Syntax<'a>>, children: Vec<SyntaxArenaId<'a>>,
close_position: Vec<SingleLineSpan>, close_position: Vec<SingleLineSpan>,
close_content: String, close_content: String,
num_descendants: u32, num_descendants: u32,
@ -193,10 +198,10 @@ impl<'a> Syntax<'a> {
arena: &'a Arena<Syntax<'a>>, arena: &'a Arena<Syntax<'a>>,
open_content: &str, open_content: &str,
open_position: Vec<SingleLineSpan>, open_position: Vec<SingleLineSpan>,
children: Vec<&'a Syntax<'a>>, children: Vec<SyntaxArenaId<'a>>,
close_content: &str, close_content: &str,
close_position: Vec<SingleLineSpan>, close_position: Vec<SingleLineSpan>,
) -> &'a Syntax<'a> { ) -> SyntaxArenaId<'a> {
// Skip empty atoms: they aren't displayed, so there's no // Skip empty atoms: they aren't displayed, so there's no
// point making our syntax tree bigger. These occur when we're // point making our syntax tree bigger. These occur when we're
// parsing incomplete or malformed programs. // parsing incomplete or malformed programs.
@ -247,7 +252,7 @@ impl<'a> Syntax<'a> {
mut position: Vec<SingleLineSpan>, mut position: Vec<SingleLineSpan>,
mut content: &str, mut content: &str,
kind: AtomKind, kind: AtomKind,
) -> &'a Syntax<'a> { ) -> SyntaxArenaId<'a> {
// If a parser hasn't cleaned up \r on CRLF files with // If a parser hasn't cleaned up \r on CRLF files with
// comments, discard it. // comments, discard it.
if content.ends_with('\r') { if content.ends_with('\r') {
@ -273,11 +278,11 @@ impl<'a> Syntax<'a> {
} }
} }
pub fn parent(&self) -> Option<&'a Syntax<'a>> { pub fn parent(&self) -> Option<SyntaxArenaId<'a>> {
self.info().parent.get() self.info().parent.get()
} }
pub fn next_sibling(&self) -> Option<&'a Syntax<'a>> { pub fn next_sibling(&self) -> Option<SyntaxArenaId<'a>> {
self.info().next_sibling.get() self.info().next_sibling.get()
} }
@ -330,7 +335,7 @@ impl<'a> Syntax<'a> {
} }
} }
pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> { pub fn comment_positions<'a>(nodes: &[SyntaxArenaId<'a>]) -> Vec<SingleLineSpan> {
fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec<SingleLineSpan>) { fn walk_comment_positions(node: &Syntax<'_>, positions: &mut Vec<SingleLineSpan>) {
match node { match node {
List { children, .. } => { List { children, .. } => {
@ -355,13 +360,13 @@ pub fn comment_positions<'a>(nodes: &[&'a Syntax<'a>]) -> Vec<SingleLineSpan> {
} }
/// Initialise all the fields in `SyntaxInfo`. /// Initialise all the fields in `SyntaxInfo`.
pub fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) { pub fn init_all_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) {
init_info(lhs_roots, rhs_roots); init_info(lhs_roots, rhs_roots);
init_next_prev(lhs_roots); init_next_prev(lhs_roots);
init_next_prev(rhs_roots); init_next_prev(rhs_roots);
} }
fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) { fn init_info<'a>(lhs_roots: &[SyntaxArenaId<'a>], rhs_roots: &[SyntaxArenaId<'a>]) {
let mut id = NonZeroU32::new(1).unwrap(); let mut id = NonZeroU32::new(1).unwrap();
init_info_on_side(lhs_roots, &mut id); init_info_on_side(lhs_roots, &mut id);
init_info_on_side(rhs_roots, &mut id); init_info_on_side(rhs_roots, &mut id);
@ -437,7 +442,7 @@ fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) {
} }
} }
} }
pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) { pub fn init_next_prev<'a>(roots: &[SyntaxArenaId<'a>]) {
set_prev_sibling(roots); set_prev_sibling(roots);
set_next_sibling(roots); set_next_sibling(roots);
set_prev(roots, None); set_prev(roots, None);
@ -445,7 +450,7 @@ pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
/// Set all the `SyntaxInfo` values for all the `roots` on a single /// Set all the `SyntaxInfo` values for all the `roots` on a single
/// side (LHS or RHS). /// side (LHS or RHS).
fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) { fn init_info_on_side<'a>(roots: &[SyntaxArenaId<'a>], next_id: &mut SyntaxId) {
set_parent(roots, None); set_parent(roots, None);
set_num_ancestors(roots, 0); set_num_ancestors(roots, 0);
set_num_after(roots, 0); set_num_after(roots, 0);
@ -492,7 +497,7 @@ fn set_content_is_unique(nodes: &[&Syntax]) {
set_content_is_unique_from_counts(nodes, &counts); set_content_is_unique_from_counts(nodes, &counts);
} }
fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) { fn set_prev_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) {
let mut prev = None; let mut prev = None;
for node in nodes { for node in nodes {
@ -505,7 +510,7 @@ fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
} }
} }
fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) { fn set_next_sibling<'a>(nodes: &[SyntaxArenaId<'a>]) {
for (i, node) in nodes.iter().enumerate() { for (i, node) in nodes.iter().enumerate() {
let sibling = nodes.get(i + 1).copied(); let sibling = nodes.get(i + 1).copied();
node.info().next_sibling.set(sibling); node.info().next_sibling.set(sibling);
@ -518,7 +523,7 @@ fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
/// For every syntax node in the tree, mark the previous node /// For every syntax node in the tree, mark the previous node
/// according to a preorder traversal. /// according to a preorder traversal.
fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { fn set_prev<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) {
for (i, node) in nodes.iter().enumerate() { for (i, node) in nodes.iter().enumerate() {
let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) }; let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
@ -529,7 +534,7 @@ fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
} }
} }
fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) { fn set_parent<'a>(nodes: &[SyntaxArenaId<'a>], parent: Option<SyntaxArenaId<'a>>) {
for node in nodes { for node in nodes {
node.info().parent.set(parent); node.info().parent.set(parent);
if let List { children, .. } = node { if let List { children, .. } = node {
@ -928,7 +933,7 @@ impl MatchedPos {
/// Walk `nodes` and return a vec of all the changed positions. /// Walk `nodes` and return a vec of all the changed positions.
pub fn change_positions<'a>( pub fn change_positions<'a>(
nodes: &[&'a Syntax<'a>], nodes: &[SyntaxArenaId<'a>],
change_map: &ChangeMap<'a>, change_map: &ChangeMap<'a>,
) -> Vec<MatchedPos> { ) -> Vec<MatchedPos> {
let mut positions = Vec::new(); let mut positions = Vec::new();
@ -937,7 +942,7 @@ pub fn change_positions<'a>(
} }
fn change_positions_<'a>( fn change_positions_<'a>(
nodes: &[&'a Syntax<'a>], nodes: &[SyntaxArenaId<'a>],
change_map: &ChangeMap<'a>, change_map: &ChangeMap<'a>,
positions: &mut Vec<MatchedPos>, positions: &mut Vec<MatchedPos>,
) { ) {