Use id-arena for Vertex

id_arena_for_vertex
Wilfred Hughes 2023-08-18 00:54:07 +07:00
parent b6d8ecbd4f
commit affce75a37
4 changed files with 126 additions and 97 deletions

7
Cargo.lock generated

@ -245,6 +245,7 @@ dependencies = [
"glob",
"hashbrown 0.12.3",
"humansize",
"id-arena",
"itertools",
"lazy_static",
"libc",
@ -383,6 +384,12 @@ dependencies = [
"quick-error",
]
[[package]]
name = "id-arena"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
[[package]]
name = "indexmap"
version = "1.7.0"

@ -66,6 +66,7 @@ hashbrown = "0.12.3"
humansize = "2.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
id-arena = "2.2.1"
[dev-dependencies]
# assert_cmd 2.0.6 requires rust 1.60

@ -5,11 +5,11 @@ use std::{cmp::Reverse, env};
use crate::{
diff::changes::ChangeMap,
diff::graph::{populate_change_map, set_neighbours, Edge, Vertex},
diff::graph::{populate_change_map, set_neighbours, Edge, Vertex, VertexId},
hash::DftHashMap,
parse::syntax::Syntax,
};
use bumpalo::Bump;
use id_arena::Arena;
use itertools::Itertools;
use radix_heap::RadixHeapMap;
@ -17,32 +17,36 @@ use radix_heap::RadixHeapMap;
pub struct ExceededGraphLimit {}
/// Return the shortest route from `start` to the end vertex.
fn shortest_vertex_path<'s, 'b>(
start: &'b Vertex<'s, 'b>,
vertex_arena: &'b Bump,
fn shortest_vertex_path<'s>(
start: VertexId<'s>,
vertex_arena: &mut Arena<Vertex<'s>>,
size_hint: usize,
graph_limit: usize,
) -> Result<Vec<&'b Vertex<'s, 'b>>, ExceededGraphLimit> {
) -> Result<Vec<VertexId<'s>>, ExceededGraphLimit> {
// We want to visit nodes with the shortest distance first, but
// RadixHeapMap is a max-heap. Ensure nodes are wrapped with
// Reverse to flip comparisons.
let mut heap: RadixHeapMap<Reverse<_>, &'b Vertex<'s, 'b>> = RadixHeapMap::new();
let mut heap: RadixHeapMap<Reverse<_>, VertexId<'s>> = RadixHeapMap::new();
heap.push(Reverse(0), start);
let mut seen = DftHashMap::default();
seen.reserve(size_hint);
let end: &'b Vertex<'s, 'b> = loop {
let end: VertexId<'s> = loop {
match heap.pop() {
Some((Reverse(distance), current)) => {
if current.is_end() {
break current;
Some((Reverse(distance), current_id)) => {
{
if (vertex_arena[current_id]).is_end() {
break current_id;
}
}
set_neighbours(current, vertex_arena, &mut seen);
set_neighbours(current_id, vertex_arena, &mut seen);
let current = &vertex_arena[current_id];
for neighbour in current.neighbours.borrow().as_ref().unwrap() {
let (edge, next) = neighbour;
let (edge, next_id) = neighbour;
let next = &vertex_arena[*next_id];
let distance_to_next = distance + edge.cost();
let found_shorter_route = match next.predecessor.get() {
@ -51,15 +55,16 @@ fn shortest_vertex_path<'s, 'b>(
};
if found_shorter_route {
next.predecessor.replace(Some((distance_to_next, current)));
heap.push(Reverse(distance_to_next), next);
next.predecessor
.replace(Some((distance_to_next, current_id)));
heap.push(Reverse(distance_to_next), *next_id);
}
}
if seen.len() > graph_limit {
info!(
"Reached graph limit, arena consumed {} bytes",
vertex_arena.allocated_bytes(),
"Reached graph limit, arena allocated {} items",
vertex_arena.len(),
);
return Err(ExceededGraphLimit {});
}
@ -69,38 +74,39 @@ fn shortest_vertex_path<'s, 'b>(
};
info!(
"Saw {} vertices (a Vertex is {} bytes), arena consumed {} bytes, with {} vertices left on heap.",
"Saw {} vertices (a Vertex is {} bytes), arena allocated {} items, with {} vertices left on heap.",
seen.len(),
std::mem::size_of::<Vertex>(),
vertex_arena.allocated_bytes(),
vertex_arena.len(),
heap.len(),
);
let mut current = Some((0, end));
let mut vertex_route: Vec<&'b Vertex<'s, 'b>> = vec![];
while let Some((_, node)) = current {
vertex_route.push(node);
current = node.predecessor.get();
let mut vertex_route: Vec<VertexId<'s>> = vec![];
while let Some((_, node_id)) = current {
vertex_route.push(node_id);
current = vertex_arena[node_id].predecessor.get();
}
vertex_route.reverse();
Ok(vertex_route)
}
fn shortest_path_with_edges<'s, 'b>(
route: &[&'b Vertex<'s, 'b>],
) -> Vec<(Edge, &'b Vertex<'s, 'b>)> {
fn shortest_path_with_edges<'s>(
route: &[VertexId<'s>],
vertex_arena: &Arena<Vertex<'s>>,
) -> Vec<(Edge, VertexId<'s>)> {
let mut prev = route.first().expect("Expected non-empty route");
let mut cost = 0;
let mut res = vec![];
for vertex in route.iter().skip(1) {
let edge = edge_between(prev, vertex);
for vertex_id in route.iter().skip(1) {
let edge = edge_between(*prev, *vertex_id, vertex_arena);
res.push((edge, *prev));
cost += edge.cost();
prev = vertex;
prev = vertex_id;
}
debug!("Found a path of {} with cost {}.", route.len(), cost);
@ -111,27 +117,34 @@ fn shortest_path_with_edges<'s, 'b>(
///
/// The vec returned does not return the very last vertex. This is
/// necessary because a route of N vertices only has N-1 edges.
fn shortest_path<'s, 'b>(
start: Vertex<'s, 'b>,
vertex_arena: &'b Bump,
fn shortest_path<'s>(
start: Vertex<'s>,
vertex_arena: &mut Arena<Vertex<'s>>,
size_hint: usize,
graph_limit: usize,
) -> Result<Vec<(Edge, &'b Vertex<'s, 'b>)>, ExceededGraphLimit> {
let start: &'b Vertex<'s, 'b> = vertex_arena.alloc(start);
) -> Result<Vec<(Edge, VertexId<'s>)>, ExceededGraphLimit> {
let start: VertexId<'s> = vertex_arena.alloc(start);
let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?;
Ok(shortest_path_with_edges(&vertex_path))
Ok(shortest_path_with_edges(&vertex_path, vertex_arena))
}
fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge {
assert_ne!(before, after);
fn edge_between<'s>(
before_id: VertexId<'s>,
after_id: VertexId<'s>,
vertex_arena: &Arena<Vertex<'s>>,
) -> Edge {
assert_ne!(before_id, after_id);
let before = &vertex_arena[before_id];
let after = &vertex_arena[after_id];
let mut shortest_edge: Option<Edge> = None;
if let Some(neighbours) = &*before.neighbours.borrow() {
for neighbour in neighbours {
let (edge, next) = *neighbour;
let (edge, next_id) = *neighbour;
// If there are multiple edges that can take us to `next`,
// prefer the shortest.
if *next == *after {
if next_id == after_id {
let is_shorter = match shortest_edge {
Some(prev_edge) => edge.cost() < prev_edge.cost(),
None => true,
@ -213,9 +226,9 @@ pub fn mark_syntax<'a>(
let size_hint = std::cmp::min(lhs_node_count * rhs_node_count, graph_limit);
let start = Vertex::new(lhs_syntax, rhs_syntax);
let vertex_arena = Bump::new();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?;
let route = shortest_path(start, &mut vertex_arena, size_hint, graph_limit)?;
let print_length = if env::var("DFT_VERBOSE").is_ok() {
50
@ -227,7 +240,8 @@ pub fn mark_syntax<'a>(
print_length,
route
.iter()
.map(|(edge, v)| {
.map(|(edge, v_id)| {
let v = &vertex_arena[*v_id];
format!(
"{:20} {:20} --- {:3} {:?}",
v.lhs_syntax
@ -242,7 +256,7 @@ pub fn mark_syntax<'a>(
.collect_vec()
);
populate_change_map(&route, change_map);
populate_change_map(&route, &vertex_arena, change_map);
Ok(())
}
@ -258,7 +272,7 @@ mod tests {
};
use itertools::Itertools;
use typed_arena::Arena;
use typed_arena::Arena as TypedArena;
fn pos_helper(line: u32) -> Vec<SingleLineSpan> {
vec![SingleLineSpan {
@ -270,7 +284,7 @@ mod tests {
#[test]
fn identical_atoms() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal);
// Same content as LHS.
@ -278,8 +292,8 @@ mod tests {
init_all_info(&[lhs], &[rhs]);
let start = Vertex::new(Some(lhs), Some(rhs));
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::<Vertex>::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -293,7 +307,7 @@ mod tests {
#[test]
fn extra_atom_lhs() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = vec![Syntax::new_list(
&arena,
@ -320,8 +334,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -337,7 +351,7 @@ mod tests {
#[test]
fn repeated_atoms() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = vec![Syntax::new_list(
&arena,
@ -362,8 +376,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -380,7 +394,7 @@ mod tests {
#[test]
fn atom_after_empty_list() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = vec![Syntax::new_list(
&arena,
@ -408,8 +422,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -431,7 +445,7 @@ mod tests {
#[test]
fn replace_similar_comment() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = vec![Syntax::new_atom(
&arena,
@ -449,8 +463,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -463,7 +477,7 @@ mod tests {
#[test]
fn replace_very_different_comment() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = vec![Syntax::new_atom(
&arena,
@ -481,8 +495,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -495,7 +509,7 @@ mod tests {
#[test]
fn replace_comment_prefer_most_similar() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = vec![
Syntax::new_atom(
@ -521,8 +535,8 @@ mod tests {
init_all_info(&lhs, &rhs);
let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied());
let vertex_arena = Bump::new();
let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let mut vertex_arena = Arena::new();
let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap();
let actions = route.iter().map(|(action, _)| *action).collect_vec();
assert_eq!(
@ -538,7 +552,7 @@ mod tests {
#[test]
fn mark_syntax_equal_atoms() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
let rhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
init_all_info(&[lhs], &[rhs]);
@ -552,7 +566,7 @@ mod tests {
#[test]
fn mark_syntax_different_atoms() {
let arena = Arena::new();
let arena = TypedArena::new();
let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal);
let rhs = Syntax::new_atom(&arena, pos_helper(1), "bar", AtomKind::Normal);
init_all_info(&[lhs], &[rhs]);

@ -1,7 +1,7 @@
//! A graph representation for computing tree diffs.
use bumpalo::Bump;
use hashbrown::hash_map::RawEntryMut;
use id_arena::{Arena, Id};
use std::{
cell::{Cell, RefCell},
cmp::min,
@ -49,9 +49,9 @@ use Edge::*;
/// ^ ^
/// ```
#[derive(Debug, Clone)]
pub struct Vertex<'s, 'b> {
pub neighbours: RefCell<Option<Vec<(Edge, &'b Vertex<'s, 'b>)>>>,
pub predecessor: Cell<Option<(u32, &'b Vertex<'s, 'b>)>>,
pub struct Vertex<'s> {
pub neighbours: RefCell<Option<Vec<(Edge, VertexId<'s>)>>>,
pub predecessor: Cell<Option<(u32, VertexId<'s>)>>,
// TODO: experiment with storing SyntaxId only, and have a HashMap
// from SyntaxId to &Syntax.
pub lhs_syntax: Option<&'s Syntax<'s>>,
@ -61,7 +61,9 @@ pub struct Vertex<'s, 'b> {
rhs_parent_id: Option<SyntaxId>,
}
impl<'s, 'b> PartialEq for Vertex<'s, 'b> {
pub type VertexId<'s> = Id<Vertex<'s>>;
impl<'s> PartialEq for Vertex<'s> {
fn eq(&self, other: &Self) -> bool {
// Strictly speaking, we should compare the whole
// EnteredDelimiter stack, not just the immediate
@ -99,9 +101,9 @@ impl<'s, 'b> PartialEq for Vertex<'s, 'b> {
b0 && b1 && b2
}
}
impl<'s, 'b> Eq for Vertex<'s, 'b> {}
impl<'s> Eq for Vertex<'s> {}
impl<'s, 'b> Hash for Vertex<'s, 'b> {
impl<'s> Hash for Vertex<'s> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.lhs_syntax.map(|node| node.id()).hash(state);
self.rhs_syntax.map(|node| node.id()).hash(state);
@ -247,7 +249,7 @@ fn push_rhs_delimiter<'s>(
}
}
impl<'s, 'b> Vertex<'s, 'b> {
impl<'s> Vertex<'s> {
pub fn is_end(&self) -> bool {
self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty()
}
@ -344,11 +346,11 @@ impl Edge {
}
}
fn allocate_if_new<'s, 'b>(
v: Vertex<'s, 'b>,
alloc: &'b Bump,
seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
) -> &'b Vertex<'s, 'b> {
fn allocate_if_new<'a, 's>(
v: Vertex<'s>,
alloc: &'a mut Arena<Vertex<'s>>,
seen: &mut DftHashMap<Vertex<'s>, Vec<VertexId<'s>>>,
) -> VertexId<'s> {
// We use the entry API so that we only need to do a single lookup
// for access and insert.
match seen.raw_entry_mut().from_key(&v) {
@ -359,15 +361,16 @@ fn allocate_if_new<'s, 'b>(
// nestings for each syntax node pair.
if let Some(allocated) = existing.last() {
if existing.len() >= 2 {
return allocated;
return *allocated;
}
}
// If we have seen exactly this graph node before, even
// considering parenthesis matching, return it.
for existing_node in existing.iter() {
for existing_node_id in existing.iter() {
let existing_node = &alloc[*existing_node_id];
if existing_node.parents == v.parents {
return existing_node;
return *existing_node_id;
}
}
@ -378,18 +381,18 @@ fn allocate_if_new<'s, 'b>(
allocated
}
RawEntryMut::Vacant(vacant) => {
let allocated = alloc.alloc(v);
let allocated_id = alloc.alloc(v.clone());
// We know that this vec will never have more than 2
// nodes, and this code is very hot, so reserve.
//
// We still use a vec to enable experiments with the value
// of how many possible parenthesis nestings to explore.
let mut existing: Vec<&'b Vertex<'s, 'b>> = Vec::with_capacity(2);
existing.push(allocated);
let mut existing: Vec<VertexId<'s>> = Vec::with_capacity(2);
existing.push(allocated_id);
vacant.insert(allocated, existing);
allocated
vacant.insert(v, existing);
allocated_id
}
}
}
@ -474,20 +477,22 @@ fn pop_all_parents<'s>(
/// Compute the neighbours of `v` if we haven't previously done so,
/// and write them to the .neighbours cell inside `v`.
pub fn set_neighbours<'s, 'b>(
v: &Vertex<'s, 'b>,
alloc: &'b Bump,
seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>,
pub fn set_neighbours<'a, 's>(
v_id: VertexId<'s>,
alloc: &'a mut Arena<Vertex<'s>>,
seen: &mut DftHashMap<Vertex<'s>, Vec<VertexId<'s>>>,
) {
if v.neighbours.borrow().is_some() {
if alloc[v_id].neighbours.borrow().is_some() {
return;
}
// There are only seven pushes in this function, so that's sufficient.
let mut res: Vec<(Edge, &Vertex)> = Vec::with_capacity(7);
let mut res: Vec<(Edge, VertexId<'s>)> = Vec::with_capacity(7);
let v = &alloc[v_id];
if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) {
if lhs_syntax == rhs_syntax {
let v = &alloc[v_id];
let depth_difference = (lhs_syntax.num_ancestors() as i32
- rhs_syntax.num_ancestors() as i32)
.unsigned_abs();
@ -769,11 +774,13 @@ pub fn set_neighbours<'s, 'b>(
v.neighbours.replace(Some(res));
}
pub fn populate_change_map<'s, 'b>(
route: &[(Edge, &'b Vertex<'s, 'b>)],
pub fn populate_change_map<'s>(
route: &[(Edge, VertexId<'s>)],
vertex_arena: &Arena<Vertex<'s>>,
change_map: &mut ChangeMap<'s>,
) {
for (e, v) in route {
for (e, v_id) in route {
let v = &vertex_arena[*v_id];
match e {
UnchangedNode { .. } => {
// No change on this node or its children.