Skip to content

Commit 3adad53

Browse files
authored
feat: implement arena-based memory allocation (#24)
1 parent e208c9e commit 3adad53

9 files changed

Lines changed: 1519 additions & 1282 deletions

File tree

src/arena.rs

Lines changed: 316 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,70 @@
1-
use crate::{Attrs, Expr, ExprKey, ExprPtr, ExprRef, Value};
2-
use rustc_hash::FxBuildHasher;
1+
use crate::typing::{ArgsRef, Record, Type, TypeRef};
2+
use crate::{Attrs, ExprKey, ExprPtr, ExprRef, Field, RecRef, StrRef, Value, VecRef};
3+
use rustc_hash::{FxBuildHasher, FxHashMap};
34
use serde::Serialize;
5+
use std::collections::hash_map::Entry;
46
use std::hash::BuildHasher;
57

6-
#[derive(Debug, Serialize)]
7-
struct Slot {
8-
attrs: Attrs,
9-
value: Value,
10-
}
11-
12-
/// An arena-based allocator for EventQL expressions.
8+
/// An arena-based allocator for interning strings.
139
///
14-
/// The `ExprArena` provides a memory-efficient way to store and manage AST nodes
15-
/// by using a flat vector and returning lightweight [`ExprRef`] handles.
10+
/// Deduplicates strings by hash and returns lightweight [`StrRef`] handles for O(1) lookups.
1611
#[derive(Default, Serialize)]
17-
pub struct ExprArena {
12+
pub struct StringArena {
1813
#[serde(skip_serializing)]
1914
hasher: FxBuildHasher,
20-
slots: Vec<Slot>,
15+
16+
cache: FxHashMap<u64, StrRef>,
17+
slots: Vec<String>,
18+
}
19+
20+
impl StringArena {
21+
/// Interns a string and returns its [`StrRef`]. Returns the existing reference if already interned.
22+
pub fn alloc(&mut self, value: &str) -> StrRef {
23+
match self.cache.entry(self.hasher.hash_one(value)) {
24+
Entry::Occupied(entry) => *entry.get(),
25+
Entry::Vacant(entry) => {
26+
let key = StrRef(self.slots.len());
27+
entry.insert(key);
28+
self.slots.push(value.to_owned());
29+
30+
key
31+
}
32+
}
33+
}
34+
35+
/// Retrieves the string associated with the given [`StrRef`].
36+
pub fn get(&self, key: StrRef) -> &str {
37+
&self.slots[key.0]
38+
}
39+
40+
/// Compares two interned strings for case-insensitive ASCII equality.
41+
pub fn eq_ignore_ascii_case(&self, ka: StrRef, kb: StrRef) -> bool {
42+
self.get(ka).eq_ignore_ascii_case(self.get(kb))
43+
}
2144
}
2245

23-
/// A view into a single node within an [`ExprArena`].
46+
/// An expression node stored in the [`ExprArena`].
2447
///
25-
/// This struct provides access to the attributes and value of a node
26-
/// without transferring ownership. It's typically obtained by calling [`ExprArena::get`].
27-
#[derive(Debug, Copy, Clone)]
28-
pub struct Node<'a> {
29-
/// Metadata about this expression (e.g., source position)
48+
/// Combines the expression's metadata ([`Attrs`]) with its actual content ([`Value`]).
49+
#[derive(Debug, Clone, Copy, Serialize)]
50+
pub struct Expr {
51+
/// Metadata including source position.
3052
pub attrs: Attrs,
31-
/// The actual kind and value of the expression
32-
pub value: &'a Value,
33-
/// The stable reference to this node in the arena
34-
pub node_ref: ExprRef,
53+
/// The kind and content of this expression.
54+
pub value: Value,
3555
}
3656

37-
impl<'a> Node<'a> {
38-
pub fn as_expr(&self) -> Expr {
39-
Expr {
40-
attrs: self.attrs,
41-
node_ref: self.node_ref,
42-
}
43-
}
57+
/// An arena-based allocator for EventQL expressions.
58+
///
59+
/// The `ExprArena` provides a memory-efficient way to store and manage AST nodes
60+
/// by using a flat vector and returning lightweight [`ExprRef`] handles.
61+
#[derive(Default, Serialize)]
62+
pub struct ExprArena {
63+
#[serde(skip_serializing)]
64+
hasher: FxBuildHasher,
65+
exprs: Vec<Expr>,
66+
vecs: Vec<Vec<ExprRef>>,
67+
recs: Vec<Vec<Field>>,
4468
}
4569

4670
impl ExprArena {
@@ -50,10 +74,10 @@ impl ExprArena {
5074
/// to create a stable [`ExprKey`], and stores it in the arena. It returns
5175
/// an [`ExprRef`] which can be used to retrieve the expression later.
5276
pub fn alloc(&mut self, attrs: Attrs, value: Value) -> ExprRef {
53-
let key = ExprKey(self.hasher.hash_one(&value));
77+
let key = ExprKey(self.hasher.hash_one(value));
5478

55-
let ptr = ExprPtr(self.slots.len());
56-
self.slots.push(Slot { attrs, value });
79+
let ptr = ExprPtr(self.exprs.len());
80+
self.exprs.push(Expr { attrs, value });
5781

5882
ExprRef { key, ptr }
5983
}
@@ -64,12 +88,265 @@ impl ExprArena {
6488
///
6589
/// Panics if the [`ExprRef`] contains an invalid pointer that is out of bounds
6690
/// of the arena's internal storage.
67-
pub fn get(&self, node_ref: ExprRef) -> Node<'_> {
68-
let slot = &self.slots[node_ref.ptr.0];
69-
Node {
70-
attrs: slot.attrs,
71-
value: &slot.value,
72-
node_ref,
91+
pub fn get(&self, node_ref: ExprRef) -> Expr {
92+
self.exprs[node_ref.ptr.0]
93+
}
94+
95+
/// Allocates a vector of expression references and returns a [`VecRef`] handle.
96+
pub fn alloc_vec(&mut self, values: Vec<ExprRef>) -> VecRef {
97+
let key = VecRef(self.vecs.len());
98+
self.vecs.push(values);
99+
100+
key
101+
}
102+
103+
/// Allocates a vector of record fields and returns a [`RecRef`] handle.
104+
pub fn alloc_rec(&mut self, values: Vec<Field>) -> RecRef {
105+
let key = RecRef(self.recs.len());
106+
self.recs.push(values);
107+
108+
key
109+
}
110+
111+
/// Returns the slice of expression references for the given [`VecRef`].
112+
pub fn vec(&self, ptr: VecRef) -> &[ExprRef] {
113+
&self.vecs[ptr.0]
114+
}
115+
116+
/// Returns the expression reference at index `idx` within the given [`VecRef`].
117+
pub fn vec_get(&self, ptr: VecRef, idx: usize) -> ExprRef {
118+
self.vecs[ptr.0][idx]
119+
}
120+
121+
/// Returns an iterator over valid indices for the given [`VecRef`].
122+
pub fn vec_idxes(&self, ptr: VecRef) -> impl Iterator<Item = usize> + use<> {
123+
0..self.vec(ptr).len()
124+
}
125+
126+
/// Returns the vector of fields for the given [`RecRef`].
127+
pub fn rec(&self, ptr: RecRef) -> &Vec<Field> {
128+
&self.recs[ptr.0]
129+
}
130+
131+
/// Returns the field at index `idx` within the given [`RecRef`].
132+
pub fn rec_get(&self, ptr: RecRef, idx: usize) -> Field {
133+
self.recs[ptr.0][idx]
134+
}
135+
136+
/// Returns an iterator over valid indices for the given [`RecRef`].
137+
pub fn rec_idxes(&self, ptr: RecRef) -> impl Iterator<Item = usize> + use<> {
138+
0..self.rec(ptr).len()
139+
}
140+
}
141+
142+
/// An arena-based allocator for type information.
143+
///
144+
/// Stores and deduplicates types, record definitions, and function argument lists.
145+
/// Supports freezing to mark a baseline and freeing types allocated after the baseline.
146+
#[derive(Default, Serialize)]
147+
pub struct TypeArena {
148+
#[serde(skip_serializing)]
149+
args_hasher: FxBuildHasher,
150+
151+
type_offset: usize,
152+
rec_offset: usize,
153+
154+
dedup_types: FxHashMap<Type, TypeRef>,
155+
dedup_args: FxHashMap<u64, ArgsRef>,
156+
types: Vec<Type>,
157+
pub(crate) records: Vec<FxHashMap<StrRef, Type>>,
158+
pub(crate) args: Vec<Vec<Type>>,
159+
}
160+
161+
impl TypeArena {
162+
/// Marks the current allocation state as the baseline.
163+
///
164+
/// Subsequent calls to [`free_space`](TypeArena::free_space) will deallocate
165+
/// only types and records allocated after this point.
166+
pub fn freeze(&mut self) {
167+
self.rec_offset = self.records.len();
168+
self.type_offset = self.types.len();
169+
}
170+
171+
/// Frees types and records allocated after the last [`freeze`](TypeArena::freeze) call.
172+
pub fn free_space(&mut self) {
173+
for tpe in self.types.drain(self.type_offset..) {
174+
self.dedup_types.remove(&tpe);
175+
}
176+
177+
for _ in self.records.drain(self.rec_offset..) {}
178+
}
179+
180+
/// Registers a type and returns a deduplicated [`TypeRef`]. Returns the existing reference if already registered.
181+
pub fn register_type(&mut self, tpe: Type) -> TypeRef {
182+
match self.dedup_types.entry(tpe) {
183+
Entry::Occupied(entry) => *entry.get(),
184+
Entry::Vacant(entry) => {
185+
let key = TypeRef(self.types.len());
186+
self.types.push(tpe);
187+
entry.insert(key);
188+
189+
key
190+
}
73191
}
74192
}
193+
194+
/// Allocates a fresh copy of a type. For records, this clones the record definition.
195+
pub fn alloc_type(&mut self, tpe: Type) -> Type {
196+
if let Type::Record(rec) = tpe {
197+
let key = Record(self.records.len());
198+
// TODO: technically, a deep-clone is needed here, where properties that point to
199+
// records should also be allocated as well.
200+
self.records.push(self.records[rec.0].clone());
201+
202+
return Type::Record(key);
203+
}
204+
205+
tpe
206+
}
207+
208+
/// Creates an array type containing elements of the given type.
209+
pub fn alloc_array_of(&mut self, tpe: Type) -> Type {
210+
Type::Array(self.register_type(tpe))
211+
}
212+
213+
/// Allocates a new record type from a map of field names to types.
214+
pub fn alloc_record(&mut self, record: FxHashMap<StrRef, Type>) -> Record {
215+
let key = Record(self.records.len());
216+
self.records.push(record);
217+
key
218+
}
219+
220+
/// Allocates a deduplicated list of function argument types and returns an [`ArgsRef`].
221+
pub fn alloc_args(&mut self, args: &[Type]) -> ArgsRef {
222+
let hash = self.args_hasher.hash_one(args);
223+
224+
match self.dedup_args.entry(hash) {
225+
Entry::Occupied(entry) => *entry.get(),
226+
Entry::Vacant(entry) => {
227+
let key = ArgsRef(self.args.len());
228+
entry.insert(key);
229+
self.args.push(args.to_vec());
230+
231+
key
232+
}
233+
}
234+
}
235+
236+
/// Retrieves the type for the given [`TypeRef`].
237+
pub fn get_type(&self, key: TypeRef) -> Type {
238+
self.types[key.0]
239+
}
240+
241+
/// Returns the field map for the given record.
242+
pub fn get_record(&self, key: Record) -> &FxHashMap<StrRef, Type> {
243+
&self.records[key.0]
244+
}
245+
246+
/// Returns the argument type slice for the given [`ArgsRef`].
247+
pub fn get_args(&self, key: ArgsRef) -> &[Type] {
248+
self.args[key.0].as_slice()
249+
}
250+
251+
/// Returns a mutable reference to the argument type slice for the given [`ArgsRef`].
252+
pub fn get_args_mut(&mut self, key: ArgsRef) -> &mut [Type] {
253+
self.args[key.0].as_mut_slice()
254+
}
255+
256+
/// Returns an iterator over valid indices for the given [`ArgsRef`].
257+
pub fn args_idxes(&self, key: ArgsRef) -> impl Iterator<Item = usize> + use<> {
258+
0..self.get_args(key).len()
259+
}
260+
261+
/// Returns the argument type at index `idx` for the given [`ArgsRef`].
262+
pub fn args_get(&self, key: ArgsRef, idx: usize) -> Type {
263+
self.get_args(key)[idx]
264+
}
265+
266+
/// Returns the type of a field in the given record, or `None` if the field doesn't exist.
267+
pub fn record_get(&self, record: Record, field: StrRef) -> Option<Type> {
268+
self.records[record.0].get(&field).copied()
269+
}
270+
271+
/// Iterates over all (field name, type) pairs in the given record.
272+
pub fn record_iter(&self, record: Record) -> impl Iterator<Item = (StrRef, Type)> {
273+
self.records[record.0].iter().map(|(k, v)| (*k, *v))
274+
}
275+
276+
/// Iterates over all field names in the given record.
277+
pub fn record_keys(&self, record: Record) -> impl Iterator<Item = StrRef> {
278+
self.records[record.0].keys().copied()
279+
}
280+
281+
/// Checks whether two records have the exact same set of field names.
282+
pub fn records_have_same_keys(&self, rec_a: Record, rec_b: Record) -> bool {
283+
let rec_a = self.get_record(rec_a);
284+
let rec_b = self.get_record(rec_b);
285+
286+
if rec_a.is_empty() && rec_b.is_empty() {
287+
return true;
288+
}
289+
290+
if rec_a.len() != rec_b.len() {
291+
return false;
292+
}
293+
294+
for bk in rec_b.keys() {
295+
if !rec_a.contains_key(bk) {
296+
return false;
297+
}
298+
}
299+
300+
true
301+
}
302+
303+
/// Creates an empty record type.
304+
pub fn instantiate_record(&mut self) -> Record {
305+
self.alloc_record(FxHashMap::default())
306+
}
307+
308+
/// Returns `true` if the given field exists in the record.
309+
pub fn record_field_exists(&self, record: Record, field: StrRef) -> bool {
310+
self.records[record.0].contains_key(&field)
311+
}
312+
313+
/// Returns the hash map entry for a field in the given record, for in-place manipulation.
314+
pub fn record_entry(&mut self, record: Record, key: StrRef) -> Entry<'_, StrRef, Type> {
315+
self.records[record.0].entry(key)
316+
}
317+
318+
/// Sets the type of a field in the given record, inserting or updating as needed.
319+
pub fn record_set(&mut self, record: Record, field: StrRef, value: Type) {
320+
self.records[record.0].insert(field, value);
321+
}
322+
323+
/// Returns the number of fields in the given record.
324+
pub fn record_len(&self, record: Record) -> usize {
325+
self.records[record.0].len()
326+
}
327+
328+
/// Returns `true` if the given record has no fields.
329+
pub fn record_is_empty(&self, record: Record) -> bool {
330+
self.records[record.0].is_empty()
331+
}
332+
}
333+
334+
/// Top-level arena that holds all memory pools for expressions, strings, and types.
335+
#[derive(Default, Serialize)]
336+
pub struct Arena {
337+
pub(crate) exprs: ExprArena,
338+
pub(crate) strings: StringArena,
339+
pub(crate) types: TypeArena,
340+
}
341+
342+
impl Arena {
343+
/// Freezes the type arena to mark the current state as baseline.
344+
pub fn freeze(&mut self) {
345+
self.types.freeze();
346+
}
347+
348+
/// Frees types allocated after the last freeze, reclaiming memory for reuse.
349+
pub fn free_space(&mut self) {
350+
self.types.free_space();
351+
}
75352
}

0 commit comments

Comments
 (0)