Skip to content

Commit 4d0774b

Browse files
committed
contract: add SoaEnvelope LE contract; audit Phase 7 follow-up
The SoA envelope must know the little-endian contract, not just its columns. ndarray::simd::MultiLaneColumn already provides the per-column LE contract (standalone, usable by any pure-SIMD consumer). What was missing: an envelope-level contract describing how columns assemble into one row-strided packet with a cycle stamp. New zero-dep module lance_graph_contract::soa_envelope: - ColumnKind / ColumnDescriptor (LE element width, offset, elems/row) - SoaEnvelope trait: columns(), row_stride(), n_rows(), cycle(), LAYOUT_VERSION, as_le_bytes(), zero-copy row_le()/column_le(), verify_layout() gate (stride/overlap/packet-size/version skew) - 7 unit tests, all passing Deliberately NOT pulling ndarray into the contract: it would force the heavy HPC build onto crewai-rust/n8n-rs (zero-dep consumers) and force pure-SIMD ndarray consumers to pull a graph contract crate. Two-level split keeps both crates clean. Iron rule: ndarray owns the column contract, lance-graph owns the envelope contract, neither restates the other, lance-graph binds them. Also flags CLAUDE.md ndarray-hpc fallback wording for demotion (no shipped consumer runs without ndarray; the fallback is CI-only).
1 parent 00d5bde commit 4d0774b

3 files changed

Lines changed: 409 additions & 0 deletions

File tree

crates/lance-graph-contract/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ pub mod scheduler;
9393
pub mod sensorium;
9494
pub mod sigma_propagation;
9595
pub mod sla;
96+
pub mod soa_envelope;
9697
pub mod soa_view;
9798
pub mod splat;
9899
pub mod tax;
Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
//! SoA envelope little-endian contract.
2+
//!
3+
//! # Why this module exists
4+
//!
5+
//! Column-level LE knowledge is not enough. ndarray's `MultiLaneColumn`
6+
//! (the column carrier) already decodes its own bytes little-endian, and
7+
//! `CausalEdge64` / `EpisodicEdges64` each know their own `to_le_bytes` /
8+
//! `from_le_bytes`. But the **SoA envelope as a whole** — the thing a Lance
9+
//! version snapshots, the thing `simd_soa` sweeps, the thing a future reader
10+
//! decodes — has no contract describing how those columns *assemble* into one
11+
//! row-strided packet. The parts know the LE contract; the envelope did not.
12+
//!
13+
//! [`SoaEnvelope`] is that missing contract. It makes one SoA snapshot a
14+
//! **self-describing little-endian packet per cycle**: a stable column
15+
//! ordering, a fixed row byte stride, a `cycle` version stamp, and a
16+
//! [`ENVELOPE_LAYOUT_VERSION`]. With it, a Lance version IS a coherent LE
17+
//! packet at cycle N — not a loose collection of independently-correct
18+
//! columns.
19+
//!
20+
//! # Layering (read before adding an ndarray dependency here)
21+
//!
22+
//! This module is **zero-dep, byte-geometry only**. It describes *where*
23+
//! columns sit in a row packet and *what* LE element each holds — as data
24+
//! ([`ColumnDescriptor`]), never as ndarray generic bounds. That keeps
25+
//! `lance-graph-contract` featherweight for its non-HPC consumers
26+
//! (`crewai-rust`, `n8n-rs`), and it keeps ndarray usable standalone by any
27+
//! pure-SIMD consumer.
28+
//!
29+
//! The split is deliberate and complementary, not duplicated:
30+
//!
31+
//! | Level | Home | Answers |
32+
//! |-------|------|---------|
33+
//! | Column LE contract | `ndarray::simd::MultiLaneColumn` | "how do I sweep one typed column" |
34+
//! | Envelope LE contract | this module | "where do columns sit in the row packet, what cycle is this" |
35+
//! | Composition | `lance-graph` (always has both deps) | carve envelope columns → wrap each in `MultiLaneColumn` |
36+
//!
37+
//! ndarray never learns the envelope exists; this crate never learns ndarray
38+
//! exists; `lance-graph` binds them.
39+
40+
/// Layout version of the envelope byte geometry.
41+
///
42+
/// Bumped whenever the meaning of [`ColumnDescriptor`] offsets/strides
43+
/// changes. A reader MUST refuse to decode a packet whose stamped version it
44+
/// does not understand (per `I-LEGACY-API-FEATURE-GATED`: layout reclaim is
45+
/// paired with a version gate on the serialization path).
46+
pub const ENVELOPE_LAYOUT_VERSION: u8 = 1;
47+
48+
/// The little-endian element type of one column.
49+
///
50+
/// Width only — no distance semantics, no domain meaning (cf. ndarray's
51+
/// no-umbrella rule). The actual decode (`from_le_bytes`) happens in the
52+
/// consumer's `MultiLaneColumn` lane iterator.
53+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
54+
#[repr(u8)]
55+
pub enum ColumnKind {
56+
U8 = 0,
57+
I8 = 1,
58+
U16 = 2,
59+
I16 = 3,
60+
U32 = 4,
61+
F32 = 5,
62+
U64 = 6,
63+
F64 = 7,
64+
}
65+
66+
impl ColumnKind {
67+
/// Bytes per element of this LE column kind.
68+
pub const fn elem_bytes(self) -> usize {
69+
match self {
70+
ColumnKind::U8 | ColumnKind::I8 => 1,
71+
ColumnKind::U16 | ColumnKind::I16 => 2,
72+
ColumnKind::U32 | ColumnKind::F32 => 4,
73+
ColumnKind::U64 | ColumnKind::F64 => 8,
74+
}
75+
}
76+
}
77+
78+
/// One column's placement within a single row packet.
79+
///
80+
/// `Copy` and `repr(C)` so a descriptor table is itself a stable LE artifact.
81+
/// `name_id` is a stable column ordinal (an enum discriminant on the consumer
82+
/// side), NOT a string — keeping this crate alloc-free and the descriptor
83+
/// `Copy`.
84+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
85+
#[repr(C)]
86+
pub struct ColumnDescriptor {
87+
/// Stable column identity (consumer-side enum ordinal).
88+
pub name_id: u16,
89+
/// LE element kind.
90+
pub kind: ColumnKind,
91+
/// Elements of `kind` per row for this column (e.g. content = 256 × u64,
92+
/// energy = 1 × f32).
93+
pub elems_per_row: u16,
94+
/// Byte offset of this column within one row packet.
95+
pub row_offset: u32,
96+
}
97+
98+
impl ColumnDescriptor {
99+
/// Bytes this column occupies in one row.
100+
pub const fn col_bytes_per_row(&self) -> usize {
101+
self.kind.elem_bytes() * self.elems_per_row as usize
102+
}
103+
104+
/// Byte range `[start, end)` of this column within a row packet.
105+
pub const fn row_byte_range(&self) -> (usize, usize) {
106+
let start = self.row_offset as usize;
107+
(start, start + self.col_bytes_per_row())
108+
}
109+
}
110+
111+
/// What can go wrong validating an envelope's byte geometry.
112+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
113+
pub enum EnvelopeError {
114+
/// The stamped layout version is not the one this build understands.
115+
LayoutVersionMismatch { expected: u8, found: u8 },
116+
/// Sum of column byte-widths does not equal the declared row stride.
117+
StrideMismatch { declared: usize, summed: usize },
118+
/// Two columns overlap, or a gap/ordering violation was found.
119+
ColumnOverlap { col_a: u16, col_b: u16 },
120+
/// `as_le_bytes().len()` is not `row_stride * n_rows`.
121+
PacketSizeMismatch { expected: usize, found: usize },
122+
/// A requested row or column index is out of bounds.
123+
OutOfBounds,
124+
}
125+
126+
/// A self-describing little-endian SoA packet for one cycle.
127+
///
128+
/// Implemented by the owner of the backing store (e.g. the mailbox SoA). The
129+
/// envelope is read-only here; mutation lives on the owner type, never on this
130+
/// view (mirrors `MailboxSoaView` vs `MailboxSoaOwner`).
131+
pub trait SoaEnvelope {
132+
/// Layout version this implementor's geometry conforms to.
133+
const LAYOUT_VERSION: u8 = ENVELOPE_LAYOUT_VERSION;
134+
135+
/// Stable, ordered column placement table. Ordering is part of the
136+
/// contract: a reader walks columns in this order.
137+
fn columns(&self) -> &[ColumnDescriptor];
138+
139+
/// Total bytes per row across all columns.
140+
fn row_stride(&self) -> usize;
141+
142+
/// Number of rows in this snapshot.
143+
fn n_rows(&self) -> usize;
144+
145+
/// The version stamp this snapshot carries (the cycle whose committed
146+
/// state these bytes are). This is what turns a Lance version into a
147+
/// coherent "packet at cycle N".
148+
fn cycle(&self) -> u32;
149+
150+
/// The whole packet as contiguous LE bytes, zero-copy. Length MUST be
151+
/// `row_stride() * n_rows()`.
152+
fn as_le_bytes(&self) -> &[u8];
153+
154+
/// Zero-copy LE view of one full row.
155+
fn row_le(&self, row: usize) -> Option<&[u8]> {
156+
let stride = self.row_stride();
157+
let start = row.checked_mul(stride)?;
158+
let end = start.checked_add(stride)?;
159+
self.as_le_bytes().get(start..end)
160+
}
161+
162+
/// Zero-copy LE view of one column within one row.
163+
fn column_le(&self, row: usize, col: &ColumnDescriptor) -> Option<&[u8]> {
164+
let r = self.row_le(row)?;
165+
let (start, end) = col.row_byte_range();
166+
r.get(start..end)
167+
}
168+
169+
/// Validate that the declared geometry is internally consistent and that
170+
/// the backing packet matches. Call this at the Lance read boundary — a
171+
/// v1 packet under a v2 reader (or a torn snapshot) is refused here rather
172+
/// than silently mis-decoded downstream.
173+
fn verify_layout(&self) -> Result<(), EnvelopeError> {
174+
// 1. Version gate.
175+
if Self::LAYOUT_VERSION != ENVELOPE_LAYOUT_VERSION {
176+
return Err(EnvelopeError::LayoutVersionMismatch {
177+
expected: ENVELOPE_LAYOUT_VERSION,
178+
found: Self::LAYOUT_VERSION,
179+
});
180+
}
181+
// 2. Columns are non-overlapping and their widths sum to the stride.
182+
let cols = self.columns();
183+
let mut summed = 0usize;
184+
for (i, a) in cols.iter().enumerate() {
185+
let (a_start, a_end) = a.row_byte_range();
186+
summed += a.col_bytes_per_row();
187+
for b in &cols[i + 1..] {
188+
let (b_start, b_end) = b.row_byte_range();
189+
let overlap = a_start < b_end && b_start < a_end;
190+
if overlap {
191+
return Err(EnvelopeError::ColumnOverlap {
192+
col_a: a.name_id,
193+
col_b: b.name_id,
194+
});
195+
}
196+
}
197+
}
198+
let stride = self.row_stride();
199+
if summed != stride {
200+
return Err(EnvelopeError::StrideMismatch {
201+
declared: stride,
202+
summed,
203+
});
204+
}
205+
// 3. Backing packet size matches stride × rows.
206+
let expected = stride.saturating_mul(self.n_rows());
207+
let found = self.as_le_bytes().len();
208+
if expected != found {
209+
return Err(EnvelopeError::PacketSizeMismatch { expected, found });
210+
}
211+
Ok(())
212+
}
213+
}
214+
215+
#[cfg(test)]
216+
mod tests {
217+
use super::*;
218+
219+
struct TestEnvelope {
220+
cols: Vec<ColumnDescriptor>,
221+
stride: usize,
222+
rows: usize,
223+
bytes: Vec<u8>,
224+
cycle: u32,
225+
}
226+
227+
impl SoaEnvelope for TestEnvelope {
228+
fn columns(&self) -> &[ColumnDescriptor] {
229+
&self.cols
230+
}
231+
fn row_stride(&self) -> usize {
232+
self.stride
233+
}
234+
fn n_rows(&self) -> usize {
235+
self.rows
236+
}
237+
fn cycle(&self) -> u32 {
238+
self.cycle
239+
}
240+
fn as_le_bytes(&self) -> &[u8] {
241+
&self.bytes
242+
}
243+
}
244+
245+
fn two_col_envelope(rows: usize) -> TestEnvelope {
246+
// col 0: 1 × f32 (4 B) at offset 0
247+
// col 1: 1 × u64 (8 B) at offset 4
248+
let cols = vec![
249+
ColumnDescriptor {
250+
name_id: 0,
251+
kind: ColumnKind::F32,
252+
elems_per_row: 1,
253+
row_offset: 0,
254+
},
255+
ColumnDescriptor {
256+
name_id: 1,
257+
kind: ColumnKind::U64,
258+
elems_per_row: 1,
259+
row_offset: 4,
260+
},
261+
];
262+
let stride = 12;
263+
TestEnvelope {
264+
cols,
265+
stride,
266+
rows,
267+
bytes: vec![0u8; stride * rows],
268+
cycle: 7,
269+
}
270+
}
271+
272+
#[test]
273+
fn kind_widths() {
274+
assert_eq!(ColumnKind::U8.elem_bytes(), 1);
275+
assert_eq!(ColumnKind::F32.elem_bytes(), 4);
276+
assert_eq!(ColumnKind::U64.elem_bytes(), 8);
277+
}
278+
279+
#[test]
280+
fn descriptor_byte_range() {
281+
let d = ColumnDescriptor {
282+
name_id: 0,
283+
kind: ColumnKind::U64,
284+
elems_per_row: 256,
285+
row_offset: 16,
286+
};
287+
assert_eq!(d.col_bytes_per_row(), 256 * 8);
288+
assert_eq!(d.row_byte_range(), (16, 16 + 256 * 8));
289+
}
290+
291+
#[test]
292+
fn valid_envelope_passes() {
293+
let env = two_col_envelope(4);
294+
assert_eq!(env.cycle(), 7);
295+
assert!(env.verify_layout().is_ok());
296+
}
297+
298+
#[test]
299+
fn stride_mismatch_caught() {
300+
let mut env = two_col_envelope(4);
301+
env.stride = 16; // columns sum to 12, not 16
302+
env.bytes = vec![0u8; 16 * 4];
303+
assert_eq!(
304+
env.verify_layout(),
305+
Err(EnvelopeError::StrideMismatch {
306+
declared: 16,
307+
summed: 12,
308+
})
309+
);
310+
}
311+
312+
#[test]
313+
fn overlap_caught() {
314+
let mut env = two_col_envelope(1);
315+
env.cols[1].row_offset = 2; // u64 at 2 overlaps f32 at [0,4)
316+
env.stride = 10;
317+
env.bytes = vec![0u8; 10];
318+
assert!(matches!(
319+
env.verify_layout(),
320+
Err(EnvelopeError::ColumnOverlap { .. })
321+
));
322+
}
323+
324+
#[test]
325+
fn packet_size_mismatch_caught() {
326+
let mut env = two_col_envelope(4);
327+
env.bytes.truncate(12 * 3); // one row short
328+
assert_eq!(
329+
env.verify_layout(),
330+
Err(EnvelopeError::PacketSizeMismatch {
331+
expected: 48,
332+
found: 36,
333+
})
334+
);
335+
}
336+
337+
#[test]
338+
fn row_and_column_views_are_zero_copy_slices() {
339+
let mut env = two_col_envelope(2);
340+
// Write row 1, col 1 (u64) = 0x0102030405060708 LE.
341+
let v: u64 = 0x0102_0304_0506_0708;
342+
let row1_col1_start = 12 + 4;
343+
env.bytes[row1_col1_start..row1_col1_start + 8].copy_from_slice(&v.to_le_bytes());
344+
345+
let row = env.row_le(1).unwrap();
346+
assert_eq!(row.len(), 12);
347+
348+
let col = env.column_le(1, &env.cols[1]).unwrap();
349+
assert_eq!(col.len(), 8);
350+
assert_eq!(u64::from_le_bytes(col.try_into().unwrap()), v);
351+
352+
// Out of bounds.
353+
assert!(env.row_le(2).is_none());
354+
}
355+
}

0 commit comments

Comments
 (0)