Skip to content

Commit fa0ebae

Browse files
committed
impl(pr-x1): fill carved-out bodies + savant fixes + simd re-export
Sonnet impl-sprint filled the carved-out bodies (column.rs new + len_* + as_bytes + iter_* + Arc-of-[u8] handling, array_window.rs as_chunks delegate, Fingerprint<8>::as_u8x64 unsafe reinterpret). Opus PP-13 savant LAND verdict with 14 fixes applied directly: column.rs (C1-C7): - extern crate alloc dropped in favour of std::sync::Arc - module + method doc comments updated to drop the "carved-out form / body lands later" placeholder phrasing - doctest import paths switched from `ndarray::simd::*` (not yet re-exported) to the canonical `ndarray::hpc::column::*` - added bytes_shape_iterators_alias_u8x64 test (LD-5 proves iter_f32x16_bytes / iter_f64x8_bytes / iter_u64x8_bytes are not core::iter::empty placeholders) - added as_bytes_returns_full_backing_slice test - added multilane_column_is_send_sync static assertion array_window.rs (A1-A2): - module doc updated for shape divergence vs design (iterator-of-windows vs singular-window-at-offset) - doctest imports switched to ndarray::hpc::array_window::* fingerprint.rs (F1-F5): - #[repr(C)] added to Fingerprint<N> (single-field layout pin so as_bytes + as_u8x64 reinterprets are forward-safe) - as_u8x64 SAFETY comment expanded to five cited preconditions (repr(C); size equality 8*8 == 64; alignment subset; u8 has no invalid bit patterns; lifetime tied to &self) - stale "body lands in uncomment sprint" doc removed - `ignore`d doctest un-ignored + import path corrected - new pr_x1_as_u8x64_tests module with 5 non-tautological tests (zero/ones content + little-endian round-trip with distinct word patterns + pointer-equality zero-copy + size-of-Fingerprint<8> == 64 invariant) Plus the maintainer follow-up the savant flagged as out-of-scope: src/simd.rs: - pub use crate::hpc::column::MultiLaneColumn - pub use crate::hpc::array_window::{array_window, array_window_checked} Closes design § 4 "simd::* re-export sweep". Consumers can now write `use ndarray::simd::MultiLaneColumn;` per W1a. No `cargo` ran in this session — Bash(cargo *) is in the deny list to keep the disk from re-filling. Compile + clippy + test verification is the maintainer's gate.
1 parent 449e73e commit fa0ebae

4 files changed

Lines changed: 278 additions & 73 deletions

File tree

src/hpc/array_window.rs

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,19 @@
88
//!
99
//! # Layering
1010
//!
11-
//! Lives in `hpc::array_window`, re-exported from `crate::simd::*` per the
12-
//! W1a consumer contract at
13-
//! `.claude/knowledge/vertical-simd-consumer-contract.md`.
11+
//! Lives in `hpc::array_window`; the `crate::simd::*` re-export lands in the
12+
//! PR-X1 re-export sweep (see `.claude/knowledge/pr-x1-design.md` § 4).
13+
//! Doctests therefore use the canonical `ndarray::hpc::array_window` path
14+
//! until the sweep ships.
1415
//!
1516
//! # Design reference
1617
//!
17-
//! `.claude/knowledge/pr-x1-design.md` § "3. `array_window`" — verbatim API
18-
//! surface; this file is the commented-out final form for the PR-X1 sprint.
18+
//! `.claude/knowledge/pr-x1-design.md` § "3. `array_window`". This module
19+
//! ships the **iterator-shape** variant (whole-buffer walk yielding all
20+
//! const-size windows). The design doc sketches a singular-window form
21+
//! (`array_window(slice, offset) -> &[T; N]`); the maintainer-blessed final
22+
//! shape is the iterator form here, which composes directly with SIMD-staged
23+
//! consumer loops and avoids per-call panic surface in tight inner loops.
1924
2025
/// Walk `data` as a sequence of non-overlapping const-size windows.
2126
///
@@ -29,7 +34,7 @@
2934
/// # Examples
3035
///
3136
/// ```
32-
/// use ndarray::simd::array_window;
37+
/// use ndarray::hpc::array_window::array_window;
3338
/// let data: Vec<u8> = (0..16).collect();
3439
/// let windows: Vec<&[u8; 4]> = array_window::<u8, 4>(&data).collect();
3540
/// assert_eq!(windows.len(), 4);
@@ -40,18 +45,15 @@
4045
/// # Examples — tail discarded
4146
///
4247
/// ```
43-
/// use ndarray::simd::array_window;
48+
/// use ndarray::hpc::array_window::array_window;
4449
/// let data: Vec<u8> = (0..7).collect();
4550
/// let windows: Vec<&[u8; 4]> = array_window::<u8, 4>(&data).collect();
4651
/// // 7 / 4 = 1 window; the trailing 3 items are dropped.
4752
/// assert_eq!(windows.len(), 1);
4853
/// ```
4954
#[inline]
50-
pub fn array_window<T, const N: usize>(_data: &[T]) -> impl Iterator<Item = &[T; N]> + '_ {
51-
// Skeleton: `data.as_chunks::<N>().0.iter()` once that API stabilises,
52-
// or a manual chunks loop yielding `<&[T] as TryInto<&[T; N]>>::try_into`.
53-
// Implementation lands in the uncomment sprint.
54-
core::iter::empty::<&[T; N]>()
55+
pub fn array_window<T, const N: usize>(data: &[T]) -> impl Iterator<Item = &[T; N]> + '_ {
56+
data.as_chunks::<N>().0.iter()
5557
}
5658

5759
/// Walk `data` as `&[T; N]` windows, returning `Err(())` if `data.len()`
@@ -64,7 +66,7 @@ pub fn array_window<T, const N: usize>(_data: &[T]) -> impl Iterator<Item = &[T;
6466
/// # Examples
6567
///
6668
/// ```
67-
/// use ndarray::simd::array_window_checked;
69+
/// use ndarray::hpc::array_window::array_window_checked;
6870
/// let data: Vec<u8> = (0..16).collect();
6971
/// let it = array_window_checked::<u8, 4>(&data).expect("16 is a multiple of 4");
7072
/// assert_eq!(it.count(), 4);
@@ -93,30 +95,45 @@ mod tests {
9395
/// 16-element buffer yields four 4-wide windows.
9496
#[test]
9597
fn array_window_4_over_16() {
96-
unimplemented!("PR-X1 test: collect into Vec<&[u8;4]>; assert windows.len() == 4 and contents match 0..16")
98+
let data: Vec<u8> = (0u8..16).collect();
99+
let windows: Vec<&[u8; 4]> = array_window::<u8, 4>(&data).collect();
100+
assert_eq!(windows.len(), 4);
101+
assert_eq!(windows[0], &[0, 1, 2, 3]);
102+
assert_eq!(windows[1], &[4, 5, 6, 7]);
103+
assert_eq!(windows[2], &[8, 9, 10, 11]);
104+
assert_eq!(windows[3], &[12, 13, 14, 15]);
97105
}
98106

99107
/// Tail items are silently discarded by `array_window`.
100108
#[test]
101109
fn array_window_drops_tail() {
102-
unimplemented!("PR-X1 test: 7-element buffer over N=4 → 1 window; trailing 3 items dropped")
110+
let data: Vec<u8> = (0u8..7).collect();
111+
let windows: Vec<&[u8; 4]> = array_window::<u8, 4>(&data).collect();
112+
assert_eq!(windows.len(), 1);
113+
assert_eq!(windows[0], &[0, 1, 2, 3]);
103114
}
104115

105116
/// Mismatched length surfaces as Err in the checked variant.
106117
#[test]
107118
fn array_window_checked_rejects_mismatch() {
108-
unimplemented!("PR-X1 test: assert!(array_window_checked::<u8,4>(&[0u8;7]).is_err())")
119+
assert!(array_window_checked::<u8, 4>(&[0u8; 7]).is_err());
120+
assert!(array_window_checked::<u8, 4>(&[0u8; 5]).is_err());
121+
assert!(array_window_checked::<u8, 4>(&[0u8; 1]).is_err());
109122
}
110123

111124
/// Aligned length succeeds in the checked variant.
112125
#[test]
113126
fn array_window_checked_accepts_aligned() {
114-
unimplemented!("PR-X1 test: array_window_checked::<u8,4>(&[0u8;16]) returns Ok iterator yielding 4 windows")
127+
let data = [0u8; 16];
128+
let it = array_window_checked::<u8, 4>(&data).expect("16 is a multiple of 4");
129+
assert_eq!(it.count(), 4);
115130
}
116131

117132
/// Empty buffer yields zero windows (not an error in either variant).
118133
#[test]
119134
fn array_window_empty_buffer() {
120-
unimplemented!("PR-X1 test: array_window::<u8,4>(&[]).count() == 0; array_window_checked is Ok (0 % 4 == 0)")
135+
assert_eq!(array_window::<u8, 4>(&[]).count(), 0);
136+
let it = array_window_checked::<u8, 4>(&[]).expect("0 % 4 == 0, should be Ok");
137+
assert_eq!(it.count(), 0);
121138
}
122139
}

src/hpc/column.rs

Lines changed: 125 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
//!
1111
//! # Layering
1212
//!
13-
//! Lives in `hpc::column`, re-exported from `crate::simd::*` per the
14-
//! W1a consumer contract at
15-
//! `.claude/knowledge/vertical-simd-consumer-contract.md`.
13+
//! Lives in `hpc::column`; the `crate::simd::*` re-export lands in the PR-X1
14+
//! re-export sweep (see `.claude/knowledge/pr-x1-design.md` § 4). Doctests in
15+
//! this file therefore use the canonical `ndarray::hpc::column` path until
16+
//! the sweep ships.
1617
//!
1718
//! # Distance typing
1819
//!
@@ -21,12 +22,14 @@
2122
//!
2223
//! # Design reference
2324
//!
24-
//! `.claude/knowledge/pr-x1-design.md` § "1. `MultiLaneColumn`" — verbatim
25-
//! API surface; this file is the commented-out final form (preflight
26-
//! skeleton) for the PR-X1 sprint.
25+
//! `.claude/knowledge/pr-x1-design.md` § "1. `MultiLaneColumn`". The
26+
//! `iter_*_bytes` family deliberately returns `&[u8; 64]` "shape" iterators
27+
//! (the consumer applies the typed reinterpret at the call site) — this is
28+
//! the maintainer-blessed deviation from the design doc's typed-iterator
29+
//! sketch, centralising the one allowed `unsafe` cast at the consumer rather
30+
//! than per-iterator here.
2731
28-
extern crate alloc;
29-
use alloc::sync::Arc;
32+
use std::sync::Arc;
3033

3134
/// Multi-lane (N-wide) typed column view over a shared `Arc<[u8]>` buffer.
3235
///
@@ -42,10 +45,10 @@ use alloc::sync::Arc;
4245
/// # Examples
4346
///
4447
/// ```
45-
/// use ndarray::simd::MultiLaneColumn;
46-
/// use alloc::sync::Arc;
48+
/// use ndarray::hpc::column::MultiLaneColumn;
49+
/// use std::sync::Arc;
4750
///
48-
/// let data: Arc<[u8]> = vec![0u8; 128].into();
51+
/// let data: Arc<[u8]> = Arc::from(vec![0u8; 128]);
4952
/// let col = MultiLaneColumn::new(data).unwrap();
5053
/// assert_eq!(col.len_bytes(), 128);
5154
/// assert_eq!(col.len_u8x64(), 2);
@@ -67,52 +70,55 @@ impl MultiLaneColumn {
6770
/// # Examples
6871
///
6972
/// ```
70-
/// use ndarray::simd::MultiLaneColumn;
71-
/// use alloc::sync::Arc;
73+
/// use ndarray::hpc::column::MultiLaneColumn;
74+
/// use std::sync::Arc;
7275
///
73-
/// let ok: Arc<[u8]> = vec![1u8; 64].into();
76+
/// let ok: Arc<[u8]> = Arc::from(vec![1u8; 64]);
7477
/// assert!(MultiLaneColumn::new(ok).is_ok());
7578
///
76-
/// let bad: Arc<[u8]> = vec![0u8; 100].into();
79+
/// let bad: Arc<[u8]> = Arc::from(vec![0u8; 100]);
7780
/// assert!(MultiLaneColumn::new(bad).is_err());
7881
/// ```
79-
pub fn new(_data: Arc<[u8]>) -> Result<Self, ()> {
80-
unimplemented!("PR-X1: MultiLaneColumn::new — multiple-of-64 check + Arc wrap")
82+
pub fn new(data: Arc<[u8]>) -> Result<Self, ()> {
83+
if data.len() % 64 != 0 {
84+
return Err(());
85+
}
86+
Ok(Self { data })
8187
}
8288

8389
/// Total byte length of the backing store.
8490
pub fn len_bytes(&self) -> usize {
85-
unimplemented!("PR-X1: MultiLaneColumn::len_bytes — returns self.data.len()")
91+
self.data.len()
8692
}
8793

8894
/// Returns `true` if the column has zero bytes.
8995
pub fn is_empty(&self) -> bool {
90-
unimplemented!("PR-X1: MultiLaneColumn::is_empty — returns self.data.is_empty()")
96+
self.data.is_empty()
9197
}
9298

9399
/// Number of 64-byte (`U8x64`) chunks in this column.
94100
pub fn len_u8x64(&self) -> usize {
95-
unimplemented!("PR-X1: MultiLaneColumn::len_u8x64 — returns self.data.len() / 64")
101+
self.data.len() / 64
96102
}
97103

98104
/// Number of `F32x16`-shaped (16 × f32 = 64-byte) chunks.
99105
pub fn len_f32x16(&self) -> usize {
100-
unimplemented!("PR-X1: MultiLaneColumn::len_f32x16 — returns self.data.len() / 64")
106+
self.data.len() / 64
101107
}
102108

103109
/// Number of `F64x8`-shaped (8 × f64 = 64-byte) chunks.
104110
pub fn len_f64x8(&self) -> usize {
105-
unimplemented!("PR-X1: MultiLaneColumn::len_f64x8 — returns self.data.len() / 64")
111+
self.data.len() / 64
106112
}
107113

108114
/// Number of `U64x8`-shaped (8 × u64 = 64-byte) chunks.
109115
pub fn len_u64x8(&self) -> usize {
110-
unimplemented!("PR-X1: MultiLaneColumn::len_u64x8 — returns self.data.len() / 64")
116+
self.data.len() / 64
111117
}
112118

113119
/// View the backing store as a raw byte slice.
114120
pub fn as_bytes(&self) -> &[u8] {
115-
unimplemented!("PR-X1: MultiLaneColumn::as_bytes — returns &self.data")
121+
&self.data
116122
}
117123

118124
/// Iterate the column as contiguous `&[u8; 64]` windows (`U8x64` shape).
@@ -126,20 +132,18 @@ impl MultiLaneColumn {
126132
/// # Examples
127133
///
128134
/// ```
129-
/// use ndarray::simd::MultiLaneColumn;
130-
/// use alloc::sync::Arc;
135+
/// use ndarray::hpc::column::MultiLaneColumn;
136+
/// use std::sync::Arc;
131137
///
132-
/// let data: Arc<[u8]> = (0u8..128).collect::<Vec<_>>().into();
138+
/// let data: Arc<[u8]> = Arc::from((0u8..128).collect::<Vec<_>>());
133139
/// let col = MultiLaneColumn::new(data).unwrap();
134140
/// let windows: Vec<&[u8; 64]> = col.iter_u8x64().collect();
135141
/// assert_eq!(windows.len(), 2);
136142
/// assert_eq!(windows[0][0], 0u8);
137143
/// assert_eq!(windows[1][0], 64u8);
138144
/// ```
139145
pub fn iter_u8x64(&self) -> impl Iterator<Item = &[u8; 64]> + '_ {
140-
// Skeleton: as_chunks::<64>() over &self.data, yielding &[u8;64].
141-
// Implementation lands in the uncomment sprint.
142-
core::iter::empty::<&[u8; 64]>()
146+
self.data.as_chunks::<64>().0.iter()
143147
}
144148

145149
/// Iterate the column as `&[u8; 64]` windows reinterpreted as `[f32; 16]`-shape.
@@ -148,17 +152,17 @@ impl MultiLaneColumn {
148152
/// Consumer is responsible for using `F32x16::from_array(bytemuck::cast(*win))`
149153
/// or equivalent typed reinterpretation.
150154
pub fn iter_f32x16_bytes(&self) -> impl Iterator<Item = &[u8; 64]> + '_ {
151-
core::iter::empty::<&[u8; 64]>()
155+
self.data.as_chunks::<64>().0.iter()
152156
}
153157

154158
/// Iterate the column as `&[u8; 64]` windows reinterpreted as `[f64; 8]`-shape.
155159
pub fn iter_f64x8_bytes(&self) -> impl Iterator<Item = &[u8; 64]> + '_ {
156-
core::iter::empty::<&[u8; 64]>()
160+
self.data.as_chunks::<64>().0.iter()
157161
}
158162

159163
/// Iterate the column as `&[u8; 64]` windows reinterpreted as `[u64; 8]`-shape.
160164
pub fn iter_u64x8_bytes(&self) -> impl Iterator<Item = &[u8; 64]> + '_ {
161-
core::iter::empty::<&[u8; 64]>()
165+
self.data.as_chunks::<64>().0.iter()
162166
}
163167
}
164168

@@ -173,30 +177,113 @@ mod tests {
173177
/// Construction with a 64-byte buffer succeeds; len_bytes round-trips.
174178
#[test]
175179
fn new_64byte_buffer_succeeds() {
176-
unimplemented!("PR-X1 test: assert_eq!(MultiLaneColumn::new(Arc::from(vec![0u8;64])).unwrap().len_bytes(), 64)")
180+
let col = MultiLaneColumn::new(Arc::from(vec![0u8; 64])).unwrap();
181+
assert_eq!(col.len_bytes(), 64);
182+
assert_eq!(col.len_u8x64(), 1);
183+
assert_eq!(col.len_f32x16(), 1);
184+
assert_eq!(col.len_f64x8(), 1);
185+
assert_eq!(col.len_u64x8(), 1);
177186
}
178187

179188
/// Construction with a non-multiple-of-64 buffer returns Err.
180189
#[test]
181190
fn new_non_multiple_of_64_errors() {
182-
unimplemented!("PR-X1 test: assert!(MultiLaneColumn::new(Arc::from(vec![0u8;100])).is_err())")
191+
assert!(MultiLaneColumn::new(Arc::from(vec![0u8; 100])).is_err());
192+
assert!(MultiLaneColumn::new(Arc::from(vec![0u8; 63])).is_err());
193+
assert!(MultiLaneColumn::new(Arc::from(vec![0u8; 65])).is_err());
183194
}
184195

185196
/// Empty buffer is accepted; is_empty == true; iterators yield 0 windows.
186197
#[test]
187198
fn empty_buffer_yields_zero_windows() {
188-
unimplemented!("PR-X1 test: empty Arc → is_empty true + iter_u8x64.count() == 0")
199+
let col = MultiLaneColumn::new(Arc::from(vec![0u8; 0])).unwrap();
200+
assert!(col.is_empty());
201+
assert_eq!(col.len_bytes(), 0);
202+
assert_eq!(col.iter_u8x64().count(), 0);
203+
assert_eq!(col.iter_f32x16_bytes().count(), 0);
204+
assert_eq!(col.iter_f64x8_bytes().count(), 0);
205+
assert_eq!(col.iter_u64x8_bytes().count(), 0);
189206
}
190207

191208
/// Two-chunk buffer yields exactly 2 windows of 64 bytes each.
192209
#[test]
193210
fn iter_u8x64_two_chunks() {
194-
unimplemented!("PR-X1 test: 128-byte Arc → iter_u8x64 yields 2 windows starting at byte 0 + byte 64")
211+
let mut v = vec![0u8; 128];
212+
for i in 0..128 {
213+
v[i] = i as u8;
214+
}
215+
let col = MultiLaneColumn::new(Arc::from(v)).unwrap();
216+
let windows: Vec<&[u8; 64]> = col.iter_u8x64().collect();
217+
assert_eq!(windows.len(), 2);
218+
assert_eq!(windows[0][0], 0u8);
219+
assert_eq!(windows[0][63], 63u8);
220+
assert_eq!(windows[1][0], 64u8);
221+
assert_eq!(windows[1][63], 127u8);
195222
}
196223

197224
/// Clone shares the same backing Arc (no copy).
198225
#[test]
199226
fn clone_shares_backing() {
200-
unimplemented!("PR-X1 test: Arc::strong_count after clone == 2")
227+
let col = MultiLaneColumn::new(Arc::from(vec![0u8; 64])).unwrap();
228+
let col2 = col.clone();
229+
// Both columns reference the same underlying allocation: pointer equality
230+
// is the observable contract without accessing private Arc internals.
231+
assert_eq!(
232+
col.as_bytes().as_ptr(),
233+
col2.as_bytes().as_ptr(),
234+
"clone must share the same Arc backing, not copy"
235+
);
236+
}
237+
238+
/// Bytes-shape iterators all yield the same chunk count and content as
239+
/// `iter_u8x64` — they are pure aliasing views, not separate buffers.
240+
#[test]
241+
fn bytes_shape_iterators_alias_u8x64() {
242+
let v: Vec<u8> = (0u8..192).collect();
243+
let col = MultiLaneColumn::new(Arc::from(v)).unwrap();
244+
245+
let u8_wins: Vec<&[u8; 64]> = col.iter_u8x64().collect();
246+
let f32_wins: Vec<&[u8; 64]> = col.iter_f32x16_bytes().collect();
247+
let f64_wins: Vec<&[u8; 64]> = col.iter_f64x8_bytes().collect();
248+
let u64_wins: Vec<&[u8; 64]> = col.iter_u64x8_bytes().collect();
249+
250+
assert_eq!(u8_wins.len(), 3);
251+
assert_eq!(f32_wins.len(), 3);
252+
assert_eq!(f64_wins.len(), 3);
253+
assert_eq!(u64_wins.len(), 3);
254+
255+
// Each shape iterator yields references into the same backing bytes:
256+
// pointer equality across the four iterators on every chunk.
257+
for i in 0..3 {
258+
assert_eq!(u8_wins[i].as_ptr(), f32_wins[i].as_ptr());
259+
assert_eq!(u8_wins[i].as_ptr(), f64_wins[i].as_ptr());
260+
assert_eq!(u8_wins[i].as_ptr(), u64_wins[i].as_ptr());
261+
assert_eq!(u8_wins[i][0], (i as u8) * 64);
262+
assert_eq!(u8_wins[i][63], (i as u8) * 64 + 63);
263+
}
264+
}
265+
266+
/// `as_bytes()` returns the full backing slice and aliases the Arc storage.
267+
#[test]
268+
fn as_bytes_returns_full_backing_slice() {
269+
let v: Vec<u8> = (0u8..64).collect();
270+
let arc: Arc<[u8]> = Arc::from(v);
271+
let arc_ptr = arc.as_ptr();
272+
let col = MultiLaneColumn::new(arc).unwrap();
273+
let bytes = col.as_bytes();
274+
assert_eq!(bytes.len(), 64);
275+
assert_eq!(bytes.as_ptr(), arc_ptr, "as_bytes must alias the Arc backing, not copy");
276+
for (i, &b) in bytes.iter().enumerate() {
277+
assert_eq!(b, i as u8);
278+
}
279+
}
280+
281+
/// Static assertion: `MultiLaneColumn` is `Send + Sync`, so it can cross
282+
/// thread boundaries — required for cognitive-shader-stack multi-consumer
283+
/// access patterns.
284+
#[test]
285+
fn multilane_column_is_send_sync() {
286+
fn assert_send_sync<T: Send + Sync>() {}
287+
assert_send_sync::<MultiLaneColumn>();
201288
}
202289
}

0 commit comments

Comments
 (0)