Skip to content

Commit 2423298

Browse files
committed
fix(codec): address PR-195 follow-up review — bijective pack_leaf + BASIN_NONE distinctness
Two real bugs flagged by CodeRabbit's outside-diff review on b44fe59. P1 — `pack_leaf` was non-bijective via unwrap_or fallbacks (mode.rs:194): Hand-constructing a `LeafCu { mode: Merge, merge_dir: None, .. }` serialized as `LeafCu::merge(.., MergeDir::North)` — silently rewriting malformed input into a different valid leaf and hiding upstream invariant breaks. Now uses `?` to short-circuit with None when required fields are missing for the mode (Merge needs merge_dir; Delta needs delta; Escape needs escape_idx). The `LeafCu::merge/delta/escape` constructors still enforce the invariants — only struct-literal bypass callers can hit the rejection. + 3 regression tests: leaf_pack_rejects_malformed_merge_without_dir, leaf_pack_rejects_malformed_delta_without_value, leaf_pack_rejects_malformed_escape_without_idx + pack_leaf docstring documents the bijective contract. P1 — `BASIN_NONE` collided with `MAX_BASIN_IDX` at 4095 (mode.rs:69): Both equaled `(1 << 12) - 1`, so basin 4095 was ambiguous — a real basin and the "no basin" sentinel had the same encoded value. Now: MAX_BASIN_IDX = 4094 (highest real basin), BASIN_NONE = 4095 (sentinel one slot above). Introduced private BASIN_FIELD_MASK = 0x0FFF for header packing/unpacking — independent of MAX_BASIN_IDX so BASIN_NONE still round-trips through the 12-bit field as a sentinel marker. pack_header/unpack_header switched from `& MAX_BASIN_IDX` (would wrongly clear bit 0 with 0xFFE) to `& BASIN_FIELD_MASK`. + 2 regression tests: basin_none_distinct_from_max_basin_idx, header_round_trips_max_basin_idx_and_basin_none_distinctly + MAX_BASIN_IDX + BASIN_NONE doctests updated to assert the new relationship. Gates: cargo test --features codec --lib hpc::codec → 55 passed (+5) cargo test --features codec --doc hpc::codec → 15 passed cargo fmt --all -- --check → clean cargo clippy --features codec --lib -- -D warnings → clean
1 parent 01c77cc commit 2423298

1 file changed

Lines changed: 112 additions & 20 deletions

File tree

src/hpc/codec/mode.rs

Lines changed: 112 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,41 @@ use super::ctu::{CellMode, LeafCu, MergeDir};
5959
// Header pack / unpack (16-bit)
6060
// ════════════════════════════════════════════════════════════════════
6161

62-
/// Maximum encodable `basin_idx`. Stored in the lower 12 bits of the
63-
/// header; values >= this constant overflow the header field.
62+
/// Maximum encodable real `basin_idx`. Equal to `(1 << 12) - 2 = 4094`
63+
/// so that the all-ones 12-bit pattern (`0xFFF = 4095`) is reserved as
64+
/// the [`BASIN_NONE`] sentinel — without that reservation, basin 4095
65+
/// would round-trip ambiguously with "no basin assigned".
66+
///
67+
/// The on-wire 12-bit field still holds any value `0..=0xFFF`; only the
68+
/// encoder's *valid-basin* range is restricted to `0..=MAX_BASIN_IDX`.
69+
/// [`BASIN_NONE`] is encodable in the header field too (when an encoder
70+
/// emits a "no basin" record), but it must never appear as a real basin
71+
/// codebook index.
6472
///
6573
/// ```
66-
/// use ndarray::hpc::codec::MAX_BASIN_IDX;
67-
/// assert_eq!(MAX_BASIN_IDX, (1 << 12) - 1);
74+
/// use ndarray::hpc::codec::{BASIN_NONE, MAX_BASIN_IDX};
75+
/// assert_eq!(MAX_BASIN_IDX, (1 << 12) - 2);
76+
/// assert_eq!(MAX_BASIN_IDX, 4094);
77+
/// assert!(MAX_BASIN_IDX < BASIN_NONE);
6878
/// ```
69-
pub const MAX_BASIN_IDX: u16 = (1 << 12) - 1; // 4095
79+
pub const MAX_BASIN_IDX: u16 = (1 << 12) - 2; // 4094
7080

7181
/// Tag inside the per-frame basin codebook for "no basin assigned"
72-
/// (encoder-side sentinel during mode decision).
82+
/// (encoder-side sentinel during mode decision). Equal to `0xFFF`
83+
/// (the all-ones 12-bit pattern) so it sits one slot above the highest
84+
/// real basin index ([`MAX_BASIN_IDX`]).
7385
///
7486
/// ```
7587
/// use ndarray::hpc::codec::{BASIN_NONE, MAX_BASIN_IDX};
76-
/// assert_eq!(BASIN_NONE, MAX_BASIN_IDX);
88+
/// assert_eq!(BASIN_NONE, 4095);
89+
/// assert_eq!(BASIN_NONE, MAX_BASIN_IDX + 1);
7790
/// ```
78-
pub const BASIN_NONE: u16 = MAX_BASIN_IDX;
91+
pub const BASIN_NONE: u16 = (1 << 12) - 1;
92+
93+
/// Private: 12-bit mask for the basin field of the packed header.
94+
/// Independent of [`MAX_BASIN_IDX`] so that [`BASIN_NONE`] (which sits
95+
/// in the 12-bit field but is not a real basin) still round-trips.
96+
const BASIN_FIELD_MASK: u16 = 0x0FFF;
7997

8098
/// Pack `(mode, basin_idx)` into a 16-bit header.
8199
///
@@ -91,7 +109,7 @@ pub const BASIN_NONE: u16 = MAX_BASIN_IDX;
91109
#[inline]
92110
pub fn pack_header(mode: CellMode, basin_idx: u16) -> u16 {
93111
let mode_bits = (mode as u16) & 0b11;
94-
let basin_bits = basin_idx & MAX_BASIN_IDX;
112+
let basin_bits = basin_idx & BASIN_FIELD_MASK;
95113
(mode_bits << 12) | basin_bits
96114
}
97115

@@ -108,7 +126,7 @@ pub fn pack_header(mode: CellMode, basin_idx: u16) -> u16 {
108126
#[inline]
109127
pub fn unpack_header(packed: u16) -> (CellMode, u16) {
110128
let mode_bits = ((packed >> 12) & 0b11) as u8;
111-
let basin_idx = packed & MAX_BASIN_IDX;
129+
let basin_idx = packed & BASIN_FIELD_MASK;
112130
let mode = match mode_bits {
113131
0b00 => CellMode::Skip,
114132
0b01 => CellMode::Merge,
@@ -165,9 +183,15 @@ pub fn unpack_merge_dir(byte: u8) -> MergeDir {
165183
/// worst case) — callers iterating CTUs typically pre-allocate
166184
/// `6 * cell_count` and trim afterwards.
167185
///
168-
/// Returns `None` if `out.len() < packed_byte_len(leaf.mode)` (insufficient
169-
/// capacity for the *mode's* width — Skip needs 2, Merge/Delta need 3,
170-
/// Escape needs 6).
186+
/// Returns `None` in two cases:
187+
/// - `out.len() < packed_byte_len(leaf.mode)` (insufficient capacity for
188+
/// the *mode's* width — Skip needs 2, Merge/Delta need 3, Escape needs 6).
189+
/// - `leaf` is structurally malformed for its mode: `Merge` without a
190+
/// `merge_dir`, `Delta` without a `delta`, or `Escape` without an
191+
/// `escape_idx`. The `LeafCu::merge` / `delta` / `escape` constructors
192+
/// enforce these invariants; only struct-literal callers bypassing the
193+
/// constructors can hit this case. Pack is therefore bijective on the
194+
/// well-formed `LeafCu` subset.
171195
///
172196
/// Format:
173197
/// - Bytes 0-1: header (`pack_header(mode, basin_idx)`, LE)
@@ -191,22 +215,24 @@ pub fn pack_leaf(leaf: &LeafCu, out: &mut [u8]) -> Option<usize> {
191215
}
192216
let header = pack_header(leaf.mode, leaf.basin_idx);
193217
out[..2].copy_from_slice(&header.to_le_bytes());
218+
// Per-mode tail. `?` rejects malformed `LeafCu`s (e.g. a hand-built
219+
// `LeafCu { mode: Merge, merge_dir: None, .. }`) with `None` rather
220+
// than silently rewriting them into a different valid leaf. The
221+
// `LeafCu::merge/delta/escape` constructors enforce the invariants;
222+
// only struct-literal callers bypassing those constructors hit
223+
// these short-circuits.
194224
let tail_len = match leaf.mode {
195225
CellMode::Skip => 0,
196226
CellMode::Merge => {
197-
// Caller guarantees `merge_dir.is_some()` for `Merge` mode
198-
// (LeafCu::merge constructor enforces this). Fall back to
199-
// North if the invariant is violated, to keep encoder
200-
// robustness — the decoder will still produce a valid leaf.
201-
out[2] = pack_merge_dir(leaf.merge_dir.unwrap_or(MergeDir::North));
227+
out[2] = pack_merge_dir(leaf.merge_dir?);
202228
1
203229
}
204230
CellMode::Delta => {
205-
out[2] = leaf.delta.unwrap_or(0);
231+
out[2] = leaf.delta?;
206232
1
207233
}
208234
CellMode::Escape => {
209-
let idx = leaf.escape_idx.unwrap_or(0);
235+
let idx = leaf.escape_idx?;
210236
out[2..6].copy_from_slice(&idx.to_le_bytes());
211237
4
212238
}
@@ -372,6 +398,72 @@ mod tests {
372398
assert!(pack_leaf(&leaf, &mut buf).is_none());
373399
}
374400

401+
#[test]
402+
fn leaf_pack_rejects_malformed_merge_without_dir() {
403+
// Bypass `LeafCu::merge` constructor and hand-build a leaf with
404+
// mode = Merge but merge_dir = None. The previous unwrap_or(North)
405+
// behavior would silently coerce this into a valid leaf — now we
406+
// reject with None instead.
407+
let malformed = LeafCu {
408+
mode: CellMode::Merge,
409+
basin_idx: 10,
410+
delta: None,
411+
merge_dir: None,
412+
escape_idx: None,
413+
};
414+
let mut buf = [0u8; 6];
415+
assert!(pack_leaf(&malformed, &mut buf).is_none());
416+
}
417+
418+
#[test]
419+
fn leaf_pack_rejects_malformed_delta_without_value() {
420+
let malformed = LeafCu {
421+
mode: CellMode::Delta,
422+
basin_idx: 10,
423+
delta: None,
424+
merge_dir: None,
425+
escape_idx: None,
426+
};
427+
let mut buf = [0u8; 6];
428+
assert!(pack_leaf(&malformed, &mut buf).is_none());
429+
}
430+
431+
#[test]
432+
fn leaf_pack_rejects_malformed_escape_without_idx() {
433+
let malformed = LeafCu {
434+
mode: CellMode::Escape,
435+
basin_idx: 10,
436+
delta: None,
437+
merge_dir: None,
438+
escape_idx: None,
439+
};
440+
let mut buf = [0u8; 6];
441+
assert!(pack_leaf(&malformed, &mut buf).is_none());
442+
}
443+
444+
#[test]
445+
fn basin_none_distinct_from_max_basin_idx() {
446+
// Regression for the BASIN_NONE/MAX_BASIN_IDX collision: the
447+
// sentinel must sit one slot above the highest real basin so
448+
// basin 4094 is unambiguously "a real basin" and 4095 is
449+
// unambiguously "no basin assigned".
450+
assert_eq!(MAX_BASIN_IDX, 4094);
451+
assert_eq!(BASIN_NONE, 4095);
452+
assert!(MAX_BASIN_IDX < BASIN_NONE);
453+
}
454+
455+
#[test]
456+
fn header_round_trips_max_basin_idx_and_basin_none_distinctly() {
457+
// Both values fit in the 12-bit field; the encoder treats them
458+
// as different. (Decoders that route on BASIN_NONE need to
459+
// compare against the sentinel explicitly.)
460+
let real = pack_header(CellMode::Skip, MAX_BASIN_IDX);
461+
let none = pack_header(CellMode::Skip, BASIN_NONE);
462+
assert_ne!(real, none);
463+
assert_eq!(unpack_header(real), (CellMode::Skip, MAX_BASIN_IDX));
464+
assert_eq!(unpack_header(none), (CellMode::Skip, BASIN_NONE));
465+
}
466+
375467
#[test]
376468
fn leaf_unpack_rejects_short_buffer() {
377469
// Header says Escape but only 2 bytes follow → not enough.

0 commit comments

Comments
 (0)