Skip to content

Commit 48d5602

Browse files
committed
fastlanes: polish signed Delta PR
Pre-merge polish across the three things a reviewer would notice: * DeltaArray docstring: add a signed `i32` example next to the unsigned one so users see signed support is first-class. Verified by doctest. * Conformance: extend `test_delta_consistency` and `test_delta_binary_numeric` with i32 / i64 / i8 cases (crossing zero, all-negative, single-negative). These run the array-trait conformance harness, so any operation that's silently broken for signed inputs surfaces here. * cast.rs: expand the comment justifying why signed sources fall back to decompress-and-re-encode (the wrapping-add invariant breaks under value-preserving widening; the same hazard applies to cross-signedness). * synthetic_workload_compression table: rename duplicate "ratio" columns to `FFoR x` / `+bcomp x` so the report is unambiguous. 256 -> 263 tests, all pass. Clippy clean. Fmt clean. Signed-off-by: Claude <noreply@anthropic.com>
1 parent 1a5c639 commit 48d5602

4 files changed

Lines changed: 71 additions & 17 deletions

File tree

encodings/fastlanes/src/delta/array/delta_compress.rs

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -235,24 +235,24 @@ mod tests {
235235

236236
println!();
237237
println!(
238-
"{:<36} {:>10} {:>14} {:>5} {:>5} {:>5} {:>10} {:>7} {:>10} {:>5} {:>10} {:>7}",
238+
"{:<36} {:>10} {:>14} {:>5} {:>5} {:>5} {:>10} {:>9} {:>10} {:>5} {:>10} {:>9}",
239239
"workload",
240240
"raw (B)",
241241
"Δ range",
242242
"Wnaive",
243243
"Wffor",
244244
"Wzig",
245245
"FFoR (B)",
246-
"ratio",
246+
"FFoR x",
247247
"bases (B)",
248248
"Wb",
249249
"+bcomp (B)",
250-
"ratio",
250+
"+bcomp x",
251251
);
252252
println!("{}", "-".repeat(140));
253253

254254
for (name, values) in workloads {
255-
let raw_bytes = values.len() * size_of::<i32>();
255+
let raw_bytes = size_of_val(values.as_slice());
256256
let array = PrimitiveArray::from_iter(values);
257257
let (bases, deltas) = delta_compress(&array, &mut ctx)?;
258258
let deltas_buf: &[i32] = deltas.as_slice();
@@ -264,18 +264,30 @@ mod tests {
264264
// Naive width = OR of raw u32 bit-patterns of every delta. Any negative delta
265265
// sets the high bits and forces W = 32.
266266
let or: u32 = deltas_buf.iter().fold(0u32, |a, &d| a | (d as u32));
267-
let naive_w = if or == 0 { 0 } else { 32 - or.leading_zeros() as usize };
267+
let naive_w = if or == 0 {
268+
0
269+
} else {
270+
32 - or.leading_zeros() as usize
271+
};
268272

269273
// FFoR width = ceil(log2(span)) where span = (max - min + 1).
270274
let span = (max_d as i64 - min_d as i64) as u64 + 1;
271-
let ffor_w = if span <= 1 { 0 } else { 64 - (span - 1).leading_zeros() as usize };
275+
let ffor_w = if span <= 1 {
276+
0
277+
} else {
278+
64 - (span - 1).leading_zeros() as usize
279+
};
272280

273281
// ZigZag width = 1 + ceil(log2(max(|min|, |max|))) for any nonzero delta.
274282
let zz_mag = (min_d.unsigned_abs()).max(max_d.unsigned_abs());
275-
let zz_w = if zz_mag == 0 { 0 } else { 1 + (32 - zz_mag.leading_zeros() as usize) };
283+
let zz_w = if zz_mag == 0 {
284+
0
285+
} else {
286+
1 + (32 - zz_mag.leading_zeros() as usize)
287+
};
276288

277289
// FFoR encoded byte size: bases (already unpacked) + ref + ceil(packed bits / 8).
278-
let bases_bytes = bases_buf.len() * size_of::<i32>();
290+
let bases_bytes = size_of_val(bases_buf);
279291
let ref_bytes = size_of::<i32>();
280292
let packed_bits = deltas_buf.len() * ffor_w;
281293
let ffor_packed_bytes = packed_bits.div_ceil(8);
@@ -291,32 +303,48 @@ mod tests {
291303
let min_b = *bases_buf.iter().min().unwrap();
292304
let max_b = *bases_buf.iter().max().unwrap();
293305
let bspan = (max_b as i64 - min_b as i64) as u64 + 1;
294-
let bases_w = if bspan <= 1 { 0 } else { 64 - (bspan - 1).leading_zeros() as usize };
306+
let bases_w = if bspan <= 1 {
307+
0
308+
} else {
309+
64 - (bspan - 1).leading_zeros() as usize
310+
};
295311
let bases_compressed = (bases_buf.len() * bases_w).div_ceil(8) + ref_bytes;
296312
let total_with_bcomp = bases_compressed + ref_bytes + ffor_packed_bytes;
297313
let ratio_with_bcomp = raw_bytes as f64 / total_with_bcomp as f64;
298314

299315
println!(
300-
"{name:<36} {raw_bytes:>10} {:>14} {naive_w:>5} {ffor_w:>5} {zz_w:>5} {ffor_total:>10} {ratio:>6.2}x {bases_bytes:>10} {bases_w:>5} {total_with_bcomp:>10} {ratio_with_bcomp:>6.2}x",
316+
"{name:<36} {raw_bytes:>10} {:>14} {naive_w:>5} {ffor_w:>5} {zz_w:>5} {ffor_total:>10} {ratio:>8.2}x {bases_bytes:>10} {bases_w:>5} {total_with_bcomp:>10} {ratio_with_bcomp:>8.2}x",
301317
format!("[{min_d}, {max_d}]"),
302318
);
303319

304320
// Sanity assertions. naive_w is 32 (or near it) for any delta sequence that
305321
// contains a negative value; FFoR/ZigZag width must be strictly smaller for these
306322
// workloads.
307-
assert!(ffor_w <= naive_w.max(1), "FFoR must never exceed naive for {name}");
323+
assert!(
324+
ffor_w <= naive_w.max(1),
325+
"FFoR must never exceed naive for {name}"
326+
);
308327
if min_d < 0 {
309-
assert_eq!(naive_w, 32, "any negative delta forces naive W to 32 for {name}");
328+
assert_eq!(
329+
naive_w, 32,
330+
"any negative delta forces naive W to 32 for {name}"
331+
);
310332
assert!(ffor_w < 32, "FFoR must compress below T for {name}");
311333
}
312334
// On the asymmetric workloads (offset, near-monotone) FFoR must beat ZigZag.
313335
if min_d > 0 || max_d < 0 {
314-
assert!(ffor_w < zz_w, "FFoR should beat ZigZag on asymmetric {name}");
336+
assert!(
337+
ffor_w < zz_w,
338+
"FFoR should beat ZigZag on asymmetric {name}"
339+
);
315340
}
316341
// Sorted inputs => the bases inherit smoothness => the bases bit-width should be
317342
// far smaller than `T` for sorted columns.
318343
if name.starts_with("monotone") || name.starts_with("offset") {
319-
assert!(bases_w < 16, "sorted bases should pack below 16 bits for {name}");
344+
assert!(
345+
bases_w < 16,
346+
"sorted bases should pack below 16 bits for {name}"
347+
);
320348
}
321349
}
322350

encodings/fastlanes/src/delta/array/mod.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,21 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["bases", "deltas"];
4343
/// let array = Delta::try_from_primitive_array(&primitive, &mut session.create_execution_ctx()).unwrap();
4444
/// ```
4545
///
46+
/// Signed inputs are also supported; deltas across negative values are encoded by
47+
/// `wrapping_sub` and recovered by `wrapping_add` at decompress time:
48+
///
49+
/// ```
50+
/// use vortex_array::arrays::PrimitiveArray;
51+
/// use vortex_array::VortexSessionExecute;
52+
/// use vortex_array::session::ArraySession;
53+
/// use vortex_session::VortexSession;
54+
/// use vortex_fastlanes::Delta;
55+
///
56+
/// let session = VortexSession::empty().with::<ArraySession>();
57+
/// let primitive = PrimitiveArray::from_iter([-3_i32, -2, -1, 0, 1, 2]);
58+
/// let array = Delta::try_from_primitive_array(&primitive, &mut session.create_execution_ctx()).unwrap();
59+
/// ```
60+
///
4661
/// # Details
4762
///
4863
/// To facilitate slicing, this array accepts an `offset` and `logical_len`. The offset must be

encodings/fastlanes/src/delta/compute/cast.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,13 @@ impl CastReduce for Delta {
2424
if target_ptype.is_signed_int() || source_ptype.bit_width() > target_ptype.bit_width() {
2525
return Ok(None);
2626
}
27-
// Signed deltas widened by per-element value-preserving cast (e.g. -1i8 -> 4294967295u32)
28-
// break the wrapping-add invariant: zero-extending the delta bytes would preserve it,
29-
// sign-extending the deltas does not. Fall back to full decompress + re-encode.
27+
// Signed sources need a different cast policy than the lossless widening cast
28+
// used here. The delta bytes are stored as the result of `wrapping_sub`, so e.g.
29+
// a delta of -1i8 has the bit pattern 0xFF. Widening *as a value* (the cast op's
30+
// semantics) sign-extends that to 0xFFFFFFFF, which means `wrapping_add(base, delta)`
31+
// at the wider type produces a different result than at the source type — round-trip
32+
// breaks. Cross-signedness widening has the same hazard for the same reason. Fall
33+
// back to decompress-and-re-encode for both cases.
3034
if source_ptype.is_signed_int() {
3135
return Ok(None);
3236
}

encodings/fastlanes/src/delta/vtable/operations.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,11 @@ mod tests {
248248
#[case::delta_large_u64((0u64..2048).collect())]
249249
// Single element
250250
#[case::delta_single(PrimitiveArray::new(buffer![42u32], Validity::NonNullable))]
251+
// Signed inputs (added with signed-delta support).
252+
#[case::delta_i32_crossing_zero((-100i32..100).collect())]
253+
#[case::delta_i64_negative((0i64..100).map(|i| -i * 10).collect())]
254+
#[case::delta_large_i32((-1024i32..1024).collect())]
255+
#[case::delta_single_negative(PrimitiveArray::new(buffer![-42i32], Validity::NonNullable))]
251256
fn test_delta_consistency(#[case] array: PrimitiveArray) {
252257
test_array_consistency(&da(&array).into_array());
253258
}
@@ -258,6 +263,8 @@ mod tests {
258263
#[case::delta_u32_basic(PrimitiveArray::new(buffer![1u32, 1, 1, 1, 1], Validity::NonNullable))]
259264
#[case::delta_u64_basic(PrimitiveArray::new(buffer![1u64, 1, 1, 1, 1], Validity::NonNullable))]
260265
#[case::delta_u32_large(PrimitiveArray::new(buffer![1u32; 100], Validity::NonNullable))]
266+
#[case::delta_i8_basic(PrimitiveArray::new(buffer![-1i8, -1, -1, -1, -1], Validity::NonNullable))]
267+
#[case::delta_i32_basic(PrimitiveArray::new(buffer![-1i32, -1, -1, -1, -1], Validity::NonNullable))]
261268
fn test_delta_binary_numeric(#[case] array: PrimitiveArray) {
262269
test_binary_numeric_array(da(&array).into_array());
263270
}

0 commit comments

Comments
 (0)