Skip to content

Commit 4457a4a

Browse files
authored
perf(alp): avoid per-combination allocations when searching exponents (#8565)
## Summary `find_best_exponents` ran a full `encode` for every `(e, f)` candidate just to size the result, allocating the encoded, patch and chunk-offset buffers across the whole `e`/`f` search space. It now estimates the size in a single allocation-free pass via `estimate_encoded_size_for_exponents`, which reproduces the existing `estimate_encoded_size` exactly, so the chosen exponents (and therefore the compressed output) are bit-for-bit unchanged. This is the throughput half of #919; the two-level stratified sampling / top-N reuse the issue describes would change which exponents are picked and depends on per-column scheme state (#8434), so I left it out here. ## Benchmark `encodings/alp/benches/alp_compress.rs`, compress medians (type, length, patch-fraction, valid-fraction): | case | before | after | speedup | | --- | --- | --- | --- | | f64 (1000, 0.0, 1.0) | 39.1 us | 14.0 us | 2.8x | | f64 (1000, 0.1, 0.25) | 45.0 us | 16.5 us | 2.7x | | f32 (1000, 0.0, 1.0) | 14.6 us | 5.8 us | 2.5x | | f64 (10000, 0.0, 1.0) | 53.1 us | 28.6 us | 1.9x | | f64 (10000, 0.1, 0.25) | 73.7 us | 49.4 us | 1.5x | ## Testing `cargo nextest run -p vortex-alp -p vortex-btrblocks` passes, including a new equivalence test (`estimate_for_exponents_matches_full_encode`) that asserts the allocation-free estimate equals a full `encode` + `estimate_encoded_size` for every candidate exponent across kept, patched and all-patched inputs. `fmt` + `clippy --all-targets --all-features` + `cargo doc` clean. --- I'm Korean, so sorry if any wording reads a little awkward. --------- Signed-off-by: Han Damin <miniex@daminstudio.net>
1 parent 0726fb9 commit 4457a4a

1 file changed

Lines changed: 98 additions & 8 deletions

File tree

encodings/alp/src/alp/mod.rs

Lines changed: 98 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,49 @@ mod tests {
3030
use vortex_array::patches::PatchesMetadata;
3131
use vortex_array::test_harness::check_metadata;
3232

33+
use crate::alp::ALPFloat;
34+
use crate::alp::Exponents;
3335
use crate::alp::array::ALPMetadata;
3436

37+
// The allocation-free estimate must match a full encode + estimate for every candidate
38+
// exponent pair, so `find_best_exponents` picks the same exponents and compression is unchanged.
39+
fn check_estimate_matches<T: ALPFloat>(values: &[T]) {
40+
for e in 0..T::MAX_EXPONENT {
41+
for f in 0..e {
42+
let exp = Exponents { e, f };
43+
let lightweight = T::estimate_encoded_size_for_exponents(values, exp);
44+
let (_, encoded, _, patches, _) = T::encode(values, Some(exp));
45+
let full = T::estimate_encoded_size(&encoded, &patches);
46+
assert_eq!(
47+
lightweight,
48+
full,
49+
"mismatch at e={e}, f={f}, len={}",
50+
values.len()
51+
);
52+
}
53+
}
54+
}
55+
56+
#[test]
57+
fn estimate_for_exponents_matches_full_encode() {
58+
// Clean 2-decimal values (mostly kept), repeating decimals (many patches), large
59+
// magnitudes, constants, and a single element.
60+
let mut f64s: Vec<f64> = (0..200).map(|i| i as f64 / 100.0).collect();
61+
f64s.extend((0..60).map(|i| i as f64 / 7.0));
62+
f64s.extend([1e17, -1e17, 0.0, 123.0]);
63+
check_estimate_matches(&f64s);
64+
check_estimate_matches::<f64>(&[123.456; 5]);
65+
check_estimate_matches::<f64>(&[42.0]);
66+
// Every value patches at every exponent -> exercises the all-patched branch.
67+
check_estimate_matches::<f64>(&[1.0 / 3.0; 8]);
68+
69+
let mut f32s: Vec<f32> = (0..200).map(|i| i as f32 / 100.0).collect();
70+
f32s.extend((0..60).map(|i| i as f32 / 7.0));
71+
f32s.extend([1e9, -1e9, 0.0, 123.0]);
72+
check_estimate_matches(&f32s);
73+
check_estimate_matches::<f32>(&[1.0 / 3.0; 8]);
74+
}
75+
3576
#[cfg_attr(miri, ignore)]
3677
#[test]
3778
fn test_alp_metadata() {
@@ -88,6 +129,13 @@ mod private {
88129
impl Sealed for f64 {}
89130
}
90131

132+
/// Widen a running `(min, max)` bound to include `value`, seeding it on the first value.
133+
fn update_bounds<I: Ord + Copy>(bounds: &mut Option<(I, I)>, value: I) {
134+
*bounds = Some(bounds.map_or((value, value), |(min, max)| {
135+
(min.min(value), max.max(value))
136+
}));
137+
}
138+
91139
pub trait ALPFloat: private::Sealed + Float + Display + NativePType {
92140
type ALPInt: PrimInt + Display + ToPrimitive + Copy + NativePType + Into<PValue>;
93141

@@ -120,27 +168,69 @@ pub trait ALPFloat: private::Sealed + Float + Display + NativePType {
120168
.cloned()
121169
.collect_vec()
122170
});
171+
let sample = sample.as_deref().unwrap_or(values);
123172

124173
for e in (0..Self::MAX_EXPONENT).rev() {
125174
for f in 0..e {
126-
let (_, encoded, _, exc_patches, _) = Self::encode(
127-
sample.as_deref().unwrap_or(values),
128-
Some(Exponents { e, f }),
129-
);
130-
131-
let size = Self::estimate_encoded_size(&encoded, &exc_patches);
175+
let exp = Exponents { e, f };
176+
let size = Self::estimate_encoded_size_for_exponents(sample, exp);
132177
if size < best_nbytes {
133178
best_nbytes = size;
134-
best_exp = Exponents { e, f };
179+
best_exp = exp;
135180
} else if size == best_nbytes && e - f < best_exp.e - best_exp.f {
136-
best_exp = Exponents { e, f };
181+
best_exp = exp;
137182
}
138183
}
139184
}
140185

141186
best_exp
142187
}
143188

189+
/// Size estimate for `values` under `exponents` matching a full [`Self::encode`] plus
190+
/// [`Self::estimate_encoded_size`], but without the per-candidate allocations.
191+
fn estimate_encoded_size_for_exponents(values: &[Self], exponents: Exponents) -> usize {
192+
// `kept` is the (min, max) over values that round-trip exactly (kept inline by `encode`);
193+
// `all` is the (min, max) over every encoded value. `encode` fills patched slots in-range,
194+
// so its emitted range is `kept`, except with all values patched (no fill) where `all` wins.
195+
let mut kept: Option<(Self::ALPInt, Self::ALPInt)> = None;
196+
let mut all: Option<(Self::ALPInt, Self::ALPInt)> = None;
197+
let mut patch_count = 0usize;
198+
199+
for &value in values {
200+
let encoded = Self::encode_single_unchecked(value, exponents);
201+
update_bounds(&mut all, encoded);
202+
if Self::decode_single(encoded, exponents).is_eq(value) {
203+
update_bounds(&mut kept, encoded);
204+
} else {
205+
patch_count += 1;
206+
}
207+
}
208+
209+
let range = if patch_count == values.len() {
210+
all
211+
} else {
212+
kept
213+
};
214+
215+
let bits_per_encoded = range
216+
.and_then(|(min, max)| max.checked_sub(&min))
217+
.and_then(|range_size| range_size.to_u64())
218+
.and_then(|range_size| {
219+
range_size
220+
.checked_ilog2()
221+
.map(|bits| (bits + 1) as usize)
222+
.or(Some(0))
223+
})
224+
.unwrap_or(size_of::<Self::ALPInt>() * 8);
225+
226+
let encoded_bytes = (values.len() * bits_per_encoded).div_ceil(8);
227+
// each patch is a value + a position
228+
// in practice, patch positions are in [0, u16::MAX] because of how we chunk
229+
let patch_bytes = patch_count * (size_of::<Self>() + size_of::<u16>());
230+
231+
encoded_bytes + patch_bytes
232+
}
233+
144234
#[inline]
145235
fn estimate_encoded_size(encoded: &[Self::ALPInt], patches: &[Self]) -> usize {
146236
let bits_per_encoded = encoded

0 commit comments

Comments
 (0)