Skip to content

Commit ba827a4

Browse files
fix: fix typo in compressor scheme (#7241)
fix typo in compressor scheme Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent bd9f543 commit ba827a4

1 file changed

Lines changed: 45 additions & 1 deletion

File tree

vortex-compressor/src/scheme.rs

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ pub fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
273273
sample(array, SAMPLE_SIZE, sample_count)
274274
};
275275

276-
let mut sample_data = ArrayAndStats::new(sample_array, ctx.stats_options());
276+
let mut sample_data = ArrayAndStats::new(sample_array, scheme.stats_options());
277277
let sample_ctx = ctx.as_sample();
278278

279279
let after = scheme
@@ -286,3 +286,47 @@ pub fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
286286

287287
Ok(ratio)
288288
}
289+
290+
#[cfg(test)]
291+
mod tests {
292+
use vortex_array::IntoArray;
293+
use vortex_array::arrays::PrimitiveArray;
294+
use vortex_array::validity::Validity;
295+
use vortex_buffer::buffer;
296+
use vortex_error::VortexResult;
297+
298+
use super::estimate_compression_ratio_with_sampling;
299+
use crate::CascadingCompressor;
300+
use crate::builtins::FloatDictScheme;
301+
use crate::ctx::CompressorContext;
302+
303+
/// Regression test for <https://github.com/spiraldb/vortex/issues/7227>.
304+
///
305+
/// `estimate_compression_ratio_with_sampling` must use the *scheme's* stats options
306+
/// (which request distinct-value counting) rather than the context's stats options
307+
/// (which may not). With the old code this panicked inside `dictionary_encode` because
308+
/// distinct values were never computed for the sample.
309+
#[test]
310+
fn sampling_uses_scheme_stats_options() -> VortexResult<()> {
311+
// Low-cardinality float array so FloatDictScheme considers it compressible.
312+
let array = PrimitiveArray::new(
313+
buffer![1.0f32, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
314+
Validity::NonNullable,
315+
)
316+
.into_array();
317+
318+
let compressor = CascadingCompressor::new(vec![&FloatDictScheme]);
319+
320+
// A context with default stats_options (count_distinct_values = false) and
321+
// marked as a sample so the function skips the sampling step and compresses
322+
// the array directly.
323+
let ctx = CompressorContext::default().as_sample();
324+
325+
// Before the fix this panicked with:
326+
// "this must be present since `DictScheme` declared that we need distinct values"
327+
let ratio =
328+
estimate_compression_ratio_with_sampling(&FloatDictScheme, &compressor, &array, ctx)?;
329+
assert!(ratio.is_finite());
330+
Ok(())
331+
}
332+
}

0 commit comments

Comments
 (0)