@@ -55,6 +55,7 @@ use vortex_pco::Pco;
5555use vortex_runend:: RunEnd ;
5656use vortex_sequence:: Sequence ;
5757use vortex_sparse:: Sparse ;
58+ #[ cfg( feature = "unstable_encodings" ) ]
5859use vortex_tensor:: encodings:: turboquant:: TurboQuant ;
5960use vortex_utils:: aliases:: hash_map:: HashMap ;
6061use vortex_zigzag:: ZigZag ;
@@ -113,6 +114,7 @@ pub static ALLOWED_ENCODINGS: LazyLock<ArrayRegistry> = LazyLock::new(|| {
113114 session. register ( RunEnd ) ;
114115 session. register ( Sequence ) ;
115116 session. register ( Sparse ) ;
117+ #[ cfg( feature = "unstable_encodings" ) ]
116118 session. register ( TurboQuant ) ;
117119 session. register ( ZigZag ) ;
118120
@@ -135,7 +137,7 @@ pub struct WriteStrategyBuilder {
135137 field_writers : HashMap < FieldPath , Arc < dyn LayoutStrategy > > ,
136138 allow_encodings : Option < ArrayRegistry > ,
137139 flat_strategy : Option < Arc < dyn LayoutStrategy > > ,
138- builder : Option < BtrBlocksCompressorBuilder > ,
140+ builder : BtrBlocksCompressorBuilder ,
139141}
140142
141143impl Default for WriteStrategyBuilder {
@@ -148,7 +150,7 @@ impl Default for WriteStrategyBuilder {
148150 field_writers : HashMap :: new ( ) ,
149151 allow_encodings : Some ( ALLOWED_ENCODINGS . clone ( ) ) ,
150152 flat_strategy : None ,
151- builder : None ,
153+ builder : BtrBlocksCompressorBuilder :: default ( ) ,
152154 }
153155 }
154156}
@@ -203,8 +205,7 @@ impl WriteStrategyBuilder {
203205 /// GPU decompression. Without it, strings use interleaved Zstd compression.
204206 #[ cfg( feature = "zstd" ) ]
205207 pub fn with_cuda_compatible_encodings ( mut self ) -> Self {
206- let mut builder = self . builder . unwrap_or_default ( ) ;
207- builder = builder. exclude ( [
208+ self . builder = self . builder . exclude ( [
208209 integer:: SparseScheme . id ( ) ,
209210 integer:: RLE_INTEGER_SCHEME . id ( ) ,
210211 float:: RLE_FLOAT_SCHEME . id ( ) ,
@@ -215,14 +216,13 @@ impl WriteStrategyBuilder {
215216
216217 #[ cfg( feature = "unstable_encodings" ) ]
217218 {
218- builder = builder. include ( [ string:: ZstdBuffersScheme . id ( ) ] ) ;
219+ self . builder = self . builder . include ( [ string:: ZstdBuffersScheme . id ( ) ] ) ;
219220 }
220221 #[ cfg( not( feature = "unstable_encodings" ) ) ]
221222 {
222- builder = builder. include ( [ string:: ZstdScheme . id ( ) ] ) ;
223+ self . builder = self . builder . include ( [ string:: ZstdScheme . id ( ) ] ) ;
223224 }
224225
225- self . builder = Some ( builder) ;
226226 self
227227 }
228228
@@ -233,13 +233,11 @@ impl WriteStrategyBuilder {
233233 /// especially for floating-point heavy datasets.
234234 #[ cfg( feature = "zstd" ) ]
235235 pub fn with_compact_encodings ( mut self ) -> Self {
236- let mut builder = self . builder . unwrap_or_default ( ) ;
237- builder = builder. include ( [
236+ self . builder = self . builder . include ( [
238237 string:: ZstdScheme . id ( ) ,
239238 integer:: PcoScheme . id ( ) ,
240239 float:: PcoScheme . id ( ) ,
241240 ] ) ;
242- self . builder = Some ( builder) ;
243241 self
244242 }
245243
@@ -254,15 +252,17 @@ impl WriteStrategyBuilder {
254252 /// compressor is used with TurboQuant added.
255253 #[ cfg( feature = "unstable_encodings" ) ]
256254 pub fn with_vector_quantization ( mut self ) -> Self {
257- let mut builder = self . builder . unwrap_or_default ( ) ;
258- builder = builder. include ( [ turboquant:: scheme:: TURBOQUANT_SCHEME . id ( ) ] ) ;
259- self . builder = Some ( builder) ;
255+ use vortex_tensor:: encodings:: turboquant:: scheme:: TURBOQUANT_SCHEME ;
256+ self . builder = self . builder . with_scheme ( & TURBOQUANT_SCHEME ) ;
260257 self
261258 }
262259
263260 /// Builds the canonical [`LayoutStrategy`] implementation, with the configured overrides
264261 /// applied.
265262 pub fn build ( self ) -> Arc < dyn LayoutStrategy > {
263+ use vortex_btrblocks:: SchemeExt as _;
264+ use vortex_btrblocks:: schemes:: integer:: IntDictScheme ;
265+
266266 let flat: Arc < dyn LayoutStrategy > = if let Some ( flat) = self . flat_strategy {
267267 flat
268268 } else if let Some ( allow_encodings) = self . allow_encodings {
@@ -275,19 +275,28 @@ impl WriteStrategyBuilder {
275275 let chunked = ChunkedLayoutStrategy :: new ( flat. clone ( ) ) ;
276276 // 6. buffer chunks so they end up with closer segment ids physically
277277 let buffered = BufferedStrategy :: new ( chunked, 2 * ONE_MEG ) ; // 2MB
278- // 5. compress each chunk
279- if self . builder . is_some ( ) && self . compressor . is_some ( ) {
280- vortex_panic ! ( "Cannot configure both a custom compressor and custom builder schemes" ) ;
281- }
282278
283- let compressing = if let Some ( ref compressor) = self . compressor {
284- CompressingStrategy :: new_opaque ( buffered, compressor. clone ( ) )
285- } else if let Some ( ref builder) = self . builder {
286- CompressingStrategy :: new_opaque ( buffered, builder. build ( ) )
279+ // 5. compress each chunk
280+ // Build separate compressors for data (excludes IntDict to avoid recursive dict encoding)
281+ // and stats/dict values (includes IntDict).
282+ let ( data_compressor, stats_compressor) : (
283+ Arc < dyn CompressorPlugin > ,
284+ Arc < dyn CompressorPlugin > ,
285+ ) = if let Some ( compressor) = self . compressor {
286+ if self . builder != BtrBlocksCompressorBuilder :: default ( ) {
287+ vortex_panic ! (
288+ "Cannot configure both a custom compressor and custom builder schemes"
289+ ) ;
290+ }
291+ ( compressor. clone ( ) , compressor)
287292 } else {
288- CompressingStrategy :: new_btrblocks ( buffered, true )
293+ let stats = Arc :: new ( self . builder . clone ( ) . build ( ) ) ;
294+ let data = Arc :: new ( self . builder . exclude ( [ IntDictScheme . id ( ) ] ) . build ( ) ) ;
295+ ( data, stats)
289296 } ;
290297
298+ let compressing = CompressingStrategy :: new ( buffered, data_compressor) ;
299+
291300 // 4. prior to compression, coalesce up to a minimum size
292301 let coalescing = RepartitionStrategy :: new (
293302 compressing,
@@ -306,11 +315,7 @@ impl WriteStrategyBuilder {
306315 ) ;
307316
308317 // 2.1. | 3.1. compress stats tables and dict values.
309- let compress_then_flat = if let Some ( ref compressor) = compressor {
310- CompressingStrategy :: new_opaque ( flat, compressor. clone ( ) )
311- } else {
312- CompressingStrategy :: new_btrblocks ( flat, false )
313- } ;
318+ let compress_then_flat = CompressingStrategy :: new ( flat, stats_compressor) ;
314319
315320 // 3. apply dict encoding or fallback
316321 let dict = DictStrategy :: new (
0 commit comments