11mod dictionary_encoded;
22mod serialize;
33
4+ use std:: cell:: RefCell ;
45use std:: fmt:: { self , Debug } ;
56use std:: io:: Write ;
67use std:: ops:: { Range , RangeInclusive } ;
@@ -19,6 +20,11 @@ use crate::column_values::monotonic_mapping::StrictlyMonotonicMappingToInternal;
1920use crate :: column_values:: { ColumnValues , monotonic_map_column} ;
2021use crate :: { Cardinality , DocId , EmptyColumnValues , MonotonicallyMappableToU64 , RowId } ;
2122
23+ thread_local ! {
24+ static ROWS : RefCell <Vec <RowId >> = const { RefCell :: new( Vec :: new( ) ) } ;
25+ static DOCS : RefCell <Vec <DocId >> = const { RefCell :: new( Vec :: new( ) ) } ;
26+ }
27+
2228#[ derive( Clone ) ]
2329pub struct Column < T = u64 > {
2430 pub index : ColumnIndex ,
@@ -156,16 +162,15 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static + Default> Column<T> {
156162 output : & mut Vec < crate :: ComparableDoc < Option < T > , DocId > > ,
157163 value_range : ValueRange < T > ,
158164 ) {
159- // TODO: Move `COLLECT_BLOCK_BUFFER_LEN` to allow for use here, or use a different constant
160- // in this context.
161- const BLOCK_LEN : usize = 64 ; // Corresponds to COLLECT_BLOCK_BUFFER_LEN in tantivy's docset
162165 match ( & self . index , value_range) {
163166 ( ColumnIndex :: Empty { .. } , value_range) => {
164167 let nulls_match = match & value_range {
165168 ValueRange :: All => true ,
166169 ValueRange :: Inclusive ( _) => false ,
167170 ValueRange :: GreaterThan ( _, nulls_match) => * nulls_match,
171+ ValueRange :: GreaterThanOrEqual ( _, nulls_match) => * nulls_match,
168172 ValueRange :: LessThan ( _, nulls_match) => * nulls_match,
173+ ValueRange :: LessThanOrEqual ( _, nulls_match) => * nulls_match,
169174 } ;
170175 if nulls_match {
171176 for & doc in input_docs {
@@ -178,83 +183,77 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static + Default> Column<T> {
178183 }
179184 ( ColumnIndex :: Full , value_range) => {
180185 self . values
181- . get_vals_in_value_range ( input_docs, output, value_range) ;
186+ . get_vals_in_value_range ( input_docs, input_docs , output, value_range) ;
182187 }
183188 ( ColumnIndex :: Optional ( optional_index) , value_range) => {
184- let len = input_docs. len ( ) ;
185- // Ensure the input docids length does not exceed BLOCK_LEN for stack allocation
186- // safety. If it does, we might need to handle this with multiple
187- // chunks or fallback to heap. For now, an assert is used to confirm
188- // expected usage within batch processing limits.
189- assert ! (
190- len <= BLOCK_LEN ,
191- "Input docids length ({}) exceeds BLOCK_LEN ({})" ,
192- len,
193- BLOCK_LEN
194- ) ;
195-
196- let mut input_docs_buffer = [ 0u32 ; BLOCK_LEN ] ;
197- input_docs_buffer[ ..len] . copy_from_slice ( input_docs) ;
198-
199- let mut dense_row_ids_buffer = [ 0u32 ; BLOCK_LEN ] ;
200- let mut dense_values_buffer = [ T :: default ( ) ; BLOCK_LEN ] ;
201- let mut presence_mask: u64 = 0 ; // Bitmask to track which input_docs have a value
202- let mut num_present = 0 ;
203-
204- // Phase 1: Identify existing RowIds and build dense_row_ids_buffer
205- for ( i, & doc_id) in input_docs_buffer[ ..len] . iter ( ) . enumerate ( ) {
206- if let Some ( row_id) = optional_index. rank_if_exists ( doc_id) {
207- dense_row_ids_buffer[ num_present] = row_id;
208- presence_mask |= 1u64 << i; // Set bit for present docid
209- num_present += 1 ;
210- }
211- }
212-
213- // Phase 2: Batch fetch values for present docs
214- if num_present > 0 {
215- self . values . get_vals (
216- & dense_row_ids_buffer[ ..num_present] ,
217- & mut dense_values_buffer[ ..num_present] ,
218- ) ;
219- }
220-
221- // Determine if nulls match the value range
222189 let nulls_match = match & value_range {
223190 ValueRange :: All => true ,
224191 ValueRange :: Inclusive ( _) => false ,
225192 ValueRange :: GreaterThan ( _, nulls_match) => * nulls_match,
193+ ValueRange :: GreaterThanOrEqual ( _, nulls_match) => * nulls_match,
226194 ValueRange :: LessThan ( _, nulls_match) => * nulls_match,
195+ ValueRange :: LessThanOrEqual ( _, nulls_match) => * nulls_match,
227196 } ;
228197
229- // Phase 3: Filter and merge results, reconstructing docids and values
230- let mut dense_values_cursor = 0 ;
231- for i in 0 ..len {
232- let original_doc_id = input_docs_buffer[ i] ;
233- if ( presence_mask & ( 1u64 << i) ) != 0 {
234- // This doc_id was present in the optional index and has a value
235- let val = dense_values_buffer[ dense_values_cursor] ;
236- dense_values_cursor += 1 ;
237-
238- // Check if the value matches the value range
239- let value_matches = match & value_range {
240- ValueRange :: All => true ,
241- ValueRange :: Inclusive ( r) => r. contains ( & val) ,
242- ValueRange :: GreaterThan ( t, _) => val > * t,
243- ValueRange :: LessThan ( t, _) => val < * t,
244- } ;
198+ let fallback_needed = ROWS . with ( |rows_cell| {
199+ DOCS . with ( |docs_cell| {
200+ let mut rows = rows_cell. borrow_mut ( ) ;
201+ let mut docs = docs_cell. borrow_mut ( ) ;
202+ rows. clear ( ) ;
203+ docs. clear ( ) ;
204+
205+ let mut has_nulls = false ;
206+
207+ for & doc_id in input_docs {
208+ if let Some ( row_id) = optional_index. rank_if_exists ( doc_id) {
209+ rows. push ( row_id) ;
210+ docs. push ( doc_id) ;
211+ } else {
212+ has_nulls = true ;
213+ if nulls_match {
214+ break ;
215+ }
216+ }
217+ }
245218
246- if value_matches {
219+ if !has_nulls || !nulls_match {
220+ self . values . get_vals_in_value_range (
221+ & rows,
222+ & docs,
223+ output,
224+ value_range. clone ( ) ,
225+ ) ;
226+ return false ;
227+ }
228+ true
229+ } )
230+ } ) ;
231+
232+ if fallback_needed {
233+ for & doc_id in input_docs {
234+ if let Some ( row_id) = optional_index. rank_if_exists ( doc_id) {
235+ let val = self . values . get_val ( row_id) ;
236+ let value_matches = match & value_range {
237+ ValueRange :: All => true ,
238+ ValueRange :: Inclusive ( r) => r. contains ( & val) ,
239+ ValueRange :: GreaterThan ( t, _) => val > * t,
240+ ValueRange :: GreaterThanOrEqual ( t, _) => val >= * t,
241+ ValueRange :: LessThan ( t, _) => val < * t,
242+ ValueRange :: LessThanOrEqual ( t, _) => val <= * t,
243+ } ;
244+
245+ if value_matches {
246+ output. push ( crate :: ComparableDoc {
247+ doc : doc_id,
248+ sort_key : Some ( val) ,
249+ } ) ;
250+ }
251+ } else if nulls_match {
247252 output. push ( crate :: ComparableDoc {
248- doc : original_doc_id ,
249- sort_key : Some ( val ) ,
253+ doc : doc_id ,
254+ sort_key : None ,
250255 } ) ;
251256 }
252- } else if nulls_match {
253- // This doc_id was not present in the optional index (null) and nulls match
254- output. push ( crate :: ComparableDoc {
255- doc : original_doc_id,
256- sort_key : None ,
257- } ) ;
258257 }
259258 }
260259 }
@@ -263,7 +262,9 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static + Default> Column<T> {
263262 ValueRange :: All => true ,
264263 ValueRange :: Inclusive ( _) => false ,
265264 ValueRange :: GreaterThan ( _, nulls_match) => * nulls_match,
265+ ValueRange :: GreaterThanOrEqual ( _, nulls_match) => * nulls_match,
266266 ValueRange :: LessThan ( _, nulls_match) => * nulls_match,
267+ ValueRange :: LessThanOrEqual ( _, nulls_match) => * nulls_match,
267268 } ;
268269 for i in 0 ..input_docs. len ( ) {
269270 let docid = input_docs[ i] ;
@@ -275,7 +276,9 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static + Default> Column<T> {
275276 ValueRange :: All => true ,
276277 ValueRange :: Inclusive ( r) => r. contains ( & val) ,
277278 ValueRange :: GreaterThan ( t, _) => val > * t,
279+ ValueRange :: GreaterThanOrEqual ( t, _) => val >= * t,
278280 ValueRange :: LessThan ( t, _) => val < * t,
281+ ValueRange :: LessThanOrEqual ( t, _) => val <= * t,
279282 } ;
280283 if matches {
281284 output. push ( crate :: ComparableDoc {
@@ -310,9 +313,15 @@ pub enum ValueRange<T> {
310313 /// A range that matches all values greater than the threshold.
311314 /// The boolean flag indicates if null values should be included.
312315 GreaterThan ( T , bool ) ,
316+ /// A range that matches all values greater than or equal to the threshold.
317+ /// The boolean flag indicates if null values should be included.
318+ GreaterThanOrEqual ( T , bool ) ,
313319 /// A range that matches all values less than the threshold.
314320 /// The boolean flag indicates if null values should be included.
315321 LessThan ( T , bool ) ,
322+ /// A range that matches all values less than or equal to the threshold.
323+ /// The boolean flag indicates if null values should be included.
324+ LessThanOrEqual ( T , bool ) ,
316325}
317326
318327impl < T : PartialOrd > ValueRange < T > {
@@ -321,7 +330,9 @@ impl<T: PartialOrd> ValueRange<T> {
321330 ValueRange :: Inclusive ( range) => * range. start ( ) <= max && * range. end ( ) >= min,
322331 ValueRange :: All => true ,
323332 ValueRange :: GreaterThan ( val, _) => max > * val,
333+ ValueRange :: GreaterThanOrEqual ( val, _) => max >= * val,
324334 ValueRange :: LessThan ( val, _) => min < * val,
335+ ValueRange :: LessThanOrEqual ( val, _) => min <= * val,
325336 }
326337 }
327338}
0 commit comments