@@ -56,8 +56,8 @@ use lance_table::format::{Fragment, SelfDescribingFileReader};
5656use lance_table:: io:: manifest:: read_manifest_indexes;
5757use roaring:: RoaringBitmap ;
5858use scalar:: {
59- build_inverted_index, detect_scalar_index_type, index_matches_criteria, inverted_index_details ,
60- TrainingRequest ,
59+ build_inverted_index, detect_scalar_index_type, index_matches_criteria, infer_index_type ,
60+ inverted_index_details , TrainingRequest ,
6161} ;
6262use serde_json:: json;
6363use snafu:: location;
@@ -281,7 +281,7 @@ impl DatasetIndexExt for Dataset {
281281 }
282282
283283 let index_id = Uuid :: new_v4 ( ) ;
284- let index_details: prost_types :: Any = match ( index_type, params. index_name ( ) ) {
284+ let index_details = match ( index_type, params. index_name ( ) ) {
285285 (
286286 IndexType :: Bitmap
287287 | IndexType :: BTree
@@ -388,6 +388,7 @@ impl DatasetIndexExt for Dataset {
388388 dataset_version : self . manifest . version ,
389389 fragment_bitmap : Some ( self . get_fragments ( ) . iter ( ) . map ( |f| f. id ( ) as u32 ) . collect ( ) ) ,
390390 index_details : Some ( index_details) ,
391+ index_version : index_type. version ( ) ,
391392 } ;
392393 let transaction = Transaction :: new (
393394 self . manifest . version ,
@@ -456,10 +457,27 @@ impl DatasetIndexExt for Dataset {
456457 return Ok ( indices) ;
457458 }
458459
459- let loaded_indices: Arc < Vec < IndexMetadata > > =
460+ let loaded_indices: Vec < IndexMetadata > =
460461 read_manifest_indexes ( & self . object_store , & self . manifest_location , & self . manifest )
461462 . await ?
462- . into ( ) ;
463+ . into_iter ( )
464+ . filter ( |idx| {
465+ let max_valid_version = infer_index_type ( idx)
466+ . map ( |t| t. version ( ) )
467+ . unwrap_or_default ( ) ;
468+ let is_valid = idx. index_version <= max_valid_version;
469+ if !is_valid {
470+ log:: warn!(
471+ "Index {} has version {}, which is not supported (<={}), ignoring it" ,
472+ idx. name,
473+ idx. index_version,
474+ max_valid_version,
475+ ) ;
476+ }
477+ is_valid
478+ } )
479+ . collect ( ) ;
480+ let loaded_indices = Arc :: new ( loaded_indices) ;
463481
464482 self . session . index_cache . insert_metadata (
465483 self . base . as_ref ( ) ,
@@ -492,6 +510,7 @@ impl DatasetIndexExt for Dataset {
492510 dataset_version : self . manifest . version ,
493511 fragment_bitmap : Some ( self . get_fragments ( ) . iter ( ) . map ( |f| f. id ( ) as u32 ) . collect ( ) ) ,
494512 index_details : None ,
513+ index_version : 0 ,
495514 } ;
496515
497516 let transaction = Transaction :: new (
@@ -574,28 +593,25 @@ impl DatasetIndexExt for Dataset {
574593 let mut new_indices = vec ! [ ] ;
575594 let mut removed_indices = vec ! [ ] ;
576595 for deltas in name_to_indices. values ( ) {
577- let Some ( ( new_id, removed, mut new_frag_ids) ) =
578- merge_indices ( dataset. clone ( ) , deltas. as_slice ( ) , options) . await ?
596+ let Some ( res) = merge_indices ( dataset. clone ( ) , deltas. as_slice ( ) , options) . await ?
579597 else {
580598 continue ;
581599 } ;
582- for removed_idx in removed. iter ( ) {
583- new_frag_ids |= removed_idx. fragment_bitmap . as_ref ( ) . unwrap ( ) ;
584- }
585600
586601 let last_idx = deltas. last ( ) . expect ( "Delta indices should not be empty" ) ;
587602 let new_idx = IndexMetadata {
588- uuid : new_id ,
603+ uuid : res . new_uuid ,
589604 name : last_idx. name . clone ( ) , // Keep the same name
590605 fields : last_idx. fields . clone ( ) ,
591606 dataset_version : self . manifest . version ,
592- fragment_bitmap : Some ( new_frag_ids ) ,
607+ fragment_bitmap : Some ( res . new_fragment_bitmap ) ,
593608 index_details : last_idx. index_details . clone ( ) ,
609+ index_version : res. new_index_version ,
594610 } ;
595- removed_indices. extend ( removed . iter ( ) . map ( |& idx| idx. clone ( ) ) ) ;
596- if deltas. len ( ) > removed . len ( ) {
611+ removed_indices. extend ( res . removed_indices . iter ( ) . map ( |& idx| idx. clone ( ) ) ) ;
612+ if deltas. len ( ) > removed_indices . len ( ) {
597613 new_indices. extend (
598- deltas[ 0 ..( deltas. len ( ) - removed . len ( ) ) ]
614+ deltas[ 0 ..( deltas. len ( ) - res . removed_indices . len ( ) ) ]
599615 . iter ( )
600616 . map ( |& idx| idx. clone ( ) ) ,
601617 ) ;
@@ -1104,7 +1120,8 @@ impl DatasetIndexInternalExt for Dataset {
11041120 let is_vector_index = idx_schema
11051121 . fields
11061122 . iter ( )
1107- . any ( |f| matches ! ( f. data_type( ) , DataType :: FixedSizeList ( _, _) ) ) ;
1123+ . any ( |f| is_vector_field ( f. data_type ( ) ) ) ;
1124+
11081125 idx. fields . len ( ) == 1 && !is_vector_index
11091126 } ) {
11101127 let field = index. fields [ 0 ] ;
@@ -1208,6 +1225,17 @@ impl DatasetIndexInternalExt for Dataset {
12081225 }
12091226}
12101227
1228+ fn is_vector_field ( data_type : DataType ) -> bool {
1229+ match data_type {
1230+ DataType :: FixedSizeList ( _, _) => true ,
1231+ DataType :: List ( inner) => {
1232+ // If the inner type is a fixed size list, then it is a multivector field
1233+ matches ! ( inner. data_type( ) , DataType :: FixedSizeList ( _, _) )
1234+ }
1235+ _ => false ,
1236+ }
1237+ }
1238+
12111239#[ cfg( test) ]
12121240mod tests {
12131241 use crate :: dataset:: builder:: DatasetBuilder ;
0 commit comments