Skip to content

Commit a94d059

Browse files
authored
feat: add load_index_by_name to reteive a unique index by name (#3931)
Closes #3926
1 parent a767eda commit a94d059

3 files changed

Lines changed: 81 additions & 52 deletions

File tree

rust/lance-index/src/traits.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ use std::sync::Arc;
55

66
use async_trait::async_trait;
77
use datafusion::execution::SendableRecordBatchStream;
8-
use lance_core::Result;
8+
use lance_core::{Error, Result};
9+
use snafu::location;
910

1011
use crate::{optimize::OptimizeOptions, scalar::ScalarIndexType, IndexParams, IndexType};
1112
use lance_table::format::Index;
@@ -136,6 +137,31 @@ pub trait DatasetIndexExt {
136137
})
137138
}
138139

140+
/// Loads a specific index with the given index name.
141+
/// This function only works for indices that are unique.
142+
/// If there are multiple indices sharing the same name, please use [load_indices_by_name]
143+
///
144+
/// Returns
145+
/// -------
146+
/// - `Ok(Some(index))`: if the index exists, returns the index.
147+
/// - `Ok(None)`: if the index does not exist.
148+
/// - `Err(e)`: Index error if there are multiple indexes sharing the same name.
149+
///
150+
async fn load_index_by_name(&self, name: &str) -> Result<Option<Index>> {
151+
let indices = self.load_indices_by_name(name).await?;
152+
if indices.is_empty() {
153+
Ok(None)
154+
} else if indices.len() == 1 {
155+
Ok(Some(indices[0].clone()))
156+
} else {
157+
Err(Error::Index {
158+
message: format!("Found multiple indices of the same name: {:?}, please use load_indices_by_name",
159+
indices.iter().map(|idx| &idx.name).collect::<Vec<_>>()),
160+
location: location!(),
161+
})
162+
}
163+
}
164+
139165
/// Loads a specific index with the given index name.
140166
async fn load_scalar_index<'a, 'b>(
141167
&'a self,

rust/lance/src/dataset/index/frag_reuse.rs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,14 @@ mod tests {
193193
)
194194
.await
195195
.unwrap();
196-
let indices_after_compact = dataset.load_indices().await.unwrap();
197-
let frag_reuse_index_meta = indices_after_compact
198-
.iter()
199-
.find(|idx| idx.name == FRAG_REUSE_INDEX_NAME)
200-
.expect("Fragment reuse index must exist");
201-
let frag_reuse_details = load_frag_reuse_index_details(&dataset, frag_reuse_index_meta)
196+
let Some(frag_reuse_index_meta) = dataset
197+
.load_index_by_name(FRAG_REUSE_INDEX_NAME)
198+
.await
199+
.unwrap()
200+
else {
201+
panic!("Fragment reuse index must be available");
202+
};
203+
let frag_reuse_details = load_frag_reuse_index_details(&dataset, &frag_reuse_index_meta)
202204
.await
203205
.unwrap();
204206
assert_eq!(frag_reuse_details.versions.len(), 1);
@@ -220,12 +222,14 @@ mod tests {
220222

221223
// Cleanup frag reuse index and check there is no reuse version
222224
cleanup_frag_reuse_index(&mut dataset).await.unwrap();
223-
let indices_after_cleanup = dataset.load_indices().await.unwrap();
224-
let frag_reuse_index_meta = indices_after_cleanup
225-
.iter()
226-
.find(|idx| idx.name == FRAG_REUSE_INDEX_NAME)
227-
.expect("Fragment reuse index must exist");
228-
let frag_reuse_details = load_frag_reuse_index_details(&dataset, frag_reuse_index_meta)
225+
let Some(frag_reuse_index_meta) = dataset
226+
.load_index_by_name(FRAG_REUSE_INDEX_NAME)
227+
.await
228+
.unwrap()
229+
else {
230+
panic!("Fragment reuse index must be available");
231+
};
232+
let frag_reuse_details = load_frag_reuse_index_details(&dataset, &frag_reuse_index_meta)
229233
.await
230234
.unwrap();
231235
assert_eq!(frag_reuse_details.versions.len(), 0);

rust/lance/src/dataset/optimize.rs

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1941,21 +1941,20 @@ mod tests {
19411941
assert!(first_metrics.fragments_removed > 0);
19421942
assert!(first_metrics.fragments_added > 0);
19431943

1944-
// Verify fragment reuse index was created
1945-
let indices_after_compact = dataset.load_indices().await.unwrap();
1946-
let frag_reuse_indices: Vec<_> = indices_after_compact
1947-
.iter()
1948-
.filter(|idx| idx.name == FRAG_REUSE_INDEX_NAME)
1949-
.collect();
1950-
assert_eq!(frag_reuse_indices.len(), 1);
1951-
19521944
// Load and verify the fragment reuse index content
1953-
let frag_reuse_index_meta = frag_reuse_indices[0];
1945+
let Some(frag_reuse_index_meta) = dataset
1946+
.load_index_by_name(FRAG_REUSE_INDEX_NAME)
1947+
.await
1948+
.unwrap()
1949+
else {
1950+
panic!("Fragment reuse index must be available");
1951+
};
1952+
19541953
assert_eq!(
19551954
frag_reuse_index_meta.fragment_bitmap.clone().unwrap(),
19561955
expected_all_new_frag_bitmap
19571956
);
1958-
let frag_reuse_details = load_frag_reuse_index_details(&dataset, frag_reuse_index_meta)
1957+
let frag_reuse_details = load_frag_reuse_index_details(&dataset, &frag_reuse_index_meta)
19591958
.await
19601959
.unwrap();
19611960
let frag_reuse_index =
@@ -1998,10 +1997,9 @@ mod tests {
19981997
assert_eq!(transposed_map, expected_all_row_id_map);
19991998

20001999
// Verify the scalar index UUID is unchanged (it should not be remapped yet)
2001-
let current_scalar_index = indices_after_compact
2002-
.iter()
2003-
.find(|idx| idx.name == "scalar")
2004-
.unwrap();
2000+
let Some(current_scalar_index) = dataset.load_index_by_name("scalar").await.unwrap() else {
2001+
panic!("scalar index must be available");
2002+
};
20052003
assert_eq!(current_scalar_index.uuid, original_scalar_uuid);
20062004
}
20072005

@@ -2046,18 +2044,18 @@ mod tests {
20462044
compact_read_versions.push(read_version);
20472045
}
20482046

2049-
let indices_after_compact = dataset.load_indices().await.unwrap();
2050-
let frag_reuse_indices: Vec<_> = indices_after_compact
2051-
.iter()
2052-
.filter(|idx| idx.name == FRAG_REUSE_INDEX_NAME)
2053-
.collect();
2054-
assert_eq!(frag_reuse_indices.len(), 1);
2055-
20562047
// Load and verify the fragment reuse index content
2057-
let frag_reuse_index_meta = frag_reuse_indices[0];
2058-
let frag_reuse_details = load_frag_reuse_index_details(&dataset, frag_reuse_index_meta)
2048+
let Some(frag_reuse_index_meta) = dataset
2049+
.load_index_by_name(FRAG_REUSE_INDEX_NAME)
20592050
.await
2060-
.unwrap();
2051+
.unwrap()
2052+
else {
2053+
panic!("Fragment reuse index must be available");
2054+
};
2055+
let frag_reuse_details =
2056+
load_frag_reuse_index_details(&dataset, &frag_reuse_index_meta)
2057+
.await
2058+
.unwrap();
20612059
let frag_reuse_index =
20622060
open_frag_reuse_index(frag_reuse_details.as_ref(), dataset.fragments().as_slice())
20632061
.await
@@ -2115,8 +2113,9 @@ mod tests {
21152113
};
21162114

21172115
// Remap without a frag reuse index should yield unsupported
2118-
let indices = dataset.load_indices().await.unwrap();
2119-
let scalar_index = indices.iter().find(|idx| idx.name == "scalar").unwrap();
2116+
let Some(scalar_index) = dataset.load_index_by_name("scalar").await.unwrap() else {
2117+
panic!("scalar index must be available");
2118+
};
21202119

21212120
let result = remapping::remap_column_index(&mut dataset, &["i"], index_name.clone()).await;
21222121
assert!(matches!(result, Err(Error::NotSupported { .. })));
@@ -2140,17 +2139,15 @@ mod tests {
21402139
.unwrap();
21412140
}
21422141

2143-
// Verify the fragment reuse index content
2144-
let indices_after_compact = dataset.load_indices().await.unwrap();
2145-
let frag_reuse_indices: Vec<_> = indices_after_compact
2146-
.iter()
2147-
.filter(|idx| idx.name == FRAG_REUSE_INDEX_NAME)
2148-
.collect();
2149-
assert_eq!(frag_reuse_indices.len(), 1);
2150-
21512142
// Load and verify the fragment reuse index content
2152-
let frag_reuse_index_meta = frag_reuse_indices[0];
2153-
let frag_reuse_details = load_frag_reuse_index_details(&dataset, frag_reuse_index_meta)
2143+
let Some(frag_reuse_index_meta) = dataset
2144+
.load_index_by_name(FRAG_REUSE_INDEX_NAME)
2145+
.await
2146+
.unwrap()
2147+
else {
2148+
panic!("Fragment reuse index must be available");
2149+
};
2150+
let frag_reuse_details = load_frag_reuse_index_details(&dataset, &frag_reuse_index_meta)
21542151
.await
21552152
.unwrap();
21562153
let frag_reuse_index =
@@ -2166,15 +2163,17 @@ mod tests {
21662163
.unwrap();
21672164

21682165
// Compare against original index
2169-
let indices = dataset.load_indices().await.unwrap();
2170-
let remapped_scalar_index = indices.iter().find(|idx| idx.name == "scalar").unwrap();
2166+
let Some(remapped_scalar_index) = dataset.load_index_by_name("scalar").await.unwrap()
2167+
else {
2168+
panic!("scalar index must be available");
2169+
};
21712170
assert_ne!(remapped_scalar_index.uuid, scalar_index.uuid);
21722171
let mut all_fragment_bitmap = RoaringBitmap::new();
21732172
dataset.fragments().iter().for_each(|f| {
21742173
all_fragment_bitmap.insert(f.id as u32);
21752174
});
21762175
assert_eq!(
2177-
remapped_scalar_index.fragment_bitmap.clone().unwrap(),
2176+
remapped_scalar_index.fragment_bitmap.unwrap(),
21782177
all_fragment_bitmap
21792178
);
21802179
}

0 commit comments

Comments
 (0)