Skip to content

Commit f0d40e8

Browse files
wkaltclaude
andauthored
fix: resolve system index types in describe_indices (#6685)
describe_indices() returned "Unknown" for system indices (__lance_frag_reuse, __lance_mem_wal) because their proto details have no scalar plugin registered. list_indices() already special-cases these via infer_system_index_type(); apply the same check in IndexDescriptionImpl::try_new so the two methods agree. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ca806a9 commit f0d40e8

2 files changed

Lines changed: 45 additions & 3 deletions

File tree

python/python/tests/test_optimize.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import lance
1010
import numpy as np
1111
import pyarrow as pa
12+
import pytest
1213
from lance.lance import Compaction
1314
from lance.optimize import RewriteResult
1415
from lance.vector import vec_to_table
@@ -323,6 +324,41 @@ def test_defer_index_remap(tmp_path: Path):
323324
assert any(idx.name == "__lance_frag_reuse" for idx in indices)
324325

325326

327+
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
328+
def test_describe_indices_matches_list_indices_for_frag_reuse(tmp_path: Path):
329+
"""describe_indices() and list_indices() must agree on the index_type
330+
string for every index, including the __lance_frag_reuse system index
331+
that defer_index_remap produces.
332+
333+
list_indices() special-cases system indices via infer_system_index_type()
334+
in python/src/dataset.rs. describe_indices() in
335+
rust/lance/src/index.rs::IndexDescriptionImpl::try_new does not, so it
336+
falls through to a plugin lookup that has no entry for
337+
FragmentReuseIndexDetails and reports 'Unknown' instead.
338+
"""
339+
base_dir = tmp_path / "dataset"
340+
data = pa.table({"i": range(6_000), "val": range(6_000)})
341+
dataset = lance.write_dataset(data, base_dir, max_rows_per_file=1_000)
342+
dataset.create_scalar_index("i", "BTREE")
343+
dataset.delete("i < 500")
344+
dataset.optimize.compact_files(
345+
target_rows_per_fragment=2_000, defer_index_remap=True, num_threads=1
346+
)
347+
348+
dataset = lance.dataset(base_dir)
349+
described = {d.name: d.index_type for d in dataset.describe_indices()}
350+
listed = {idx["name"]: idx["type"] for idx in dataset.list_indices()}
351+
352+
assert "__lance_frag_reuse" in listed, (
353+
"test precondition: defer_index_remap should produce a frag-reuse index"
354+
)
355+
assert described == listed, (
356+
"describe_indices and list_indices disagree on index_type:\n"
357+
f" describe_indices: {described}\n"
358+
f" list_indices: {listed}"
359+
)
360+
361+
326362
def test_dataset_distributed_optimize(tmp_path: Path):
327363
base_dir = tmp_path / "dataset"
328364
data = pa.table({"a": range(800), "b": range(800)})

rust/lance/src/index.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ use lance_index::vector::sq::ScalarQuantizer;
4646
use lance_index::vector::v3::subindex::IvfSubIndex;
4747
use lance_index::{INDEX_FILE_NAME, Index, IndexType, PrewarmOptions, pb, vector::VectorIndex};
4848
use lance_index::{
49-
IndexCriteria, is_system_index,
49+
IndexCriteria, infer_system_index_type, is_system_index,
5050
metrics::{MetricsCollector, NoOpMetricsCollector},
5151
};
5252
use lance_io::scheduler::{ScanScheduler, SchedulerConfig};
@@ -651,8 +651,14 @@ impl IndexDescriptionImpl {
651651
let details = IndexDetails(index_details.clone());
652652
let mut rows_indexed = 0;
653653

654-
// Vector indices need to be opened to get the correct type
655-
let index_type = if details.is_vector() {
654+
// System indices (e.g. __lance_frag_reuse, __lance_mem_wal) are
655+
// identified by name and have no entry in the scalar plugin registry,
656+
// so resolve them up front. This mirrors `load_indices` in
657+
// python/src/dataset.rs, keeping the two listing methods in agreement.
658+
let index_type = if let Some(system_type) = infer_system_index_type(example_metadata) {
659+
system_type.to_string()
660+
} else if details.is_vector() {
661+
// Vector indices need to be opened to get the correct type
656662
let column = field_ids
657663
.first()
658664
.and_then(|id| dataset.schema().field_by_id(*id))

0 commit comments

Comments
 (0)