Skip to content

Commit cb601fd

Browse files
wjones127claude
andauthored
fix: reject Rewrite vs CreateIndex when FRI groups straddle bitmap (#6610)
## Summary `check_rewrite_txn` returned `Ok` for any `(None, Some(_))` — a Rewrite with a deferred-remap FRI committing against a concurrently committed CreateIndex — on the assumption that the FRI would correct index bitmaps post-commit. That is only safe when each rewrite group's old fragments are fully inside or fully outside the new index's bitmap. If a group straddles (some fragments indexed, some not), `load_indices` later fails with `split of indexed and non-indexed data` and queries against the index break. This PR rejects the straddling case as a retryable conflict so the rewrite is replanned against the post-CreateIndex view. Production trigger (with `defer_index_remap=true` now enabled): 1. Compaction plans `[frag1, frag2]` while both are unindexed. 2. `optimize_indices` commits a CreateIndex covering `frag1` only (ran on an older version that didn't see `frag2`). 3. Compaction's Rewrite commits against a read_version earlier than the CreateIndex; the conflict resolver incorrectly returns COMPATIBLE. 4. Queries fail: the FRI group `[frag1, frag2]` straddles the new index bitmap. Also fixes an `.unwrap()` in `remap_fragment_bitmap` so the underlying error propagates instead of panicking. ## Test plan - [x] New test `test_rewrite_fri_vs_create_index_conflict` in `rust/lance/src/dataset/optimize.rs` reproduces the production sequence and asserts `commit_compaction` returns `RetryableCommitConflict` (before the fix, commit succeeded and the query failed with "split of indexed and non-indexed data"). - [x] Existing `dataset::optimize::` and `io::commit::` tests pass. - [x] `cargo clippy -p lance --tests -- -D warnings` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 11e0532 commit cb601fd

3 files changed

Lines changed: 135 additions & 6 deletions

File tree

rust/lance/src/dataset/optimize.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4564,4 +4564,104 @@ mod tests {
45644564
// validate() should have turned off materialize_deletions since threshold >= 1.0
45654565
assert!(!plan.options.materialize_deletions);
45664566
}
4567+
4568+
// check_rewrite_txn takes the (None, Some(_)) branch when a Rewrite with
4569+
// defer_index_remap=true is committed against a previously committed
4570+
// CreateIndex, declaring COMPATIBLE without verifying that the Rewrite's
4571+
// FRI groups don't straddle the CreateIndex's fragment bitmap. When a
4572+
// group mixes indexed and unindexed fragments, commit succeeds and later
4573+
// queries fail at load_indices with "split of indexed and non-indexed
4574+
// data".
4575+
#[tokio::test]
4576+
async fn test_rewrite_fri_vs_create_index_conflict() {
4577+
use crate::index::DatasetIndexExt;
4578+
use crate::index::vector::VectorIndexParams;
4579+
use futures::TryStreamExt;
4580+
use lance_datagen::{BatchCount, Dimension, RowCount, array, gen_batch};
4581+
use lance_index::IndexType;
4582+
use lance_linalg::distance::MetricType;
4583+
4584+
async fn append_fragment(uri: &str, rows: u64) -> Dataset {
4585+
let reader = gen_batch()
4586+
.col("vec", array::rand_vec::<Float32Type>(Dimension::from(16)))
4587+
.into_reader_rows(RowCount::from(rows), BatchCount::from(1));
4588+
let params = WriteParams {
4589+
max_rows_per_file: rows as usize,
4590+
mode: WriteMode::Append,
4591+
..Default::default()
4592+
};
4593+
Dataset::write(reader, uri, Some(params)).await.unwrap()
4594+
}
4595+
4596+
let tmpdir = TempStrDir::default();
4597+
let uri = format!("file://{}", tmpdir.as_str());
4598+
4599+
// frag0 (256 rows) with a base IVF index.
4600+
let reader = gen_batch()
4601+
.col("vec", array::rand_vec::<Float32Type>(Dimension::from(16)))
4602+
.into_reader_rows(RowCount::from(256), BatchCount::from(1));
4603+
let mut dataset = Dataset::write(
4604+
reader,
4605+
&uri,
4606+
Some(WriteParams {
4607+
max_rows_per_file: 256,
4608+
mode: WriteMode::Overwrite,
4609+
..Default::default()
4610+
}),
4611+
)
4612+
.await
4613+
.unwrap();
4614+
let index_params = VectorIndexParams::ivf_pq(2, 8, 2, MetricType::L2, 50);
4615+
dataset
4616+
.create_index(&["vec"], IndexType::Vector, None, &index_params, true)
4617+
.await
4618+
.unwrap();
4619+
4620+
// Append frag1 (unindexed), snapshot a stale handle pointing here,
4621+
// then append frag2 (also unindexed).
4622+
dataset = append_fragment(&uri, 64).await;
4623+
let mut stale = dataset.clone();
4624+
dataset = append_fragment(&uri, 64).await;
4625+
4626+
// Plan + execute compaction of frag1+frag2 with deferred remap.
4627+
let options = CompactionOptions {
4628+
defer_index_remap: true,
4629+
..Default::default()
4630+
};
4631+
let plan = plan_compaction(&dataset, &options).await.unwrap();
4632+
assert!(!plan.tasks.is_empty());
4633+
let snapshot = dataset.clone();
4634+
let completed: Vec<RewriteResult> = futures::stream::iter(plan.tasks.into_iter())
4635+
.map(|task| rewrite_files(Cow::Borrowed(&snapshot), task, &options))
4636+
.buffer_unordered(1)
4637+
.try_collect()
4638+
.await
4639+
.unwrap();
4640+
4641+
// optimize_indices on the stale handle indexes frag1 only (frag2
4642+
// didn't exist at that version), commits as CreateIndex. `dataset`
4643+
// stays at its pre-optimize version so the Rewrite commit has to
4644+
// conflict-check against this CreateIndex.
4645+
stale
4646+
.optimize_indices(&lance_index::optimize::OptimizeOptions::append())
4647+
.await
4648+
.unwrap();
4649+
4650+
// Commit the pre-executed Rewrite. The FRI group [frag1, frag2]
4651+
// straddles the new CreateIndex bitmap (frag1 indexed, frag2 not), so
4652+
// check_rewrite_txn must reject this as a retryable conflict rather
4653+
// than letting it commit into a broken state that fails queries.
4654+
let err = commit_compaction(
4655+
&mut dataset,
4656+
completed,
4657+
Arc::new(DatasetIndexRemapperOptions::default()),
4658+
&options,
4659+
)
4660+
.await
4661+
.expect_err("commit should fail with retryable conflict");
4662+
assert!(
4663+
matches!(err, Error::RetryableCommitConflict { .. }),
4664+
"unexpected error: {err}"
4665+
);
4666+
}
45674667
}

rust/lance/src/index.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -923,11 +923,11 @@ impl DatasetIndexExt for Dataset {
923923
})
924924
.await?;
925925
let mut indices = indices.as_ref().clone();
926-
indices.iter_mut().for_each(|idx| {
926+
for idx in indices.iter_mut() {
927927
if let Some(bitmap) = idx.fragment_bitmap.as_mut() {
928-
frag_reuse_index.remap_fragment_bitmap(bitmap).unwrap();
928+
frag_reuse_index.remap_fragment_bitmap(bitmap)?;
929929
}
930-
});
930+
}
931931
Ok(Arc::new(indices))
932932
} else {
933933
Ok(indices)

rust/lance/src/io/commit/conflict_resolver.rs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -746,9 +746,38 @@ impl<'a> TransactionRebase<'a> {
746746
.push(committed_fri.clone());
747747
Ok(())
748748
}
749-
// If rewrite defers index remap,
750-
// then it does not conflict with index creation
751-
(None, Some(_)) => Ok(()),
749+
// If rewrite defers index remap, the FRI handles the
750+
// post-commit bitmap update — but only if each rewrite
751+
// group is fully inside or fully outside each new
752+
// index's fragment bitmap. A group that straddles
753+
// would produce a bitmap with a mix of indexed and
754+
// non-indexed fragments, which load_indices rejects.
755+
(None, Some(_)) => {
756+
for index in new_indices {
757+
let Some(frag_bitmap) = &index.fragment_bitmap else {
758+
return Err(self
759+
.retryable_conflict_err(other_transaction, other_version));
760+
};
761+
for group in groups {
762+
let mut indexed = 0usize;
763+
let mut unindexed = 0usize;
764+
for frag in &group.old_fragments {
765+
if frag_bitmap.contains(frag.id as u32) {
766+
indexed += 1;
767+
} else {
768+
unindexed += 1;
769+
}
770+
}
771+
if indexed > 0 && unindexed > 0 {
772+
return Err(self.retryable_conflict_err(
773+
other_transaction,
774+
other_version,
775+
));
776+
}
777+
}
778+
}
779+
Ok(())
780+
}
752781
// Rewrite with remapping and frag_reuse_index creation can commit without conflict
753782
(Some(_), None) => {
754783
// this should not happen today since we don't support committing

0 commit comments

Comments
 (0)