From cdc5b258a28d32cccc424c4a4ef049a9b2fbbe8d Mon Sep 17 00:00:00 2001 From: jeadie Date: Wed, 29 Apr 2026 12:29:28 +1000 Subject: [PATCH 1/5] fix(duckdb): exclude Spice-managed HNSW indexes from index drift check Indexes named `__spice_vss_*` are created externally by the Spice runtime after each full-refresh write completes. The datafusion-table-providers overwrite flow compares indexes on the previous internal table against the new one; these externally-managed indexes are not registered in the `TableDefinition` configuration, causing spurious "Indexes do not match" errors on every subsequent refresh. Filtering them out of the actual-indexes set before the comparison lets the drift check ignore them, consistent with how they are managed entirely outside the table provider. --- core/src/duckdb/creator.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/duckdb/creator.rs b/core/src/duckdb/creator.rs index e59bb9a7..ad09d451 100644 --- a/core/src/duckdb/creator.rs +++ b/core/src/duckdb/creator.rs @@ -639,6 +639,7 @@ impl TableManager { let actual_indexes_str_map = actual_indexes_str_map .iter() + .filter(|index| !index.starts_with("__spice_vss_")) .map(|index| index.replace(&other_table.table_name().to_string(), "")) .collect::>(); From b9da22115a6e7d7661518cbb2d147fe877164270 Mon Sep 17 00:00:00 2001 From: jeadie Date: Wed, 29 Apr 2026 13:15:44 +1000 Subject: [PATCH 2/5] fix(duckdb): add ignored_index_prefixes to TableDefinition for drift check Replace the hardcoded `__spice_vss_*` filter with a configurable `ignored_index_prefixes` field on `TableDefinition`. Callers register the prefixes of externally-managed indexes; `verify_indexes_match` then excludes those indexes from the drift comparison so they don't cause spurious refresh failures. --- core/src/duckdb/creator.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/core/src/duckdb/creator.rs b/core/src/duckdb/creator.rs index ad09d451..e25881a9 100644 --- a/core/src/duckdb/creator.rs +++ b/core/src/duckdb/creator.rs @@ -54,6 +54,10 @@ pub struct TableDefinition { schema: SchemaRef, constraints: Option, indexes: Vec<(ColumnReference, IndexType)>, + /// Index name prefixes that are managed externally to the write pipeline. + /// Indexes whose names start with any of these prefixes are excluded from + /// the drift-check comparison in [`TableManager::verify_indexes_match`]. + ignored_index_prefixes: Vec, } impl TableDefinition { @@ -64,6 +68,7 @@ impl TableDefinition { schema, constraints: None, indexes: Vec::new(), + ignored_index_prefixes: Vec::new(), } } @@ -79,6 +84,20 @@ impl TableDefinition { self } + /// Register index name prefixes that are managed outside the write pipeline + /// (e.g. by the application layer). Indexes matching these prefixes are + /// excluded from the index drift check so that externally-managed indexes do + /// not cause refresh failures. + #[must_use] + pub fn with_ignored_index_prefixes(mut self, prefixes: Vec) -> Self { + self.ignored_index_prefixes = prefixes; + self + } + + pub fn ignored_index_prefixes(&self) -> &[String] { + &self.ignored_index_prefixes + } + #[must_use] pub fn with_name(self, name: RelationName) -> Self { Self { @@ -86,6 +105,7 @@ impl TableDefinition { schema: self.schema, constraints: self.constraints, indexes: self.indexes, + ignored_index_prefixes: self.ignored_index_prefixes, } } @@ -637,9 +657,10 @@ impl TableManager { .map(|index| index.replace(&self.table_name().to_string(), "")) .collect::>(); + let ignored_prefixes = self.table_definition.ignored_index_prefixes(); let actual_indexes_str_map = actual_indexes_str_map .iter() - .filter(|index| !index.starts_with("__spice_vss_")) + .filter(|index| !ignored_prefixes.iter().any(|prefix| index.starts_with(prefix.as_str()))) .map(|index| index.replace(&other_table.table_name().to_string(), "")) .collect::>(); From ba402549f1db7b8d815b7bd198f705323215dcf5 Mon Sep 17 00:00:00 2001 From: jeadie Date: Wed, 29 Apr 2026 13:18:06 +1000 Subject: [PATCH 3/5] fix(duckdb): make ignored_index_prefixes mutable after construction Use Mutex> so callers can register externally-managed index prefixes after the TableDefinition is created (e.g. when the vector engine is configured in a separate registration step). --- core/src/duckdb/creator.rs | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/core/src/duckdb/creator.rs b/core/src/duckdb/creator.rs index e25881a9..cd314250 100644 --- a/core/src/duckdb/creator.rs +++ b/core/src/duckdb/creator.rs @@ -15,7 +15,7 @@ use itertools::Itertools; use snafu::prelude::*; use std::collections::HashSet; use std::fmt::Display; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use super::DuckDB; use crate::util::{ @@ -57,7 +57,8 @@ pub struct TableDefinition { /// Index name prefixes that are managed externally to the write pipeline. /// Indexes whose names start with any of these prefixes are excluded from /// the drift-check comparison in [`TableManager::verify_indexes_match`]. - ignored_index_prefixes: Vec, + /// Uses interior mutability so callers can register prefixes after construction. + ignored_index_prefixes: Mutex>, } impl TableDefinition { @@ -68,7 +69,7 @@ impl TableDefinition { schema, constraints: None, indexes: Vec::new(), - ignored_index_prefixes: Vec::new(), + ignored_index_prefixes: Mutex::new(Vec::new()), } } @@ -84,28 +85,27 @@ impl TableDefinition { self } - /// Register index name prefixes that are managed outside the write pipeline - /// (e.g. by the application layer). Indexes matching these prefixes are - /// excluded from the index drift check so that externally-managed indexes do - /// not cause refresh failures. - #[must_use] - pub fn with_ignored_index_prefixes(mut self, prefixes: Vec) -> Self { - self.ignored_index_prefixes = prefixes; - self - } - - pub fn ignored_index_prefixes(&self) -> &[String] { - &self.ignored_index_prefixes + /// Register an index name prefix whose indexes are managed outside the write pipeline. + /// Indexes whose names start with this prefix are excluded from the drift-check + /// comparison so that externally-managed indexes do not cause refresh failures. + /// + /// May be called after construction (e.g. after the vector engine is configured). + pub fn add_ignored_index_prefix(&self, prefix: impl Into) { + self.ignored_index_prefixes + .lock() + .unwrap_or_else(|e| e.into_inner()) + .push(prefix.into()); } #[must_use] pub fn with_name(self, name: RelationName) -> Self { + let prefixes = self.ignored_index_prefixes.into_inner().unwrap_or_default(); Self { name, schema: self.schema, constraints: self.constraints, indexes: self.indexes, - ignored_index_prefixes: self.ignored_index_prefixes, + ignored_index_prefixes: Mutex::new(prefixes), } } @@ -657,7 +657,9 @@ impl TableManager { .map(|index| index.replace(&self.table_name().to_string(), "")) .collect::>(); - let ignored_prefixes = self.table_definition.ignored_index_prefixes(); + let ignored_prefixes = self.table_definition.ignored_index_prefixes + .lock() + .unwrap_or_else(|e| e.into_inner()); let actual_indexes_str_map = actual_indexes_str_map .iter() .filter(|index| !ignored_prefixes.iter().any(|prefix| index.starts_with(prefix.as_str()))) From 167a4810df6ea8a45346fd10105d89700edf1d7f Mon Sep 17 00:00:00 2001 From: jeadie Date: Wed, 29 Apr 2026 13:24:22 +1000 Subject: [PATCH 4/5] fix: manually impl Clone for TableDefinition to handle Mutex field --- core/src/duckdb/creator.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/core/src/duckdb/creator.rs b/core/src/duckdb/creator.rs index cd314250..6f7ace36 100644 --- a/core/src/duckdb/creator.rs +++ b/core/src/duckdb/creator.rs @@ -48,7 +48,7 @@ impl From for RelationName { /// A table definition, which includes the table name, schema, constraints, and indexes. /// This is used to store the definition of a table for a dataset, and can be re-used to create one or more tables (like internal data tables). -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug)] pub struct TableDefinition { name: RelationName, schema: SchemaRef, @@ -116,6 +116,25 @@ impl TableDefinition { pub fn schema(&self) -> SchemaRef { Arc::clone(&self.schema) } +} + +impl Clone for TableDefinition { + fn clone(&self) -> Self { + let prefixes = self.ignored_index_prefixes + .lock() + .unwrap_or_else(|e| e.into_inner()) + .clone(); + Self { + name: self.name.clone(), + schema: Arc::clone(&self.schema), + constraints: self.constraints.clone(), + indexes: self.indexes.clone(), + ignored_index_prefixes: Mutex::new(prefixes), + } + } +} + +impl TableDefinition { pub fn indexes(&self) -> &[(ColumnReference, IndexType)] { &self.indexes From 662c9a5820c93ac6e60f6da43485ef907f900424 Mon Sep 17 00:00:00 2001 From: jeadie Date: Wed, 29 Apr 2026 14:49:28 +1000 Subject: [PATCH 5/5] fix: manually implement PartialEq for TableDefinition --- core/src/duckdb/creator.rs | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/core/src/duckdb/creator.rs b/core/src/duckdb/creator.rs index 6f7ace36..12b0a9a5 100644 --- a/core/src/duckdb/creator.rs +++ b/core/src/duckdb/creator.rs @@ -118,9 +118,19 @@ impl TableDefinition { } } +impl PartialEq for TableDefinition { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + && self.schema == other.schema + && self.constraints == other.constraints + && self.indexes == other.indexes + } +} + impl Clone for TableDefinition { fn clone(&self) -> Self { - let prefixes = self.ignored_index_prefixes + let prefixes = self + .ignored_index_prefixes .lock() .unwrap_or_else(|e| e.into_inner()) .clone(); @@ -135,7 +145,6 @@ impl Clone for TableDefinition { } impl TableDefinition { - pub fn indexes(&self) -> &[(ColumnReference, IndexType)] { &self.indexes } @@ -676,12 +685,18 @@ impl TableManager { .map(|index| index.replace(&self.table_name().to_string(), "")) .collect::>(); - let ignored_prefixes = self.table_definition.ignored_index_prefixes + let ignored_prefixes = self + .table_definition + .ignored_index_prefixes .lock() .unwrap_or_else(|e| e.into_inner()); let actual_indexes_str_map = actual_indexes_str_map .iter() - .filter(|index| !ignored_prefixes.iter().any(|prefix| index.starts_with(prefix.as_str()))) + .filter(|index| { + !ignored_prefixes + .iter() + .any(|prefix| index.starts_with(prefix.as_str())) + }) .map(|index| index.replace(&other_table.table_name().to_string(), "")) .collect::>();