Skip to content

Commit 6862b99

Browse files
committed
fix(sqlite-provider): auto-create index on key column if missing
On startup, ensure_key_index() checks whether the key column has a PRIMARY KEY or secondary index. If neither exists, it creates one. This prevents accidental full table scans if a table is ever built or altered without a proper key index.
1 parent c754605 commit 6862b99

1 file changed

Lines changed: 86 additions & 0 deletions

File tree

src/sqlite_provider.rs

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,88 @@ fn open_conn(db_path: &str) -> DFResult<Connection> {
104104
Ok(conn)
105105
}
106106

107+
/// Ensure the key column has an index. If the table was created with
108+
/// `INTEGER PRIMARY KEY` the rowid alias already serves as the index and
109+
/// this is a no-op. For tables created without a PK (pre-fix builds) we
110+
/// create a secondary index so point lookups use the B-tree instead of a
111+
/// full table scan.
112+
fn ensure_key_index(conn: &Connection, table_name: &str, key_col: &str) -> DFResult<()> {
113+
// Check if the key column is the INTEGER PRIMARY KEY (rowid alias).
114+
// In that case SQLite already uses the rowid B-tree — no extra index needed.
115+
let is_pk: bool = conn
116+
.query_row(
117+
&format!(
118+
"SELECT pk FROM pragma_table_info({tn}) WHERE name = ?1",
119+
tn = quote_ident(table_name)
120+
),
121+
rusqlite::params![key_col],
122+
|row| row.get::<_, i64>(0),
123+
)
124+
.map(|pk| pk > 0)
125+
.unwrap_or(false);
126+
127+
if is_pk {
128+
return Ok(());
129+
}
130+
131+
// Check if any existing index covers the key column using pragmas
132+
// (avoids brittle SQL text matching against sqlite_master).
133+
let has_index: bool = {
134+
let mut found = false;
135+
let mut idx_stmt = conn
136+
.prepare(&format!(
137+
"SELECT name FROM pragma_index_list({tn})",
138+
tn = quote_ident(table_name)
139+
))
140+
.map_err(|e| DataFusionError::Execution(e.to_string()))?;
141+
let idx_names: Vec<String> = idx_stmt
142+
.query_map([], |row| row.get::<_, String>(0))
143+
.map_err(|e| DataFusionError::Execution(e.to_string()))?
144+
.filter_map(|r| r.ok())
145+
.collect();
146+
for idx_name in idx_names {
147+
let col_name: Option<String> = conn
148+
.query_row(
149+
&format!(
150+
"SELECT name FROM pragma_index_info({idx})",
151+
idx = quote_ident(&idx_name)
152+
),
153+
[],
154+
|row| row.get::<_, String>(0),
155+
)
156+
.ok();
157+
if col_name.as_deref() == Some(key_col) {
158+
found = true;
159+
break;
160+
}
161+
}
162+
found
163+
};
164+
165+
if has_index {
166+
return Ok(());
167+
}
168+
169+
tracing::warn!(
170+
"SQLite table '{}': key column '{}' has no index — creating one (one-time migration).",
171+
table_name,
172+
key_col,
173+
);
174+
conn.execute(
175+
&format!(
176+
"CREATE INDEX {idx} ON {tn}({col})",
177+
idx = quote_ident(&format!("idx_{table_name}_{key_col}")),
178+
tn = quote_ident(table_name),
179+
col = quote_ident(key_col),
180+
),
181+
[],
182+
)
183+
.map_err(|e| DataFusionError::Execution(format!("failed to create key index: {e}")))?;
184+
185+
tracing::info!("Created index on '{}'.'{}'", table_name, key_col,);
186+
Ok(())
187+
}
188+
107189
impl SqliteLookupProvider {
108190
/// Open the existing SQLite database at `db_path`, or build it from
109191
/// parquet files on first run. Opens a pool of `pool_size` read
@@ -152,6 +234,10 @@ impl SqliteLookupProvider {
152234
table_name,
153235
n
154236
);
237+
// Ensure the key column is indexed. Tables built before the
238+
// INTEGER PRIMARY KEY fix may lack any index on the key column,
239+
// turning every point lookup into a full table scan.
240+
ensure_key_index(&conn, table_name, &key_col)?;
155241
} else {
156242
tracing::info!(
157243
"First run: building SQLite table '{}' (one-time).",

0 commit comments

Comments
 (0)