Skip to content

Commit df24e73

Browse files
authored
fix: replace better-sqlite3 with bun:sqlite for schema cache and SQLite driver (#323)
* fix: replace `better-sqlite3` with `bun:sqlite` for schema cache and SQLite driver `better-sqlite3` is a native addon that doesn't work on Bun ("not yet supported"). This broke `schema_index`, `schema_search`, `schema_cache_status`, and the SQLite driver for all users on the released CLI binary. Switch to `bun:sqlite` which is built into the Bun runtime — zero-install, no native compilation, same synchronous API. The storage layer (`db.ts`) already uses this pattern successfully. Changes: - `schema/cache.ts`: direct `bun:sqlite` import, sync `create()`/`createInMemory()` - `drivers/sqlite.ts`: `bun:sqlite` import, fix PRAGMA+LIMIT syntax error - Remove `better-sqlite3` from optional deps, peer deps, build externals, types - Update driver docs and E2E tests (SQLite tests no longer need skip guards) Closes #314 * test: add adversarial schema cache and SQLite driver tests 34 new tests covering: - Upgrade path from `better-sqlite3` (legacy DB files opened by `bun:sqlite`) - Corrupted/truncated/zero-byte database files - SQL injection resistance via search queries (7 injection vectors + null bytes) - Unicode and special character identifiers in schema/table/column names - Large dataset stress (1000 tables x 10 columns) - Re-indexing data replacement and multi-warehouse isolation - Connector failure modes (listSchemas/listTables/describeTable errors) - Search edge cases (empty, stop words, long query, case insensitivity, FQN) - `listColumns` with limits and unknown warehouses - File-based cache persistence across close/reopen - Singleton lifecycle (getCache/resetCache) - PRAGMA LIMIT syntax fix verification * fix: address code review findings — readonly bug, transaction wrapping, tests Fixes from 6-model consensus code review: 1. **CRITICAL** — SQLite driver `readonly` + `create: true` bug: gate `create` and WAL pragma on `!readonly` so readonly connections don't silently open read-write or crash on PRAGMA WAL. 2. **MAJOR** — Wrap `indexWarehouse` inserts in `db.transaction()` per-table to avoid per-statement disk fsyncs (~200x slowdown for large warehouses). 3. **MAJOR** — Fix no-op parent directory test (was creating dir before testing). Add 3 readonly connection tests (read existing, reject writes, refuse create). 4. **MINOR** — Extend `idx_columns_table` covering index to include `column_name` for `listColumns()` ORDER BY.
1 parent 7a16e73 commit df24e73

File tree

9 files changed

+806
-90
lines changed

9 files changed

+806
-90
lines changed

docs/docs/drivers.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Altimate Code connects to 10 databases natively via TypeScript drivers. No Pytho
1010
|----------|---------|-------------|------------|-------|
1111
| PostgreSQL | `pg` | Password, Connection String, SSL | ✅ Docker | Stable, fully parameterized queries |
1212
| DuckDB | `duckdb` | File/Memory (no auth) | ✅ In-memory | Default local database |
13-
| SQLite | `better-sqlite3` | File (no auth) | ✅ File-based | Sync API wrapped async |
13+
| SQLite | `bun:sqlite` (built-in) | File (no auth) | ✅ File-based | Zero-install, built into runtime |
1414
| MySQL | `mysql2` | Password | ✅ Docker | Parameterized introspection |
1515
| SQL Server | `mssql` | Password, Azure AD | ✅ Docker | Uses `tedious` TDS protocol |
1616
| Redshift | `pg` (wire-compat) | Password | ✅ Docker (PG wire) | Uses SVV system views |
@@ -26,7 +26,7 @@ Drivers are `optionalDependencies`, so install only what you need:
2626
```bash
2727
# Embedded databases (no external service needed)
2828
bun add duckdb
29-
bun add better-sqlite3
29+
# SQLite uses bun:sqlite (built-in, no install needed)
3030

3131
# Standard databases
3232
bun add pg # PostgreSQL + Redshift

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@
7373
},
7474
"devDependencies": {
7575
"@tsconfig/bun": "catalog:",
76-
"@types/better-sqlite3": "7.6.13",
7776
"@types/pg": "8.18.0",
7877
"@typescript/native-preview": "catalog:",
7978
"husky": "9.1.7",

packages/drivers/package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
"mysql2": "^3.0.0",
1717
"mssql": "^11.0.0",
1818
"oracledb": "^6.0.0",
19-
"duckdb": "^1.0.0",
20-
"better-sqlite3": "^11.0.0"
19+
"duckdb": "^1.0.0"
2120
}
2221
}

packages/drivers/src/sqlite.ts

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,45 @@
11
/**
2-
* SQLite driver using the `better-sqlite3` package.
2+
* SQLite driver using Bun's built-in `bun:sqlite`.
33
* Synchronous API wrapped in async interface.
44
*/
55

6+
import { Database } from "bun:sqlite"
67
import type { ConnectionConfig, Connector, ConnectorResult, SchemaColumn } from "./types"
78

89
export async function connect(config: ConnectionConfig): Promise<Connector> {
9-
let Database: any
10-
try {
11-
const mod = await import("better-sqlite3")
12-
Database = mod.default || mod
13-
} catch {
14-
throw new Error(
15-
"SQLite driver not installed. Run: npm install better-sqlite3",
16-
)
17-
}
18-
1910
const dbPath = (config.path as string) ?? ":memory:"
20-
let db: any
11+
let db: Database | null = null
2112

2213
return {
2314
async connect() {
15+
const isReadonly = config.readonly === true
2416
db = new Database(dbPath, {
25-
readonly: config.readonly === true,
17+
readonly: isReadonly,
18+
create: !isReadonly,
2619
})
27-
db.pragma("journal_mode = WAL")
20+
if (!isReadonly) {
21+
db.exec("PRAGMA journal_mode = WAL")
22+
}
2823
},
2924

3025
async execute(sql: string, limit?: number, _binds?: any[]): Promise<ConnectorResult> {
26+
if (!db) throw new Error("SQLite connection not open")
3127
const effectiveLimit = limit ?? 1000
3228

3329
// Determine if this is a SELECT-like statement
3430
const trimmed = sql.trim().toLowerCase()
31+
const isPragma = trimmed.startsWith("pragma")
3532
const isSelect =
3633
trimmed.startsWith("select") ||
37-
trimmed.startsWith("pragma") ||
34+
isPragma ||
3835
trimmed.startsWith("with") ||
3936
trimmed.startsWith("explain")
4037

38+
// PRAGMA statements don't support LIMIT clause
4139
let query = sql
4240
if (
4341
isSelect &&
42+
!isPragma &&
4443
effectiveLimit &&
4544
!/\bLIMIT\b/i.test(sql)
4645
) {
@@ -59,7 +58,7 @@ export async function connect(config: ConnectionConfig): Promise<Connector> {
5958
}
6059

6160
const stmt = db.prepare(query)
62-
const rows = stmt.all()
61+
const rows = stmt.all() as any[]
6362
const columns = rows.length > 0 ? Object.keys(rows[0]) : []
6463
const truncated = rows.length > effectiveLimit
6564
const limitedRows = truncated ? rows.slice(0, effectiveLimit) : rows
@@ -82,11 +81,12 @@ export async function connect(config: ConnectionConfig): Promise<Connector> {
8281
async listTables(
8382
_schema: string,
8483
): Promise<Array<{ name: string; type: string }>> {
84+
if (!db) throw new Error("SQLite connection not open")
8585
const rows = db
8686
.prepare(
8787
"SELECT name, type FROM sqlite_master WHERE type IN ('table','view') AND name NOT LIKE 'sqlite_%' ORDER BY name",
8888
)
89-
.all()
89+
.all() as any[]
9090
return rows.map((r: any) => ({
9191
name: r.name as string,
9292
type: r.type as string,
@@ -97,7 +97,8 @@ export async function connect(config: ConnectionConfig): Promise<Connector> {
9797
_schema: string,
9898
table: string,
9999
): Promise<SchemaColumn[]> {
100-
const rows = db.prepare('SELECT * FROM pragma_table_info(?) ORDER BY cid').all(table)
100+
if (!db) throw new Error("SQLite connection not open")
101+
const rows = db.prepare('SELECT * FROM pragma_table_info(?) ORDER BY cid').all(table) as any[]
101102
return rows.map((r: any) => ({
102103
name: r.name as string,
103104
data_type: r.type as string,

packages/opencode/script/build.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ for (const item of targets) {
242242
"@altimateai/altimate-core",
243243
// Database drivers — native addons, users install on demand per warehouse
244244
"pg", "snowflake-sdk", "@google-cloud/bigquery", "@databricks/sql",
245-
"mysql2", "mssql", "oracledb", "duckdb", "better-sqlite3",
245+
"mysql2", "mssql", "oracledb", "duckdb",
246246
// Optional infra packages — native addons or heavy optional deps
247247
"keytar", "ssh2", "dockerode",
248248
],

packages/opencode/script/publish.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ const driverPeerDependencies: Record<string, string> = {
2929
"mssql": ">=11",
3030
"oracledb": ">=6",
3131
"duckdb": ">=1",
32-
"better-sqlite3": ">=11",
3332
}
3433

3534
const driverPeerDependenciesMeta: Record<string, { optional: true }> = Object.fromEntries(

packages/opencode/src/altimate/native/schema/cache.ts

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
/**
22
* Schema cache — indexes warehouse metadata into SQLite for fast search.
33
*
4-
* Uses better-sqlite3 (optional dependency, dynamically imported) to build
5-
* a local FTS-ready cache of warehouse schemas, tables, and columns.
4+
* Uses bun:sqlite (built into the Bun runtime) to build a local FTS-ready
5+
* cache of warehouse schemas, tables, and columns.
66
* Cache location: ~/.altimate-code/schema-cache.db
77
*/
88

9+
import { Database } from "bun:sqlite"
910
import * as path from "path"
1011
import * as os from "os"
1112
import * as fs from "fs"
@@ -65,7 +66,7 @@ CREATE INDEX IF NOT EXISTS idx_tables_search ON tables_cache(search_text);
6566
CREATE INDEX IF NOT EXISTS idx_columns_search ON columns_cache(search_text);
6667
CREATE INDEX IF NOT EXISTS idx_tables_warehouse ON tables_cache(warehouse);
6768
CREATE INDEX IF NOT EXISTS idx_columns_warehouse ON columns_cache(warehouse);
68-
CREATE INDEX IF NOT EXISTS idx_columns_table ON columns_cache(warehouse, schema_name, table_name);
69+
CREATE INDEX IF NOT EXISTS idx_columns_table ON columns_cache(warehouse, schema_name, table_name, column_name);
6970
`
7071

7172
// ---------------------------------------------------------------------------
@@ -115,45 +116,24 @@ function tokenizeQuery(query: string): string[] {
115116

116117
/** SQLite-backed schema metadata cache for fast warehouse search. */
117118
export class SchemaCache {
118-
private db: any // better-sqlite3 Database instance
119+
private db: Database
119120
private dbPath: string
120121

121-
private constructor(db: any, dbPath: string) {
122+
private constructor(db: Database, dbPath: string) {
122123
this.db = db
123124
this.dbPath = dbPath
124125
}
125126

126-
/**
127-
* Create a SchemaCache instance.
128-
* Uses dynamic import for better-sqlite3 (optional dependency).
129-
*/
130-
static async create(dbPath?: string): Promise<SchemaCache> {
127+
/** Create a SchemaCache instance backed by a file on disk. */
128+
static create(dbPath?: string): SchemaCache {
131129
const resolvedPath = dbPath || defaultCachePath()
132-
let Database: any
133-
try {
134-
const mod = await import("better-sqlite3")
135-
Database = mod.default || mod
136-
} catch {
137-
throw new Error(
138-
"better-sqlite3 not installed. Install with: npm install better-sqlite3",
139-
)
140-
}
141-
const db = new Database(resolvedPath)
130+
const db = new Database(resolvedPath, { create: true })
142131
db.exec(CREATE_TABLES_SQL)
143132
return new SchemaCache(db, resolvedPath)
144133
}
145134

146-
/**
147-
* Create a SchemaCache with an in-memory database (for testing).
148-
*/
149-
static async createInMemory(): Promise<SchemaCache> {
150-
let Database: any
151-
try {
152-
const mod = await import("better-sqlite3")
153-
Database = mod.default || mod
154-
} catch {
155-
throw new Error("better-sqlite3 not installed.")
156-
}
135+
/** Create a SchemaCache with an in-memory database (for testing). */
136+
static createInMemory(): SchemaCache {
157137
const db = new Database(":memory:")
158138
db.exec(CREATE_TABLES_SQL)
159139
return new SchemaCache(db, ":memory:")
@@ -197,6 +177,18 @@ export class SchemaCache {
197177
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
198178
)
199179

180+
// Batch inserts per-table inside a transaction to avoid per-statement disk fsyncs.
181+
// The async connector calls (listTables, describeTable) run outside the transaction;
182+
// only the synchronous SQLite inserts are wrapped.
183+
const insertTableBatch = this.db.transaction(
184+
(tableArgs: any[], columnArgsBatch: any[][]) => {
185+
insertTable.run(...tableArgs)
186+
for (const colArgs of columnArgsBatch) {
187+
insertColumn.run(...colArgs)
188+
}
189+
},
190+
)
191+
200192
for (const schemaName of schemas) {
201193
if (schemaName.toUpperCase() === "INFORMATION_SCHEMA") continue
202194
totalSchemas++
@@ -211,27 +203,32 @@ export class SchemaCache {
211203
for (const tableInfo of tables) {
212204
totalTables++
213205
const searchText = makeSearchText(databaseName, schemaName, tableInfo.name, tableInfo.type)
214-
insertTable.run(
215-
warehouseName, databaseName, schemaName, tableInfo.name, tableInfo.type, searchText,
216-
)
217206

218207
let columns: Array<{ name: string; data_type: string; nullable: boolean }> = []
219208
try {
220209
columns = await connector.describeTable(schemaName, tableInfo.name)
221210
} catch {
222-
continue
211+
// continue with empty columns
223212
}
224213

214+
// Build column insert args
215+
const columnArgsBatch: any[][] = []
225216
for (const col of columns) {
226217
totalColumns++
227218
const colSearch = makeSearchText(
228219
databaseName, schemaName, tableInfo.name, col.name, col.data_type,
229220
)
230-
insertColumn.run(
221+
columnArgsBatch.push([
231222
warehouseName, databaseName, schemaName, tableInfo.name,
232223
col.name, col.data_type, col.nullable ? 1 : 0, colSearch,
233-
)
224+
])
234225
}
226+
227+
// Insert table + all its columns in a single transaction
228+
insertTableBatch(
229+
[warehouseName, databaseName, schemaName, tableInfo.name, tableInfo.type, searchText],
230+
columnArgsBatch,
231+
)
235232
}
236233
}
237234

@@ -399,7 +396,7 @@ let _cache: SchemaCache | null = null
399396

400397
export async function getCache(): Promise<SchemaCache> {
401398
if (!_cache) {
402-
_cache = await SchemaCache.create()
399+
_cache = SchemaCache.create()
403400
}
404401
return _cache
405402
}

0 commit comments

Comments
 (0)