Skip to content

Commit 2cd836b

Browse files
committed
fix: backfill cli wrapper
1 parent 2a669d9 commit 2cd836b

1 file changed

Lines changed: 121 additions & 64 deletions

File tree

packages/cli/src/commands/encrypt/backfill.ts

Lines changed: 121 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ import {
77
runBackfill,
88
upsertManifestColumn,
99
} from '@cipherstash/migrate'
10+
import {
11+
type ColumnSchema,
12+
castAsEnum,
13+
toEqlCastAs,
14+
} from '@cipherstash/stack/schema'
1015
import * as p from '@clack/prompts'
1116
import pg from 'pg'
1217
import { loadEncryptionContext, requireTable } from './context.js'
@@ -159,24 +164,25 @@ export async function backfillCommand(options: BackfillCommandOptions) {
159164
return
160165
}
161166
}
162-
// The EncryptedTable schema is keyed by the *encrypted* column name
163-
// (because the user's ORM schema declares the encrypted column, not
164-
// the plaintext one). Default accordingly — override with
165-
// --schema-column-key only if your schema uses a different object key.
166-
const schemaColumnKey = options.schemaColumnKey ?? encryptedColumn
167+
const columns = (tableSchema.build().columns ?? {}) as Record<
168+
string,
169+
ColumnSchema
170+
>
171+
const schemaColumnKey = resolveSchemaColumnKey({
172+
columns,
173+
tableName: options.table,
174+
plaintextColumn,
175+
encryptedColumn,
176+
override: options.schemaColumnKey,
177+
})
178+
const column = columns[schemaColumnKey]
167179

168180
const { transform: transformPlaintext, castAs: detectedCastAs } =
169-
buildPlaintextCoercer(tableSchema, schemaColumnKey)
170-
171-
// Record intent in `.cipherstash/migrations.json`. Backfill is the
172-
// first lifecycle command the user invokes for a column, so it's the
173-
// natural moment to commit the manifest entry. Idempotent — re-runs
174-
// (resume / --force) replace the same entry with the same content.
175-
// `targetPhase: 'cut-over'` is the typical end state; the user can
176-
// hand-edit the manifest to bump it to 'dropped' later, or `stash
177-
// encrypt drop` does that automatically when it runs.
181+
buildPlaintextCoercer(column?.cast_as)
182+
183+
// Idempotent: re-runs (resume / --force) replace the same entry.
178184
const manifestEntry = buildManifestEntry(
179-
tableSchema,
185+
column,
180186
schemaColumnKey,
181187
plaintextColumn,
182188
options.pkColumn,
@@ -252,15 +258,21 @@ export async function backfillCommand(options: BackfillCommandOptions) {
252258
`Backfill complete. ${result.rowsProcessed.toLocaleString()} rows encrypted.`,
253259
)
254260
} catch (error) {
255-
// Generic message only — `error.message` may include plaintext sample
256-
// values bubbled up from the encryption pipeline (e.g. the leak guard
257-
// in @cipherstash/migrate now emits type-only diagnostics, but
258-
// upstream libraries can still embed offending input in their
259-
// exception text). Preserve exit behaviour but stop the message path
260-
// from leaking sensitive data.
261-
p.log.error(
262-
`Backfill failed${error instanceof Error && /^[\w. -]+$/.test(error.name) ? ` (${error.name})` : ''}. Re-run with diagnostic logging if you need details.`,
263-
)
261+
if (error instanceof BackfillConfigError) {
262+
// Author-controlled diagnostic — safe to print verbatim and tells
263+
// the user exactly what to fix. Does not include any row data.
264+
p.log.error(error.message)
265+
} else {
266+
// Generic message only — `error.message` may include plaintext sample
267+
// values bubbled up from the encryption pipeline (e.g. the leak guard
268+
// in @cipherstash/migrate now emits type-only diagnostics, but
269+
// upstream libraries can still embed offending input in their
270+
// exception text). Preserve exit behaviour but stop the message path
271+
// from leaking sensitive data.
272+
p.log.error(
273+
`Backfill failed${error instanceof Error && /^[\w. -]+$/.test(error.name) ? ` (${error.name})` : ''}. Re-run with diagnostic logging if you need details.`,
274+
)
275+
}
264276
exitCode = 1
265277
} finally {
266278
process.off('SIGINT', onSignal)
@@ -271,6 +283,57 @@ export async function backfillCommand(options: BackfillCommandOptions) {
271283
if (exitCode) process.exit(exitCode)
272284
}
273285

286+
/**
287+
* Tagged error class for misconfigurations we detect ourselves (e.g. a
288+
* `--schema-column-key` that does not exist in the schema). Messages on
289+
* these errors are author-controlled and safe to print in full — unlike
290+
* upstream encryption errors, which can embed plaintext samples and are
291+
* suppressed by the catch block in {@link backfillCommand}.
292+
*/
293+
class BackfillConfigError extends Error {
294+
constructor(message: string) {
295+
super(message)
296+
this.name = 'BackfillConfigError'
297+
}
298+
}
299+
300+
/**
301+
* Pick the schema-column key for this physical column. The drizzle
302+
* helper (`extractEncryptionSchema`) keys by the physical encrypted name;
303+
* handwritten `encryptedTable(...)` schemas key by whatever the author
304+
* wrote (typically plaintext). Honour `--schema-column-key`; otherwise
305+
* prefer encrypted, fall back to plaintext, and throw a
306+
* {@link BackfillConfigError} listing the schema's available keys when
307+
* neither candidate is present.
308+
*/
309+
function resolveSchemaColumnKey(opts: {
310+
columns: Record<string, ColumnSchema>
311+
tableName: string
312+
plaintextColumn: string
313+
encryptedColumn: string
314+
override: string | undefined
315+
}): string {
316+
const { columns, override, encryptedColumn, plaintextColumn, tableName } =
317+
opts
318+
const available = Object.keys(columns).join(', ') || '(none)'
319+
320+
if (override !== undefined) {
321+
if (!(override in columns)) {
322+
throw new BackfillConfigError(
323+
`--schema-column-key "${override}" is not declared in the encryption schema for table "${tableName}". Available keys: ${available}.`,
324+
)
325+
}
326+
return override
327+
}
328+
329+
if (encryptedColumn in columns) return encryptedColumn
330+
if (plaintextColumn in columns) return plaintextColumn
331+
332+
throw new BackfillConfigError(
333+
`Could not resolve a schema column key for ${tableName}.${plaintextColumn} (encrypted twin: ${encryptedColumn}). The encryption schema for this table declares: ${available}. Pass --schema-column-key <name> with one of those keys.`,
334+
)
335+
}
336+
274337
/**
275338
* Build a coercer that turns whatever the `pg` driver returns for a given
276339
* column into the JS shape `bulkEncryptModels` expects, based on the
@@ -286,18 +349,10 @@ export async function backfillCommand(options: BackfillCommandOptions) {
286349
*
287350
* Null / undefined are always passed through unchanged.
288351
*/
289-
function buildPlaintextCoercer(
290-
// biome-ignore lint/suspicious/noExplicitAny: EncryptedTableLike.build is generic
291-
tableSchema: { build(): { columns: Record<string, any> } },
292-
schemaColumnKey: string,
293-
): { transform: (value: unknown) => unknown; castAs: string | undefined } {
294-
let castAs: string | undefined
295-
try {
296-
castAs = tableSchema.build().columns?.[schemaColumnKey]?.cast_as
297-
} catch {
298-
castAs = undefined
299-
}
300-
352+
function buildPlaintextCoercer(castAs: string | undefined): {
353+
transform: (value: unknown) => unknown
354+
castAs: string | undefined
355+
} {
301356
const transform = (() => {
302357
switch (castAs) {
303358
case 'number':
@@ -439,40 +494,24 @@ async function ensureDualWritesDeployed(
439494
return true
440495
}
441496

442-
/**
443-
* Build the manifest entry for the column being backfilled by inspecting
444-
* the user's `EncryptedTable` schema. The manifest's `castAs` and
445-
* `indexes` fields mirror what EQL was configured with via `db push`, so
446-
* the source of truth is the same schema object the encryption client
447-
* consumes.
448-
*
449-
* `EncryptedTable.build()` shape (from `@cipherstash/stack/schema`):
450-
*
451-
* { columns: { [key]: { cast_as: 'string' | …, indexes: { ore?, unique?, match?, ste_vec? } } } }
452-
*
453-
* We pull the entry keyed by `schemaColumnKey` and surface the configured
454-
* index kinds as a flat array of `IndexKind` values that the manifest
455-
* schema accepts.
456-
*/
457497
function buildManifestEntry(
458-
// biome-ignore lint/suspicious/noExplicitAny: EncryptedTable.build() is generic
459-
tableSchema: { build(): { columns: Record<string, any> } },
498+
column: ColumnSchema | undefined,
460499
schemaColumnKey: string,
461500
plaintextColumn: string,
462501
pkColumn: string | undefined,
463502
): ManifestColumn {
464-
let castAs = 'text'
465-
// biome-ignore lint/suspicious/noExplicitAny: see buildPlaintextCoercer for the same shape
466-
let indexConfig: Record<string, any> = {}
467-
try {
468-
const built = tableSchema.build().columns?.[schemaColumnKey]
469-
castAs = built?.cast_as ?? 'text'
470-
indexConfig = built?.indexes ?? {}
471-
} catch {
472-
// Fall through with defaults — the manifest is informational, so a
473-
// partial entry is better than failing the whole backfill.
474-
}
503+
// SDK `cast_as` ('string', 'number', …) and EQL `castAs` ('text',
504+
// 'double', …) are different vocabularies; translate via the same
505+
// helper `stash db push` uses so the two stay aligned.
506+
const castAs: ManifestColumn['castAs'] =
507+
column?.cast_as !== undefined
508+
? translateCastAs(
509+
column.cast_as,
510+
`"${schemaColumnKey}" (plaintext: "${plaintextColumn}")`,
511+
)
512+
: 'text'
475513

514+
const indexConfig = column?.indexes ?? {}
476515
const indexes = (['unique', 'match', 'ore', 'ste_vec'] as const).filter(
477516
(kind) => indexConfig[kind] !== undefined,
478517
)
@@ -485,3 +524,21 @@ function buildManifestEntry(
485524
...(pkColumn ? { pkColumn } : {}),
486525
}
487526
}
527+
528+
// Drop the wrapping default so unknown values fail validation instead of
529+
// being silently coerced to `'text'`. Reuses `castAsEnum` so this list
530+
// stays in lockstep with the SDK as new types are added.
531+
const sdkCastAsEnum = castAsEnum.removeDefault()
532+
533+
function translateCastAs(
534+
raw: unknown,
535+
where: string,
536+
): ManifestColumn['castAs'] {
537+
const parsed = sdkCastAsEnum.safeParse(raw)
538+
if (!parsed.success) {
539+
throw new BackfillConfigError(
540+
`Encryption schema for column ${where} declares cast_as: ${JSON.stringify(raw)}, which is not one of the supported SDK data types (${sdkCastAsEnum.options.join(', ')}). Fix the .dataType(...) call in your encryption client.`,
541+
)
542+
}
543+
return toEqlCastAs(parsed.data)
544+
}

0 commit comments

Comments
 (0)