@@ -7,6 +7,11 @@ import {
77 runBackfill ,
88 upsertManifestColumn ,
99} from '@cipherstash/migrate'
10+ import {
11+ type ColumnSchema ,
12+ castAsEnum ,
13+ toEqlCastAs ,
14+ } from '@cipherstash/stack/schema'
1015import * as p from '@clack/prompts'
1116import pg from 'pg'
1217import { loadEncryptionContext , requireTable } from './context.js'
@@ -159,24 +164,25 @@ export async function backfillCommand(options: BackfillCommandOptions) {
159164 return
160165 }
161166 }
162- // The EncryptedTable schema is keyed by the *encrypted* column name
163- // (because the user's ORM schema declares the encrypted column, not
164- // the plaintext one). Default accordingly — override with
165- // --schema-column-key only if your schema uses a different object key.
166- const schemaColumnKey = options . schemaColumnKey ?? encryptedColumn
167+ const columns = ( tableSchema . build ( ) . columns ?? { } ) as Record <
168+ string ,
169+ ColumnSchema
170+ >
171+ const schemaColumnKey = resolveSchemaColumnKey ( {
172+ columns,
173+ tableName : options . table ,
174+ plaintextColumn,
175+ encryptedColumn,
176+ override : options . schemaColumnKey ,
177+ } )
178+ const column = columns [ schemaColumnKey ]
167179
168180 const { transform : transformPlaintext , castAs : detectedCastAs } =
169- buildPlaintextCoercer ( tableSchema , schemaColumnKey )
170-
171- // Record intent in `.cipherstash/migrations.json`. Backfill is the
172- // first lifecycle command the user invokes for a column, so it's the
173- // natural moment to commit the manifest entry. Idempotent — re-runs
174- // (resume / --force) replace the same entry with the same content.
175- // `targetPhase: 'cut-over'` is the typical end state; the user can
176- // hand-edit the manifest to bump it to 'dropped' later, or `stash
177- // encrypt drop` does that automatically when it runs.
181+ buildPlaintextCoercer ( column ?. cast_as )
182+
183+ // Idempotent: re-runs (resume / --force) replace the same entry.
178184 const manifestEntry = buildManifestEntry (
179- tableSchema ,
185+ column ,
180186 schemaColumnKey ,
181187 plaintextColumn ,
182188 options . pkColumn ,
@@ -252,15 +258,21 @@ export async function backfillCommand(options: BackfillCommandOptions) {
252258 `Backfill complete. ${ result . rowsProcessed . toLocaleString ( ) } rows encrypted.` ,
253259 )
254260 } catch ( error ) {
255- // Generic message only — `error.message` may include plaintext sample
256- // values bubbled up from the encryption pipeline (e.g. the leak guard
257- // in @cipherstash /migrate now emits type-only diagnostics, but
258- // upstream libraries can still embed offending input in their
259- // exception text). Preserve exit behaviour but stop the message path
260- // from leaking sensitive data.
261- p . log . error (
262- `Backfill failed${ error instanceof Error && / ^ [ \w . - ] + $ / . test ( error . name ) ? ` (${ error . name } )` : '' } . Re-run with diagnostic logging if you need details.` ,
263- )
261+ if ( error instanceof BackfillConfigError ) {
262+ // Author-controlled diagnostic — safe to print verbatim and tells
263+ // the user exactly what to fix. Does not include any row data.
264+ p . log . error ( error . message )
265+ } else {
266+ // Generic message only — `error.message` may include plaintext sample
267+ // values bubbled up from the encryption pipeline (e.g. the leak guard
268+ // in @cipherstash /migrate now emits type-only diagnostics, but
269+ // upstream libraries can still embed offending input in their
270+ // exception text). Preserve exit behaviour but stop the message path
271+ // from leaking sensitive data.
272+ p . log . error (
273+ `Backfill failed${ error instanceof Error && / ^ [ \w . - ] + $ / . test ( error . name ) ? ` (${ error . name } )` : '' } . Re-run with diagnostic logging if you need details.` ,
274+ )
275+ }
264276 exitCode = 1
265277 } finally {
266278 process . off ( 'SIGINT' , onSignal )
@@ -271,6 +283,57 @@ export async function backfillCommand(options: BackfillCommandOptions) {
271283 if ( exitCode ) process . exit ( exitCode )
272284}
273285
286+ /**
287+ * Tagged error class for misconfigurations we detect ourselves (e.g. a
288+ * `--schema-column-key` that does not exist in the schema). Messages on
289+ * these errors are author-controlled and safe to print in full — unlike
290+ * upstream encryption errors, which can embed plaintext samples and are
291+ * suppressed by the catch block in {@link backfillCommand}.
292+ */
293+ class BackfillConfigError extends Error {
294+ constructor ( message : string ) {
295+ super ( message )
296+ this . name = 'BackfillConfigError'
297+ }
298+ }
299+
300+ /**
301+ * Pick the schema-column key for this physical column. The drizzle
302+ * helper (`extractEncryptionSchema`) keys by the physical encrypted name;
303+ * handwritten `encryptedTable(...)` schemas key by whatever the author
304+ * wrote (typically plaintext). Honour `--schema-column-key`; otherwise
305+ * prefer encrypted, fall back to plaintext, and throw a
306+ * {@link BackfillConfigError} listing the schema's available keys when
307+ * neither candidate is present.
308+ */
309+ function resolveSchemaColumnKey ( opts : {
310+ columns : Record < string , ColumnSchema >
311+ tableName : string
312+ plaintextColumn : string
313+ encryptedColumn : string
314+ override : string | undefined
315+ } ) : string {
316+ const { columns, override, encryptedColumn, plaintextColumn, tableName } =
317+ opts
318+ const available = Object . keys ( columns ) . join ( ', ' ) || '(none)'
319+
320+ if ( override !== undefined ) {
321+ if ( ! ( override in columns ) ) {
322+ throw new BackfillConfigError (
323+ `--schema-column-key "${ override } " is not declared in the encryption schema for table "${ tableName } ". Available keys: ${ available } .` ,
324+ )
325+ }
326+ return override
327+ }
328+
329+ if ( encryptedColumn in columns ) return encryptedColumn
330+ if ( plaintextColumn in columns ) return plaintextColumn
331+
332+ throw new BackfillConfigError (
333+ `Could not resolve a schema column key for ${ tableName } .${ plaintextColumn } (encrypted twin: ${ encryptedColumn } ). The encryption schema for this table declares: ${ available } . Pass --schema-column-key <name> with one of those keys.` ,
334+ )
335+ }
336+
274337/**
275338 * Build a coercer that turns whatever the `pg` driver returns for a given
276339 * column into the JS shape `bulkEncryptModels` expects, based on the
@@ -286,18 +349,10 @@ export async function backfillCommand(options: BackfillCommandOptions) {
286349 *
287350 * Null / undefined are always passed through unchanged.
288351 */
289- function buildPlaintextCoercer (
290- // biome-ignore lint/suspicious/noExplicitAny: EncryptedTableLike.build is generic
291- tableSchema : { build ( ) : { columns : Record < string , any > } } ,
292- schemaColumnKey : string ,
293- ) : { transform : ( value : unknown ) => unknown ; castAs : string | undefined } {
294- let castAs : string | undefined
295- try {
296- castAs = tableSchema . build ( ) . columns ?. [ schemaColumnKey ] ?. cast_as
297- } catch {
298- castAs = undefined
299- }
300-
352+ function buildPlaintextCoercer ( castAs : string | undefined ) : {
353+ transform : ( value : unknown ) => unknown
354+ castAs : string | undefined
355+ } {
301356 const transform = ( ( ) => {
302357 switch ( castAs ) {
303358 case 'number' :
@@ -439,40 +494,24 @@ async function ensureDualWritesDeployed(
439494 return true
440495}
441496
442- /**
443- * Build the manifest entry for the column being backfilled by inspecting
444- * the user's `EncryptedTable` schema. The manifest's `castAs` and
445- * `indexes` fields mirror what EQL was configured with via `db push`, so
446- * the source of truth is the same schema object the encryption client
447- * consumes.
448- *
449- * `EncryptedTable.build()` shape (from `@cipherstash/stack/schema`):
450- *
451- * { columns: { [key]: { cast_as: 'string' | …, indexes: { ore?, unique?, match?, ste_vec? } } } }
452- *
453- * We pull the entry keyed by `schemaColumnKey` and surface the configured
454- * index kinds as a flat array of `IndexKind` values that the manifest
455- * schema accepts.
456- */
457497function buildManifestEntry (
458- // biome-ignore lint/suspicious/noExplicitAny: EncryptedTable.build() is generic
459- tableSchema : { build ( ) : { columns : Record < string , any > } } ,
498+ column : ColumnSchema | undefined ,
460499 schemaColumnKey : string ,
461500 plaintextColumn : string ,
462501 pkColumn : string | undefined ,
463502) : ManifestColumn {
464- let castAs = 'text'
465- // biome-ignore lint/suspicious/noExplicitAny: see buildPlaintextCoercer for the same shape
466- let indexConfig : Record < string , any > = { }
467- try {
468- const built = tableSchema . build ( ) . columns ?. [ schemaColumnKey ]
469- castAs = built ?. cast_as ?? 'text'
470- indexConfig = built ?. indexes ?? { }
471- } catch {
472- // Fall through with defaults — the manifest is informational, so a
473- // partial entry is better than failing the whole backfill.
474- }
503+ // SDK `cast_as` ('string', 'number', …) and EQL `castAs` ('text',
504+ // 'double', …) are different vocabularies; translate via the same
505+ // helper `stash db push` uses so the two stay aligned.
506+ const castAs : ManifestColumn [ 'castAs' ] =
507+ column ?. cast_as !== undefined
508+ ? translateCastAs (
509+ column . cast_as ,
510+ `"${ schemaColumnKey } " (plaintext: "${ plaintextColumn } ")` ,
511+ )
512+ : 'text'
475513
514+ const indexConfig = column ?. indexes ?? { }
476515 const indexes = ( [ 'unique' , 'match' , 'ore' , 'ste_vec' ] as const ) . filter (
477516 ( kind ) => indexConfig [ kind ] !== undefined ,
478517 )
@@ -485,3 +524,21 @@ function buildManifestEntry(
485524 ...( pkColumn ? { pkColumn } : { } ) ,
486525 }
487526}
527+
528+ // Drop the wrapping default so unknown values fail validation instead of
529+ // being silently coerced to `'text'`. Reuses `castAsEnum` so this list
530+ // stays in lockstep with the SDK as new types are added.
531+ const sdkCastAsEnum = castAsEnum . removeDefault ( )
532+
533+ function translateCastAs (
534+ raw : unknown ,
535+ where : string ,
536+ ) : ManifestColumn [ 'castAs' ] {
537+ const parsed = sdkCastAsEnum . safeParse ( raw )
538+ if ( ! parsed . success ) {
539+ throw new BackfillConfigError (
540+ `Encryption schema for column ${ where } declares cast_as: ${ JSON . stringify ( raw ) } , which is not one of the supported SDK data types (${ sdkCastAsEnum . options . join ( ', ' ) } ). Fix the .dataType(...) call in your encryption client.` ,
541+ )
542+ }
543+ return toEqlCastAs ( parsed . data )
544+ }
0 commit comments