@@ -251,62 +251,8 @@ function maybeRewriteSchemaMessage(schemaMessageBytes: Buffer): Buffer | null {
251251 return null ;
252252 }
253253
254- // Snapshot the (name, originalTypeType, durationUnit, originalCustomMetadata)
255- // for every field, then rebuild the schema using the flatbuffer builder.
256- type FieldSnapshot = {
257- name : string ;
258- nullable : boolean ;
259- isDuration : boolean ;
260- durationUnit ?: number ; // FbTimeUnit
261- /** Preserved metadata key→value pairs (we add ours on top for Duration). */
262- metadata : Array < [ string , string ] > ;
263- /** Raw bytes for the original field if no rewrite needed; we'll re-encode it. */
264- typeType : number ;
265- /** Pre-decoded type sub-table bytes for non-Duration fields. */
266- // For M0 we only rewrite Duration; other fields we re-create with the
267- // same primitive type. To keep the rewriter narrow, we only support
268- // schemas where non-Duration fields use type sub-tables that can be
269- // round-tripped via Field.decode → re-encode through flatbuffers'
270- // SizedByteArray serialization. That's complex, so instead we use
271- // a different approach: copy the raw FlatBuffer field offset
272- // directly when no rewrite is needed (handled by the
273- // copy-field-by-reference path below).
274- } ;
275- // We can't simply "copy field by reference" across FlatBuffer
276- // builders, so we have to re-encode every field. For non-Duration
277- // fields, we re-encode using the apache-arrow `fb/*` accessors.
278- // That requires touching every existing supported type.
279- //
280- // To keep this rewriter narrow and DRY, we take a different
281- // approach: in-place patch. We do NOT rebuild the FlatBuffer.
282- // Instead, we mutate the field's `type_type` byte from Duration(18)
283- // to Int(2), and we point its `type` offset at a freshly-appended
284- // Int sub-table that we splice into the message bytes. Then we
285- // append a fresh `KeyValue` for `databricks.arrow.duration_unit`
286- // into the field's `custom_metadata` vector. This avoids re-encoding
287- // every other field.
288- //
289- // FlatBuffer in-place mutation is tricky because tables have vtables
290- // and offsets are 32-bit relative pointers. The fields we need to
291- // change are:
292- // 1. Field.type_type (1-byte enum at vtable slot for field #2):
293- // mutate the byte from 18 → 2. Same width, safe to overwrite.
294- // 2. Field.type (4-byte relative offset to the type sub-table):
295- // change the offset to point at our appended Int sub-table.
296- // Same width, safe to overwrite.
297- // 3. Field.custom_metadata (4-byte relative offset to vector):
298- // either rewrite the existing vector to add our entry, or
299- // append a new vector and update the offset.
300- //
301- // Because relative offsets are forward-only in FlatBuffers (offset is
302- // distance from the storage location to the target), and our
303- // appended sub-tables live AFTER the storage location, the math
304- // works out. We append to a growing byte buffer and patch the
305- // existing offset fields to point at the new tail.
306-
307- // Bail back to the full rebuild approach; in-place patching of
308- // arbitrary vtable layouts is fragile (vtables may share storage
309- // across fields). Re-encode the whole schema.
254+ // Re-encode the whole schema. This is more verbose than an in-place
255+ // FlatBuffer patch, but it avoids relying on vtable layout details.
310256 return rebuildSchemaWithDurationRewritten ( message , fbSchema ) ;
311257}
312258
0 commit comments