redpanda-data
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎internal/impl/iceberg/config.go‎
Lines changed: 11 additions & 6 deletions b/‎internal/impl/iceberg/config.go‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎internal/impl/iceberg/output_iceberg.go‎
Lines changed: 11 additions & 0 deletions b/‎internal/impl/iceberg/output_iceberg.go‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎internal/impl/iceberg/router.go‎
Lines changed: 6 additions & 1 deletion b/‎internal/impl/iceberg/router.go‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎internal/impl/iceberg/shredder/shredder.go‎
Lines changed: 30 additions & 7 deletions b/‎internal/impl/iceberg/shredder/shredder.go‎
Lines changed: 30 additions & 7 deletions
diff --git a/‎internal/impl/iceberg/shredder/shredder_test.go‎
Lines changed: 15 additions & 15 deletions b/‎internal/impl/iceberg/shredder/shredder_test.go‎
Lines changed: 15 additions & 15 deletions
@@ -16,6 +16,10 @@ All notable changes to this project will be documented in this file.
   - **Pipelines whose own code reads the `schema_metadata` bytes via `meta()`** and pattern-matches the historical INT64 shape: schemas now contain `TIMESTAMP` / `DATE` / `TIME_OF_DAY` / `UUID` along with new `unit` and `adjust_to_utc` fields. Update the pattern.
   - **iceberg shredder** is now schema-aware for numeric inputs: a numeric millisecond value declared by the schema as `timestamp-millis` is correctly interpreted as milliseconds rather than as Unix seconds. This closes a previously-silent corruption case where an int64 millis input into a TIMESTAMPTZ column would land ~50,000 years in the future.
 
+### Added
+
+- iceberg: `schema_evolution.require_schema_metadata` (default `false`). When enabled along with `schema_evolution.schema_metadata`, numeric values shredded into a `timestamp`, `timestamptz`, `date`, or `time` column without registered schema metadata are rejected loudly instead of falling through to the bloblang Unix-seconds default. Use this when you cannot guarantee the upstream attaches schema metadata and prefer a hard error to silently corrupting dates by ~50,000 years. No effect on time-typed columns receiving native `time.Time` / `time.Duration` Go values.
+
 ### Changed
 
 - iceberg: `NewWriter` now takes a `*typeResolver` argument so the writer can use schema metadata to interpret numeric inputs into time-typed columns at shredding time. Internal API change only.
 
@@ -69,12 +69,13 @@ const (
 	ioFieldAzureAccessKey        = "storage_access_key"
 
 	// Schema evolution fields
-	ioFieldSchemaEvolution                     = "schema_evolution"
-	ioFieldSchemaEvolutionEnabled              = "enabled"
-	ioFieldSchemaEvolutionPartitionSpec        = "partition_spec"
-	ioFieldSchemaEvolutionTableLoc             = "table_location"
-	ioFieldSchemaEvolutionSchemaMetadata       = "schema_metadata"
-	ioFieldSchemaEvolutionNewColumnTypeMapping = "new_column_type_mapping"
+	ioFieldSchemaEvolution                      = "schema_evolution"
+	ioFieldSchemaEvolutionEnabled               = "enabled"
+	ioFieldSchemaEvolutionPartitionSpec         = "partition_spec"
+	ioFieldSchemaEvolutionTableLoc              = "table_location"
+	ioFieldSchemaEvolutionSchemaMetadata        = "schema_metadata"
+	ioFieldSchemaEvolutionNewColumnTypeMapping  = "new_column_type_mapping"
+	ioFieldSchemaEvolutionRequireSchemaMetadata = "require_schema_metadata"
 
 	// Commit fields
 	ioFieldCommit               = "commit"
@@ -332,6 +333,10 @@ array:list
 					Description("An optional Bloblang mapping to customize column types during schema evolution. This mapping is executed for each new column and can override the inferred or schema-metadata-derived type. The mapping receives an object with fields `name` (column name), `path` (dot-separated path), `value` (sample value), `inferred_type` (the type that would be used without this mapping), `message` (the full message body), `namespace`, and `table`. It must return a string with a valid Iceberg type name: `boolean`, `int`, `long`, `float`, `double`, `string`, `binary`, `date`, `time`, `timestamp`, `timestamptz`, `uuid`, `decimal(p,s)`, or `fixed[n]`.").
 					Optional().
 					Advanced(),
+				service.NewBoolField(ioFieldSchemaEvolutionRequireSchemaMetadata).
+					Description("When `true`, writing a numeric value into a `timestamp`, `timestamptz`, `date`, or `time` column without `schema_metadata` registered for that column is a hard error. The default `false` permits a fallback path that interprets bare numeric timestamps as Unix seconds and bare numeric times as already-microseconds — convenient, but silently wrong if upstream produced milliseconds. Enable this when you cannot guarantee the upstream attaches schema metadata and want to fail loudly rather than corrupt dates by ~50,000 years. No effect on time-typed columns receiving `time.Time`/`time.Duration` Go values, which carry their own unit unambiguously, and no effect on non-time columns. Requires `schema_metadata` to be set.").
+					Default(false).
+					Advanced(),
 			).Description("Schema evolution configuration.").
 				Optional().
 				Advanced(),
 
@@ -484,6 +484,17 @@ func parseSchemaEvolutionConfig(conf *service.ParsedConfig) (SchemaEvolutionConf
 		}
 	}
 
+	// Parse require_schema_metadata
+	if conf.Contains(ioFieldSchemaEvolution, ioFieldSchemaEvolutionRequireSchemaMetadata) {
+		cfg.RequireSchemaMetadata, err = conf.FieldBool(ioFieldSchemaEvolution, ioFieldSchemaEvolutionRequireSchemaMetadata)
+		if err != nil {
+			return cfg, err
+		}
+		if cfg.RequireSchemaMetadata && cfg.SchemaMetadata == "" {
+			return cfg, fmt.Errorf("%s.%s requires %s.%s to be set", ioFieldSchemaEvolution, ioFieldSchemaEvolutionRequireSchemaMetadata, ioFieldSchemaEvolution, ioFieldSchemaEvolutionSchemaMetadata)
+		}
+	}
+
 	return cfg, nil
 }
 
 
@@ -52,6 +52,11 @@ type SchemaEvolutionConfig struct {
 	// NewColumnTypeMapping is an optional Bloblang mapping that can override inferred
 	// or schema-metadata-derived column types during schema evolution.
 	NewColumnTypeMapping *bloblang.Executor
+	// RequireSchemaMetadata enables strict mode: when true, writing a numeric
+	// value into a time-typed column without registered schema metadata is a
+	// hard error rather than a silent fallback to bloblang's seconds-default.
+	// Only meaningful when SchemaMetadata is also set.
+	RequireSchemaMetadata bool
 }
 
 const maxSchemaEvolutionRetries = 10
@@ -666,7 +671,7 @@ func (r *Router) createWriter(ctx context.Context, key tableKey) (*writer, error
 	// Create writer with its own table reference and the committer.
 	// The resolver is passed so the writer can use schema metadata to
 	// interpret numeric inputs into time-typed columns at shredding time.
-	w := NewWriter(writerTbl, comm, r.caseSensitive, r.writerOpts, r.resolver, r.logger)
+	w := NewWriter(writerTbl, comm, r.caseSensitive, r.writerOpts, r.resolver, r.schemaEvoCfg.RequireSchemaMetadata, r.logger)
 	r.logger.Debugf("Created writer for table %s.%s", key.namespace, key.table)
 
 	return w, nil
 
@@ -87,6 +87,13 @@ type RecordShredder struct {
 	// typed columns instead of guessing. nil entries fall back to the
 	// pre-schema-metadata behavior — see [convertLeafValue].
 	fieldCommons map[int]*schema.Common
+	// strictTemporal causes [convertLeafValue] to refuse numeric inputs
+	// into time-typed columns when no schema metadata has been
+	// registered for that column. When false (the default), the value
+	// converter falls back to [bloblang.ValueAsTimestamp]'s seconds
+	// default — convenient but silently wrong if the upstream produced
+	// a different unit. When true, the writer fails the batch loudly.
+	strictTemporal bool
 }
 
 // NewRecordShredder creates a new shredder for the given schema.
@@ -100,6 +107,22 @@ func NewRecordShredder(schema *iceberg.Schema, caseSensitive bool) *RecordShredd
 	}
 }
 
+// SetStrictTemporalMode toggles whether numeric inputs into time-typed
+// columns require registered schema metadata. With strict mode on, a bare
+// int64 / float64 value reaching a TIMESTAMP / TIMESTAMPTZ / DATE / TIME
+// column with no [schema.Common] in the field map is rejected with a
+// per-field error rather than guessed-as-Unix-seconds.
+//
+// Strict mode has no effect on time.Time / time.Duration values, which
+// carry their own unit unambiguously, and no effect on non-time columns.
+//
+// Defaults to off (back-compat). Operators that cannot guarantee schema
+// metadata flows end-to-end can flip this on to fail loudly instead of
+// silently corrupting dates by ~50,000 years.
+func (rs *RecordShredder) SetStrictTemporalMode(on bool) {
+	rs.strictTemporal = on
+}
+
 // SetFieldSchemaMetadata supplies a field-ID → schema.Common map that the
 // leaf value converter consults when it sees a numeric input destined for a
 // time-typed Iceberg column (TIMESTAMP, TIMESTAMPTZ, TIMESTAMP_NS,
@@ -241,7 +264,7 @@ func (rs *RecordShredder) shredValue(
 
 	default:
 		// Leaf/primitive type.
-		pqVal, err := convertLeafValue(value, typ, rs.commonForField(fieldID))
+		pqVal, err := convertLeafValue(value, typ, rs.commonForField(fieldID), rs.strictTemporal)
 		if err != nil {
 			return err
 		}
@@ -355,7 +378,7 @@ func (rs *RecordShredder) shredMap(
 
 		// Shred the key. Map keys are always leaf primitives, never time
 		// types, so the schema-metadata lookup never fires for them.
-		keyVal, err := convertLeafValue(k, mapType.KeyType, nil)
+		keyVal, err := convertLeafValue(k, mapType.KeyType, nil, false)
 		if err != nil {
 			return fmt.Errorf("map key: %w", err)
 		}
@@ -430,7 +453,7 @@ func (rs *RecordShredder) shredNull(
 // time.Duration directly and treats bare numerics as already-in-the-target
 // unit (microseconds for time, seconds for timestamp via bloblang
 // ValueAsTimestamp's default — preserves the pre-PR behavior).
-func convertLeafValue(value any, typ iceberg.Type, common *schema.Common) (parquet.Value, error) {
+func convertLeafValue(value any, typ iceberg.Type, common *schema.Common, strictTemporal bool) (parquet.Value, error) {
 	if value == nil {
 		return parquet.NullValue(), nil
 	}
@@ -472,25 +495,25 @@ func convertLeafValue(value any, typ iceberg.Type, common *schema.Common) (parqu
 		return parquet.ByteArrayValue(v), err
 
 	case iceberg.DateType:
-		return convertDate(value)
+		return convertDate(value, common, strictTemporal)
 
 	case iceberg.TimeType:
 		// Iceberg TIME is microseconds since midnight. Accept time.Duration
 		// directly (the twmb/avro decode of time-millis/time-micros), and
 		// fall back to numeric input interpreted via the schema-declared
 		// unit when available.
-		return convertTime(value, common)
+		return convertTime(value, common, strictTemporal)
 
 	case iceberg.TimestampType, iceberg.TimestampTzType:
 		// Iceberg TIMESTAMP / TIMESTAMPTZ are microseconds since epoch.
 		// time.Time inputs are unambiguous and used directly. For numeric
 		// inputs, prefer the schema's declared unit so that millis stays
 		// millis (instead of being interpreted as seconds and landing in
 		// year 56755).
-		return convertTimestamp(value, common, false)
+		return convertTimestamp(value, common, false, strictTemporal)
 
 	case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
-		return convertTimestamp(value, common, true)
+		return convertTimestamp(value, common, true, strictTemporal)
 
 	case iceberg.UUIDType:
 		switch v := value.(type) {
 
@@ -1055,7 +1055,7 @@ func TestConvertLeafValueDecimal(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result, err := convertLeafValue(tt.value, dt, nil)
+			result, err := convertLeafValue(tt.value, dt, nil, false)
 			if tt.wantErr {
 				require.Error(t, err)
 				return
@@ -1069,7 +1069,7 @@ func TestConvertLeafValueDecimal(t *testing.T) {
 func TestConvertLeafValueDecimalPrecision(t *testing.T) {
 	dt := iceberg.DecimalTypeOf(10, 2)
 
-	result, err := convertLeafValue(float64(123.45), dt, nil)
+	result, err := convertLeafValue(float64(123.45), dt, nil, false)
 	require.NoError(t, err)
 
 	b := result.ByteArray()
@@ -1085,7 +1085,7 @@ func TestConvertLeafValueDecimalPrecision(t *testing.T) {
 func TestConvertLeafValueDecimalNegative(t *testing.T) {
 	dt := iceberg.DecimalTypeOf(10, 2)
 
-	result, err := convertLeafValue(float64(-123.45), dt, nil)
+	result, err := convertLeafValue(float64(-123.45), dt, nil, false)
 	require.NoError(t, err)
 
 	b := result.ByteArray()
@@ -1131,7 +1131,7 @@ func TestConvertLeafValueDecimalExactValues(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result, err := convertLeafValue(tt.value, dt, nil)
+			result, err := convertLeafValue(tt.value, dt, nil, false)
 			require.NoError(t, err)
 			assert.Equal(t, tt.wantUnscaled, decodeUnscaled(t, result))
 		})
@@ -1143,57 +1143,57 @@ func TestConvertLeafValueDecimalOverflow(t *testing.T) {
 	dt := iceberg.DecimalTypeOf(5, 2)
 
 	// 999.99 should succeed
-	_, err := convertLeafValue(float64(999.99), dt, nil)
+	_, err := convertLeafValue(float64(999.99), dt, nil, false)
 	require.NoError(t, err)
 
 	// 1000.00 exceeds precision — unscaled 100000 >= 10^5
-	_, err = convertLeafValue(float64(1000.00), dt, nil)
+	_, err = convertLeafValue(float64(1000.00), dt, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "exceeds decimal(5, 2) precision")
 
 	// Large negative should also fail
-	_, err = convertLeafValue(float64(-1000.00), dt, nil)
+	_, err = convertLeafValue(float64(-1000.00), dt, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "exceeds decimal(5, 2) precision")
 }
 
 func TestConvertLeafValueDecimalStringError(t *testing.T) {
 	dt := iceberg.DecimalTypeOf(10, 2)
 
-	_, err := convertLeafValue("not_a_number", dt, nil)
+	_, err := convertLeafValue("not_a_number", dt, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "cannot parse")
 }
 
 func TestConvertLeafValueDecimalNaNInf(t *testing.T) {
 	dt := iceberg.DecimalTypeOf(10, 2)
 
-	_, err := convertLeafValue(math.NaN(), dt, nil)
+	_, err := convertLeafValue(math.NaN(), dt, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "cannot convert")
 
-	_, err = convertLeafValue(math.Inf(1), dt, nil)
+	_, err = convertLeafValue(math.Inf(1), dt, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "cannot convert")
 
-	_, err = convertLeafValue(math.Inf(-1), dt, nil)
+	_, err = convertLeafValue(math.Inf(-1), dt, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "cannot convert")
 
-	_, err = convertLeafValue(float32(math.NaN()), dt, nil)
+	_, err = convertLeafValue(float32(math.NaN()), dt, nil, false)
 	require.Error(t, err)
 
-	_, err = convertLeafValue(float32(math.Inf(1)), dt, nil)
+	_, err = convertLeafValue(float32(math.Inf(1)), dt, nil, false)
 	require.Error(t, err)
 }
 
 func TestConvertLeafValueUint64Overflow(t *testing.T) {
-	_, err := convertLeafValue(uint64(math.MaxInt64+1), iceberg.Int64Type{}, nil)
+	_, err := convertLeafValue(uint64(math.MaxInt64+1), iceberg.Int64Type{}, nil, false)
 	require.Error(t, err)
 	assert.Contains(t, err.Error(), "exceeds int64 range")
 
 	// Value within range should succeed
-	_, err = convertLeafValue(uint64(math.MaxInt64), iceberg.Int64Type{}, nil)
+	_, err = convertLeafValue(uint64(math.MaxInt64), iceberg.Int64Type{}, nil, false)
 	require.NoError(t, err)
 }
Original file line number	Diff line number	Diff line change
`@@ -484,6 +484,17 @@ func parseSchemaEvolutionConfig(conf *service.ParsedConfig) (SchemaEvolutionConf`
`484`	`484`	`}`
`485`	`485`	`}`
`486`	`486`
	`487`	`+ // Parse require_schema_metadata`
	`488`	`+ if conf.Contains(ioFieldSchemaEvolution, ioFieldSchemaEvolutionRequireSchemaMetadata) {`
	`489`	`+ cfg.RequireSchemaMetadata, err = conf.FieldBool(ioFieldSchemaEvolution, ioFieldSchemaEvolutionRequireSchemaMetadata)`
	`490`	`+ if err != nil {`
	`491`	`+ return cfg, err`
	`492`	`+ }`
	`493`	`+ if cfg.RequireSchemaMetadata && cfg.SchemaMetadata == "" {`
	`494`	`+ return cfg, fmt.Errorf("%s.%s requires %s.%s to be set", ioFieldSchemaEvolution, ioFieldSchemaEvolutionRequireSchemaMetadata, ioFieldSchemaEvolution, ioFieldSchemaEvolutionSchemaMetadata)`
	`495`	`+ }`
	`496`	`+ }`
	`497`	`+`
`487`	`498`	`return cfg, nil`
`488`	`499`	`}`
`489`	`500`