Skip to content

Commit b13320b

Browse files
twmbclaude
andauthored
public/schema: add Date, TimeOfDay, UUID and timestamp/time-of-day params (#429)
Extend the neutral schema language so logical-type information from formats like Avro can round-trip without loss. Adds three new CommonType variants (Date, TimeOfDay, UUID), a TimeUnit enum, and TimestampParams / TimeOfDayParams blocks under LogicalParams that carry a unit and an AdjustToUTC flag. EffectiveTimestamp() supplies the legacy default (millis, UTC) when a Timestamp-typed schema has no Logical set, preserving fingerprint stability for pre-existing schemas. ToAny / ParseFromAny / Validate / fingerprint emit the new fields only when non-nil so unaffected schemas keep their existing serialised bytes and SchemaCache fingerprints. A new TestFingerprintLegacyStability locks down the canonical form for representative pre-parameterised schemas. Backwards-compat invariants: - Type == Timestamp with nil Logical: permitted, treated as legacy default. - Type == TimeOfDay with nil Logical: rejected (no historical default exists). - LogicalParams.Timestamp / TimeOfDay on the wrong top-level type: rejected. Companion to redpanda-data/connect#4399, which uses these new types to preserve Avro logicalType annotations end-to-end through the schema-registry decode -> Iceberg path. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d3ecb49 commit b13320b

4 files changed

Lines changed: 616 additions & 2 deletions

File tree

public/schema/common.go

Lines changed: 196 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ const (
9090
Any CommonType = 14
9191
Decimal CommonType = 15
9292
BigDecimal CommonType = 16
93+
Date CommonType = 17
94+
TimeOfDay CommonType = 18
95+
UUID CommonType = 19
9396
)
9497

9598
// Decimal precision bounds. The upper bound matches the widest precision that
@@ -134,6 +137,12 @@ func (t CommonType) String() string {
134137
return "DECIMAL"
135138
case BigDecimal:
136139
return "BIG_DECIMAL"
140+
case Date:
141+
return "DATE"
142+
case TimeOfDay:
143+
return "TIME_OF_DAY"
144+
case UUID:
145+
return "UUID"
137146
default:
138147
return "UNKNOWN"
139148
}
@@ -173,6 +182,12 @@ func typeFromStr(v string) (CommonType, error) {
173182
return Decimal, nil
174183
case "BIG_DECIMAL":
175184
return BigDecimal, nil
185+
case "DATE":
186+
return Date, nil
187+
case "TIME_OF_DAY":
188+
return TimeOfDay, nil
189+
case "UUID":
190+
return UUID, nil
176191
default:
177192
return 0, fmt.Errorf("unrecognised type string: %v", v)
178193
}
@@ -199,7 +214,9 @@ type Common struct {
199214
// CommonType values may carry. Each parameterised type has its own field;
200215
// at most one is expected to be non-nil for any given Common schema.
201216
type LogicalParams struct {
202-
Decimal *DecimalParams
217+
Decimal *DecimalParams
218+
Timestamp *TimestampParams
219+
TimeOfDay *TimeOfDayParams
203220
}
204221

205222
// DecimalParams describes a fixed-precision decimal number.
@@ -214,6 +231,100 @@ type DecimalParams struct {
214231
Scale int32
215232
}
216233

234+
// TimestampParams describes the precision and timezone semantics of a
235+
// [Timestamp] schema. Unit selects the resolution at which the timestamp is
236+
// expressed; AdjustToUTC distinguishes a UTC instant (true) from a civil /
237+
// "local" datetime that carries no timezone offset (false).
238+
//
239+
// A nil [LogicalParams.Timestamp] on a [Timestamp]-typed schema is permitted
240+
// for backwards compatibility and is treated as {Unit: TimeUnitMillis,
241+
// AdjustToUTC: true}; see [Common.EffectiveTimestamp].
242+
type TimestampParams struct {
243+
Unit TimeUnit
244+
AdjustToUTC bool
245+
}
246+
247+
// TimeOfDayParams describes the precision and timezone semantics of a
248+
// [TimeOfDay] schema (a wall-clock time with no date component). Unit selects
249+
// the resolution; AdjustToUTC parallels the equivalent Parquet TIME flag and
250+
// is rare outside Parquet/Postgres timetz.
251+
//
252+
// Unlike [TimestampParams], a [TimeOfDay]-typed schema must have non-nil
253+
// [LogicalParams.TimeOfDay] — there is no historical default to fall back to.
254+
type TimeOfDayParams struct {
255+
Unit TimeUnit
256+
AdjustToUTC bool
257+
}
258+
259+
// TimeUnit names the precision at which a [Timestamp] or [TimeOfDay] value is
260+
// expressed. The zero value is invalid; use one of the named constants.
261+
type TimeUnit int
262+
263+
// Supported time units.
264+
const (
265+
TimeUnitSeconds TimeUnit = 1
266+
TimeUnitMillis TimeUnit = 2
267+
TimeUnitMicros TimeUnit = 3
268+
TimeUnitNanos TimeUnit = 4
269+
)
270+
271+
// String returns a human-readable representation of the time unit, suitable
272+
// for serialisation via [Common.ToAny].
273+
func (u TimeUnit) String() string {
274+
switch u {
275+
case TimeUnitSeconds:
276+
return "SECONDS"
277+
case TimeUnitMillis:
278+
return "MILLIS"
279+
case TimeUnitMicros:
280+
return "MICROS"
281+
case TimeUnitNanos:
282+
return "NANOS"
283+
default:
284+
return "UNKNOWN"
285+
}
286+
}
287+
288+
func timeUnitFromStr(v string) (TimeUnit, error) {
289+
switch v {
290+
case "SECONDS":
291+
return TimeUnitSeconds, nil
292+
case "MILLIS":
293+
return TimeUnitMillis, nil
294+
case "MICROS":
295+
return TimeUnitMicros, nil
296+
case "NANOS":
297+
return TimeUnitNanos, nil
298+
default:
299+
return 0, fmt.Errorf("unrecognised time unit string: %v", v)
300+
}
301+
}
302+
303+
// valid reports whether u is one of the named TimeUnit constants.
304+
func (u TimeUnit) valid() bool {
305+
switch u {
306+
case TimeUnitSeconds, TimeUnitMillis, TimeUnitMicros, TimeUnitNanos:
307+
return true
308+
default:
309+
return false
310+
}
311+
}
312+
313+
// EffectiveTimestamp returns the timestamp parameters for c, applying the
314+
// legacy default ({Unit: TimeUnitMillis, AdjustToUTC: true}) when c.Logical
315+
// is unset. It is only meaningful when c.Type == [Timestamp]; for other
316+
// types the returned value should be ignored.
317+
//
318+
// Format adapters that need to honour both pre-parameterised legacy schemas
319+
// and richer schemas produced by newer decoders should consult this rather
320+
// than peeking at c.Logical directly.
321+
func (c *Common) EffectiveTimestamp() TimestampParams {
322+
if c.Logical != nil && c.Logical.Timestamp != nil {
323+
return *c.Logical.Timestamp
324+
}
325+
return TimestampParams{Unit: TimeUnitMillis, AdjustToUTC: true}
326+
}
327+
217328
const (
218329
anyFieldType = "type"
219330
anyFieldName = "name"
@@ -222,6 +333,8 @@ const (
222333
anyFieldFingerprint = "fingerprint"
223334
anyFieldPrecision = "precision"
224335
anyFieldScale = "scale"
336+
anyFieldUnit = "unit"
337+
anyFieldAdjustToUTC = "adjust_to_utc"
225338
)
226339

227340
// ToAny serializes the common schema into a generic Go value, with structured
@@ -265,6 +378,19 @@ func (c *Common) ToAny() any {
265378
m[anyFieldScale] = int64(c.Logical.Decimal.Scale)
266379
}
267380

381+
// Timestamp parameters are only emitted when present, so legacy schemas
382+
// (Type == Timestamp with nil Logical) keep their pre-parameterised
383+
// fingerprint and ToAny output exactly.
384+
if c.Type == Timestamp && c.Logical != nil && c.Logical.Timestamp != nil {
385+
m[anyFieldUnit] = c.Logical.Timestamp.Unit.String()
386+
m[anyFieldAdjustToUTC] = c.Logical.Timestamp.AdjustToUTC
387+
}
388+
389+
if c.Type == TimeOfDay && c.Logical != nil && c.Logical.TimeOfDay != nil {
390+
m[anyFieldUnit] = c.Logical.TimeOfDay.Unit.String()
391+
m[anyFieldAdjustToUTC] = c.Logical.TimeOfDay.AdjustToUTC
392+
}
393+
268394
return m
269395
}
270396

@@ -361,6 +487,42 @@ func parseFromAnyNoValidate(v any) (Common, error) {
361487
return c, errors.New("type DECIMAL requires fields `precision` and `scale`")
362488
}
363489

490+
_, hasUnit := obj[anyFieldUnit]
491+
_, hasAdjust := obj[anyFieldAdjustToUTC]
492+
if hasUnit || hasAdjust {
493+
switch c.Type {
494+
case Timestamp, TimeOfDay:
495+
default:
496+
return c, fmt.Errorf("fields `unit` and `adjust_to_utc` are only valid for types TIMESTAMP or TIME_OF_DAY, got %v", c.Type)
497+
}
498+
if !hasUnit {
499+
return c, fmt.Errorf("type %v with `adjust_to_utc` requires field `unit`", c.Type)
500+
}
501+
if !hasAdjust {
502+
return c, fmt.Errorf("type %v with `unit` requires field `adjust_to_utc`", c.Type)
503+
}
504+
unitStr, ok := obj[anyFieldUnit].(string)
505+
if !ok {
506+
return c, fmt.Errorf("expected field `unit` of type string, got %T", obj[anyFieldUnit])
507+
}
508+
unit, err := timeUnitFromStr(unitStr)
509+
if err != nil {
510+
return c, err
511+
}
512+
adjustB, ok := obj[anyFieldAdjustToUTC].(bool)
513+
if !ok {
514+
return c, fmt.Errorf("expected field `adjust_to_utc` of type bool, got %T", obj[anyFieldAdjustToUTC])
515+
}
516+
switch c.Type {
517+
case Timestamp:
518+
c.Logical = &LogicalParams{Timestamp: &TimestampParams{Unit: unit, AdjustToUTC: adjustB}}
519+
case TimeOfDay:
520+
c.Logical = &LogicalParams{TimeOfDay: &TimeOfDayParams{Unit: unit, AdjustToUTC: adjustB}}
521+
}
522+
} else if c.Type == TimeOfDay {
523+
return c, errors.New("type TIME_OF_DAY requires fields `unit` and `adjust_to_utc`")
524+
}
525+
364526
return c, nil
365527
}
366528

@@ -439,6 +601,33 @@ func (c *Common) Validate() error {
439601
return fmt.Errorf("Logical.Decimal parameters are only valid for type DECIMAL, got %v", c.Type)
440602
}
441603

604+
// Timestamp parameters are optional: a nil Logical.Timestamp on a
605+
// Timestamp-typed schema is treated as the legacy default (millis, UTC),
606+
// see [Common.EffectiveTimestamp]. When provided, the unit must be one of
607+
// the named TimeUnit constants.
608+
if c.Type == Timestamp {
609+
if c.Logical != nil && c.Logical.Timestamp != nil {
610+
if !c.Logical.Timestamp.Unit.valid() {
611+
return fmt.Errorf("invalid timestamp unit %v", int(c.Logical.Timestamp.Unit))
612+
}
613+
}
614+
} else if c.Logical != nil && c.Logical.Timestamp != nil {
615+
return fmt.Errorf("Logical.Timestamp parameters are only valid for type TIMESTAMP, got %v", c.Type)
616+
}
617+
618+
// TimeOfDay parameters are required: there is no historical default to
619+
// fall back to, since the type itself is new.
620+
if c.Type == TimeOfDay {
621+
if c.Logical == nil || c.Logical.TimeOfDay == nil {
622+
return errors.New("type TIME_OF_DAY requires Logical.TimeOfDay parameters")
623+
}
624+
if !c.Logical.TimeOfDay.Unit.valid() {
625+
return fmt.Errorf("invalid time-of-day unit %v", int(c.Logical.TimeOfDay.Unit))
626+
}
627+
} else if c.Logical != nil && c.Logical.TimeOfDay != nil {
628+
return fmt.Errorf("Logical.TimeOfDay parameters are only valid for type TIME_OF_DAY, got %v", c.Type)
629+
}
630+
442631
if !c.isContainerType() && len(c.Children) > 0 {
443632
return fmt.Errorf("type %v is a leaf and must not have children", c.Type)
444633
}
@@ -497,6 +686,12 @@ func (c *Common) writeFingerprint(w io.Writer) {
497686
if c.Type == Decimal && c.Logical != nil && c.Logical.Decimal != nil {
498687
fmt.Fprintf(w, "D:%d:%d|", c.Logical.Decimal.Precision, c.Logical.Decimal.Scale)
499688
}
689+
if c.Type == Timestamp && c.Logical != nil && c.Logical.Timestamp != nil {
690+
fmt.Fprintf(w, "TS:%d:%t|", c.Logical.Timestamp.Unit, c.Logical.Timestamp.AdjustToUTC)
691+
}
692+
if c.Type == TimeOfDay && c.Logical != nil && c.Logical.TimeOfDay != nil {
693+
fmt.Fprintf(w, "TOD:%d:%t|", c.Logical.TimeOfDay.Unit, c.Logical.TimeOfDay.AdjustToUTC)
694+
}
500695

501696
// Write children count and recursively fingerprint each child
502697
fmt.Fprintf(w, "C:%d|", len(c.Children))

0 commit comments

Comments
 (0)