redpanda-data · Jeffail · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
 ### Added
 
 - Schema: Added a `Decimal` common type carrying precision and scale via a new `LogicalParams` struct, enabling lossless conversion between Avro, Parquet, and database `NUMBER`/`NUMERIC` decimals. Includes `NewDecimal`, `FormatDecimal`/`ParseDecimal`, and `DecimalParams.Format`/`Parse`/`ValidateValue` helpers, plus a `Common.Validate` entry point. `ParseFromAny` and `InferFromAny` now accept `encoding/json.Number` values, so schemas pipelined through `json.Decoder.UseNumber()` round-trip without precision loss. (@Jeffail)
+- Schema: Added a `BigDecimal` common type for arbitrary-precision decimals, alongside `NewBigDecimal`, `FormatBigDecimal`, and `ParseBigDecimal` helpers. Use it for sources that lack column-level precision (Postgres `numeric` without `(p, s)`, Oracle `NUMBER` with no `DATA_PRECISION`, MongoDB `Decimal128`). `Common.Validate` enforces `BigDecimal` as a leaf type with no logical parameters. (@Jeffail)
 
 ## 4.70.0 - 2026-04-02
 

diff --git a/public/schema/bigdecimal.go b/public/schema/bigdecimal.go
@@ -0,0 +1,64 @@
+// Copyright 2026 Redpanda Data, Inc.
+
+package schema
+
+import (
+	"fmt"
+	"math/big"
+)
+
+// NewBigDecimal constructs a Common schema for a [BigDecimal] column —
+// an arbitrary-precision decimal with no schema-level precision or scale
+// commitment. Use this for sources where the column type does not carry
+// fixed precision and scale (e.g. unparameterised Postgres NUMERIC, Oracle
+// NUMBER without DATA_PRECISION, MongoDB Decimal128).
+func NewBigDecimal(name string, optional bool) Common {
+	return Common{
+		Name:     name,
+		Type:     BigDecimal,
+		Optional: optional,
+	}
+}
+
+// FormatBigDecimal renders an unscaled integer at the given scale as a
+// canonical [BigDecimal] string. Output rules match [FormatDecimal]: leading
+// minus for negatives only, no leading plus, no leading zeros aside from a
+// single "0" before the decimal point, decimal point present iff scale > 0,
+// exactly scale fractional digits emitted.
+//
+// Unlike [DecimalParams.Format], the [BigDecimal] schema imposes no fixed
+// scale; callers pick the scale that matches the source value's natural
+// precision. The scale parameter must be non-negative.
+func FormatBigDecimal(unscaled *big.Int, scale int32) (string, error) {
+	return FormatDecimal(unscaled, scale)
+}
+
+// ParseBigDecimal interprets s as a decimal-shaped string and returns the
+// unscaled integer alongside the scale recovered from the number of
+// fractional digits in the input.
+//
+// The accepted form matches [ParseDecimal]: lenient on non-canonical-but-
+// unambiguous inputs (leading plus, leading zeros, missing integer part as
+// in ".5"), strict on ambiguous or malformed inputs (scientific notation,
+// multiple decimal points, whitespace, thousands separators, non-digit
+// characters). Canonical form is enforced when values are re-emitted via
+// [FormatBigDecimal]. Unlike [ParseDecimal], the scale is recovered from
+// the input rather than supplied by the caller.
+//
+// The parser does not bound the input length. The underlying big.Int parse
+// is super-linear, so callers exposing this directly to untrusted input
+// should impose their own length cap.
+func ParseBigDecimal(s string) (*big.Int, int32, error) {
+	sign, intPart, fracPart, err := parseCanonicalDecimal(s)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	raw := sign + intPart + fracPart
+	n, ok := new(big.Int).SetString(raw, 10)
+	if !ok {
+		return nil, 0, fmt.Errorf("failed to parse decimal value %q", s)
+	}
+
+	return n, int32(len(fracPart)), nil
+}
diff --git a/public/schema/bigdecimal_test.go b/public/schema/bigdecimal_test.go
@@ -0,0 +1,246 @@
+// Copyright 2026 Redpanda Data, Inc.
+
+package schema
+
+import (
+	"math/big"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNewBigDecimal(t *testing.T) {
+	c := NewBigDecimal("amount", true)
+	assert.Equal(t, BigDecimal, c.Type)
+	assert.Equal(t, "amount", c.Name)
+	assert.True(t, c.Optional)
+	assert.Nil(t, c.Logical)
+	assert.NoError(t, c.Validate())
+}
+
+func TestBigDecimalToAnyOmitsParams(t *testing.T) {
+	c := NewBigDecimal("x", false)
+	m, ok := c.ToAny().(map[string]any)
+	require.True(t, ok)
+
+	assert.Equal(t, "BIG_DECIMAL", m[anyFieldType])
+	_, hasPrecision := m[anyFieldPrecision]
+	_, hasScale := m[anyFieldScale]
+	assert.False(t, hasPrecision)
+	assert.False(t, hasScale)
+}
+
+func TestBigDecimalRoundTrip(t *testing.T) {
+	original := NewBigDecimal("balance", true)
+	parsed, err := ParseFromAny(original.ToAny())
+	require.NoError(t, err)
+	assert.Equal(t, original.Type, parsed.Type)
+	assert.Equal(t, original.Name, parsed.Name)
+	assert.Equal(t, original.Optional, parsed.Optional)
+	assert.Nil(t, parsed.Logical)
+	assert.Equal(t, original.fingerprint(), parsed.fingerprint())
+}
+
+func TestBigDecimalParseFromAnyRejectsParams(t *testing.T) {
+	in := map[string]any{
+		anyFieldType:      "BIG_DECIMAL",
+		anyFieldName:      "x",
+		anyFieldPrecision: int64(10),
+		anyFieldScale:     int64(2),
+	}
+	_, err := ParseFromAny(in)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "only valid for type DECIMAL")
+}
+
+func TestBigDecimalValidateRejectsLogicalDecimal(t *testing.T) {
+	c := Common{
+		Type:    BigDecimal,
+		Name:    "x",
+		Logical: &LogicalParams{Decimal: &DecimalParams{Precision: 10, Scale: 2}},
+	}
+	err := c.Validate()
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "only valid for type DECIMAL")
+}
+
+func TestBigDecimalValidateRejectsChildren(t *testing.T) {
+	c := Common{
+		Type:     BigDecimal,
+		Name:     "x",
+		Children: []Common{{Type: String, Name: "weird"}},
+	}
+	err := c.Validate()
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "is a leaf and must not have children")
+}
+
+func TestFormatBigDecimal(t *testing.T) {
+	tests := []struct {
+		name     string
+		unscaled string
+		scale    int32
+		want     string
+	}{
+		{"zero scale zero", "0", 0, "0"},
+		{"zero scale four", "0", 4, "0.0000"},
+		{"twelve thousand scale four", "12345", 4, "1.2345"},
+		{"negative one scale four", "-1", 4, "-0.0001"},
+		{"large scale", "1", 30, "0." + strings.Repeat("0", 29) + "1"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			n, ok := new(big.Int).SetString(tt.unscaled, 10)
+			require.True(t, ok)
+			got, err := FormatBigDecimal(n, tt.scale)
+			require.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestFormatBigDecimalNoNegativeZero(t *testing.T) {
+	// big.Int has no concept of -0 — Sign() returns 0 for zero values, so
+	// we never emit a leading minus on a zero magnitude. Verify both
+	// constructions land on the same canonical zero string.
+	zeroPos := big.NewInt(0)
+	zeroNeg := new(big.Int).Neg(big.NewInt(0))
+
+	got, err := FormatBigDecimal(zeroPos, 4)
+	require.NoError(t, err)
+	assert.Equal(t, "0.0000", got)
+
+	got, err = FormatBigDecimal(zeroNeg, 4)
+	require.NoError(t, err)
+	assert.Equal(t, "0.0000", got)
+}
+
+func TestParseBigDecimal(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		unscaled string
+		scale    int32
+	}{
+		{"integer", "12345", "12345", 0},
+		{"negative integer", "-12345", "-12345", 0},
+		{"fractional", "1.5", "15", 1},
+		{"three fractional", "1.500", "1500", 3},
+		{"zero", "0", "0", 0},
+		{"zero with scale", "0.0000", "0", 4},
+		{"negative fractional", "-0.0001", "-1", 4},
+		{"trailing dot", "1.", "1", 0},
+		{"high scale", "0." + strings.Repeat("0", 29) + "1", "1", 30},
+		// Lenient acceptance — non-canonical but unambiguous.
+		{"leading plus", "+1.5", "15", 1},
+		{"leading zero", "01.5", "15", 1},
+		{"leading zeros multiple", "-001", "-1", 0},
+		{"missing integer part", ".5", "5", 1},
+		{"missing integer part with sign", "-.5", "-5", 1},
+		{"plus and missing integer", "+.5", "5", 1},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			n, scale, err := ParseBigDecimal(tt.input)
+			require.NoError(t, err)
+			assert.Equal(t, tt.unscaled, n.String())
+			assert.Equal(t, tt.scale, scale)
+		})
+	}
+}
+
+func TestParseBigDecimalErrors(t *testing.T) {
+	tests := []struct {
+		name    string
+		input   string
+		wantErr string
+	}{
+		{"empty", "", "must not be empty"},
+		{"just minus", "-", "no digits"},
+		{"just plus", "+", "no digits"},
+		{"just dot", ".", "no digits"},
+		{"two dots", "1.2.3", "at most one decimal point"},
+		{"non-digit", "1.2a", "non-digit"},
+		{"scientific notation", "1e5", "non-digit"},
+		{"whitespace", " 1.5", "non-digit"},
+		{"thousands separator", "1,000", "non-digit"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, _, err := ParseBigDecimal(tt.input)
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.wantErr)
+		})
+	}
+}
+
+func TestBigDecimalFormatParseRoundTrip(t *testing.T) {
+	// Parse a canonical string, format the recovered (unscaled, scale)
+	// pair, and confirm we get the original string back.
+	values := []string{
+		"0",
+		"0.0",
+		"0.0000",
+		"1",
+		"-1",
+		"1.5",
+		"-1.5",
+		"12345.6789",
+		"-12345.6789",
+		"0.000000000000000000000000000001",
+	}
+
+	for _, v := range values {
+		t.Run(v, func(t *testing.T) {
+			unscaled, scale, err := ParseBigDecimal(v)
+			require.NoError(t, err)
+			got, err := FormatBigDecimal(unscaled, scale)
+			require.NoError(t, err)
+			assert.Equal(t, v, got)
+		})
+	}
+}
+
+func TestParseBigDecimalNormalisesToCanonical(t *testing.T) {
+	// Postel: lenient parse, strict emit. Non-canonical inputs that the
+	// parser accepts must come back out in canonical form when re-emitted
+	// via FormatBigDecimal.
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{"+1.5", "1.5"},
+		{"01.5", "1.5"},
+		{"-001.5", "-1.5"},
+		{".5", "0.5"},
+		{"+.5", "0.5"},
+		{"-.5", "-0.5"},
+		{"+0.0001", "0.0001"},
+		{"01.500", "1.500"}, // trailing zeros preserved (scale 3)
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			n, scale, err := ParseBigDecimal(tt.input)
+			require.NoError(t, err)
+			got, err := FormatBigDecimal(n, scale)
+			require.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestBigDecimalFingerprintDistinctFromDecimal(t *testing.T) {
+	bd := NewBigDecimal("amount", false)
+	d := Common{
+		Type:    Decimal,
+		Name:    "amount",
+		Logical: &LogicalParams{Decimal: &DecimalParams{Precision: 10, Scale: 2}},
+	}
+	assert.NotEqual(t, bd.fingerprint(), d.fingerprint())
+}