@@ -81,33 +81,64 @@ var ErrShaFallbackNeedsKeymap = errors.New("backup: filename uses SHA fallback;
8181// inputs.
8282//
8383// The encoding is deterministic and idempotent given the same input.
84+ //
85+ // Two short-circuits ensure the encoder never trips its own invariants:
86+ //
87+ // - If raw is so large that percent-encoding it would always overflow
88+ // maxSegmentBytes (3*len(raw) > maxSegmentBytes), we go straight to
89+ // shaFallback without allocating the full expansion. Without this an
90+ // adversarial caller could force a very large transient allocation
91+ // just to discard it.
92+ // - If the percent-encoded form happens to match the SHA-fallback shape
93+ // (32 hex chars followed by "__"), we promote it to a real
94+ // SHA-fallback so DecodeSegment's structural detection cannot
95+ // misclassify a legitimate key. Both isShaFallback and shaFallback
96+ // are true on the resulting output, so KEYMAP.jsonl carries the
97+ // original bytes for exact-byte recovery.
8498func EncodeSegment (raw []byte ) string {
99+ if len (raw )* percentEncodeMaxExpansion > maxSegmentBytes {
100+ return shaFallback (raw )
101+ }
85102 encoded := percentEncode (raw )
86- if len (encoded ) <= maxSegmentBytes {
87- return encoded
103+ if len (encoded ) > maxSegmentBytes || isShaFallback ( encoded ) {
104+ return shaFallback ( raw )
88105 }
89- return shaFallback ( raw )
106+ return encoded
90107}
91108
92109// EncodeBinarySegment encodes a DynamoDB B-attribute (binary) segment as
93110// "b64.<base64url-no-padding>" so that binary keys never collide with string
94111// keys whose hex-encoding happens to look like base64.
95112//
96- // b64-encoded segments take the SHA fallback if they exceed maxSegmentBytes
97- // after the base64 expansion (~4/3 of the raw length).
113+ // Short-circuits the SHA-fallback for inputs whose base64 expansion (~4/3 of
114+ // the raw length, plus the 4-byte "b64." prefix) would always overflow
115+ // maxSegmentBytes. As with EncodeSegment, this avoids an unnecessary large
116+ // allocation when the result would have been discarded anyway.
98117func EncodeBinarySegment (raw []byte ) string {
118+ if base64 .RawURLEncoding .EncodedLen (len (raw ))+ len (binaryPrefix ) > maxSegmentBytes {
119+ return shaFallback (raw )
120+ }
99121 enc := binaryPrefix + base64 .RawURLEncoding .EncodeToString (raw )
100- if len (enc ) <= maxSegmentBytes {
101- return enc
122+ if len (enc ) > maxSegmentBytes {
123+ return shaFallback ( raw )
102124 }
103- return shaFallback ( raw )
125+ return enc
104126}
105127
106128// DecodeSegment is the inverse of EncodeSegment for percent-encoded and
107129// binary-prefixed inputs. SHA-fallback inputs return ErrShaFallbackNeedsKeymap
108130// so the caller knows to consult KEYMAP.jsonl rather than treat the partial
109131// suffix as the original key.
132+ //
133+ // As a defensive measure DecodeSegment refuses inputs longer than
134+ // maxSegmentBytes. EncodeSegment never produces such inputs, so any caller
135+ // passing one is either reading a corrupted dump or has a bug; either way the
136+ // percentDecode allocation should not run.
110137func DecodeSegment (seg string ) ([]byte , error ) {
138+ if len (seg ) > maxSegmentBytes {
139+ return nil , errors .Wrapf (ErrInvalidEncodedSegment ,
140+ "segment length %d exceeds maximum %d" , len (seg ), maxSegmentBytes )
141+ }
111142 if isShaFallback (seg ) {
112143 return nil , errors .WithStack (ErrShaFallbackNeedsKeymap )
113144 }
@@ -121,6 +152,10 @@ func DecodeSegment(seg string) ([]byte, error) {
121152 return percentDecode (seg )
122153}
123154
155+ // percentEncodeMaxExpansion is the worst-case ratio of encoded length to
156+ // raw length for percentEncode (every byte expands to "%HH").
157+ const percentEncodeMaxExpansion = 3
158+
124159// IsShaFallback reports whether seg uses the SHA-prefix-and-truncated-original
125160// form. Such segments cannot be reversed without KEYMAP.jsonl.
126161func IsShaFallback (seg string ) bool {
0 commit comments