@@ -80,32 +80,70 @@ var ErrShaFallbackNeedsKeymap = errors.New("backup: filename uses SHA fallback;
8080// filename component. It is the inverse of DecodeSegment for non-fallback
8181// inputs.
8282//
83- // The encoding is deterministic and idempotent given the same input.
83+ // The encoding is deterministic given the same input.
8484//
85- // Two short-circuits ensure the encoder never trips its own invariants:
85+ // Three structural short-circuits ensure DecodeSegment cannot
86+ // misclassify a legitimate key:
8687//
87- // - If raw is so large that percent-encoding it would always overflow
88- // maxSegmentBytes (3*len(raw) > maxSegmentBytes), we go straight to
89- // shaFallback without allocating the full expansion. Without this an
90- // adversarial caller could force a very large transient allocation
91- // just to discard it.
92- // - If the percent-encoded form happens to match the SHA-fallback shape
93- // (32 hex chars followed by "__"), we promote it to a real
88+ // - If `raw` is longer than maxSegmentBytes, even a fully-unreserved
89+ // encoding (1:1) cannot fit, so we go straight to shaFallback.
90+ // This also caps the percent-encode allocation at
91+ // ~maxSegmentBytes, preventing OOM on adversarial input.
92+ // - If the percent-encoded form happens to match the SHA-fallback
93+ // shape (32 hex chars followed by "__"), we promote it to a real
9494// SHA-fallback so DecodeSegment's structural detection cannot
95- // misclassify a legitimate key. Both isShaFallback and shaFallback
96- // are true on the resulting output, so KEYMAP.jsonl carries the
97- // original bytes for exact-byte recovery.
95+ // fabricate a wrong original.
96+ // - If the percent-encoded form starts with the binary "b64."
97+ // prefix, we promote to SHA-fallback for the same reason: a
98+ // plain string key like "b64.foo" would otherwise be decoded as
99+ // base64 and produce different bytes on round-trip.
100+ //
101+ // Both promoted-fallback paths leave the original in KEYMAP.jsonl
102+ // (a correctness dependency, per the package doc), so exact-byte
103+ // recovery is preserved.
98104func EncodeSegment (raw []byte ) string {
99- if len (raw )* percentEncodeMaxExpansion > maxSegmentBytes {
105+ if len (raw ) > maxSegmentBytes {
106+ // 1:1 lower bound on encoded length; cannot fit.
100107 return shaFallback (raw )
101108 }
102- encoded := percentEncode (raw )
103- if len (encoded ) > maxSegmentBytes || isShaFallback (encoded ) {
109+ encoded , ok := percentEncodeBounded (raw , maxSegmentBytes )
110+ if ! ok || isShaFallback (encoded ) || strings . HasPrefix (encoded , binaryPrefix ) {
104111 return shaFallback (raw )
105112 }
106113 return encoded
107114}
108115
116+ // percentEncodeBounded percent-encodes raw, bailing out as soon as the
117+ // in-progress output would exceed maxLen. Returns ("", false) on
118+ // overflow so the caller can take the SHA-fallback path without
119+ // having allocated the full 3*len(raw) buffer that the unbounded
120+ // variant would. Returns (encoded, true) on success.
121+ func percentEncodeBounded (raw []byte , maxLen int ) (string , bool ) {
122+ const escapeBytes = 3 // len("%HH") -- one escape's worst-case width
123+ cap := escapeBytes * len (raw )
124+ if cap > maxLen + escapeBytes {
125+ cap = maxLen + escapeBytes
126+ }
127+ var b strings.Builder
128+ b .Grow (cap )
129+ for _ , c := range raw {
130+ if isUnreserved (c ) {
131+ if b .Len ()+ 1 > maxLen {
132+ return "" , false
133+ }
134+ b .WriteByte (c )
135+ continue
136+ }
137+ if b .Len ()+ escapeBytes > maxLen {
138+ return "" , false
139+ }
140+ b .WriteByte ('%' )
141+ b .WriteByte (hexUpper (c >> 4 )) //nolint:mnd // 4 == nibble width
142+ b .WriteByte (hexUpper (c & 0x0F )) //nolint:mnd // 0x0F == low-nibble mask
143+ }
144+ return b .String (), true
145+ }
146+
109147// EncodeBinarySegment encodes a DynamoDB B-attribute (binary) segment as
110148// "b64.<base64url-no-padding>" so that binary keys never collide with string
111149// keys whose hex-encoding happens to look like base64.
@@ -152,10 +190,6 @@ func DecodeSegment(seg string) ([]byte, error) {
152190 return percentDecode (seg )
153191}
154192
155- // percentEncodeMaxExpansion is the worst-case ratio of encoded length to
156- // raw length for percentEncode (every byte expands to "%HH").
157- const percentEncodeMaxExpansion = 3
158-
159193// IsShaFallback reports whether seg uses the SHA-prefix-and-truncated-original
160194// form. Such segments cannot be reversed without KEYMAP.jsonl.
161195func IsShaFallback (seg string ) bool {
0 commit comments