Skip to content

Commit 47f16d3

Browse files
committed
fix(proxy): allocation-free encode fast paths + typed path/query selector
encodePhantomForPair: a pre-scan via phantomNeedsQueryEscape returns nil without any allocation when every byte in the phantom is already in the unreserved-for-query-component set. Skips the byte->string copy that url.QueryEscape would otherwise produce for the no-op case. encodePhantomLowerForPair: a pre-scan returns nil without allocating when no percent-escape in the encoded phantom contains an uppercase A-F hex digit, so the "nothing to lowercase" case (notably OAuth JWT phantoms with no upper-hex escapes) is allocation-free. swapPhantomBytes: the path-vs-query escape choice is now driven by an explicit pathContext bool parameter instead of comparing the human-readable location label. Callers in Request and the two test helpers pass pathContext directly, so the type system enforces the escape choice and a typo in the location string can no longer silently flip path encoding to query encoding (or vice versa).
1 parent b5d4a2e commit 47f16d3

3 files changed

Lines changed: 60 additions & 26 deletions

File tree

internal/proxy/addon.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -637,20 +637,21 @@ func (a *SluiceAddon) Request(f *mitmproxy.Flow) {
637637

638638
// Pass 2+3 on body.
639639
if len(f.Request.Body) > 0 {
640-
f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body")
640+
f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body", false)
641641
}
642642

643643
// Pass 2+3 on URL query.
644644
if rawQ := f.Request.URL.RawQuery; bytesContainsAnyPhantomPrefix([]byte(rawQ)) {
645645
f.Request.URL.RawQuery = string(
646-
a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query"),
646+
a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query", false),
647647
)
648648
}
649649

650-
// Pass 2+3 on URL path.
650+
// Pass 2+3 on URL path. pathContext=true selects path escaping so
651+
// secrets containing spaces get %20, not '+'.
651652
if rawP := f.Request.URL.Path; bytesContainsAnyPhantomPrefix([]byte(rawP)) {
652653
f.Request.URL.Path = string(
653-
a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path"),
654+
a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path", true),
654655
)
655656
f.Request.URL.RawPath = ""
656657
}
@@ -1265,14 +1266,17 @@ func bytesContainsAnyPhantomPrefix(data []byte) bool {
12651266
// every body, query, or header scan. The encoded secret is computed on
12661267
// demand once per swap call, only when the encoded phantom actually appears.
12671268
//
1268-
// location chooses between query escaping (body, URL query, header) and
1269-
// path escaping (URL path). The two differ in how spaces are encoded:
1270-
// QueryEscape uses '+', PathEscape uses '%20'. Using QueryEscape for a path
1271-
// substitution would turn a space in the secret into a literal '+' in the
1272-
// URL path, which is interpreted as a plus character by the server, not a
1273-
// space — corrupting the request.
1274-
func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string) []byte {
1275-
pathContext := location == "URL path"
1269+
// pathContext chooses between query escaping (false; body, URL query,
1270+
// header) and path escaping (true; URL path). The two differ in how
1271+
// spaces are encoded: QueryEscape uses '+', PathEscape uses '%20'. Using
1272+
// query escaping for a path substitution would turn a space in the
1273+
// secret into a literal '+' in the URL path, which the server reads as
1274+
// a plus character, not a space — corrupting the request. The boolean is
1275+
// passed in explicitly so the type system enforces the choice; callers
1276+
// cannot accidentally pick path escaping by typo-ing the location label.
1277+
// location is still passed for the audit log message but never drives
1278+
// behavior.
1279+
func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string, pathContext bool) []byte {
12761280
for _, p := range pairs {
12771281
if bytes.Contains(data, p.phantom) {
12781282
data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes())

internal/proxy/addon_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,7 @@ func TestSwapPhantomBytes_PathUsesPathEscape(t *testing.T) {
854854
defer releasePhantomPairs(pairs)
855855

856856
in := []byte("/v1/SLUICE_PHANTOM%3Aapi_key/resource")
857-
out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL path")
857+
out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL path", true)
858858
got := string(out)
859859

860860
if !strings.Contains(got, "real%20secret") {
@@ -892,7 +892,7 @@ func TestSwapPhantomBytes_QueryUsesQueryEscape(t *testing.T) {
892892
defer releasePhantomPairs(pairs)
893893

894894
in := []byte("token=SLUICE_PHANTOM%3Aapi_key")
895-
out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL query")
895+
out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL query", false)
896896
got := string(out)
897897

898898
if !strings.Contains(got, "real+secret") {

internal/proxy/phantom_pairs.go

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,35 @@
11
package proxy
22

33
import (
4-
"bytes"
54
"log"
6-
"net/url"
75

86
"github.com/nemirovsky/sluice/internal/vault"
97
)
108

119
// encodePhantomForPair returns the URL query-escaped form of a phantom
1210
// token, or nil when QueryEscape would leave the bytes unchanged. The
1311
// "nil when unchanged" convention lets the hot-path swap skip a redundant
14-
// bytes.Contains scan for the literal form a second time.
12+
// bytes.Contains scan for the literal form a second time. A pre-scan
13+
// returns nil before any allocation when no byte in phantom would be
14+
// escaped — also avoids the byte->string copy that url.QueryEscape would
15+
// otherwise produce for the no-op case.
1516
func encodePhantomForPair(phantom []byte) []byte {
16-
encoded := []byte(url.QueryEscape(string(phantom)))
17-
if bytes.Equal(encoded, phantom) {
17+
if !phantomNeedsQueryEscape(phantom) {
1818
return nil
1919
}
20-
return encoded
20+
return queryEscapeBytes(phantom)
21+
}
22+
23+
// phantomNeedsQueryEscape reports whether any byte in phantom would be
24+
// rewritten by queryEscapeBytes. Returns true on a space or any byte
25+
// outside the unreserved-for-query-component set.
26+
func phantomNeedsQueryEscape(phantom []byte) bool {
27+
for _, c := range phantom {
28+
if c == ' ' || !shouldNotEscapeQueryComponent(c) {
29+
return true
30+
}
31+
}
32+
return false
2133
}
2234

2335
// encodePhantomLowerForPair returns the lowercase-hex variant of the
@@ -26,16 +38,33 @@ func encodePhantomForPair(phantom []byte) []byte {
2638
// hex digits A-F). RFC 3986 §2.1 makes percent-encoded hex case-
2739
// insensitive, so a phantom that arrives encoded as %3a must still match
2840
// the precomputed phantom whose canonical form is %3A.
41+
//
42+
// A pre-scan returns nil before any allocation when no percent-escape
43+
// sequence contains an uppercase A-F hex digit. The "no allocation when
44+
// nothing to lower" path matters for OAuth JWT phantoms and any phantom
45+
// whose only escape is %3A (the encoded colon) — once we've already
46+
// stored the uppercase variant elsewhere on the pair, there is nothing
47+
// new to lower for those.
2948
func encodePhantomLowerForPair(encoded []byte) []byte {
3049
if len(encoded) == 0 {
3150
return nil
3251
}
52+
hasUpperHex := false
53+
for i := 0; i < len(encoded); i++ {
54+
if encoded[i] != '%' || i+2 >= len(encoded) {
55+
continue
56+
}
57+
if isASCIIUpperHex(encoded[i+1]) || isASCIIUpperHex(encoded[i+2]) {
58+
hasUpperHex = true
59+
break
60+
}
61+
}
62+
if !hasUpperHex {
63+
return nil
64+
}
3365
lower := make([]byte, len(encoded))
3466
i := 0
3567
for i < len(encoded) {
36-
// Lowercase only the two hex digits after a %, leave everything
37-
// else untouched so the credential name (which can contain
38-
// upper-case letters by policy) isn't corrupted.
3968
if encoded[i] == '%' && i+2 < len(encoded) {
4069
lower[i] = '%'
4170
lower[i+1] = asciiLowerHex(encoded[i+1])
@@ -46,12 +75,13 @@ func encodePhantomLowerForPair(encoded []byte) []byte {
4675
lower[i] = encoded[i]
4776
i++
4877
}
49-
if bytes.Equal(lower, encoded) {
50-
return nil
51-
}
5278
return lower
5379
}
5480

81+
func isASCIIUpperHex(b byte) bool {
82+
return b >= 'A' && b <= 'F'
83+
}
84+
5585
func asciiLowerHex(b byte) byte {
5686
if b >= 'A' && b <= 'F' {
5787
return b + ('a' - 'A')

0 commit comments

Comments
 (0)