From fe9fdabac92b24fe9b8556a1e358bf94f142756f Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 18:39:59 +0800 Subject: [PATCH 1/7] feat(proxy): scan url-encoded phantom tokens in form bodies The pass-2 scoped phantom replacement only matched the literal form SLUICE_PHANTOM:, so phantom tokens posted in application/x-www-form-urlencoded bodies (Anthropic and Google OAuth refresh, for example) passed through to the upstream as SLUICE_PHANTOM%3A. The provider returned invalid_grant and the agent dropped back to a fresh interactive OAuth on every refresh. The injection pass, the unbound-phantom strip, and the streaming reader now scan both forms and substitute the URL-encoded secret so the wire body stays valid form data. The holdback buffer in the streaming reader sizes against the longer encoded form so a phantom that straddles a chunk boundary cannot slip through. --- internal/proxy/addon.go | 77 ++++++++++++++++++++---- internal/proxy/addon_test.go | 101 ++++++++++++++++++++++++++++++++ internal/proxy/phantom.go | 25 ++++++-- internal/proxy/phantom_strip.go | 20 ++++++- 4 files changed, 204 insertions(+), 19 deletions(-) diff --git a/internal/proxy/addon.go b/internal/proxy/addon.go index 235b521..45c976a 100644 --- a/internal/proxy/addon.go +++ b/internal/proxy/addon.go @@ -7,6 +7,7 @@ import ( "log" "net" "net/http" + "net/url" "runtime/debug" "sort" "strconv" @@ -641,14 +642,14 @@ func (a *SluiceAddon) Request(f *mitmproxy.Flow) { } // Pass 2+3 on URL query. - if rawQ := f.Request.URL.RawQuery; bytes.Contains([]byte(rawQ), phantomPrefix) { + if rawQ := f.Request.URL.RawQuery; bytesContainsAnyPhantomPrefix([]byte(rawQ)) { f.Request.URL.RawQuery = string( a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query"), ) } // Pass 2+3 on URL path. - if rawP := f.Request.URL.Path; bytes.Contains([]byte(rawP), phantomPrefix) { + if rawP := f.Request.URL.Path; bytesContainsAnyPhantomPrefix([]byte(rawP)) { f.Request.URL.Path = string( a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path"), ) @@ -1211,34 +1212,64 @@ func releasePhantomPairs(pairs []phantomPair) { // hasPhantomPrefix checks whether the request body, headers, or URL // contain the phantom prefix bytes. func (a *SluiceAddon) hasPhantomPrefix(f *mitmproxy.Flow) bool { - if bytes.Contains(f.Request.Body, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(f.Request.Body) { return true } for _, vals := range f.Request.Header { for _, v := range vals { - if bytes.Contains([]byte(v), phantomPrefix) { + if bytesContainsAnyPhantomPrefix([]byte(v)) { return true } } } - if bytes.Contains([]byte(f.Request.URL.RawQuery), phantomPrefix) { + if bytesContainsAnyPhantomPrefix([]byte(f.Request.URL.RawQuery)) { return true } - if bytes.Contains([]byte(f.Request.URL.Path), phantomPrefix) { + if bytesContainsAnyPhantomPrefix([]byte(f.Request.URL.Path)) { return true } return false } +// bytesContainsAnyPhantomPrefix reports whether the data contains either the +// literal or URL-encoded phantom prefix. Form-urlencoded request bodies and +// URL query/path components percent-encode the colon in phantom tokens, so a +// scan that only checks the literal form would miss phantoms about to leak +// through OAuth refresh POSTs and other form-encoded paths. +func bytesContainsAnyPhantomPrefix(data []byte) bool { + return bytes.Contains(data, phantomPrefix) || bytes.Contains(data, urlEncodedPhantomPrefix) +} + // swapPhantomBytes performs Pass 2 (scoped replacement) and Pass 3 (strip // unbound) on a byte slice. +// +// Each pair is matched in both its literal form (`SLUICE_PHANTOM:`, +// the shape used in JSON bodies and raw header values) and its URL-encoded +// form (`SLUICE_PHANTOM%3A`, the shape used in +// application/x-www-form-urlencoded request bodies and URL components). The +// encoded path is what makes OAuth refresh round-trips work: refresh POSTs +// to providers like Anthropic and Google use form-urlencoded bodies, so the +// colon in the phantom token gets percent-encoded on the wire. Without the +// encoded scan the upstream receives `SLUICE_PHANTOM%3A...` literally, +// returns `invalid_grant`, and the agent falls back to a fresh interactive +// OAuth — every time tokens expire. +// +// The literal pass runs first so JSON request bodies (where the colon +// survives unencoded) take the cheap exact-substring path. The encoded +// pass then handles the form-urlencoded case. We url-encode the secret +// before substitution so the wire body remains well-formed +// application/x-www-form-urlencoded data. func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string) []byte { for _, p := range pairs { if bytes.Contains(data, p.phantom) { data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes()) } + encoded := []byte(url.QueryEscape(string(p.phantom))) + if !bytes.Equal(encoded, p.phantom) && bytes.Contains(data, encoded) { + data = bytes.ReplaceAll(data, encoded, []byte(url.QueryEscape(string(p.secret.Bytes())))) + } } - if bytes.Contains(data, phantomPrefix) { + if bytes.Contains(data, phantomPrefix) || bytes.Contains(data, urlEncodedPhantomPrefix) { data = stripUnboundPhantomsFromProvider(data, a.provider) log.Printf("[ADDON-INJECT] stripped unbound phantom token from %s for %s:%d", location, host, port) } @@ -1246,6 +1277,10 @@ func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host st } // swapPhantomHeaders performs Pass 2+3 on all request headers. +// +// Each pair is matched in both its literal and URL-encoded forms so phantom +// tokens carried in percent-encoded header values (custom cookie schemes, +// query-style header payloads) cannot bypass the swap. func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, host string, port int) { for key, vals := range f.Request.Header { for i, v := range vals { @@ -1256,8 +1291,13 @@ func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes()) changed = true } + encoded := []byte(url.QueryEscape(string(p.phantom))) + if !bytes.Equal(encoded, p.phantom) && bytes.Contains(vb, encoded) { + vb = bytes.ReplaceAll(vb, encoded, []byte(url.QueryEscape(string(p.secret.Bytes())))) + changed = true + } } - if bytes.Contains(vb, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(vb) { vb = stripUnboundPhantomsFromProvider(vb, a.provider) changed = true log.Printf("[ADDON-INJECT] stripped unbound phantom token from header %q for %s:%d", key, host, port) @@ -1285,15 +1325,24 @@ type phantomSwapReader struct { // maxPhantomLen returns the length of the longest phantom token in the // pairs list. Used to determine how much data to hold back from the // output buffer to handle tokens that span read boundaries. +// +// The result accounts for both literal phantom tokens (SLUICE_PHANTOM:name) +// and their URL-encoded forms (SLUICE_PHANTOM%3Aname). The encoded form is +// strictly longer because the colon expands to %3A, so a holdback sized for +// the literal form alone would lose URL-encoded phantoms that straddle a +// read boundary. func maxPhantomLen(pairs []phantomPair) int { m := 0 for _, p := range pairs { if len(p.phantom) > m { m = len(p.phantom) } + if encLen := len(url.QueryEscape(string(p.phantom))); encLen > m { + m = encLen + } } // Also account for the generic phantom prefix pattern. - if pLen := len(phantomPrefix) + maxCredNameLen; pLen > m { + if pLen := len(urlEncodedPhantomPrefix) + maxCredNameLen; pLen > m { m = pLen } return m @@ -1340,14 +1389,18 @@ func (r *phantomSwapReader) Read(p []byte) (int, error) { toProcess := r.pending[:safe] r.pending = append([]byte(nil), r.pending[safe:]...) - // Pass 2: scoped replacement. + // Pass 2: scoped replacement, in both literal and URL-encoded forms. for _, pp := range r.pairs { if bytes.Contains(toProcess, pp.phantom) { toProcess = bytes.ReplaceAll(toProcess, pp.phantom, pp.secret.Bytes()) } + encoded := []byte(url.QueryEscape(string(pp.phantom))) + if !bytes.Equal(encoded, pp.phantom) && bytes.Contains(toProcess, encoded) { + toProcess = bytes.ReplaceAll(toProcess, encoded, []byte(url.QueryEscape(string(pp.secret.Bytes())))) + } } - // Pass 3: strip unbound. - if bytes.Contains(toProcess, phantomPrefix) { + // Pass 3: strip unbound, including URL-encoded phantoms. + if bytesContainsAnyPhantomPrefix(toProcess) { toProcess = stripUnboundPhantomsFromProvider(toProcess, r.provider) } diff --git a/internal/proxy/addon_test.go b/internal/proxy/addon_test.go index d7c7394..168b3d8 100644 --- a/internal/proxy/addon_test.go +++ b/internal/proxy/addon_test.go @@ -726,6 +726,107 @@ func TestRequest_StripUnboundPhantoms(t *testing.T) { } } +// TestRequest_PhantomSwapInFormUrlencodedBody covers the OAuth refresh path: +// the agent posts application/x-www-form-urlencoded data and the phantom's +// colon is percent-encoded on the wire (SLUICE_PHANTOM%3Aapi_key). The +// scanner must recognize that form and substitute the URL-encoded secret so +// the upstream still receives a well-formed form body. +func TestRequest_PhantomSwapInFormUrlencodedBody(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real value/with+special&chars"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/oauth/token") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + f.Request.Body = []byte("grant_type=refresh_token&refresh_token=SLUICE_PHANTOM%3Aapi_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + wantValue := "real+value%2Fwith%2Bspecial%26chars" + if !strings.Contains(body, wantValue) { + t.Fatalf("body = %q, want url-encoded secret %q", body, wantValue) + } + if strings.Contains(body, "SLUICE_PHANTOM%3A") { + t.Fatalf("url-encoded phantom should have been replaced, got %q", body) + } + if strings.Contains(body, "SLUICE_PHANTOM:") { + t.Fatalf("literal phantom should not appear in body, got %q", body) + } +} + +// TestRequest_StripUnboundPhantoms_UrlEncoded asserts that an unbound +// phantom in url-encoded form does not pass through to the upstream. +func TestRequest_StripUnboundPhantoms_UrlEncoded(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{ + "api_key": "real-secret", + "other_key": "other-secret", + }, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/data") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + f.Request.Body = []byte("key=SLUICE_PHANTOM%3Aapi_key&unbound=SLUICE_PHANTOM%3Aother_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + if !strings.Contains(body, "real-secret") { + t.Fatalf("expected bound phantom to be replaced, got %q", body) + } + if strings.Contains(body, "SLUICE_PHANTOM%3A") { + t.Fatalf("expected unbound url-encoded phantom to be stripped, got %q", body) + } + if strings.Contains(body, "other-secret") { + t.Fatalf("unbound phantom should not be replaced with real credential, got %q", body) + } +} + +// TestRequest_PhantomSwapInUrlEncodedQuery covers the URL query path: +// the phantom is percent-encoded in RawQuery and must be swapped with a +// percent-encoded real value so the resulting query string stays valid. +func TestRequest_PhantomSwapInUrlEncodedQuery(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real-secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "GET", "https://api.example.com/data?token=SLUICE_PHANTOM%3Aapi_key") + + addon.Requestheaders(f) + addon.Request(f) + + if got := f.Request.URL.RawQuery; !strings.Contains(got, "real-secret") { + t.Fatalf("RawQuery = %q, want to contain real-secret", got) + } + if got := f.Request.URL.RawQuery; strings.Contains(got, "SLUICE_PHANTOM%3A") { + t.Fatalf("RawQuery still contains url-encoded phantom: %q", got) + } +} + func TestRequest_NoBindingNoChange(t *testing.T) { // No bindings configured. Body without phantom tokens should pass // through unchanged. diff --git a/internal/proxy/phantom.go b/internal/proxy/phantom.go index eb12da4..6d14d3a 100644 --- a/internal/proxy/phantom.go +++ b/internal/proxy/phantom.go @@ -2,15 +2,30 @@ package proxy import "regexp" -// phantomPrefix is the byte prefix for all phantom tokens, used for quick -// detection before applying the more expensive regex strip. +// phantomPrefix is the byte prefix for all phantom tokens in their literal +// form, used for quick detection before applying the more expensive regex +// strip. Literal form appears in JSON bodies, raw header values, and +// anywhere the colon survives unencoded. var phantomPrefix = []byte("SLUICE_PHANTOM:") +// urlEncodedPhantomPrefix is the byte prefix for phantom tokens after URL +// percent-encoding (the colon becomes %3A). Appears in +// application/x-www-form-urlencoded request bodies (e.g. OAuth refresh +// POSTs) and in URL query strings. Without scanning for this form, a +// phantom embedded in form-urlencoded data would pass through unswapped +// and the upstream would receive the literal `SLUICE_PHANTOM%3A...` +// string. The two prefixes are kept side by side rather than computed at +// runtime so the byte scan stays a single allocation-free contains check. +var urlEncodedPhantomPrefix = []byte("SLUICE_PHANTOM%3A") + // phantomStripRe is a last-resort regex for stripping phantom tokens when -// provider.List() cannot enumerate all credential names. It matches word -// characters, dots, and hyphens. +// provider.List() cannot enumerate all credential names. It matches both +// literal (SLUICE_PHANTOM:...) and URL-encoded (SLUICE_PHANTOM%3A...) forms +// so unbound phantoms cannot leak via either encoding. The character class +// matches word characters, dots, and hyphens — the same set used by the +// credential-name and OAuth-suffix grammar. // The primary strip path uses exact matching via provider.List(). -var phantomStripRe = regexp.MustCompile(`SLUICE_PHANTOM:[\w.\-]+`) +var phantomStripRe = regexp.MustCompile(`SLUICE_PHANTOM(?::|%3[Aa])[\w.\-]+`) // PhantomToken returns the placeholder token for a credential name. // Agents use this token in requests. The MITM proxy replaces it with diff --git a/internal/proxy/phantom_strip.go b/internal/proxy/phantom_strip.go index 420796f..694494a 100644 --- a/internal/proxy/phantom_strip.go +++ b/internal/proxy/phantom_strip.go @@ -2,6 +2,7 @@ package proxy import ( "bytes" + "net/url" "sort" "github.com/nemirovsky/sluice/internal/vault" @@ -47,6 +48,19 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by []byte(PhantomToken(name)), ) } + // Mirror every phantom with its URL-encoded variant so phantoms carried in + // form-urlencoded bodies or URL components are stripped as cleanly as the + // literal form. The encoded variant is only added when it differs from the + // literal form (i.e. when QueryEscape actually rewrote something) so we + // don't waste a duplicate ReplaceAll on the literal scan path. + encodedPhantoms := make([][]byte, 0, len(phantoms)) + for _, p := range phantoms { + encoded := []byte(url.QueryEscape(string(p))) + if !bytes.Equal(encoded, p) { + encodedPhantoms = append(encodedPhantoms, encoded) + } + } + phantoms = append(phantoms, encodedPhantoms...) // Sort by token length descending so longer phantom tokens are stripped // before shorter prefixes that could corrupt them via substring match. sort.Slice(phantoms, func(i, j int) bool { @@ -58,8 +72,10 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by } } // Last-resort regex strip for phantom tokens from providers that - // don't support List() (e.g. env provider). - if bytes.Contains(data, phantomPrefix) { + // don't support List() (e.g. env provider). The regex handles both + // literal (SLUICE_PHANTOM:...) and URL-encoded (SLUICE_PHANTOM%3A...) + // forms so unbound phantoms in form-urlencoded bodies are caught too. + if bytes.Contains(data, phantomPrefix) || bytes.Contains(data, urlEncodedPhantomPrefix) { data = phantomStripRe.ReplaceAll(data, nil) } return data From e92001c9299b2020ce9f22043a6d26897a6488e8 Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 18:58:27 +0800 Subject: [PATCH 2/7] fix(proxy): path-correct escaping + precomputed encoded phantoms Hot-path response to Copilot review on #40: 1. swapPhantomBytes now chooses between url.QueryEscape and url.PathEscape based on the location label. QueryEscape encodes a space as '+', which is correct for form bodies and URL query strings but corrupts URL path substitutions where a literal '+' is its own character. Paths get PathEscape; bodies, queries, and headers keep QueryEscape. 2. phantomPair gains an encodedPhantom byte slice, populated once at pair-construction time via encodePhantomForPair. The hot scans (swapPhantomBytes, swapPhantomHeaders, phantomSwapReader, maxPhantomLen) read the precomputed bytes instead of recomputing url.QueryEscape on every request, header, and stream chunk. The encoded secret is still computed on demand, but only when the encoded phantom is actually present in the data, so the no-match case is now allocation-free. 3. Added unit tests for the path/query escape divergence and the QueryEscape symmetry case so the location-specific behavior stays pinned. --- internal/proxy/addon.go | 70 ++++++++++++++++++------------ internal/proxy/addon_test.go | 76 +++++++++++++++++++++++++++++++++ internal/proxy/phantom_pairs.go | 26 +++++++++-- internal/proxy/quic.go | 6 ++- internal/proxy/ws.go | 16 +++++-- 5 files changed, 157 insertions(+), 37 deletions(-) diff --git a/internal/proxy/addon.go b/internal/proxy/addon.go index 45c976a..0d29f62 100644 --- a/internal/proxy/addon.go +++ b/internal/proxy/addon.go @@ -1188,9 +1188,11 @@ func (a *SluiceAddon) buildPhantomPairs(host string, port int, proto string) []p pairs = append(pairs, oauthPairs...) continue } + phantom := []byte(PhantomToken(name)) pairs = append(pairs, phantomPair{ - phantom: []byte(PhantomToken(name)), - secret: secret, + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: secret, }) } @@ -1246,30 +1248,42 @@ func bytesContainsAnyPhantomPrefix(data []byte) bool { // Each pair is matched in both its literal form (`SLUICE_PHANTOM:`, // the shape used in JSON bodies and raw header values) and its URL-encoded // form (`SLUICE_PHANTOM%3A`, the shape used in -// application/x-www-form-urlencoded request bodies and URL components). The -// encoded path is what makes OAuth refresh round-trips work: refresh POSTs -// to providers like Anthropic and Google use form-urlencoded bodies, so the -// colon in the phantom token gets percent-encoded on the wire. Without the -// encoded scan the upstream receives `SLUICE_PHANTOM%3A...` literally, -// returns `invalid_grant`, and the agent falls back to a fresh interactive -// OAuth — every time tokens expire. +// application/x-www-form-urlencoded request bodies and URL query strings). +// The encoded path is what makes OAuth refresh round-trips work: refresh +// POSTs to providers like Anthropic and Google use form-urlencoded bodies, +// so the colon in the phantom token gets percent-encoded on the wire. +// Without the encoded scan the upstream receives `SLUICE_PHANTOM%3A...` +// literally, returns `invalid_grant`, and the agent falls back to a fresh +// interactive OAuth — every time tokens expire. // -// The literal pass runs first so JSON request bodies (where the colon -// survives unencoded) take the cheap exact-substring path. The encoded -// pass then handles the form-urlencoded case. We url-encode the secret -// before substitution so the wire body remains well-formed -// application/x-www-form-urlencoded data. +// The encoded phantom is precomputed once per pair (in encodePhantomForPair) +// and stored on phantomPair.encodedPhantom so we don't re-allocate it on +// every body, query, or header scan. The encoded secret is computed on +// demand once per swap call, only when the encoded phantom actually appears. +// +// location chooses between query escaping (body, URL query, header) and +// path escaping (URL path). The two differ in how spaces are encoded: +// QueryEscape uses '+', PathEscape uses '%20'. Using QueryEscape for a path +// substitution would turn a space in the secret into a literal '+' in the +// URL path, which is interpreted as a plus character by the server, not a +// space — corrupting the request. func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string) []byte { + pathContext := location == "URL path" for _, p := range pairs { if bytes.Contains(data, p.phantom) { data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes()) } - encoded := []byte(url.QueryEscape(string(p.phantom))) - if !bytes.Equal(encoded, p.phantom) && bytes.Contains(data, encoded) { - data = bytes.ReplaceAll(data, encoded, []byte(url.QueryEscape(string(p.secret.Bytes())))) + if len(p.encodedPhantom) > 0 && bytes.Contains(data, p.encodedPhantom) { + var encodedSecret string + if pathContext { + encodedSecret = url.PathEscape(string(p.secret.Bytes())) + } else { + encodedSecret = url.QueryEscape(string(p.secret.Bytes())) + } + data = bytes.ReplaceAll(data, p.encodedPhantom, []byte(encodedSecret)) } } - if bytes.Contains(data, phantomPrefix) || bytes.Contains(data, urlEncodedPhantomPrefix) { + if bytesContainsAnyPhantomPrefix(data) { data = stripUnboundPhantomsFromProvider(data, a.provider) log.Printf("[ADDON-INJECT] stripped unbound phantom token from %s for %s:%d", location, host, port) } @@ -1291,9 +1305,8 @@ func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes()) changed = true } - encoded := []byte(url.QueryEscape(string(p.phantom))) - if !bytes.Equal(encoded, p.phantom) && bytes.Contains(vb, encoded) { - vb = bytes.ReplaceAll(vb, encoded, []byte(url.QueryEscape(string(p.secret.Bytes())))) + if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) { + vb = bytes.ReplaceAll(vb, p.encodedPhantom, []byte(url.QueryEscape(string(p.secret.Bytes())))) changed = true } } @@ -1330,15 +1343,16 @@ type phantomSwapReader struct { // and their URL-encoded forms (SLUICE_PHANTOM%3Aname). The encoded form is // strictly longer because the colon expands to %3A, so a holdback sized for // the literal form alone would lose URL-encoded phantoms that straddle a -// read boundary. +// read boundary. Uses the precomputed encodedPhantom on each pair so no +// per-chunk allocation is required. func maxPhantomLen(pairs []phantomPair) int { m := 0 for _, p := range pairs { if len(p.phantom) > m { m = len(p.phantom) } - if encLen := len(url.QueryEscape(string(p.phantom))); encLen > m { - m = encLen + if len(p.encodedPhantom) > m { + m = len(p.encodedPhantom) } } // Also account for the generic phantom prefix pattern. @@ -1390,13 +1404,15 @@ func (r *phantomSwapReader) Read(p []byte) (int, error) { r.pending = append([]byte(nil), r.pending[safe:]...) // Pass 2: scoped replacement, in both literal and URL-encoded forms. + // The encoded phantom is precomputed once per pair so this hot path + // only allocates when an encoded phantom is actually present and we + // need the encoded form of the real secret. for _, pp := range r.pairs { if bytes.Contains(toProcess, pp.phantom) { toProcess = bytes.ReplaceAll(toProcess, pp.phantom, pp.secret.Bytes()) } - encoded := []byte(url.QueryEscape(string(pp.phantom))) - if !bytes.Equal(encoded, pp.phantom) && bytes.Contains(toProcess, encoded) { - toProcess = bytes.ReplaceAll(toProcess, encoded, []byte(url.QueryEscape(string(pp.secret.Bytes())))) + if len(pp.encodedPhantom) > 0 && bytes.Contains(toProcess, pp.encodedPhantom) { + toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantom, []byte(url.QueryEscape(string(pp.secret.Bytes())))) } } // Pass 3: strip unbound, including URL-encoded phantoms. diff --git a/internal/proxy/addon_test.go b/internal/proxy/addon_test.go index 168b3d8..a3060c8 100644 --- a/internal/proxy/addon_test.go +++ b/internal/proxy/addon_test.go @@ -827,6 +827,82 @@ func TestRequest_PhantomSwapInUrlEncodedQuery(t *testing.T) { } } +// TestSwapPhantomBytes_PathUsesPathEscape exercises the URL-path branch of +// the swap directly. A secret that contains a space must be encoded as +// %20 (PathEscape), not '+' (QueryEscape), so the path stays semantically +// correct. Net/url's parser normally decodes %3A in URL.Path before our +// scan ever runs, but the swap helper is shared with QUIC and streaming +// paths where the encoded phantom can reach it, and the encoding choice +// must still be path-correct. +func TestSwapPhantomBytes_PathUsesPathEscape(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + + phantom := []byte(PhantomToken("api_key")) + pairs := []phantomPair{{ + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: vault.NewSecureBytes("real secret"), + }} + defer releasePhantomPairs(pairs) + + in := []byte("/v1/SLUICE_PHANTOM%3Aapi_key/resource") + out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL path") + got := string(out) + + if !strings.Contains(got, "real%20secret") { + t.Fatalf("path swap = %q, want PathEscape (real%%20secret)", got) + } + if strings.Contains(got, "real+secret") { + t.Fatalf("path swap = %q, must not contain '+' for a space in a URL path", got) + } + if strings.Contains(got, "SLUICE_PHANTOM") { + t.Fatalf("path swap = %q, still contains phantom", got) + } +} + +// TestSwapPhantomBytes_QueryUsesQueryEscape covers the symmetric query case: +// the same swap helper must encode spaces as '+' for query strings and form +// bodies so the receiving application/x-www-form-urlencoded parser sees a +// space. +func TestSwapPhantomBytes_QueryUsesQueryEscape(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + + phantom := []byte(PhantomToken("api_key")) + pairs := []phantomPair{{ + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: vault.NewSecureBytes("real secret"), + }} + defer releasePhantomPairs(pairs) + + in := []byte("token=SLUICE_PHANTOM%3Aapi_key") + out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL query") + got := string(out) + + if !strings.Contains(got, "real+secret") { + t.Fatalf("query swap = %q, want QueryEscape ('+' for space)", got) + } + if strings.Contains(got, "SLUICE_PHANTOM") { + t.Fatalf("query swap = %q, still contains phantom", got) + } +} + func TestRequest_NoBindingNoChange(t *testing.T) { // No bindings configured. Body without phantom tokens should pass // through unchanged. diff --git a/internal/proxy/phantom_pairs.go b/internal/proxy/phantom_pairs.go index ca6596a..31274fe 100644 --- a/internal/proxy/phantom_pairs.go +++ b/internal/proxy/phantom_pairs.go @@ -1,11 +1,25 @@ package proxy import ( + "bytes" "log" + "net/url" "github.com/nemirovsky/sluice/internal/vault" ) +// encodePhantomForPair returns the URL query-escaped form of a phantom +// token, or nil when QueryEscape would leave the bytes unchanged. The +// "nil when unchanged" convention lets the hot-path swap skip a redundant +// bytes.Contains scan for the literal form a second time. +func encodePhantomForPair(phantom []byte) []byte { + encoded := []byte(url.QueryEscape(string(phantom))) + if bytes.Equal(encoded, phantom) { + return nil + } + return encoded +} + // maxProxyBody limits the request/response body size that MITM proxies // (HTTPS and QUIC) read for phantom token replacement. 16 MiB is // sufficient for typical API traffic while preventing memory exhaustion @@ -24,15 +38,19 @@ func buildOAuthPhantomPairs(name string, secret vault.SecureBytes, logPrefix str return nil, err } accessSecret := vault.NewSecureBytes(cred.AccessToken) + accessPhantom := []byte(oauthPhantomAccess(name, cred.AccessToken)) pairs := []phantomPair{{ - phantom: []byte(oauthPhantomAccess(name, cred.AccessToken)), - secret: accessSecret, + phantom: accessPhantom, + encodedPhantom: encodePhantomForPair(accessPhantom), + secret: accessSecret, }} if cred.RefreshToken != "" { refreshSecret := vault.NewSecureBytes(cred.RefreshToken) + refreshPhantom := []byte(oauthPhantomRefresh(name, cred.RefreshToken)) pairs = append(pairs, phantomPair{ - phantom: []byte(oauthPhantomRefresh(name, cred.RefreshToken)), - secret: refreshSecret, + phantom: refreshPhantom, + encodedPhantom: encodePhantomForPair(refreshPhantom), + secret: refreshSecret, }) } return pairs, nil diff --git a/internal/proxy/quic.go b/internal/proxy/quic.go index 4c0bc50..be5f6d8 100644 --- a/internal/proxy/quic.go +++ b/internal/proxy/quic.go @@ -572,9 +572,11 @@ func (q *QUICProxy) buildPhantomPairs(host string, port int) []phantomPair { pairs = append(pairs, oauthPairs...) continue } + phantom := []byte(PhantomToken(name)) pairs = append(pairs, phantomPair{ - phantom: []byte(PhantomToken(name)), - secret: secret, + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: secret, }) } } diff --git a/internal/proxy/ws.go b/internal/proxy/ws.go index 5d146f0..f05e2c2 100644 --- a/internal/proxy/ws.go +++ b/internal/proxy/ws.go @@ -371,9 +371,15 @@ func (wp *WSProxy) UpdateRules(blockConfigs []WSBlockRuleConfig, redactConfigs [ } // phantomPair holds a phantom token and its corresponding real credential. +// encodedPhantom is the URL query-escaped form of phantom, precomputed once +// per pair so the hot-path swap does not re-allocate it on every request, +// stream chunk, or header. It is empty when QueryEscape would not change the +// bytes (which is the case for the generic shape today, but keeping the +// check is cheap and lets us guard against future grammar tweaks). type phantomPair struct { - phantom []byte - secret vault.SecureBytes + phantom []byte + encodedPhantom []byte + secret vault.SecureBytes } // Relay runs bidirectional WebSocket frame forwarding between agent and @@ -400,9 +406,11 @@ func (wp *WSProxy) Relay(agentConn, upstreamConn net.Conn, host string, port int pairs = append(pairs, oauthPairs...) continue } + phantom := []byte(PhantomToken(name)) pairs = append(pairs, phantomPair{ - phantom: []byte(PhantomToken(name)), - secret: secret, + phantom: phantom, + encodedPhantom: encodePhantomForPair(phantom), + secret: secret, }) } } From e724cfe7c9f67ec746bfb17542d8e94d18a866ad Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 19:12:09 +0800 Subject: [PATCH 3/7] fix(proxy): match case-insensitive percent encoding (%3a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 3986 §2.1 makes percent-encoded hex digits case-insensitive. Go's url.QueryEscape always emits uppercase, but a third-party client can emit lowercase, so a scan that only matched %3A would miss SLUICE_PHANTOM%3a... and let the phantom leak. Adds a urlEncodedPhantomPrefixLower constant and an encodedPhantomLower field on phantomPair, populated once at pair-construction time. The swapPhantomBytes, swapPhantomHeaders, streaming reader, prefix detector, and unbound-strip path all check both casings. The replacement secret is escaped once on first hit and reused across both casing branches so the cost stays linear in number-of-encoded-forms, not pairs. Adds tests for the lowercase form on both the bound-swap and unbound- strip paths. --- internal/proxy/addon.go | 90 +++++++++++++++++++++++++-------- internal/proxy/addon_test.go | 67 ++++++++++++++++++++++++ internal/proxy/phantom.go | 11 +++- internal/proxy/phantom_pairs.go | 55 +++++++++++++++++--- internal/proxy/phantom_strip.go | 30 +++++++---- internal/proxy/quic.go | 8 +-- internal/proxy/ws.go | 32 ++++++++---- 7 files changed, 240 insertions(+), 53 deletions(-) diff --git a/internal/proxy/addon.go b/internal/proxy/addon.go index 0d29f62..6f6ae0d 100644 --- a/internal/proxy/addon.go +++ b/internal/proxy/addon.go @@ -1189,10 +1189,12 @@ func (a *SluiceAddon) buildPhantomPairs(host string, port int, proto string) []p continue } phantom := []byte(PhantomToken(name)) + encoded := encodePhantomForPair(phantom) pairs = append(pairs, phantomPair{ - phantom: phantom, - encodedPhantom: encodePhantomForPair(phantom), - secret: secret, + phantom: phantom, + encodedPhantom: encoded, + encodedPhantomLower: encodePhantomLowerForPair(encoded), + secret: secret, }) } @@ -1233,13 +1235,16 @@ func (a *SluiceAddon) hasPhantomPrefix(f *mitmproxy.Flow) bool { return false } -// bytesContainsAnyPhantomPrefix reports whether the data contains either the -// literal or URL-encoded phantom prefix. Form-urlencoded request bodies and -// URL query/path components percent-encode the colon in phantom tokens, so a -// scan that only checks the literal form would miss phantoms about to leak -// through OAuth refresh POSTs and other form-encoded paths. +// bytesContainsAnyPhantomPrefix reports whether the data contains the +// literal phantom prefix or either case of the URL-encoded prefix (%3A or +// %3a). Form-urlencoded request bodies and URL query/path components +// percent-encode the colon in phantom tokens, and RFC 3986 §2.1 makes the +// hex digits case-insensitive, so a scan that only checks one case would +// miss phantoms emitted by clients that lowercase their percent escapes. func bytesContainsAnyPhantomPrefix(data []byte) bool { - return bytes.Contains(data, phantomPrefix) || bytes.Contains(data, urlEncodedPhantomPrefix) + return bytes.Contains(data, phantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefixLower) } // swapPhantomBytes performs Pass 2 (scoped replacement) and Pass 3 (strip @@ -1273,14 +1278,28 @@ func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host st if bytes.Contains(data, p.phantom) { data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes()) } - if len(p.encodedPhantom) > 0 && bytes.Contains(data, p.encodedPhantom) { - var encodedSecret string + // Encoded swap covers both uppercase (%3A, the canonical form Go + // emits) and lowercase (%3a, valid per RFC 3986 §2.1). The + // replacement secret is escaped once on first hit and reused so + // the cost stays linear in number-of-encoded-forms, not pairs. + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret != nil { + return + } if pathContext { - encodedSecret = url.PathEscape(string(p.secret.Bytes())) + encodedSecret = []byte(url.PathEscape(string(p.secret.Bytes()))) } else { - encodedSecret = url.QueryEscape(string(p.secret.Bytes())) + encodedSecret = []byte(url.QueryEscape(string(p.secret.Bytes()))) } - data = bytes.ReplaceAll(data, p.encodedPhantom, []byte(encodedSecret)) + } + if len(p.encodedPhantom) > 0 && bytes.Contains(data, p.encodedPhantom) { + ensureEncodedSecret() + data = bytes.ReplaceAll(data, p.encodedPhantom, encodedSecret) + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(data, p.encodedPhantomLower) { + ensureEncodedSecret() + data = bytes.ReplaceAll(data, p.encodedPhantomLower, encodedSecret) } } if bytesContainsAnyPhantomPrefix(data) { @@ -1305,8 +1324,20 @@ func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes()) changed = true } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = []byte(url.QueryEscape(string(p.secret.Bytes()))) + } + } if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) { - vb = bytes.ReplaceAll(vb, p.encodedPhantom, []byte(url.QueryEscape(string(p.secret.Bytes())))) + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantom, encodedSecret) + changed = true + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(vb, p.encodedPhantomLower) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantomLower, encodedSecret) changed = true } } @@ -1354,8 +1385,13 @@ func maxPhantomLen(pairs []phantomPair) int { if len(p.encodedPhantom) > m { m = len(p.encodedPhantom) } + if len(p.encodedPhantomLower) > m { + m = len(p.encodedPhantomLower) + } } - // Also account for the generic phantom prefix pattern. + // Also account for the generic phantom prefix pattern. Uppercase and + // lowercase encoded prefixes are the same length, so either works as + // the lower bound. if pLen := len(urlEncodedPhantomPrefix) + maxCredNameLen; pLen > m { m = pLen } @@ -1403,16 +1439,28 @@ func (r *phantomSwapReader) Read(p []byte) (int, error) { toProcess := r.pending[:safe] r.pending = append([]byte(nil), r.pending[safe:]...) - // Pass 2: scoped replacement, in both literal and URL-encoded forms. - // The encoded phantom is precomputed once per pair so this hot path - // only allocates when an encoded phantom is actually present and we - // need the encoded form of the real secret. + // Pass 2: scoped replacement, in both literal and URL-encoded forms + // (both case variants of %3A). The encoded phantom is precomputed + // once per pair so this hot path only allocates when an encoded + // phantom is actually present and we need the encoded form of the + // real secret. for _, pp := range r.pairs { if bytes.Contains(toProcess, pp.phantom) { toProcess = bytes.ReplaceAll(toProcess, pp.phantom, pp.secret.Bytes()) } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = []byte(url.QueryEscape(string(pp.secret.Bytes()))) + } + } if len(pp.encodedPhantom) > 0 && bytes.Contains(toProcess, pp.encodedPhantom) { - toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantom, []byte(url.QueryEscape(string(pp.secret.Bytes())))) + ensureEncodedSecret() + toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantom, encodedSecret) + } + if len(pp.encodedPhantomLower) > 0 && bytes.Contains(toProcess, pp.encodedPhantomLower) { + ensureEncodedSecret() + toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantomLower, encodedSecret) } } // Pass 3: strip unbound, including URL-encoded phantoms. diff --git a/internal/proxy/addon_test.go b/internal/proxy/addon_test.go index a3060c8..4f806f3 100644 --- a/internal/proxy/addon_test.go +++ b/internal/proxy/addon_test.go @@ -903,6 +903,73 @@ func TestSwapPhantomBytes_QueryUsesQueryEscape(t *testing.T) { } } +// TestRequest_PhantomSwapInFormUrlencodedBody_LowercaseHex covers the +// case-insensitivity of percent-encoded hex (RFC 3986 §2.1). A client may +// emit %3a instead of %3A. The scanner must still swap the phantom and +// the upstream must receive the real secret. +func TestRequest_PhantomSwapInFormUrlencodedBody_LowercaseHex(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "real-secret"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/oauth/token") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + f.Request.Body = []byte("grant_type=refresh_token&refresh_token=SLUICE_PHANTOM%3aapi_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + if !strings.Contains(body, "real-secret") { + t.Fatalf("body = %q, want real-secret", body) + } + if strings.Contains(body, "SLUICE_PHANTOM%3a") || strings.Contains(body, "SLUICE_PHANTOM%3A") { + t.Fatalf("body = %q, must not contain any encoded phantom variant", body) + } +} + +// TestRequest_StripUnboundPhantoms_LowercaseHex asserts the unbound-strip +// path also catches the lowercase percent-encoded variant. +func TestRequest_StripUnboundPhantoms_LowercaseHex(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{ + "api_key": "real-secret", + "other_key": "other-secret", + }, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + f := newTestFlow(client, "POST", "https://api.example.com/data") + f.Request.Body = []byte("k=SLUICE_PHANTOM%3aapi_key&u=SLUICE_PHANTOM%3aother_key") + + addon.Requestheaders(f) + addon.Request(f) + + body := string(f.Request.Body) + if !strings.Contains(body, "real-secret") { + t.Fatalf("bound phantom not swapped, body = %q", body) + } + if strings.Contains(body, "SLUICE_PHANTOM") { + t.Fatalf("unbound lowercase phantom not stripped, body = %q", body) + } + if strings.Contains(body, "other-secret") { + t.Fatalf("unbound phantom must not be replaced with real credential, body = %q", body) + } +} + func TestRequest_NoBindingNoChange(t *testing.T) { // No bindings configured. Body without phantom tokens should pass // through unchanged. diff --git a/internal/proxy/phantom.go b/internal/proxy/phantom.go index 6d14d3a..d19f125 100644 --- a/internal/proxy/phantom.go +++ b/internal/proxy/phantom.go @@ -16,7 +16,16 @@ var phantomPrefix = []byte("SLUICE_PHANTOM:") // and the upstream would receive the literal `SLUICE_PHANTOM%3A...` // string. The two prefixes are kept side by side rather than computed at // runtime so the byte scan stays a single allocation-free contains check. -var urlEncodedPhantomPrefix = []byte("SLUICE_PHANTOM%3A") +// +// Percent-encoding hex digits are case-insensitive per RFC 3986 §2.1, so +// callers may emit either %3A or %3a. Go's url.QueryEscape always produces +// uppercase, but third-party clients can produce lowercase. The lowercase +// variant is stored alongside the uppercase one so the prefix scan catches +// both forms. +var ( + urlEncodedPhantomPrefix = []byte("SLUICE_PHANTOM%3A") + urlEncodedPhantomPrefixLower = []byte("SLUICE_PHANTOM%3a") +) // phantomStripRe is a last-resort regex for stripping phantom tokens when // provider.List() cannot enumerate all credential names. It matches both diff --git a/internal/proxy/phantom_pairs.go b/internal/proxy/phantom_pairs.go index 31274fe..272cf24 100644 --- a/internal/proxy/phantom_pairs.go +++ b/internal/proxy/phantom_pairs.go @@ -20,6 +20,45 @@ func encodePhantomForPair(phantom []byte) []byte { return encoded } +// encodePhantomLowerForPair returns the lowercase-hex variant of the +// uppercase-encoded phantom, or nil when the input is nil or the +// lowercased form is identical (which happens when the encoding has no +// hex digits A-F). RFC 3986 §2.1 makes percent-encoded hex case- +// insensitive, so a phantom that arrives encoded as %3a must still match +// the precomputed phantom whose canonical form is %3A. +func encodePhantomLowerForPair(encoded []byte) []byte { + if len(encoded) == 0 { + return nil + } + lower := make([]byte, len(encoded)) + i := 0 + for i < len(encoded) { + // Lowercase only the two hex digits after a %, leave everything + // else untouched so the credential name (which can contain + // upper-case letters by policy) isn't corrupted. + if encoded[i] == '%' && i+2 < len(encoded) { + lower[i] = '%' + lower[i+1] = asciiLowerHex(encoded[i+1]) + lower[i+2] = asciiLowerHex(encoded[i+2]) + i += 3 + continue + } + lower[i] = encoded[i] + i++ + } + if bytes.Equal(lower, encoded) { + return nil + } + return lower +} + +func asciiLowerHex(b byte) byte { + if b >= 'A' && b <= 'F' { + return b + ('a' - 'A') + } + return b +} + // maxProxyBody limits the request/response body size that MITM proxies // (HTTPS and QUIC) read for phantom token replacement. 16 MiB is // sufficient for typical API traffic while preventing memory exhaustion @@ -39,18 +78,22 @@ func buildOAuthPhantomPairs(name string, secret vault.SecureBytes, logPrefix str } accessSecret := vault.NewSecureBytes(cred.AccessToken) accessPhantom := []byte(oauthPhantomAccess(name, cred.AccessToken)) + accessEncoded := encodePhantomForPair(accessPhantom) pairs := []phantomPair{{ - phantom: accessPhantom, - encodedPhantom: encodePhantomForPair(accessPhantom), - secret: accessSecret, + phantom: accessPhantom, + encodedPhantom: accessEncoded, + encodedPhantomLower: encodePhantomLowerForPair(accessEncoded), + secret: accessSecret, }} if cred.RefreshToken != "" { refreshSecret := vault.NewSecureBytes(cred.RefreshToken) refreshPhantom := []byte(oauthPhantomRefresh(name, cred.RefreshToken)) + refreshEncoded := encodePhantomForPair(refreshPhantom) pairs = append(pairs, phantomPair{ - phantom: refreshPhantom, - encodedPhantom: encodePhantomForPair(refreshPhantom), - secret: refreshSecret, + phantom: refreshPhantom, + encodedPhantom: refreshEncoded, + encodedPhantomLower: encodePhantomLowerForPair(refreshEncoded), + secret: refreshSecret, }) } return pairs, nil diff --git a/internal/proxy/phantom_strip.go b/internal/proxy/phantom_strip.go index 694494a..a750ed3 100644 --- a/internal/proxy/phantom_strip.go +++ b/internal/proxy/phantom_strip.go @@ -48,16 +48,22 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by []byte(PhantomToken(name)), ) } - // Mirror every phantom with its URL-encoded variant so phantoms carried in - // form-urlencoded bodies or URL components are stripped as cleanly as the - // literal form. The encoded variant is only added when it differs from the - // literal form (i.e. when QueryEscape actually rewrote something) so we - // don't waste a duplicate ReplaceAll on the literal scan path. - encodedPhantoms := make([][]byte, 0, len(phantoms)) + // Mirror every phantom with its URL-encoded variants so phantoms carried + // in form-urlencoded bodies or URL components are stripped as cleanly as + // the literal form. Two variants are added: the canonical uppercase form + // emitted by Go's url.QueryEscape, and a lowercase-hex form that other + // clients may produce (RFC 3986 §2.1 makes percent-encoded hex case- + // insensitive). Variants are only added when they differ from forms we + // already cover, so the literal scan path is not duplicated. + encodedPhantoms := make([][]byte, 0, len(phantoms)*2) for _, p := range phantoms { encoded := []byte(url.QueryEscape(string(p))) - if !bytes.Equal(encoded, p) { - encodedPhantoms = append(encodedPhantoms, encoded) + if bytes.Equal(encoded, p) { + continue + } + encodedPhantoms = append(encodedPhantoms, encoded) + if lower := encodePhantomLowerForPair(encoded); lower != nil { + encodedPhantoms = append(encodedPhantoms, lower) } } phantoms = append(phantoms, encodedPhantoms...) @@ -72,10 +78,12 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by } } // Last-resort regex strip for phantom tokens from providers that - // don't support List() (e.g. env provider). The regex handles both - // literal (SLUICE_PHANTOM:...) and URL-encoded (SLUICE_PHANTOM%3A...) + // don't support List() (e.g. env provider). The regex handles literal + // (SLUICE_PHANTOM:...) and URL-encoded (SLUICE_PHANTOM%3A.../%3a...) // forms so unbound phantoms in form-urlencoded bodies are caught too. - if bytes.Contains(data, phantomPrefix) || bytes.Contains(data, urlEncodedPhantomPrefix) { + if bytes.Contains(data, phantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefix) || + bytes.Contains(data, urlEncodedPhantomPrefixLower) { data = phantomStripRe.ReplaceAll(data, nil) } return data diff --git a/internal/proxy/quic.go b/internal/proxy/quic.go index be5f6d8..15b8ebd 100644 --- a/internal/proxy/quic.go +++ b/internal/proxy/quic.go @@ -573,10 +573,12 @@ func (q *QUICProxy) buildPhantomPairs(host string, port int) []phantomPair { continue } phantom := []byte(PhantomToken(name)) + encoded := encodePhantomForPair(phantom) pairs = append(pairs, phantomPair{ - phantom: phantom, - encodedPhantom: encodePhantomForPair(phantom), - secret: secret, + phantom: phantom, + encodedPhantom: encoded, + encodedPhantomLower: encodePhantomLowerForPair(encoded), + secret: secret, }) } } diff --git a/internal/proxy/ws.go b/internal/proxy/ws.go index f05e2c2..12fc9a5 100644 --- a/internal/proxy/ws.go +++ b/internal/proxy/ws.go @@ -371,15 +371,23 @@ func (wp *WSProxy) UpdateRules(blockConfigs []WSBlockRuleConfig, redactConfigs [ } // phantomPair holds a phantom token and its corresponding real credential. -// encodedPhantom is the URL query-escaped form of phantom, precomputed once -// per pair so the hot-path swap does not re-allocate it on every request, -// stream chunk, or header. It is empty when QueryEscape would not change the -// bytes (which is the case for the generic shape today, but keeping the -// check is cheap and lets us guard against future grammar tweaks). +// encodedPhantom is the URL query-escaped form of phantom (uppercase hex), +// and encodedPhantomLower is the same form with the percent-encoded colon +// in lowercase. Both are precomputed once per pair so the hot-path swap +// does not re-allocate on every request, stream chunk, or header. +// +// The lowercase variant exists because percent-encoded hex digits are +// case-insensitive per RFC 3986 §2.1: Go's url.QueryEscape always emits +// uppercase, but third-party clients can emit lowercase, and a phantom +// scan that only checked one casing would let `SLUICE_PHANTOM%3a...` +// through to the upstream. The fields are nil when no encoded form would +// differ from the literal phantom bytes, which keeps the no-encoded-form +// branch allocation-free. type phantomPair struct { - phantom []byte - encodedPhantom []byte - secret vault.SecureBytes + phantom []byte + encodedPhantom []byte + encodedPhantomLower []byte + secret vault.SecureBytes } // Relay runs bidirectional WebSocket frame forwarding between agent and @@ -407,10 +415,12 @@ func (wp *WSProxy) Relay(agentConn, upstreamConn net.Conn, host string, port int continue } phantom := []byte(PhantomToken(name)) + encoded := encodePhantomForPair(phantom) pairs = append(pairs, phantomPair{ - phantom: phantom, - encodedPhantom: encodePhantomForPair(phantom), - secret: secret, + phantom: phantom, + encodedPhantom: encoded, + encodedPhantomLower: encodePhantomLowerForPair(encoded), + secret: secret, }) } } From 2c24b8a212ed29416703ead401cb9415b8ec1c31 Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 21:02:42 +0800 Subject: [PATCH 4/7] test(proxy): cover url-encoded phantom spanning a streaming read boundary Adds TestStreamRequestModifier_UrlEncodedPhantomSpanningReads, which places the url-encoded phantom in the middle of a 32 KiB body so it straddles a read chunk. Verifies the holdback buffer in phantomSwapReader is sized for the encoded form (two bytes longer than the literal) so an encoded phantom on a boundary cannot slip through unswapped. Pins the maxPhantomLen change from e92001c (encoded-aware sizing) against regression. --- internal/proxy/addon_test.go | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/internal/proxy/addon_test.go b/internal/proxy/addon_test.go index 4f806f3..111d53b 100644 --- a/internal/proxy/addon_test.go +++ b/internal/proxy/addon_test.go @@ -1116,6 +1116,49 @@ func TestStreamRequestModifier_LargeBodySpanningReads(t *testing.T) { } } +// TestStreamRequestModifier_UrlEncodedPhantomSpanningReads verifies that +// the streaming reader's holdback buffer is sized for the URL-encoded form +// of a phantom (SLUICE_PHANTOM%3Aapi_key), not just the literal one. The +// encoded form is two bytes longer because the colon expands to %3A, and a +// holdback sized to the literal length alone would let an encoded phantom +// straddling a read boundary slip through unswapped. +func TestStreamRequestModifier_UrlEncodedPhantomSpanningReads(t *testing.T) { + addon := newTestAddonWithCreds( + t, + map[string]string{"api_key": "REPLACED"}, + []vault.Binding{{ + Destination: "api.example.com", + Ports: []int{443}, + Credential: "api_key", + }}, + ) + client := setupAddonConn(addon, "api.example.com:443") + + prefix := bytes.Repeat([]byte("A"), 16*1024) + phantom := []byte("SLUICE_PHANTOM%3Aapi_key") + suffix := bytes.Repeat([]byte("B"), 16*1024) + body := make([]byte, 0, len(prefix)+len(phantom)+len(suffix)) + body = append(body, prefix...) + body = append(body, phantom...) + body = append(body, suffix...) + + f := newTestFlow(client, "POST", "https://api.example.com/oauth/token") + f.Request.Header.Set("Content-Type", "application/x-www-form-urlencoded") + reader := addon.StreamRequestModifier(f, bytes.NewReader(body)) + + out, err := io.ReadAll(reader) + if err != nil { + t.Fatalf("ReadAll failed: %v", err) + } + + if bytes.Contains(out, []byte("SLUICE_PHANTOM%3A")) || bytes.Contains(out, []byte("SLUICE_PHANTOM%3a")) { + t.Fatalf("url-encoded phantom must not survive streaming swap, got: %q", out[16*1024-8:16*1024+50]) + } + if !bytes.Contains(out, []byte("REPLACED")) { + t.Fatalf("expected real credential in streamed output") + } +} + func TestStreamRequestModifier_StripUnbound(t *testing.T) { addon := newTestAddonWithCreds( t, From b5d4a2e66599626f1ab66e3c9776316c5b5206ce Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 21:14:06 +0800 Subject: [PATCH 5/7] fix(proxy): url-encoded phantom swap for QUIC and WebSocket paths Plus byte-level escape helpers so the secret never crosses into an immutable Go string. QUIC and WebSocket each carry a per-pair phantom swap that previously only matched the literal SLUICE_PHANTOM: form. With the new precomputed encodedPhantom and encodedPhantomLower fields available on phantomPair, the QUIC replacePhantomInHeaders, replacePhantomInBody, and the WebSocket relayFrames text-frame swap now also match SLUICE_PHANTOM%3A and SLUICE_PHANTOM%3a, and trigger unbound-phantom stripping when any encoded prefix is present. Without this, an HTTP/3 OAuth refresh or a form-encoded WebSocket text payload would have leaked the encoded phantom unchanged. The new queryEscapeBytes and pathEscapeBytes helpers are byte-in, byte-out URL encoders that mirror net/url.{Query,Path}Escape's output rules. SwapPhantomBytes, swapPhantomHeaders, phantomSwapReader.Read, the QUIC swap, and the WebSocket swap all use them instead of `url.QueryEscape(string(secret.Bytes()))`, so the secret stays in byte slices that the caller can zero via SecureBytes.Release(). The unreserved character sets follow RFC 3986: query-component (alpha / digit / - _ . ~) plus space->+, and path-segment (the same set plus sub-delims $ & + , ; = : @). phantom_strip.go now reuses encodePhantomForPair and encodePhantomLowerForPair instead of duplicating the URL-encoding rule inline, and drops the net/url import. --- internal/proxy/addon.go | 9 ++-- internal/proxy/phantom_pairs.go | 84 +++++++++++++++++++++++++++++++++ internal/proxy/phantom_strip.go | 5 +- internal/proxy/quic.go | 40 ++++++++++++++-- internal/proxy/ws.go | 24 ++++++++-- 5 files changed, 148 insertions(+), 14 deletions(-) diff --git a/internal/proxy/addon.go b/internal/proxy/addon.go index 6f6ae0d..f4a1fa8 100644 --- a/internal/proxy/addon.go +++ b/internal/proxy/addon.go @@ -7,7 +7,6 @@ import ( "log" "net" "net/http" - "net/url" "runtime/debug" "sort" "strconv" @@ -1288,9 +1287,9 @@ func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host st return } if pathContext { - encodedSecret = []byte(url.PathEscape(string(p.secret.Bytes()))) + encodedSecret = pathEscapeBytes(p.secret.Bytes()) } else { - encodedSecret = []byte(url.QueryEscape(string(p.secret.Bytes()))) + encodedSecret = queryEscapeBytes(p.secret.Bytes()) } } if len(p.encodedPhantom) > 0 && bytes.Contains(data, p.encodedPhantom) { @@ -1327,7 +1326,7 @@ func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, var encodedSecret []byte ensureEncodedSecret := func() { if encodedSecret == nil { - encodedSecret = []byte(url.QueryEscape(string(p.secret.Bytes()))) + encodedSecret = queryEscapeBytes(p.secret.Bytes()) } } if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) { @@ -1451,7 +1450,7 @@ func (r *phantomSwapReader) Read(p []byte) (int, error) { var encodedSecret []byte ensureEncodedSecret := func() { if encodedSecret == nil { - encodedSecret = []byte(url.QueryEscape(string(pp.secret.Bytes()))) + encodedSecret = queryEscapeBytes(pp.secret.Bytes()) } } if len(pp.encodedPhantom) > 0 && bytes.Contains(toProcess, pp.encodedPhantom) { diff --git a/internal/proxy/phantom_pairs.go b/internal/proxy/phantom_pairs.go index 272cf24..00f92d2 100644 --- a/internal/proxy/phantom_pairs.go +++ b/internal/proxy/phantom_pairs.go @@ -59,6 +59,90 @@ func asciiLowerHex(b byte) byte { return b } +// queryEscapeBytes is a byte-in, byte-out form-component URL encoder that +// mirrors net/url.QueryEscape's output rules without ever materializing +// the input as a Go string. url.QueryEscape's signature is +// `func(string) string`, which forces callers to wrap a credential's +// SecureBytes via `url.QueryEscape(string(secret.Bytes()))` and leaves +// two immutable string copies of the secret on the heap that +// SecureBytes.Release() cannot zero. Operating on []byte throughout keeps +// the secret only in slices the caller can clear. +// +// The unreserved character set follows RFC 3986 §2.3 plus Go's +// net/url-compatible additions: spaces become '+' (form encoding), and +// everything outside the unreserved set is percent-encoded with +// uppercase hex (the canonical form Go and most clients emit). +func queryEscapeBytes(src []byte) []byte { + dst := make([]byte, 0, len(src)) + for _, c := range src { + switch { + case c == ' ': + dst = append(dst, '+') + case shouldNotEscapeQueryComponent(c): + dst = append(dst, c) + default: + dst = append(dst, '%', hexUpper(c>>4), hexUpper(c&0x0F)) + } + } + return dst +} + +// pathEscapeBytes is the byte-level analogue of net/url.PathEscape. It +// preserves URL-path semantics: spaces become %20 (not '+'), and a +// slightly larger unreserved set is honored (sub-delims that are legal +// in path segments are emitted verbatim). +func pathEscapeBytes(src []byte) []byte { + dst := make([]byte, 0, len(src)) + for _, c := range src { + if shouldNotEscapePathSegment(c) { + dst = append(dst, c) + continue + } + dst = append(dst, '%', hexUpper(c>>4), hexUpper(c&0x0F)) + } + return dst +} + +// shouldNotEscapeQueryComponent reports whether a byte is safe to emit +// literally inside an application/x-www-form-urlencoded value. Matches +// the predicate net/url applies for encodeQueryComponent: ALPHA / DIGIT +// / '-' / '_' / '.' / '~'. +func shouldNotEscapeQueryComponent(c byte) bool { + switch { + case c >= 'A' && c <= 'Z', + c >= 'a' && c <= 'z', + c >= '0' && c <= '9': + return true + case c == '-' || c == '_' || c == '.' || c == '~': + return true + } + return false +} + +// shouldNotEscapePathSegment reports whether a byte is safe to emit +// literally inside a URL path segment. Matches the predicate net/url +// applies for encodePathSegment: unreserved + sub-delims minus the +// segment separators '/' and '?'. Specifically: ALPHA / DIGIT / +// '-' '_' '.' '~' '$' '&' '+' ',' ';' '=' ':' '@'. +func shouldNotEscapePathSegment(c byte) bool { + if shouldNotEscapeQueryComponent(c) { + return true + } + switch c { + case '$', '&', '+', ',', ';', '=', ':', '@': + return true + } + return false +} + +// hexUpper returns the uppercase hex digit for a nibble in 0..15. +func hexUpper(nibble byte) byte { + if nibble < 10 { + return '0' + nibble + } + return 'A' + (nibble - 10) +} + // maxProxyBody limits the request/response body size that MITM proxies // (HTTPS and QUIC) read for phantom token replacement. 16 MiB is // sufficient for typical API traffic while preventing memory exhaustion diff --git a/internal/proxy/phantom_strip.go b/internal/proxy/phantom_strip.go index a750ed3..ec48d68 100644 --- a/internal/proxy/phantom_strip.go +++ b/internal/proxy/phantom_strip.go @@ -2,7 +2,6 @@ package proxy import ( "bytes" - "net/url" "sort" "github.com/nemirovsky/sluice/internal/vault" @@ -57,8 +56,8 @@ func stripUnboundPhantomsFromProvider(data []byte, provider vault.Provider) []by // already cover, so the literal scan path is not duplicated. encodedPhantoms := make([][]byte, 0, len(phantoms)*2) for _, p := range phantoms { - encoded := []byte(url.QueryEscape(string(p))) - if bytes.Equal(encoded, p) { + encoded := encodePhantomForPair(p) + if encoded == nil { continue } encodedPhantoms = append(encodedPhantoms, encoded) diff --git a/internal/proxy/quic.go b/internal/proxy/quic.go index 15b8ebd..6aa472d 100644 --- a/internal/proxy/quic.go +++ b/internal/proxy/quic.go @@ -600,8 +600,24 @@ func (q *QUICProxy) replacePhantomInHeaders(r *http.Request, pairs []phantomPair vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes()) changed = true } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantom, encodedSecret) + changed = true + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(vb, p.encodedPhantomLower) { + ensureEncodedSecret() + vb = bytes.ReplaceAll(vb, p.encodedPhantomLower, encodedSecret) + changed = true + } } - if bytes.Contains(vb, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(vb) { vb = q.stripUnboundPhantoms(vb) changed = true log.Printf("[QUIC-MITM] stripped unbound phantom from header %q for %s:%d", key, host, port) @@ -614,14 +630,32 @@ func (q *QUICProxy) replacePhantomInHeaders(r *http.Request, pairs []phantomPair } // replacePhantomInBody replaces phantom tokens in a request body with real -// credential values and strips any unbound phantom tokens. +// credential values and strips any unbound phantom tokens. Matches both +// the literal SLUICE_PHANTOM: form and the URL-encoded +// SLUICE_PHANTOM%3A form (uppercase and lowercase hex) so that +// HTTP/3 form-urlencoded OAuth refreshes route through the same swap as +// HTTP/1.x and HTTP/2. func (q *QUICProxy) replacePhantomInBody(body []byte, pairs []phantomPair, host string, port int) []byte { for _, p := range pairs { if bytes.Contains(body, p.phantom) { body = bytes.ReplaceAll(body, p.phantom, p.secret.Bytes()) } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(body, p.encodedPhantom) { + ensureEncodedSecret() + body = bytes.ReplaceAll(body, p.encodedPhantom, encodedSecret) + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(body, p.encodedPhantomLower) { + ensureEncodedSecret() + body = bytes.ReplaceAll(body, p.encodedPhantomLower, encodedSecret) + } } - if bytes.Contains(body, phantomPrefix) { + if bytesContainsAnyPhantomPrefix(body) { body = q.stripUnboundPhantoms(body) log.Printf("[QUIC-MITM] stripped unbound phantom from body for %s:%d", host, port) } diff --git a/internal/proxy/ws.go b/internal/proxy/ws.go index 12fc9a5..cfb10fe 100644 --- a/internal/proxy/ws.go +++ b/internal/proxy/ws.go @@ -514,15 +514,33 @@ func (wp *WSProxy) relayFrames(src io.Reader, dst io.Writer, pairs []phantomPair } } - // Replace bound phantom tokens with real credentials. + // Replace bound phantom tokens with real credentials in both + // literal and URL-encoded forms (covers WS text frames that + // carry application/x-www-form-urlencoded payloads or + // percent-escaped query-like content). for _, p := range pairs { if bytes.Contains(payload, p.phantom) { payload = bytes.ReplaceAll(payload, p.phantom, p.secret.Bytes()) } + var encodedSecret []byte + ensureEncodedSecret := func() { + if encodedSecret == nil { + encodedSecret = queryEscapeBytes(p.secret.Bytes()) + } + } + if len(p.encodedPhantom) > 0 && bytes.Contains(payload, p.encodedPhantom) { + ensureEncodedSecret() + payload = bytes.ReplaceAll(payload, p.encodedPhantom, encodedSecret) + } + if len(p.encodedPhantomLower) > 0 && bytes.Contains(payload, p.encodedPhantomLower) { + ensureEncodedSecret() + payload = bytes.ReplaceAll(payload, p.encodedPhantomLower, encodedSecret) + } } - // Strip any remaining unbound phantom tokens. - if bytes.Contains(payload, phantomPrefix) { + // Strip any remaining unbound phantom tokens (literal or + // URL-encoded, either case). + if bytesContainsAnyPhantomPrefix(payload) { payload = wp.stripUnboundPhantoms(payload) log.Printf("[WS] stripped unbound phantom token from text frame") } From 47f16d348c45ba46ac613b153733f11b63c27bbc Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 21:25:13 +0800 Subject: [PATCH 6/7] fix(proxy): allocation-free encode fast paths + typed path/query selector encodePhantomForPair: a pre-scan via phantomNeedsQueryEscape returns nil without any allocation when every byte in the phantom is already in the unreserved-for-query-component set. Skips the byte->string copy that url.QueryEscape would otherwise produce for the no-op case. encodePhantomLowerForPair: a pre-scan returns nil without allocating when no percent-escape in the encoded phantom contains an uppercase A-F hex digit, so the "nothing to lowercase" case (notably OAuth JWT phantoms with no upper-hex escapes) is allocation-free. swapPhantomBytes: the path-vs-query escape choice is now driven by an explicit pathContext bool parameter instead of comparing the human-readable location label. Callers in Request and the two test helpers pass pathContext directly, so the type system enforces the escape choice and a typo in the location string can no longer silently flip path encoding to query encoding (or vice versa). --- internal/proxy/addon.go | 28 +++++++++-------- internal/proxy/addon_test.go | 4 +-- internal/proxy/phantom_pairs.go | 54 +++++++++++++++++++++++++-------- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/internal/proxy/addon.go b/internal/proxy/addon.go index f4a1fa8..a9afefb 100644 --- a/internal/proxy/addon.go +++ b/internal/proxy/addon.go @@ -637,20 +637,21 @@ func (a *SluiceAddon) Request(f *mitmproxy.Flow) { // Pass 2+3 on body. if len(f.Request.Body) > 0 { - f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body") + f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body", false) } // Pass 2+3 on URL query. if rawQ := f.Request.URL.RawQuery; bytesContainsAnyPhantomPrefix([]byte(rawQ)) { f.Request.URL.RawQuery = string( - a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query"), + a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query", false), ) } - // Pass 2+3 on URL path. + // Pass 2+3 on URL path. pathContext=true selects path escaping so + // secrets containing spaces get %20, not '+'. if rawP := f.Request.URL.Path; bytesContainsAnyPhantomPrefix([]byte(rawP)) { f.Request.URL.Path = string( - a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path"), + a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path", true), ) f.Request.URL.RawPath = "" } @@ -1265,14 +1266,17 @@ func bytesContainsAnyPhantomPrefix(data []byte) bool { // every body, query, or header scan. The encoded secret is computed on // demand once per swap call, only when the encoded phantom actually appears. // -// location chooses between query escaping (body, URL query, header) and -// path escaping (URL path). The two differ in how spaces are encoded: -// QueryEscape uses '+', PathEscape uses '%20'. Using QueryEscape for a path -// substitution would turn a space in the secret into a literal '+' in the -// URL path, which is interpreted as a plus character by the server, not a -// space — corrupting the request. -func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string) []byte { - pathContext := location == "URL path" +// pathContext chooses between query escaping (false; body, URL query, +// header) and path escaping (true; URL path). The two differ in how +// spaces are encoded: QueryEscape uses '+', PathEscape uses '%20'. Using +// query escaping for a path substitution would turn a space in the +// secret into a literal '+' in the URL path, which the server reads as +// a plus character, not a space — corrupting the request. The boolean is +// passed in explicitly so the type system enforces the choice; callers +// cannot accidentally pick path escaping by typo-ing the location label. +// location is still passed for the audit log message but never drives +// behavior. +func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string, pathContext bool) []byte { for _, p := range pairs { if bytes.Contains(data, p.phantom) { data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes()) diff --git a/internal/proxy/addon_test.go b/internal/proxy/addon_test.go index 111d53b..5c672b7 100644 --- a/internal/proxy/addon_test.go +++ b/internal/proxy/addon_test.go @@ -854,7 +854,7 @@ func TestSwapPhantomBytes_PathUsesPathEscape(t *testing.T) { defer releasePhantomPairs(pairs) in := []byte("/v1/SLUICE_PHANTOM%3Aapi_key/resource") - out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL path") + out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL path", true) got := string(out) if !strings.Contains(got, "real%20secret") { @@ -892,7 +892,7 @@ func TestSwapPhantomBytes_QueryUsesQueryEscape(t *testing.T) { defer releasePhantomPairs(pairs) in := []byte("token=SLUICE_PHANTOM%3Aapi_key") - out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL query") + out := addon.swapPhantomBytes(in, pairs, "api.example.com", 443, "URL query", false) got := string(out) if !strings.Contains(got, "real+secret") { diff --git a/internal/proxy/phantom_pairs.go b/internal/proxy/phantom_pairs.go index 00f92d2..2df517c 100644 --- a/internal/proxy/phantom_pairs.go +++ b/internal/proxy/phantom_pairs.go @@ -1,9 +1,7 @@ package proxy import ( - "bytes" "log" - "net/url" "github.com/nemirovsky/sluice/internal/vault" ) @@ -11,13 +9,27 @@ import ( // encodePhantomForPair returns the URL query-escaped form of a phantom // token, or nil when QueryEscape would leave the bytes unchanged. The // "nil when unchanged" convention lets the hot-path swap skip a redundant -// bytes.Contains scan for the literal form a second time. +// bytes.Contains scan for the literal form a second time. A pre-scan +// returns nil before any allocation when no byte in phantom would be +// escaped — also avoids the byte->string copy that url.QueryEscape would +// otherwise produce for the no-op case. func encodePhantomForPair(phantom []byte) []byte { - encoded := []byte(url.QueryEscape(string(phantom))) - if bytes.Equal(encoded, phantom) { + if !phantomNeedsQueryEscape(phantom) { return nil } - return encoded + return queryEscapeBytes(phantom) +} + +// phantomNeedsQueryEscape reports whether any byte in phantom would be +// rewritten by queryEscapeBytes. Returns true on a space or any byte +// outside the unreserved-for-query-component set. +func phantomNeedsQueryEscape(phantom []byte) bool { + for _, c := range phantom { + if c == ' ' || !shouldNotEscapeQueryComponent(c) { + return true + } + } + return false } // encodePhantomLowerForPair returns the lowercase-hex variant of the @@ -26,16 +38,33 @@ func encodePhantomForPair(phantom []byte) []byte { // hex digits A-F). RFC 3986 §2.1 makes percent-encoded hex case- // insensitive, so a phantom that arrives encoded as %3a must still match // the precomputed phantom whose canonical form is %3A. +// +// A pre-scan returns nil before any allocation when no percent-escape +// sequence contains an uppercase A-F hex digit. The "no allocation when +// nothing to lower" path matters for OAuth JWT phantoms and any phantom +// whose only escape is %3A (the encoded colon) — once we've already +// stored the uppercase variant elsewhere on the pair, there is nothing +// new to lower for those. func encodePhantomLowerForPair(encoded []byte) []byte { if len(encoded) == 0 { return nil } + hasUpperHex := false + for i := 0; i < len(encoded); i++ { + if encoded[i] != '%' || i+2 >= len(encoded) { + continue + } + if isASCIIUpperHex(encoded[i+1]) || isASCIIUpperHex(encoded[i+2]) { + hasUpperHex = true + break + } + } + if !hasUpperHex { + return nil + } lower := make([]byte, len(encoded)) i := 0 for i < len(encoded) { - // Lowercase only the two hex digits after a %, leave everything - // else untouched so the credential name (which can contain - // upper-case letters by policy) isn't corrupted. if encoded[i] == '%' && i+2 < len(encoded) { lower[i] = '%' lower[i+1] = asciiLowerHex(encoded[i+1]) @@ -46,12 +75,13 @@ func encodePhantomLowerForPair(encoded []byte) []byte { lower[i] = encoded[i] i++ } - if bytes.Equal(lower, encoded) { - return nil - } return lower } +func isASCIIUpperHex(b byte) bool { + return b >= 'A' && b <= 'F' +} + func asciiLowerHex(b byte) byte { if b >= 'A' && b <= 'F' { return b + ('a' - 'A') From 8fa2ced1d10f5efcfe4f9678586150f27be80395 Mon Sep 17 00:00:00 2001 From: Nikita Nemirovsky Date: Mon, 11 May 2026 21:33:37 +0800 Subject: [PATCH 7/7] docs(proxy): clarify encodePhantomLowerForPair fast-path scope The previous comment implied the no-allocation fast path covered OAuth JWT phantoms and phantoms whose only escape is %3A. The %3A case still differs from %3a, so the fast path does NOT skip the allocation there. It only fires when every percent-escape uses 0-9 digits exclusively (e.g. %20%21%30). Aligned the comment with the actual behavior. --- internal/proxy/phantom_pairs.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/internal/proxy/phantom_pairs.go b/internal/proxy/phantom_pairs.go index 2df517c..3f27c37 100644 --- a/internal/proxy/phantom_pairs.go +++ b/internal/proxy/phantom_pairs.go @@ -40,11 +40,13 @@ func phantomNeedsQueryEscape(phantom []byte) bool { // the precomputed phantom whose canonical form is %3A. // // A pre-scan returns nil before any allocation when no percent-escape -// sequence contains an uppercase A-F hex digit. The "no allocation when -// nothing to lower" path matters for OAuth JWT phantoms and any phantom -// whose only escape is %3A (the encoded colon) — once we've already -// stored the uppercase variant elsewhere on the pair, there is nothing -// new to lower for those. +// sequence contains an uppercase A-F hex digit. This fast path only +// fires for phantoms whose escaped form happens to use 0-9 hex digits +// exclusively. A phantom containing %3A (the encoded colon, which every +// SLUICE_PHANTOM: phantom has after url-encoding) still differs +// between %3A and %3a, so the allocation still occurs in the common +// case — the fast path is for shapes like %20%21%30 where every escape +// is already lowercase-equivalent. func encodePhantomLowerForPair(encoded []byte) []byte { if len(encoded) == 0 { return nil