Skip to content

Commit 55092f4

Browse files
authored
feat(proxy): URL-encoded phantom scan + path-correct escaping + case-insensitive %3a (#40)
Per-pair precomputed encoded phantom variants, byte-level URL escape helpers, dual literal+encoded swap across HTTP, HTTP/2, HTTP/3 (QUIC), and WebSocket paths. Closes the OAuth refresh leak where SLUICE_PHANTOM%3A... in form-urlencoded bodies passed through to upstream.
1 parent 399807c commit 55092f4

7 files changed

Lines changed: 748 additions & 43 deletions

File tree

internal/proxy/addon.go

Lines changed: 140 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -637,20 +637,21 @@ func (a *SluiceAddon) Request(f *mitmproxy.Flow) {
637637

638638
// Pass 2+3 on body.
639639
if len(f.Request.Body) > 0 {
640-
f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body")
640+
f.Request.Body = a.swapPhantomBytes(f.Request.Body, pairs, host, port, "body", false)
641641
}
642642

643643
// Pass 2+3 on URL query.
644-
if rawQ := f.Request.URL.RawQuery; bytes.Contains([]byte(rawQ), phantomPrefix) {
644+
if rawQ := f.Request.URL.RawQuery; bytesContainsAnyPhantomPrefix([]byte(rawQ)) {
645645
f.Request.URL.RawQuery = string(
646-
a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query"),
646+
a.swapPhantomBytes([]byte(rawQ), pairs, host, port, "URL query", false),
647647
)
648648
}
649649

650-
// Pass 2+3 on URL path.
651-
if rawP := f.Request.URL.Path; bytes.Contains([]byte(rawP), phantomPrefix) {
650+
// Pass 2+3 on URL path. pathContext=true selects path escaping so
651+
// secrets containing spaces get %20, not '+'.
652+
if rawP := f.Request.URL.Path; bytesContainsAnyPhantomPrefix([]byte(rawP)) {
652653
f.Request.URL.Path = string(
653-
a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path"),
654+
a.swapPhantomBytes([]byte(rawP), pairs, host, port, "URL path", true),
654655
)
655656
f.Request.URL.RawPath = ""
656657
}
@@ -1187,9 +1188,13 @@ func (a *SluiceAddon) buildPhantomPairs(host string, port int, proto string) []p
11871188
pairs = append(pairs, oauthPairs...)
11881189
continue
11891190
}
1191+
phantom := []byte(PhantomToken(name))
1192+
encoded := encodePhantomForPair(phantom)
11901193
pairs = append(pairs, phantomPair{
1191-
phantom: []byte(PhantomToken(name)),
1192-
secret: secret,
1194+
phantom: phantom,
1195+
encodedPhantom: encoded,
1196+
encodedPhantomLower: encodePhantomLowerForPair(encoded),
1197+
secret: secret,
11931198
})
11941199
}
11951200

@@ -1211,41 +1216,107 @@ func releasePhantomPairs(pairs []phantomPair) {
12111216
// hasPhantomPrefix checks whether the request body, headers, or URL
12121217
// contain the phantom prefix bytes.
12131218
func (a *SluiceAddon) hasPhantomPrefix(f *mitmproxy.Flow) bool {
1214-
if bytes.Contains(f.Request.Body, phantomPrefix) {
1219+
if bytesContainsAnyPhantomPrefix(f.Request.Body) {
12151220
return true
12161221
}
12171222
for _, vals := range f.Request.Header {
12181223
for _, v := range vals {
1219-
if bytes.Contains([]byte(v), phantomPrefix) {
1224+
if bytesContainsAnyPhantomPrefix([]byte(v)) {
12201225
return true
12211226
}
12221227
}
12231228
}
1224-
if bytes.Contains([]byte(f.Request.URL.RawQuery), phantomPrefix) {
1229+
if bytesContainsAnyPhantomPrefix([]byte(f.Request.URL.RawQuery)) {
12251230
return true
12261231
}
1227-
if bytes.Contains([]byte(f.Request.URL.Path), phantomPrefix) {
1232+
if bytesContainsAnyPhantomPrefix([]byte(f.Request.URL.Path)) {
12281233
return true
12291234
}
12301235
return false
12311236
}
12321237

1238+
// bytesContainsAnyPhantomPrefix reports whether the data contains the
1239+
// literal phantom prefix or either case of the URL-encoded prefix (%3A or
1240+
// %3a). Form-urlencoded request bodies and URL query/path components
1241+
// percent-encode the colon in phantom tokens, and RFC 3986 §2.1 makes the
1242+
// hex digits case-insensitive, so a scan that only checks one case would
1243+
// miss phantoms emitted by clients that lowercase their percent escapes.
1244+
func bytesContainsAnyPhantomPrefix(data []byte) bool {
1245+
return bytes.Contains(data, phantomPrefix) ||
1246+
bytes.Contains(data, urlEncodedPhantomPrefix) ||
1247+
bytes.Contains(data, urlEncodedPhantomPrefixLower)
1248+
}
1249+
12331250
// swapPhantomBytes performs Pass 2 (scoped replacement) and Pass 3 (strip
12341251
// unbound) on a byte slice.
1235-
func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string) []byte {
1252+
//
1253+
// Each pair is matched in both its literal form (`SLUICE_PHANTOM:<name>`,
1254+
// the shape used in JSON bodies and raw header values) and its URL-encoded
1255+
// form (`SLUICE_PHANTOM%3A<name>`, the shape used in
1256+
// application/x-www-form-urlencoded request bodies and URL query strings).
1257+
// The encoded path is what makes OAuth refresh round-trips work: refresh
1258+
// POSTs to providers like Anthropic and Google use form-urlencoded bodies,
1259+
// so the colon in the phantom token gets percent-encoded on the wire.
1260+
// Without the encoded scan the upstream receives `SLUICE_PHANTOM%3A...`
1261+
// literally, returns `invalid_grant`, and the agent falls back to a fresh
1262+
// interactive OAuth — every time tokens expire.
1263+
//
1264+
// The encoded phantom is precomputed once per pair (in encodePhantomForPair)
1265+
// and stored on phantomPair.encodedPhantom so we don't re-allocate it on
1266+
// every body, query, or header scan. The encoded secret is computed on
1267+
// demand once per swap call, only when the encoded phantom actually appears.
1268+
//
1269+
// pathContext chooses between query escaping (false; body, URL query,
1270+
// header) and path escaping (true; URL path). The two differ in how
1271+
// spaces are encoded: QueryEscape uses '+', PathEscape uses '%20'. Using
1272+
// query escaping for a path substitution would turn a space in the
1273+
// secret into a literal '+' in the URL path, which the server reads as
1274+
// a plus character, not a space — corrupting the request. The boolean is
1275+
// passed in explicitly so the type system enforces the choice; callers
1276+
// cannot accidentally pick path escaping by typo-ing the location label.
1277+
// location is still passed for the audit log message but never drives
1278+
// behavior.
1279+
func (a *SluiceAddon) swapPhantomBytes(data []byte, pairs []phantomPair, host string, port int, location string, pathContext bool) []byte {
12361280
for _, p := range pairs {
12371281
if bytes.Contains(data, p.phantom) {
12381282
data = bytes.ReplaceAll(data, p.phantom, p.secret.Bytes())
12391283
}
1284+
// Encoded swap covers both uppercase (%3A, the canonical form Go
1285+
// emits) and lowercase (%3a, valid per RFC 3986 §2.1). The
1286+
// replacement secret is escaped once on first hit and reused so
1287+
// the cost stays linear in number-of-encoded-forms, not pairs.
1288+
var encodedSecret []byte
1289+
ensureEncodedSecret := func() {
1290+
if encodedSecret != nil {
1291+
return
1292+
}
1293+
if pathContext {
1294+
encodedSecret = pathEscapeBytes(p.secret.Bytes())
1295+
} else {
1296+
encodedSecret = queryEscapeBytes(p.secret.Bytes())
1297+
}
1298+
}
1299+
if len(p.encodedPhantom) > 0 && bytes.Contains(data, p.encodedPhantom) {
1300+
ensureEncodedSecret()
1301+
data = bytes.ReplaceAll(data, p.encodedPhantom, encodedSecret)
1302+
}
1303+
if len(p.encodedPhantomLower) > 0 && bytes.Contains(data, p.encodedPhantomLower) {
1304+
ensureEncodedSecret()
1305+
data = bytes.ReplaceAll(data, p.encodedPhantomLower, encodedSecret)
1306+
}
12401307
}
1241-
if bytes.Contains(data, phantomPrefix) {
1308+
if bytesContainsAnyPhantomPrefix(data) {
12421309
data = stripUnboundPhantomsFromProvider(data, a.provider)
12431310
log.Printf("[ADDON-INJECT] stripped unbound phantom token from %s for %s:%d", location, host, port)
12441311
}
12451312
return data
12461313
}
12471314

12481315
// swapPhantomHeaders performs Pass 2+3 on all request headers.
1316+
//
1317+
// Each pair is matched in both its literal and URL-encoded forms so phantom
1318+
// tokens carried in percent-encoded header values (custom cookie schemes,
1319+
// query-style header payloads) cannot bypass the swap.
12491320
func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair, host string, port int) {
12501321
for key, vals := range f.Request.Header {
12511322
for i, v := range vals {
@@ -1256,8 +1327,24 @@ func (a *SluiceAddon) swapPhantomHeaders(f *mitmproxy.Flow, pairs []phantomPair,
12561327
vb = bytes.ReplaceAll(vb, p.phantom, p.secret.Bytes())
12571328
changed = true
12581329
}
1330+
var encodedSecret []byte
1331+
ensureEncodedSecret := func() {
1332+
if encodedSecret == nil {
1333+
encodedSecret = queryEscapeBytes(p.secret.Bytes())
1334+
}
1335+
}
1336+
if len(p.encodedPhantom) > 0 && bytes.Contains(vb, p.encodedPhantom) {
1337+
ensureEncodedSecret()
1338+
vb = bytes.ReplaceAll(vb, p.encodedPhantom, encodedSecret)
1339+
changed = true
1340+
}
1341+
if len(p.encodedPhantomLower) > 0 && bytes.Contains(vb, p.encodedPhantomLower) {
1342+
ensureEncodedSecret()
1343+
vb = bytes.ReplaceAll(vb, p.encodedPhantomLower, encodedSecret)
1344+
changed = true
1345+
}
12591346
}
1260-
if bytes.Contains(vb, phantomPrefix) {
1347+
if bytesContainsAnyPhantomPrefix(vb) {
12611348
vb = stripUnboundPhantomsFromProvider(vb, a.provider)
12621349
changed = true
12631350
log.Printf("[ADDON-INJECT] stripped unbound phantom token from header %q for %s:%d", key, host, port)
@@ -1285,15 +1372,30 @@ type phantomSwapReader struct {
12851372
// maxPhantomLen returns the length of the longest phantom token in the
12861373
// pairs list. Used to determine how much data to hold back from the
12871374
// output buffer to handle tokens that span read boundaries.
1375+
//
1376+
// The result accounts for both literal phantom tokens (SLUICE_PHANTOM:name)
1377+
// and their URL-encoded forms (SLUICE_PHANTOM%3Aname). The encoded form is
1378+
// strictly longer because the colon expands to %3A, so a holdback sized for
1379+
// the literal form alone would lose URL-encoded phantoms that straddle a
1380+
// read boundary. Uses the precomputed encodedPhantom on each pair so no
1381+
// per-chunk allocation is required.
12881382
func maxPhantomLen(pairs []phantomPair) int {
12891383
m := 0
12901384
for _, p := range pairs {
12911385
if len(p.phantom) > m {
12921386
m = len(p.phantom)
12931387
}
1388+
if len(p.encodedPhantom) > m {
1389+
m = len(p.encodedPhantom)
1390+
}
1391+
if len(p.encodedPhantomLower) > m {
1392+
m = len(p.encodedPhantomLower)
1393+
}
12941394
}
1295-
// Also account for the generic phantom prefix pattern.
1296-
if pLen := len(phantomPrefix) + maxCredNameLen; pLen > m {
1395+
// Also account for the generic phantom prefix pattern. Uppercase and
1396+
// lowercase encoded prefixes are the same length, so either works as
1397+
// the lower bound.
1398+
if pLen := len(urlEncodedPhantomPrefix) + maxCredNameLen; pLen > m {
12971399
m = pLen
12981400
}
12991401
return m
@@ -1340,14 +1442,32 @@ func (r *phantomSwapReader) Read(p []byte) (int, error) {
13401442
toProcess := r.pending[:safe]
13411443
r.pending = append([]byte(nil), r.pending[safe:]...)
13421444

1343-
// Pass 2: scoped replacement.
1445+
// Pass 2: scoped replacement, in both literal and URL-encoded forms
1446+
// (both case variants of %3A). The encoded phantom is precomputed
1447+
// once per pair so this hot path only allocates when an encoded
1448+
// phantom is actually present and we need the encoded form of the
1449+
// real secret.
13441450
for _, pp := range r.pairs {
13451451
if bytes.Contains(toProcess, pp.phantom) {
13461452
toProcess = bytes.ReplaceAll(toProcess, pp.phantom, pp.secret.Bytes())
13471453
}
1454+
var encodedSecret []byte
1455+
ensureEncodedSecret := func() {
1456+
if encodedSecret == nil {
1457+
encodedSecret = queryEscapeBytes(pp.secret.Bytes())
1458+
}
1459+
}
1460+
if len(pp.encodedPhantom) > 0 && bytes.Contains(toProcess, pp.encodedPhantom) {
1461+
ensureEncodedSecret()
1462+
toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantom, encodedSecret)
1463+
}
1464+
if len(pp.encodedPhantomLower) > 0 && bytes.Contains(toProcess, pp.encodedPhantomLower) {
1465+
ensureEncodedSecret()
1466+
toProcess = bytes.ReplaceAll(toProcess, pp.encodedPhantomLower, encodedSecret)
1467+
}
13481468
}
1349-
// Pass 3: strip unbound.
1350-
if bytes.Contains(toProcess, phantomPrefix) {
1469+
// Pass 3: strip unbound, including URL-encoded phantoms.
1470+
if bytesContainsAnyPhantomPrefix(toProcess) {
13511471
toProcess = stripUnboundPhantomsFromProvider(toProcess, r.provider)
13521472
}
13531473

0 commit comments

Comments
 (0)