Skip to content

Commit ad18290

Browse files
authored
feat(replay): single-use claim on consent + callback state (#22)
Closes the only documented security trade-off in specs.md (consent_token replay) and removes IdP-fan-out + audit noise on retried /callback. When a replay store is wired (REDIS_URL): - GET /authorize seals a fresh JTI on every consent render and a fresh SessionID on every silent-redirect session. Per-render JTI keeps back-button safe (a re-render gets a new claim slot) while still single-use within one render. - POST /consent ClaimOnce on JTI before either approve or deny — a captured consent_token cannot be replayed for either decision. - GET /callback ClaimOnce on session.SessionID before the upstream OIDC exchange — a replayed /callback URL never fans out to the IdP token endpoint and never produces audit-log noise. - Replay detected → 400 invalid_request with error_code consent_replay / callback_state_replay, increments mcp_auth_replay_detected_total{kind=consent|callback_state}. - Store unreachable → 503 with error_code replay_store_unavailable, increments mcp_auth_access_denied_total{reason="replay_store_unavailable"} (reuses the existing /token counter so one alert rule covers every claim site). - nil store → stateless fallback (configured opt-out, mirrors /token). - Empty JTI / SessionID on tokens sealed by older binaries falls through to stateless behavior so an in-flight rollout doesn't 503 active users for the duration of consentTTL / sessionTTL. - The /callback IdP-error redirect path deliberately does NOT claim — the redirect is idempotent (no IdP fan-out, no token issuance) and claiming would force re-auth on every legit transient-error retry. - All four replay_store_error log sites now carry an `op` field (claim_authz_code / claim_refresh_family / claim_consent / claim_callback_state) for site-level log correlation. specs.md and README updated. 10 new handler tests cover replay detection, deny-replayed, nil-store fallback, legacy empty JTI/SID, store-error fail-closed, and JTI/SID population by /authorize for both forks.
1 parent c432051 commit ad18290

10 files changed

Lines changed: 639 additions & 34 deletions

File tree

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,9 @@ rollout notes, and K8s deployment shape.
238238
expected interaction, not a policy rejection. Pair with
239239
`mcp_auth_tokens_issued_total{grant_type="authorization_code"}`
240240
to derive abandoned-after-approve as a funnel signal
241-
- `mcp_auth_replay_detected_total{kind}``code` or `refresh` replays
242-
caught by the Redis-backed store
241+
- `mcp_auth_replay_detected_total{kind}``code`, `refresh`,
242+
`consent`, or `callback_state` replays caught by the Redis-backed
243+
store
243244
- `mcp_auth_rate_limited_total{endpoint}` — httprate 429s by endpoint
244245
- `mcp_auth_clients_registered_total` — RFC 7591 registrations
245246
- `mcp_auth_groups_claim_shape_mismatch_total` — id_token `groups`

handlers/authorize.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/babs/mcp-auth-proxy/metrics"
1111
"github.com/babs/mcp-auth-proxy/token"
12+
"github.com/google/uuid"
1213
"go.uber.org/zap"
1314
"golang.org/x/oauth2"
1415
)
@@ -226,6 +227,10 @@ func Authorize(tm *token.Manager, logger *zap.Logger, baseURL string, oauth2Cfg
226227
// redirect) replays from POST /consent on approval.
227228
if authzCfg.RenderConsentPage {
228229
renderConsent(w, r, tm, logger, baseURL, authzCfg.ResourceName, sealedConsent{
230+
// Per-render JTI: a fresh id every GET /authorize so
231+
// back-button = re-consent (each render gets its own
232+
// single-use claim slot) rather than dead-state errors.
233+
JTI: uuid.New().String(),
229234
ClientID: client.ID,
230235
ClientName: client.ClientName,
231236
RedirectURI: redirectURI,
@@ -280,6 +285,7 @@ func Authorize(tm *token.Manager, logger *zap.Logger, baseURL string, oauth2Cfg
280285
PKCEVerifier: upstreamVerifier,
281286
SvrVerifier: svrVerifier, // empty unless H6 server-side PKCE kicked in
282287
SvrChallenge: svrChallenge, // mirrors sessionChallenge in that case
288+
SessionID: uuid.New().String(),
283289
Typ: token.PurposeSession,
284290
Audience: baseURL,
285291
Resource: authzCfg.CanonicalResource,

handlers/callback.go

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ package handlers
33
import (
44
"context"
55
"encoding/json"
6+
"errors"
67
"net/http"
78
"net/url"
89
"strings"
910
"time"
1011

1112
"github.com/babs/mcp-auth-proxy/metrics"
13+
"github.com/babs/mcp-auth-proxy/replay"
1214
"github.com/babs/mcp-auth-proxy/token"
1315
"github.com/coreos/go-oidc/v3/oidc"
1416
"github.com/google/uuid"
@@ -72,10 +74,18 @@ func sanitizeErrorDescription(s string) string {
7274
return string(b)
7375
}
7476

75-
// CallbackConfig holds the group filtering parameters for the callback handler.
77+
// CallbackConfig holds optional dependencies for the callback handler.
7678
type CallbackConfig struct {
7779
AllowedGroups []string // empty = allow all authenticated users
7880
GroupsClaim string // flat claim name in id_token (default "groups")
81+
// ReplayStore, when non-nil, enforces single-use semantics on the
82+
// sealed `state` parameter via its embedded SessionID: a captured
83+
// /callback URL cannot be replayed to fan out to the upstream IdP
84+
// (audit-noise + outbound-fan-out defense — the IdP authorization
85+
// code is itself single-use, so a successful replay would just
86+
// burn the IdP's `invalid_grant` quota anyway). nil = stateless
87+
// fallback (configured opt-out).
88+
ReplayStore replay.Store
7989
}
8090

8191
// Callback handles GET /callback (IdP redirect after user authentication).
@@ -118,6 +128,17 @@ func callbackHandler(tm *token.Manager, logger *zap.Logger, audience string, oau
118128
internalState := q.Get("state")
119129

120130
if idpError != "" {
131+
// Deliberately NO ClaimOnce on this branch even when the
132+
// session opens cleanly. The error redirect produces no
133+
// IdP fan-out (no token-endpoint call) and no token
134+
// issuance — just an idempotent 302 to the registered
135+
// redirect_uri carrying `error=…`. Claiming here would
136+
// burn the state on the first transient IdP error and
137+
// force the user through full re-auth on the next legit
138+
// retry. Trade-off accepted: a stolen state can replay
139+
// the same error redirect, but the attacker gains
140+
// nothing not already producible by sending the user a
141+
// crafted /authorize URL.
121142
safeError := normalizeIdPError(idpError)
122143
// `error_description` from a /callback hit is fully
123144
// attacker-controlled — anyone can craft a /callback URL
@@ -175,6 +196,33 @@ func callbackHandler(tm *token.Manager, logger *zap.Logger, audience string, oau
175196
return
176197
}
177198

199+
// Single-use claim on the sealed state's SessionID. Applies
200+
// BEFORE the upstream IdP exchange so a replayed /callback
201+
// URL never fans out to the IdP. Same nil-store /
202+
// ErrAlreadyClaimed / fail-closed-on-error policy as the
203+
// /token code claim. Empty SessionID is the in-flight-rollout
204+
// fallback (older binary sealed the session before this
205+
// field existed).
206+
if cbCfg.ReplayStore != nil && session.SessionID != "" {
207+
remaining := max(time.Until(session.ExpiresAt), time.Second)
208+
key := replay.NamespacedKey("callback_state", session.SessionID)
209+
if err := cbCfg.ReplayStore.ClaimOnce(r.Context(), key, remaining); err != nil {
210+
if errors.Is(err, replay.ErrAlreadyClaimed) {
211+
metrics.ReplayDetected.WithLabelValues("callback_state").Inc()
212+
logger.Warn("callback_state_replay",
213+
zap.String("session_id", session.SessionID),
214+
zap.String("client_id", session.ClientID),
215+
)
216+
writeOAuthError(w, http.StatusBadRequest, "invalid_request", "callback state already used", "callback_state_replay")
217+
return
218+
}
219+
logger.Error("replay_store_error", zap.String("op", "claim_callback_state"), zap.Error(err))
220+
metrics.AccessDenied.WithLabelValues("replay_store_unavailable").Inc()
221+
writeOAuthError(w, http.StatusServiceUnavailable, "server_error", "replay store unavailable", "replay_store_unavailable")
222+
return
223+
}
224+
}
225+
178226
// Explicit timeout for upstream OIDC token exchange
179227
exchangeCtx, cancel := context.WithTimeout(r.Context(), oidcExchangeTTL)
180228
defer cancel()

handlers/consent.go

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ import (
1010
"time"
1111

1212
"github.com/babs/mcp-auth-proxy/metrics"
13+
"github.com/babs/mcp-auth-proxy/replay"
1314
"github.com/babs/mcp-auth-proxy/token"
15+
"github.com/google/uuid"
1416
"go.uber.org/zap"
1517
"golang.org/x/oauth2"
1618
)
@@ -183,10 +185,14 @@ func redirectHost(redirectURI string) string {
183185
}
184186

185187
// ConsentConfig holds optional dependencies for the consent
186-
// approval handler. Mirrors the shape of CallbackConfig — no
187-
// runtime tunables today, kept as a struct so future fields don't
188-
// re-break the constructor signature.
189-
type ConsentConfig struct{}
188+
// approval handler. Mirrors the shape of CallbackConfig.
189+
type ConsentConfig struct {
190+
// ReplayStore, when non-nil, enforces single-use semantics on the
191+
// consent token's JTI: a captured consent_token can be POSTed at
192+
// most once. nil = stateless fallback (configured opt-out — the
193+
// token is still audience- and TTL-bound).
194+
ReplayStore replay.Store
195+
}
190196

191197
// Consent handles POST /consent (consent-page approval submit).
192198
//
@@ -206,17 +212,14 @@ type ConsentConfig struct{}
206212
// purpose-bound, 5-min TTL). A POST without a valid consent_token
207213
// is rejected.
208214
//
209-
// Replay note: the consent token is NOT integrated with the replay
210-
// store. A given token can be redeemed multiple times within its
211-
// 5-min window. Each Approve mints a fresh sealedSession (different
212-
// nonce / verifier), and the IdP login still has to complete; each
213-
// Deny just emits another error=access_denied to the registered
214-
// redirect_uri. Neither path produces an effect the original user
215-
// couldn't reproduce by clicking again. Wiring single-use through
216-
// the replay store would be belt-and-braces against an attacker
217-
// who exfiltrated the consent token (a stronger compromise than
218-
// this defense addresses) — accepted as a deliberate trade-off.
219-
func Consent(tm *token.Manager, logger *zap.Logger, baseURL string, oauth2Cfg *oauth2.Config, _ ConsentConfig) http.HandlerFunc {
215+
// Replay defense: when ConsentConfig.ReplayStore is wired, the
216+
// consent token's JTI is claimed single-use before either branch
217+
// runs. Each GET /authorize render mints a fresh JTI so the
218+
// back-button case still works (a re-render gets a new claim
219+
// slot); a stolen consent_token can be POSTed at most once. Empty
220+
// JTI (token sealed by an older binary still in flight during
221+
// rollout) falls through to the prior stateless behavior.
222+
func Consent(tm *token.Manager, logger *zap.Logger, baseURL string, oauth2Cfg *oauth2.Config, cfg ConsentConfig) http.HandlerFunc {
220223
return func(w http.ResponseWriter, r *http.Request) {
221224
r.Body = http.MaxBytesReader(w, r.Body, maxBodySize)
222225

@@ -278,6 +281,39 @@ func Consent(tm *token.Manager, logger *zap.Logger, baseURL string, oauth2Cfg *o
278281
return
279282
}
280283

284+
// Single-use claim on the consent JTI — applies BEFORE the
285+
// approve/deny branch so a captured token cannot be replayed
286+
// for either decision. Mirrors the /token authorization-code
287+
// claim policy: nil store = stateless fallback (configured
288+
// opt-out); ErrAlreadyClaimed = 400 + replay metric; other
289+
// backend errors = fail-closed 503 so we never proceed
290+
// against an uncertain replay-state result. Empty JTI is the
291+
// in-flight-rollout fallback (older binary sealed the token
292+
// before this field existed).
293+
if cfg.ReplayStore != nil && consent.JTI != "" {
294+
remaining := max(time.Until(consent.ExpiresAt), time.Second)
295+
key := replay.NamespacedKey("consent", consent.JTI)
296+
if err := cfg.ReplayStore.ClaimOnce(r.Context(), key, remaining); err != nil {
297+
if errors.Is(err, replay.ErrAlreadyClaimed) {
298+
metrics.ReplayDetected.WithLabelValues("consent").Inc()
299+
logger.Warn("consent_token_replay",
300+
zap.String("jti", consent.JTI),
301+
zap.String("client_id", consent.ClientID),
302+
)
303+
writeOAuthError(w, http.StatusBadRequest, "invalid_request", "consent token already used", "consent_replay")
304+
return
305+
}
306+
// Reuse the same access_denied{replay_store_unavailable}
307+
// counter as /token rather than a per-site counter — a
308+
// Redis outage hits every claim site at once and a single
309+
// alerting rule on this counter covers all of them.
310+
logger.Error("replay_store_error", zap.String("op", "claim_consent"), zap.Error(err))
311+
metrics.AccessDenied.WithLabelValues("replay_store_unavailable").Inc()
312+
writeOAuthError(w, http.StatusServiceUnavailable, "server_error", "replay store unavailable", "replay_store_unavailable")
313+
return
314+
}
315+
}
316+
281317
if action == "deny" {
282318
// Counted on a dedicated funnel counter rather than
283319
// AccessDenied: clicking Deny is a normal expected user
@@ -335,6 +371,7 @@ func Consent(tm *token.Manager, logger *zap.Logger, baseURL string, oauth2Cfg *o
335371
PKCEVerifier: upstreamVerifier,
336372
SvrVerifier: svrVerifier,
337373
SvrChallenge: svrChallenge,
374+
SessionID: uuid.New().String(),
338375
Typ: token.PurposeSession,
339376
Audience: baseURL,
340377
Resource: consent.Resource,

handlers/helpers.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,14 @@ type sealedSession struct {
6060
PKCEVerifier string `json:"pv"`
6161
SvrVerifier string `json:"sv,omitempty"`
6262
SvrChallenge string `json:"sch,omitempty"`
63-
Typ string `json:"typ"`
64-
Audience string `json:"aud"`
63+
// SessionID is a per-session unique id used as the replay-store
64+
// claim key at /callback so a captured `state` cannot be replayed
65+
// against the IdP token endpoint (audit + outbound-fan-out
66+
// defense). Empty on sessions sealed before this field existed —
67+
// /callback then falls through to the prior stateless behavior.
68+
SessionID string `json:"sid,omitempty"`
69+
Typ string `json:"typ"`
70+
Audience string `json:"aud"`
6571
// Resource is the canonical RFC 8707 resource indicator the
6672
// downstream tokens will be bound to. Captured at /authorize so
6773
// the binding is fixed BEFORE the upstream IdP round trip — a
@@ -125,6 +131,13 @@ type sealedCode struct {
125131
// usefulness). AAD purpose binding keeps it from being opened as
126132
// any other sealed type.
127133
type sealedConsent struct {
134+
// JTI is a per-render unique id used as the single-use claim key
135+
// at POST /consent. Each GET /authorize render mints a fresh JTI
136+
// (back-button-safe: a re-render gets a new claim slot, the prior
137+
// one is dead once redeemed). Empty on tokens sealed before this
138+
// field existed — /consent then falls through to the prior
139+
// stateless behavior for that token.
140+
JTI string `json:"jti,omitempty"`
128141
ClientID string `json:"cid"`
129142
ClientName string `json:"cn,omitempty"`
130143
RedirectURI string `json:"ru"`

0 commit comments

Comments
 (0)