Skip to content

Commit 897ad3f

Browse files
committed
security: strip untrusted identity headers when no auth backend configured (#1445)
When no external auth provider (ext_authz / Authorino) is configured, identity headers (x-authz-user-id, x-authz-user-groups) come directly from the client and can be spoofed. This allows any client to impersonate any user, bypassing role-based routing, per-user rate limits, and memory isolation. Fix: strip identity headers from incoming requests when no auth backend with header-injection type is configured. - Add HasExternalAuthProvider() to AuthzConfig — returns true when a header-injection provider is explicitly configured - Strip identity headers in handleRequestHeaders when HasExternalAuthProvider() returns false - Log a warning when headers are stripped to aid debugging When an auth provider IS configured (e.g., Authorino with header-injection), identity headers are trusted since they were injected by the auth backend after validating the user's credentials. Includes security e2e tests (e2e/testing/10-security-audit-test.py) with 12 tests covering auth header stripping, destination injection safety, DoS resilience, tool call safety, memory isolation, and replay exposure. Fixes #1445 Signed-off-by: Yossi Ovadia <jabadia@gmail.com>
1 parent 4a68063 commit 897ad3f

6 files changed

Lines changed: 716 additions & 0 deletions

File tree

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# Session Affinity Demo Config
2+
# Demonstrates the route-bouncing problem in multi-turn conversations.
3+
#
4+
# Setup:
5+
# - "expensive" model: Claude via claude-code-proxy on port 11480
6+
# - "cheap" model: qwen2.5:0.5b (0.5B params) on Ollama port 11434
7+
#
8+
# Routing logic:
9+
# - Complex queries (complexity signal "hard") → Claude (expensive)
10+
# - Simple queries (complexity signal "easy") → qwen2.5:0.5b (cheap)
11+
#
12+
# The demo scenario:
13+
# 1. User sends complex coding question → routes to Claude (expensive)
14+
# 2. Claude responds with great code and asks "want me to add tests?"
15+
# 3. User says "yes" → VSR sees only "yes" → classifies as easy
16+
# 4. Routes to qwen2.5:0.5b (cheap) → 0.5B model has NO IDEA what "yes" means
17+
18+
# Disable features we don't need for the demo
19+
semantic_cache:
20+
enabled: false
21+
22+
tools:
23+
enabled: false
24+
25+
prompt_guard:
26+
enabled: true
27+
model_id: "models/mom-jailbreak-classifier"
28+
threshold: 0.7
29+
use_cpu: true
30+
jailbreak_mapping_path: "models/mom-jailbreak-classifier/label_mapping.json"
31+
32+
# Jailbreak rules for signal-based detection
33+
jailbreak_rules:
34+
- name: "jailbreak_high"
35+
threshold: 0.7
36+
description: "High-confidence jailbreak detection"
37+
38+
hallucination_mitigation:
39+
enabled: false
40+
41+
# Domain classifier (required by VSR startup)
42+
classifier:
43+
category_model:
44+
model_id: "models/mom-domain-classifier"
45+
threshold: 0.6
46+
use_cpu: true
47+
category_mapping_path: "models/mom-domain-classifier/category_mapping.json"
48+
49+
# Embedding models for complexity signal
50+
embedding_models:
51+
qwen3_model_path: "models/mom-embedding-pro"
52+
use_cpu: true
53+
hnsw_config:
54+
model_type: "qwen3"
55+
preload_embeddings: true
56+
target_dimension: 1024
57+
enable_soft_matching: true
58+
min_score_threshold: 0.5
59+
60+
# Two backends
61+
vllm_endpoints:
62+
- name: "ollama"
63+
address: "127.0.0.1"
64+
port: 11434
65+
weight: 1
66+
- name: "claude-proxy"
67+
address: "127.0.0.1"
68+
port: 11480
69+
weight: 1
70+
71+
# Model-to-endpoint mapping
72+
model_config:
73+
"expensive-model":
74+
preferred_endpoints: ["claude-proxy"]
75+
"qwen2.5:0.5b":
76+
preferred_endpoints: ["ollama"]
77+
78+
# Complexity signal: distinguish hard vs easy prompts
79+
complexity_rules:
80+
- name: "prompt_complexity"
81+
threshold: 0.15
82+
description: "Classify prompt complexity for routing decisions"
83+
hard:
84+
candidates:
85+
- "Implement a concurrent lock-free data structure with memory ordering guarantees"
86+
- "Design a distributed consensus algorithm for fault-tolerant systems"
87+
- "Write a compiler optimization pass for loop vectorization"
88+
- "Build a real-time stream processing pipeline with exactly-once semantics"
89+
- "Implement a B+ tree with concurrent readers and writers"
90+
- "Design a garbage collector with generational collection and compaction"
91+
- "Write an optimized matrix multiplication kernel with cache tiling"
92+
- "Implement a raft consensus protocol with log compaction"
93+
- "Explain the mathematical proof of the P vs NP problem"
94+
- "Analyze the time complexity of this recursive algorithm with memoization"
95+
easy:
96+
candidates:
97+
- "yes"
98+
- "no"
99+
- "ok"
100+
- "sure"
101+
- "thanks"
102+
- "got it"
103+
- "sounds good"
104+
- "please do"
105+
- "go ahead"
106+
- "that works"
107+
- "hello"
108+
- "hi"
109+
- "what is a variable"
110+
- "how do I print hello world"
111+
- "what does this error mean"
112+
113+
# Categories (minimal, required by classifier)
114+
categories:
115+
- name: computer_science
116+
description: "Computer science and programming"
117+
mmlu_categories: ["computer_science"]
118+
- name: other
119+
description: "General topics"
120+
mmlu_categories: ["other"]
121+
122+
# Routing strategy
123+
strategy: "priority"
124+
125+
# Two decisions: complex → expensive, simple → cheap
126+
decisions:
127+
- name: "jailbreak_block"
128+
description: "Block jailbreak attempts"
129+
priority: 999
130+
rules:
131+
operator: "AND"
132+
conditions:
133+
- type: "jailbreak"
134+
name: "jailbreak_high"
135+
modelRefs:
136+
- model: "qwen2.5:0.5b"
137+
use_reasoning: false
138+
plugins:
139+
- type: "fast_response"
140+
configuration:
141+
enabled: true
142+
message: "Request blocked: jailbreak attempt detected."
143+
status_code: 403
144+
145+
- name: "complex_query"
146+
description: "Complex queries that need a powerful model"
147+
priority: 200
148+
rules:
149+
operator: "AND"
150+
conditions:
151+
- type: "complexity"
152+
name: "prompt_complexity:hard"
153+
modelRefs:
154+
- model: "expensive-model"
155+
use_reasoning: false
156+
157+
- name: "simple_query"
158+
description: "Simple queries that a cheap model can handle"
159+
priority: 100
160+
rules:
161+
operator: "AND"
162+
conditions:
163+
- type: "complexity"
164+
name: "prompt_complexity:easy"
165+
modelRefs:
166+
- model: "qwen2.5:0.5b"
167+
use_reasoning: false
168+
169+
- name: "fallback"
170+
description: "Fallback for unmatched queries"
171+
priority: 1
172+
rules:
173+
operator: "AND"
174+
conditions:
175+
- type: "domain"
176+
name: "other"
177+
modelRefs:
178+
- model: "qwen2.5:0.5b"
179+
use_reasoning: false
180+
181+
# Default model when no decision matches
182+
default_model: "qwen2.5:0.5b"
183+
184+
# Observability
185+
observability:
186+
metrics:
187+
enabled: true
188+
tracing:
189+
enabled: false
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# Envoy config for session affinity demo
2+
# Stripped down: no ext_authz, just ext_proc + dynamic routing
3+
static_resources:
4+
listeners:
5+
- name: listener_0
6+
address:
7+
socket_address:
8+
address: 0.0.0.0
9+
port_value: 8801
10+
filter_chains:
11+
- filters:
12+
- name: envoy.filters.network.http_connection_manager
13+
typed_config:
14+
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
15+
stat_prefix: ingress_http
16+
access_log:
17+
- name: envoy.access_loggers.stdout
18+
typed_config:
19+
"@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
20+
log_format:
21+
json_format:
22+
time: "%START_TIME%"
23+
request_method: "%REQ(:METHOD)%"
24+
request_path: "%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%"
25+
response_code: "%RESPONSE_CODE%"
26+
upstream_host: "%UPSTREAM_HOST%"
27+
selected_model: "%REQ(X-SELECTED-MODEL)%"
28+
destination: "%REQ(X-VSR-DESTINATION-ENDPOINT)%"
29+
route_config:
30+
name: local_route
31+
virtual_hosts:
32+
- name: local_service
33+
domains: ["*"]
34+
routes:
35+
- match:
36+
prefix: "/"
37+
route:
38+
cluster: dynamic_backend
39+
timeout: 300s
40+
http_filters:
41+
- name: envoy.filters.http.ext_proc
42+
typed_config:
43+
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
44+
grpc_service:
45+
envoy_grpc:
46+
cluster_name: extproc_service
47+
allow_mode_override: true
48+
processing_mode:
49+
request_header_mode: "SEND"
50+
response_header_mode: "SEND"
51+
request_body_mode: "BUFFERED"
52+
response_body_mode: "BUFFERED"
53+
request_trailer_mode: "SKIP"
54+
response_trailer_mode: "SKIP"
55+
failure_mode_allow: true
56+
message_timeout: 300s
57+
- name: envoy.filters.http.router
58+
typed_config:
59+
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
60+
suppress_envoy_headers: true
61+
http2_protocol_options:
62+
max_concurrent_streams: 100
63+
stream_idle_timeout: "300s"
64+
request_timeout: "300s"
65+
common_http_protocol_options:
66+
idle_timeout: "300s"
67+
68+
clusters:
69+
- name: extproc_service
70+
connect_timeout: 300s
71+
type: STATIC
72+
lb_policy: ROUND_ROBIN
73+
typed_extension_protocol_options:
74+
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
75+
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
76+
explicit_http_config:
77+
http2_protocol_options:
78+
connection_keepalive:
79+
interval: 300s
80+
timeout: 300s
81+
load_assignment:
82+
cluster_name: extproc_service
83+
endpoints:
84+
- lb_endpoints:
85+
- endpoint:
86+
address:
87+
socket_address:
88+
address: 127.0.0.1
89+
port_value: 50051
90+
91+
# Dynamic backend using original destination (VSR sets x-vsr-destination-endpoint)
92+
- name: dynamic_backend
93+
connect_timeout: 300s
94+
type: ORIGINAL_DST
95+
lb_policy: CLUSTER_PROVIDED
96+
original_dst_lb_config:
97+
use_http_header: true
98+
http_header_name: "x-vsr-destination-endpoint"
99+
typed_extension_protocol_options:
100+
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
101+
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
102+
explicit_http_config:
103+
http_protocol_options: {}
104+
105+
admin:
106+
address:
107+
socket_address:
108+
address: "127.0.0.1"
109+
port_value: 19000

config/testing/router-runtime.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"phase": "ready",
3+
"ready": true,
4+
"message": "Router models are ready. Starting router services...",
5+
"updated_at": "2026-03-06T18:09:13Z"
6+
}

0 commit comments

Comments
 (0)