Skip to content

Commit 8726f29

Browse files
authored
Merge pull request #235 from MorpheusAIs/test
Test -> main
2 parents 87249ed + 83134ce commit 8726f29

5 files changed

Lines changed: 170 additions & 18 deletions

File tree

models/prod_model_price.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,78 @@
22
"default_input_price_per_million": "0.50",
33
"default_output_price_per_million": "2.00",
44
"models": {
5+
"glm-5.1": { "input": "1.50", "output": "5.00" },
6+
"glm-5.1:web": { "input": "1.50", "output": "5.00" },
7+
"GLM-5.1": { "input": "1.50", "output": "5.00" },
8+
"glm-5.1-non-thinking": { "input": "1.50", "output": "5.00" },
9+
"glm-5.1-non-thinking:web": { "input": "1.50", "output": "5.00" },
10+
511
"glm-5": { "input": "1.00", "output": "3.20" },
12+
"glm-5:web": { "input": "1.00", "output": "3.20" },
13+
"GLM-5": { "input": "1.00", "output": "3.20" },
614
"glm-4.7": { "input": "0.50", "output": "2.25" },
15+
"glm-4.7:web": { "input": "0.50", "output": "2.25" },
716
"glm-4.7-thinking": { "input": "0.45", "output": "2.00" },
17+
"glm-4.7-thinking:web": { "input": "0.45", "output": "2.00" },
818
"glm-4.7-flash": { "input": "0.10", "output": "0.50" },
19+
"glm-4.7-flash:web": { "input": "0.10", "output": "0.50" },
920

21+
"kimi-k2.6": { "input": "0.50", "output": "3.25" },
22+
"kimi-k2.6:web": { "input": "0.50", "output": "3.25" },
1023
"kimi-k2.5": { "input": "0.60", "output": "3.00" },
24+
"kimi-k2.5:web": { "input": "0.60", "output": "3.00" },
25+
"Kimi-K2.5": { "input": "0.60", "output": "3.00" },
1126
"kimi-k2-thinking": { "input": "0.60", "output": "3.00" },
1227

1328
"gemma-4-31b": { "input": "0.15", "output": "0.40" },
29+
"Gemma-4-31b": { "input": "0.15", "output": "0.40" },
30+
"Gemma-4-31b:web": { "input": "0.15", "output": "0.40" },
1431
"gemma-4-26b-a4b": { "input": "0.15", "output": "0.40" },
32+
"Gemma-4-26b-a4b": { "input": "0.15", "output": "0.40" },
33+
"Gemma-4-26b-a4b:web": { "input": "0.15", "output": "0.40" },
1534

1635
"qwen3-235b": { "input": "0.40", "output": "3.00" },
36+
"qwen3-235b:web": { "input": "0.40", "output": "3.00" },
1737
"qwen-3-235b": { "input": "0.40", "output": "3.00" },
1838
"qwen3-coder-480b-a35b-instruct": { "input": "0.70", "output": "2.80" },
39+
"qwen3-coder-480b-a35b-instruct:web": { "input": "0.70", "output": "2.80" },
1940
"qwen3-coder-480b-a35b": { "input": "0.70", "output": "2.80" },
2041
"qwen3-next-80b": { "input": "0.15", "output": "1.50" },
42+
"qwen3-next-80b:web": { "input": "0.15", "output": "1.50" },
2143
"qwen35-35b-a3b": { "input": "0.30", "output": "1.25" },
44+
"qwen35-35b-a3b:web": { "input": "0.30", "output": "1.25" },
2245
"qwen35-9b": { "input": "0.05", "output": "0.15" },
46+
"qwen35-9b:web": { "input": "0.05", "output": "0.15" },
2347

2448
"arcee-trinity-large-thinking": { "input": "0.30", "output": "1.00" },
49+
"Arcee-Trinity-Large-Thinking": { "input": "0.30", "output": "1.00" },
50+
"Arcee-Trinity-Large-Thinking:web": { "input": "0.30", "output": "1.00" },
2551

2652
"minimax-m2.5": { "input": "0.30", "output": "1.20" },
53+
"MiniMax-M2.5": { "input": "0.30", "output": "1.20" },
54+
"MiniMax-M2.5:web": { "input": "0.30", "output": "1.20" },
55+
"MiniMax-M2.7": { "input": "0.35", "output": "1.50" },
56+
"MiniMax-M2.7:web": { "input": "0.35", "output": "1.50" },
57+
58+
"deepseek-v4-pro": { "input": "1.60", "output": "3.50" },
59+
"deepseek-v4-pro:web": { "input": "1.60", "output": "3.50" },
60+
"deepseek-v4-flash": { "input": "0.15", "output": "0.30" },
61+
"deepseek-v4-flash:web": { "input": "0.15", "output": "0.30" },
2762

2863
"gpt-oss-120b": { "input": "0.07", "output": "0.28" },
64+
"gpt-oss-120b:web": { "input": "0.07", "output": "0.28" },
2965

3066
"hermes-3-llama-3.1-405b": { "input": "1.00", "output": "3.00" },
67+
"hermes-3-llama-3.1-405b:web": { "input": "1.00", "output": "3.00" },
3168
"llama-3.3-70b": { "input": "0.70", "output": "2.50" },
69+
"llama-3.3-70b:web": { "input": "0.70", "output": "2.50" },
3270
"llama-3-3-70b": { "input": "0.70", "output": "2.50" },
3371
"llama-3.2-3b": { "input": "0.10", "output": "0.50" },
72+
"llama-3.2-3b:web": { "input": "0.10", "output": "0.50" },
3473
"llama-3-2-3b": { "input": "0.10", "output": "0.50" },
3574

3675
"mistral-31-24b": { "input": "0.50", "output": "2.00" },
76+
"mistral-31-24b:web": { "input": "0.50", "output": "2.00" },
3777
"mistral-small-24b": { "input": "0.50", "output": "2.00" },
3878

3979
"venice-uncensored": { "input": "0.20", "output": "0.90" },

models/prod_rate_limit.json

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
"qwen35-9b",
3939
"qwen35-9b:web",
4040
"venice-uncensored",
41-
"venice-uncensored:web"
41+
"venice-uncensored:web",
42+
"deepseek-v4-flash",
43+
"deepseek-v4-flash:web"
4244
],
4345
"priority": 50,
4446
"description": "Medium models with moderate limits"
@@ -50,6 +52,12 @@
5052
"models": [
5153
"glm-5",
5254
"glm-5:web",
55+
"GLM-5",
56+
"glm-5.1",
57+
"glm-5.1:web",
58+
"GLM-5.1",
59+
"glm-5.1-non-thinking",
60+
"glm-5.1-non-thinking:web",
5361
"glm-4.7",
5462
"glm-4.7:web",
5563
"glm-4.7-thinking",
@@ -58,20 +66,35 @@
5866
"glm-4.7-flash:web",
5967
"kimi-k2.5",
6068
"kimi-k2.5:web",
69+
"Kimi-K2.5",
70+
"kimi-k2.6",
71+
"kimi-k2.6:web",
6172
"kimi-k2-thinking",
6273
"kimi-k2-thinking:web",
6374
"gemma-4-31b",
6475
"gemma-4-31b:web",
76+
"Gemma-4-31b",
77+
"Gemma-4-31b:web",
6578
"gemma-4-26b-a4b",
6679
"gemma-4-26b-a4b:web",
80+
"Gemma-4-26b-a4b",
81+
"Gemma-4-26b-a4b:web",
6782
"arcee-trinity-large-thinking",
6883
"arcee-trinity-large-thinking:web",
84+
"Arcee-Trinity-Large-Thinking",
85+
"Arcee-Trinity-Large-Thinking:web",
6986
"qwen3-235b",
7087
"qwen3-235b:web",
7188
"qwen3-coder-480b-a35b-instruct",
7289
"qwen3-coder-480b-a35b-instruct:web",
7390
"minimax-m2.5",
7491
"minimax-m2.5:web",
92+
"MiniMax-M2.5",
93+
"MiniMax-M2.5:web",
94+
"MiniMax-M2.7",
95+
"MiniMax-M2.7:web",
96+
"deepseek-v4-pro",
97+
"deepseek-v4-pro:web",
7598
"gpt-oss-120b",
7699
"gpt-oss-120b:web",
77100
"hermes-3-llama-3.1-405b",

src/api/v1/billing/index.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from ....db.database import get_db_session
1212
from ....db.models import User, LedgerEntryType
13-
from ....dependencies import get_current_user, get_api_key_user
13+
from ....dependencies import get_current_user, get_user_jwt_or_api_key
1414
from ....services.billing_service import billing_service
1515
from ....crud import credits as credits_crud
1616
from ....schemas.billing import (
@@ -40,10 +40,12 @@
4040
async def get_balance(
4141
request: Request,
4242
db: AsyncSession = Depends(get_db_session),
43-
current_user: User = Depends(get_current_user),
43+
current_user: User = Depends(get_user_jwt_or_api_key),
4444
):
4545
"""
4646
Get current credit balance for the authenticated user.
47+
48+
Authenticate with either a Cognito JWT or an ``sk-…`` API key.
4749
4850
Returns:
4951
- paid: Paid bucket balance (posted, holds, available)
@@ -132,11 +134,13 @@ async def list_transactions(
132134
from_date: Optional[datetime] = Query(default=None, alias="from"),
133135
to_date: Optional[datetime] = Query(default=None, alias="to"),
134136
db: AsyncSession = Depends(get_db_session),
135-
current_user: User = Depends(get_current_user),
137+
current_user: User = Depends(get_user_jwt_or_api_key),
136138
):
137139
"""
138140
Get paginated list of credit transactions (ledger entries).
139-
141+
142+
Authenticate with either a Cognito JWT or an ``sk-…`` API key.
143+
140144
Parameters:
141145
- limit: Maximum number of items to return (1-∞)
142146
- offset: Number of items to skip
@@ -226,11 +230,13 @@ async def get_monthly_spending(
226230
year: int = Query(default=None, description="Year for spending data (defaults to current year)"),
227231
mode: SpendingModeEnum = Query(default=SpendingModeEnum.gross),
228232
db: AsyncSession = Depends(get_db_session),
229-
current_user: User = Depends(get_current_user),
233+
current_user: User = Depends(get_user_jwt_or_api_key),
230234
):
231235
"""
232236
Get monthly spending metrics for a year.
233-
237+
238+
Authenticate with either a Cognito JWT or an ``sk-…`` API key.
239+
234240
Parameters:
235241
- year: Year to get spending for (defaults to current year)
236242
- mode:
@@ -300,11 +306,13 @@ async def list_usage(
300306
to_date: Optional[datetime] = Query(default=None, alias="to"),
301307
model: Optional[str] = Query(default=None),
302308
db: AsyncSession = Depends(get_db_session),
303-
current_user: User = Depends(get_current_user),
309+
current_user: User = Depends(get_user_jwt_or_api_key),
304310
):
305311
"""
306312
Get paginated list of usage entries (posted usage charges only).
307-
313+
314+
Authenticate with either a Cognito JWT or an ``sk-…`` API key.
315+
308316
Parameters:
309317
- limit: Maximum number of items to return (1-∞)
310318
- offset: Number of items to skip
@@ -374,11 +382,13 @@ async def list_usage_for_month(
374382
limit: int = Query(default=50, ge=1),
375383
offset: int = Query(default=0, ge=0),
376384
db: AsyncSession = Depends(get_db_session),
377-
current_user: User = Depends(get_current_user),
385+
current_user: User = Depends(get_user_jwt_or_api_key),
378386
):
379387
"""
380388
Get paginated list of usage entries for a specific month.
381-
389+
390+
Authenticate with either a Cognito JWT or an ``sk-…`` API key.
391+
382392
Parameters:
383393
- year: Year
384394
- month: Month (1-12)

src/dependencies.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,11 +553,62 @@ async def get_current_api_key(
553553
return auth.api_key
554554

555555

556+
# ---------------------------------------------------------------------------
557+
# Union auth: accept either Cognito JWT or sk-… API key
558+
# ---------------------------------------------------------------------------
559+
560+
async def get_user_jwt_or_api_key(
561+
db: AsyncSession = Depends(get_db_session),
562+
token: Optional[HTTPAuthorizationCredentials] = Depends(oauth2_scheme_optional),
563+
api_key_str: Optional[str] = Security(api_key_header),
564+
) -> User:
565+
"""
566+
Authenticate via either a Cognito JWT or an ``sk-…`` API key and return
567+
the associated :class:`User`.
568+
569+
Both schemes are carried in the ``Authorization`` header; the scheme is
570+
selected purely by the credential prefix:
571+
572+
* value starts with ``sk-`` → treated as API key, delegated to
573+
:func:`get_api_key_auth`
574+
* anything else → treated as a Cognito JWT, delegated to
575+
:func:`get_current_user`
576+
577+
Intended for read-only endpoints (e.g. billing GETs) that should be
578+
reachable from both the dashboard (JWT) and programmatic clients (key).
579+
"""
580+
# Local testing bypass mirrors the underlying dependencies.
581+
from src.core.local_testing import is_local_testing_mode, get_or_create_test_user
582+
if is_local_testing_mode():
583+
return await get_or_create_test_user(db)
584+
585+
# Both HTTPBearer and APIKeyHeader read the same Authorization header.
586+
# Prefer the HTTPBearer-parsed credential (already stripped of "Bearer ");
587+
# fall back to the raw header for clients that omit the scheme prefix.
588+
raw = token.credentials if token else (api_key_str or "")
589+
if raw.startswith("Bearer "):
590+
raw = raw[7:]
591+
592+
if not raw:
593+
raise HTTPException(
594+
status_code=status.HTTP_401_UNAUTHORIZED,
595+
detail="Not authenticated",
596+
headers={"WWW-Authenticate": "Bearer"},
597+
)
598+
599+
if raw.startswith("sk-"):
600+
auth = await get_api_key_auth(api_key_str=raw)
601+
return auth.user
602+
603+
return await get_current_user(db=db, token=token)
604+
605+
556606
# ---------------------------------------------------------------------------
557607
# Type aliases for commonly used dependency chains
558608
# ---------------------------------------------------------------------------
559609
CurrentUser = Annotated[User, Depends(get_current_user)]
560610
APIKeyUser = Annotated[User, Depends(get_api_key_user)]
561611
CurrentAPIKey = Annotated[APIKey, Depends(get_current_api_key)]
562612
APIKeyAuthentication = Annotated[APIKeyAuth, Depends(get_api_key_auth)]
613+
JwtOrApiKeyUser = Annotated[User, Depends(get_user_jwt_or_api_key)]
563614
DBSession = Annotated[AsyncSession, Depends(get_db_session)]

src/services/proxy_router_service.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ def get_http_status_code(self) -> int:
103103
return error_type_mapping.get(self.error_type, 500)
104104

105105

106+
# Error substrings from the proxy router that indicate permanent failures
107+
# where retrying the same request will never succeed.
108+
NON_RETRIABLE_ERROR_PATTERNS = [
109+
"llm tee verification failed",
110+
"p-node tee attestation failed"
111+
]
112+
113+
114+
def _is_non_retriable_error(response_text: str) -> bool:
115+
"""Return True if the response body contains a known non-retriable error."""
116+
text_lower = response_text.lower()
117+
return any(pattern in text_lower for pattern in NON_RETRIABLE_ERROR_PATTERNS)
118+
119+
106120

107121

108122
async def _execute_request(
@@ -214,21 +228,35 @@ async def _execute_request(
214228
error = response.text,
215229
event_type="proxy_http_error")
216230

231+
error_type = "http_error"
232+
if status_code >= 500:
233+
error_type = "server_error"
234+
elif status_code >= 400:
235+
error_type = "client_error"
236+
237+
non_retriable = _is_non_retriable_error(response.text)
238+
if non_retriable:
239+
req_logger.error(
240+
"Non-retriable error detected, skipping remaining retries",
241+
status_code=status_code,
242+
url=e.response.url,
243+
method=method,
244+
error=response.text,
245+
event_type="proxy_non_retriable_error")
246+
raise ProxyRouterServiceError(
247+
sanitize_error_message(f"HTTP {status_code}: {response.text}"),
248+
status_code=status_code,
249+
error_type=error_type
250+
)
251+
217252
if attempt == max_retries - 1:
218-
# If this was the last attempt, raise with status code info
219253
req_logger.error("Proxy router request failed after all retries",
220254
max_retries=max_retries,
221255
url=e.response.url,
222256
method=method,
223257
error=response.text,
224258
status_code=status_code,
225259
event_type="proxy_request_failed")
226-
error_type = "http_error"
227-
if status_code >= 500:
228-
error_type = "server_error"
229-
elif status_code >= 400:
230-
error_type = "client_error"
231-
232260
raise ProxyRouterServiceError(
233261
sanitize_error_message(f"HTTP {status_code}: {response.text}"),
234262
status_code=status_code,

0 commit comments

Comments
 (0)