-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi_diagnostics.py
More file actions
181 lines (150 loc) · 5.72 KB
/
Copy pathapi_diagnostics.py
File metadata and controls
181 lines (150 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from __future__ import annotations
import argparse
import json
import os
import time
from pathlib import Path
from typing import Any
import httpx
try:
from dotenv import load_dotenv
except ModuleNotFoundError: # pragma: no cover - optional dependency fallback
def load_dotenv(*_args: Any, **_kwargs: Any) -> bool:
return False
ROOT_DIR = Path(__file__).resolve().parent
def _load_env_file(path: Path) -> None:
if not path.exists():
return
try:
for raw_line in path.read_text().splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if line.startswith("export "):
line = line[len("export ") :].strip()
if "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
if not key:
continue
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
value = value[1:-1]
if key not in os.environ:
os.environ[key] = value
except OSError:
return
load_dotenv(ROOT_DIR / ".env")
_load_env_file(ROOT_DIR / ".env")
DEFAULT_API_BASE_URL = os.getenv(
"API_BASE_URL",
"https://router.huggingface.co/v1",
)
DEFAULT_MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
def _is_gemini_endpoint(api_base_url: str) -> bool:
return "generativelanguage.googleapis.com" in api_base_url.lower()
def _is_hf_router_endpoint(api_base_url: str) -> bool:
lowered = api_base_url.lower()
return "router.huggingface.co" in lowered or "api-inference.huggingface.co" in lowered
def resolve_api_key(api_base_url: str, explicit: str | None = None) -> tuple[str | None, str]:
if explicit:
return explicit, "explicit"
hf_key = os.getenv("HF_TOKEN")
openai_key = os.getenv("OPENAI_API_KEY")
gemini_key = os.getenv("GEMINI_API_KEY")
if _is_gemini_endpoint(api_base_url):
if gemini_key:
return gemini_key, "GEMINI_API_KEY"
if openai_key:
return openai_key, "OPENAI_API_KEY"
if hf_key:
return hf_key, "HF_TOKEN"
return None, "none"
if _is_hf_router_endpoint(api_base_url):
if hf_key:
return hf_key, "HF_TOKEN"
if openai_key:
return openai_key, "OPENAI_API_KEY"
if gemini_key:
return gemini_key, "GEMINI_API_KEY"
return None, "none"
if openai_key:
return openai_key, "OPENAI_API_KEY"
if hf_key:
return hf_key, "HF_TOKEN"
if gemini_key:
return gemini_key, "GEMINI_API_KEY"
return None, "none"
def mask_token(token: str | None) -> str:
if not token:
return "missing"
if len(token) <= 8:
return "*" * len(token)
return f"{token[:4]}...{token[-4:]}"
def extract_debug_headers(headers: httpx.Headers) -> dict[str, str]:
keys = []
for key in headers.keys():
lowered = key.lower()
if "ratelimit" in lowered or lowered in {"retry-after", "x-request-id", "request-id"}:
keys.append(key)
return {key: headers.get(key, "") for key in sorted(set(keys), key=str.lower)}
def request_with_metrics(
client: httpx.Client,
method: str,
path: str,
*,
payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
start = time.perf_counter()
response = client.request(method, path, json=payload)
elapsed_ms = round((time.perf_counter() - start) * 1000.0, 2)
return {
"path": path,
"status_code": response.status_code,
"latency_ms": elapsed_ms,
"rate_headers": extract_debug_headers(response.headers),
"body_preview": response.text[:240],
}
def main() -> int:
parser = argparse.ArgumentParser(description="Probe OpenAI-compatible API status, latency, and rate-limit headers.")
parser.add_argument("--api-base-url", default=DEFAULT_API_BASE_URL)
parser.add_argument("--model", default=DEFAULT_MODEL_NAME)
parser.add_argument("--api-key", default=None)
parser.add_argument("--skip-chat", action="store_true")
args = parser.parse_args()
api_key, source = resolve_api_key(args.api_base_url, explicit=args.api_key)
summary: dict[str, Any] = {
"api_base_url": args.api_base_url,
"model": args.model,
"api_key_source": source,
"api_key_masked": mask_token(api_key),
"checks": [],
}
if not api_key:
summary["error"] = "No API key found. Set GEMINI_API_KEY, HF_TOKEN, OPENAI_API_KEY, or --api-key."
print(json.dumps(summary, indent=2, sort_keys=True))
return 1
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
with httpx.Client(base_url=args.api_base_url.rstrip("/"), headers=headers, timeout=45.0) as client:
try:
summary["checks"].append(request_with_metrics(client, "GET", "/models"))
except Exception as exc:
summary["checks"].append({"path": "/models", "error": str(exc)})
if not args.skip_chat:
chat_payload = {
"model": args.model,
"temperature": 0,
"max_tokens": 5,
"messages": [{"role": "user", "content": "Respond with exactly: ok"}],
}
try:
summary["checks"].append(
request_with_metrics(client, "POST", "/chat/completions", payload=chat_payload)
)
except Exception as exc:
summary["checks"].append({"path": "/chat/completions", "error": str(exc)})
print(json.dumps(summary, indent=2, sort_keys=True))
return 0
if __name__ == "__main__":
raise SystemExit(main())