Skip to content

Commit 0263299

Browse files
Fix errors
1 parent b2d82ae commit 0263299

2 files changed

Lines changed: 171 additions & 4 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
curl.txt
22
state/last_id.txt
3+
logs/
34

45
# Byte-compiled / optimized / DLL files
56
__pycache__/

src/xclient.py

Lines changed: 170 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime as dt
33
import json
44
import logging
5+
import os
56
import re
67
from collections.abc import AsyncIterator
78
from pathlib import Path
@@ -68,6 +69,70 @@ def unescape_entities(text: str) -> str:
6869

6970

7071
_RE_TRAILING_TCO = re.compile(r"(https?://t\.co/\S+)$")
72+
_SENSITIVE_KEYS = {
73+
"authorization",
74+
"cookie",
75+
"x-csrf-token",
76+
"x-guest-token",
77+
"x-twitter-auth-type",
78+
"ct0",
79+
"auth_token",
80+
}
81+
82+
83+
def _mask_secret(value: str, keep: int = 4) -> str:
84+
if not value:
85+
return ""
86+
if len(value) <= keep * 2:
87+
return "*" * len(value)
88+
return f"{value[:keep]}...{value[-keep:]}"
89+
90+
91+
def _redact_mapping(data: dict[str, Any] | None) -> dict[str, Any]:
92+
if not data:
93+
return {}
94+
out: dict[str, Any] = {}
95+
for k, v in data.items():
96+
key_l = str(k).lower()
97+
if key_l in _SENSITIVE_KEYS or "token" in key_l or "auth" in key_l:
98+
out[str(k)] = _mask_secret(str(v))
99+
else:
100+
out[str(k)] = v
101+
return out
102+
103+
104+
def _preview_json(value: Any, limit: int = 1000) -> str:
105+
try:
106+
txt = json.dumps(value, ensure_ascii=False)
107+
except Exception:
108+
txt = str(value)
109+
return txt[:limit]
110+
111+
112+
def _parse_cookie_string(cookie_blob: str) -> dict[str, str]:
113+
out: dict[str, str] = {}
114+
for part in cookie_blob.split(";"):
115+
token = part.strip()
116+
if not token or "=" not in token:
117+
continue
118+
k, v = token.split("=", 1)
119+
out[k.strip()] = v.strip().strip('"')
120+
return out
121+
122+
123+
def _extract_cookies_from_curl(raw_curl: str) -> dict[str, str]:
124+
collapsed = re.sub(r"\\\s*\n", " ", raw_curl)
125+
out: dict[str, str] = {}
126+
patterns = [
127+
r"(?:^|\s)-b\s+'([^']+)'",
128+
r'(?:^|\s)-b\s+"([^"]+)"',
129+
r"(?:^|\s)--cookie\s+'([^']+)'",
130+
r'(?:^|\s)--cookie\s+"([^"]+)"',
131+
]
132+
for pat in patterns:
133+
for m in re.finditer(pat, collapsed):
134+
out.update(_parse_cookie_string(m.group(1)))
135+
return out
71136

72137

73138
def strip_trailing_tco(text: str) -> str:
@@ -93,12 +158,18 @@ def __init__(
93158
curl_path: str = "curl.txt",
94159
timeout_s: float = 30.0,
95160
persist_last_id_path: str | None = None,
161+
debug_http: bool | None = None,
96162
) -> None:
97163
self.curl_path = Path(curl_path)
98164
self.timeout_s = timeout_s
99165
self._session: aiohttp.ClientSession | None = None
100166
self._req: dict[str, Any] = {}
101167
self._last_tweet_id: int = 0
168+
self.debug_http = (
169+
(os.getenv("XCLIENT_DEBUG_HTTP", "").lower() in {"1", "true", "yes"})
170+
if debug_http is None
171+
else debug_http
172+
)
102173
self.persist_last_id_path = (
103174
Path(persist_last_id_path) if persist_last_id_path else None
104175
)
@@ -114,17 +185,95 @@ def _load_curl(self) -> None:
114185
ctx = uncurl.parse_context(
115186
"".join(line.strip() for line in raw.splitlines())
116187
)
188+
parsed_cookies = dict(ctx.cookies) if ctx.cookies else {}
189+
if not parsed_cookies:
190+
parsed_cookies = _extract_cookies_from_curl(raw)
117191
self._req = {
118192
"url": ctx.url,
119193
"headers": dict(ctx.headers) if ctx.headers else {},
120-
"cookies": dict(ctx.cookies) if ctx.cookies else {},
194+
"cookies": parsed_cookies,
121195
"json": json.loads(ctx.data) if ctx.data else None,
122196
"method": ctx.method.upper(),
123197
}
198+
self._log_request_health_hint()
199+
if self.debug_http:
200+
logger.warning(
201+
"Loaded cURL: method=%s url=%s headers=%s cookies=%s has_json=%s",
202+
self._req["method"],
203+
self._req["url"],
204+
sorted(_redact_mapping(self._req["headers"]).keys()),
205+
sorted(_redact_mapping(self._req["cookies"]).keys()),
206+
self._req["json"] is not None,
207+
)
124208
except Exception as e:
125209
logger.critical("Error reading %s: %s", self.curl_path, e)
126210
self._req = {}
127211

212+
def _log_request_health_hint(self) -> None:
213+
headers = {str(k).lower(): str(v) for k, v in self._req.get("headers", {}).items()}
214+
cookies = {str(k).lower(): str(v) for k, v in self._req.get("cookies", {}).items()}
215+
warnings: list[str] = []
216+
217+
if "authorization" not in headers:
218+
warnings.append("missing authorization header")
219+
if "x-csrf-token" not in headers:
220+
warnings.append("missing x-csrf-token header")
221+
if "auth_token" not in cookies:
222+
warnings.append("missing auth_token cookie")
223+
if "ct0" not in cookies:
224+
warnings.append("missing ct0 cookie")
225+
226+
if warnings:
227+
logger.warning("Request auth hints: %s", "; ".join(warnings))
228+
229+
def _log_request_debug(self, *, url: str, method: str, json_payload: Any) -> None:
230+
if not self.debug_http:
231+
return
232+
logger.warning(
233+
"HTTP request %s %s\nheaders=%s\ncookies=%s\njson=%s",
234+
method,
235+
url,
236+
_redact_mapping(self._req.get("headers")),
237+
_redact_mapping(self._req.get("cookies")),
238+
_preview_json(json_payload),
239+
)
240+
241+
def _save_http_error_snapshot(
242+
self,
243+
*,
244+
status: int,
245+
url: str,
246+
body: str,
247+
response_headers: dict[str, str],
248+
request_payload: Any,
249+
) -> Path | None:
250+
Path("logs").mkdir(parents=True, exist_ok=True)
251+
ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
252+
out_path = Path("logs") / f"http_error_{status}_{ts}.json"
253+
payload = {
254+
"timestamp": dt.datetime.now().isoformat(),
255+
"status": status,
256+
"url": url,
257+
"request": {
258+
"method": self._req.get("method", "GET"),
259+
"headers": _redact_mapping(self._req.get("headers")),
260+
"cookies": _redact_mapping(self._req.get("cookies")),
261+
"json": request_payload,
262+
},
263+
"response": {
264+
"headers": response_headers,
265+
"body": body,
266+
},
267+
}
268+
try:
269+
out_path.write_text(
270+
json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8"
271+
)
272+
return out_path
273+
except Exception as e:
274+
logger.warning("Could not save HTTP error snapshot: %s", e)
275+
return None
276+
128277
def _load_last_id(self) -> None:
129278
"""Load last tweet id from disk (if configured)."""
130279
if not self.persist_last_id_path:
@@ -194,14 +343,30 @@ async def fetch_raw(self, *, text: bool = False) -> dict | str:
194343
await self._ensure_session()
195344
assert self._session is not None
196345
url = self._req["url"]
346+
method = self._req.get("method", "GET")
197347
json_payload = self._req.get("json")
348+
self._log_request_health_hint()
349+
self._log_request_debug(url=url, method=method, json_payload=json_payload)
198350

199351
try:
200-
async with self._session.get(url, json=json_payload) as resp:
352+
async with self._session.request(method, url, json=json_payload) as resp:
201353
if resp.status >= 400:
202354
body = await resp.text()
355+
resp_headers = {k: v for k, v in resp.headers.items()}
356+
snapshot = self._save_http_error_snapshot(
357+
status=resp.status,
358+
url=url,
359+
body=body,
360+
response_headers=resp_headers,
361+
request_payload=json_payload,
362+
)
203363
logger.error(
204-
"HTTP %s for %s\nResponse: %s", resp.status, url, body[:2000]
364+
"HTTP %s for %s\nResponse headers: %s\nResponse: %s\nSnapshot: %s",
365+
resp.status,
366+
url,
367+
resp_headers,
368+
body[:2000],
369+
snapshot,
205370
)
206371
return "" if text else {}
207372

@@ -519,4 +684,5 @@ async def _example_stream():
519684

520685

521686
# if __name__ == "__main__":
522-
# asyncio.run(_example_stream())
687+
# asyncio.run(_example_stream())
688+
# asyncio.run(_example_once())

0 commit comments

Comments
 (0)