-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprobe_nfo_api.py
More file actions
349 lines (302 loc) · 13.5 KB
/
probe_nfo_api.py
File metadata and controls
349 lines (302 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/usr/bin/env python3
"""
probe_nfo_api.py — probe getnfo API support across all indexers.
Uses per-indexer Newznab search (/{id}/api?t=search) since Prowlarr's
REST /api/v1/search endpoint doesn't return results on this setup.
Strategy:
Round 1 — search title[0] on every non-skipped indexer, test getnfo.
Round N — for indexers still INCONCLUSIVE (ERROR-300), try next title.
Early exit — once verdict is WORKS or UNSUPPORTED, stop for that indexer.
Cost: up to N_titles × N_indexers search + getnfo pairs, early-exit cuts it.
Usage:
docker cp /home/<user>/DK/probe_nfo_api.py dksubs-proxy:/probe_nfo_api.py
docker exec -it dksubs-proxy python3 /probe_nfo_api.py [--skip ID,...] [titles...]
"""
import os, sys, re, json, time, xml.etree.ElementTree as ET
from urllib.request import urlopen, Request
from urllib.error import HTTPError
from urllib.parse import urlparse, parse_qs, quote_plus
PROWLARR_URL = os.environ.get("PROWLARR_URL", "http://Prowlarr:9696").rstrip("/")
PROWLARR_KEY = os.environ.get("PROWLARR_API_KEY", "")
TIMEOUT = 15
UA = "DKSubs-Proxy/5.3"
DEFAULT_TITLES = [
"Breaking Bad S01E01",
"The Wire S01E01",
"Game of Thrones S01E01",
"Sopranos S01E01",
"Better Call Saul S01E01",
"Succession S01E01",
"The Boys S01E01",
"Peaky Blinders S01E01",
"Chernobyl S01E01",
"True Detective S01E01",
]
# Verdict keys where further probing is pointless
DEFINITIVE = {"WORKS", "UNSUPPORTED", "CONNECT-ERROR", "NO-KEY"}
def load_indexer_env():
cfg = {}
for k, v in os.environ.items():
m = re.match(r'^INDEXER_(\d+)_(APIKEY|BASEURL)$', k)
if m:
idx, field = int(m.group(1)), m.group(2)
cfg.setdefault(idx, {})[field] = v.rstrip("/")
return cfg
def http_get(url, headers=None):
h = {"User-Agent": UA}
if headers:
h.update(headers)
req = Request(url, headers=h)
try:
with urlopen(req, timeout=TIMEOUT) as r:
return r.getcode(), r.read().decode("utf-8", errors="replace")
except HTTPError as e:
body = ""
try: body = e.read().decode("utf-8", errors="replace")
except: pass
return e.code, body
except Exception as e:
return None, str(e)
def classify_nfo(body: str):
"""Returns (verdict_key, detail). verdict_key in DEFINITIVE or 'NO-NFO'/'RSS-FEED'."""
if not body or not body.strip():
return "NO-NFO", "empty response"
t = body.lstrip()
if t.startswith("<rss") or "<channel>" in t[:300]:
return "RSS-FEED", f"RSS feed returned instead of NFO ({len(body)} bytes)"
if "<error " in t[:500]:
m = re.search(r'code="(\d+)"', t[:500])
code = m.group(1) if m else "?"
desc = re.search(r'description="([^"]+)"', t[:500])
detail = desc.group(1)[:70] if desc else ""
if code == "202":
return "UNSUPPORTED", f"error 202 — endpoint not supported"
if code == "300":
return "NO-NFO", f"error 300 — no NFO for this release"
return "NO-NFO", f"error {code}: {detail}"
if t.startswith("<!DOCTYPE") or "<html" in t[:200]:
return "NO-NFO", "HTML response"
if len(body) > 150:
# Quick sanity: real NFOs contain at least some alphanumeric prose
return "WORKS", f"NFO text {len(body)} bytes"
return "NO-NFO", f"short response {len(body)}b: {body[:60]!r}"
def extract_nzb_id(guid: str) -> str:
p = urlparse(guid)
qs = parse_qs(p.query)
if "id" in qs: return qs["id"][0]
if "guid" in qs: return qs["guid"][0]
segs = [s for s in p.path.split("/") if s]
return segs[-1] if segs else ""
def newznab_search(indexer_id: int, query: str) -> list[dict]:
"""
Search one indexer via Prowlarr's Newznab proxy.
Returns list of dicts: {title, guid, nzb_id, info_url}.
"""
url = (f"{PROWLARR_URL}/{indexer_id}/api"
f"?t=search&q={quote_plus(query)}&apikey={PROWLARR_KEY}")
code, body = http_get(url)
if code != 200 or not body.strip():
return []
try:
root = ET.fromstring(body)
except ET.ParseError:
return []
ns = {"newznab": "http://www.newznab.com/DTD/2010/feeds/attributes/"}
results = []
for item in root.iter("item"):
title_el = item.find("title")
guid_el = item.find("guid")
if title_el is None or guid_el is None:
continue
guid = guid_el.text or ""
nzb_id = extract_nzb_id(guid)
# Look for info attr (direct NFO URL, e.g. NZBFinder provides this)
info_url = ""
for attr in item.findall("newznab:attr", ns):
if attr.get("name") == "info":
info_url = attr.get("value", "")
results.append({
"title": title_el.text or "",
"guid": guid,
"nzb_id": nzb_id,
"info_url": info_url,
})
return results
def probe_getnfo(prowlarr_url: str, prowlarr_key: str,
indexer_id: int, nzb_id: str,
direct_base: str = "", direct_key: str = ""):
"""
Stage 1: Prowlarr-proxied getnfo.
error 202 means Prowlarr can't proxy this function (NOT that the
indexer lacks getnfo) — fall through to Stage 2 in that case.
error 300 means Prowlarr proxied it but no NFO for this release.
WORKS means the proxied response is real NFO text.
Stage 2: Direct indexer API call with the real indexer credentials.
error 202 here means the indexer itself doesn't support getnfo.
"""
# Stage 1 — via Prowlarr proxy
url1 = f"{prowlarr_url}/{indexer_id}/api?t=getnfo&apikey={prowlarr_key}&id={nzb_id}"
code1, body1 = http_get(url1)
stage1_verdict, stage1_detail = None, ""
if code1 is not None:
stage1_verdict, stage1_detail = classify_nfo(body1)
if stage1_verdict == "WORKS":
return "WORKS", f"{stage1_detail} [via Prowlarr proxy]"
# error 202 = Prowlarr can't proxy getnfo for this indexer type — not authoritative
# error 300 = Prowlarr did proxy it, no NFO for this NZB ID — try direct for confirmation
# Any other verdict: fall through to Stage 2
# Stage 2 — direct indexer API (tests whether the indexer itself supports getnfo)
if direct_base and direct_key:
url2 = f"{direct_base}/api?t=getnfo&apikey={direct_key}&id={nzb_id}&raw=1"
code2, body2 = http_get(url2)
if code2 is None:
return "CONNECT-ERROR", f"{body2[:80]} (direct)"
if code2 == 403:
return "NO-NFO", f"HTTP 403 (direct) — blocked (wrong key/IP/UA?)"
if code2 == 404:
return "NO-NFO", f"HTTP 404 (direct) — endpoint missing"
if code2 != 200:
return "NO-NFO", f"HTTP {code2} (direct)"
v2, d2 = classify_nfo(body2)
return v2, f"{d2} [direct]"
# No direct creds — Stage 1 result is all we have
if stage1_verdict is not None:
return stage1_verdict, f"{stage1_detail} [via Prowlarr proxy, no direct creds]"
return "NO-NFO", f"no NFO found (Stage 1: HTTP {code1})"
def probe_info_url(info_url: str):
if not info_url:
return None, None
code, body = http_get(info_url)
if code is None:
return "CONNECT-ERROR", body[:80]
if code != 200:
return "NO-NFO", f"HTTP {code}"
return classify_nfo(body)
def prowlarr_indexer_names() -> dict:
code, body = http_get(f"{PROWLARR_URL}/api/v1/indexer",
{"X-Api-Key": PROWLARR_KEY})
if code != 200:
return {}
try:
return {ix["id"]: ix["name"] for ix in json.loads(body)}
except:
return {}
def main():
args = sys.argv[1:]
skip_ids: set[int] = set()
titles = []
i = 0
while i < len(args):
if args[i] == "--skip" and i + 1 < len(args):
skip_ids = {int(x) for x in args[i+1].split(",") if x.strip()}
i += 2
else:
titles.append(args[i])
i += 1
if not titles:
titles = DEFAULT_TITLES
skip_label = f" (skipping IDs: {sorted(skip_ids)})" if skip_ids else ""
print(f"\nDKSubs NFO API probe — {len(titles)} title(s), early-exit on definitive verdict{skip_label}")
print(f"Prowlarr: {PROWLARR_URL} (Newznab per-indexer search)")
print("=" * 72)
if not PROWLARR_KEY:
print("[!] PROWLARR_API_KEY not set. Source .env first.")
sys.exit(1)
ix_env = load_indexer_env()
ix_names = prowlarr_indexer_names()
if not ix_names:
print("[!] Could not fetch indexer list from Prowlarr.")
sys.exit(1)
# All indexer IDs minus skipped
all_ids = sorted(ix_id for ix_id in ix_names if ix_id not in skip_ids)
print(f"Indexers to probe: {[f'{i}={ix_names[i]}' for i in all_ids]}\n")
# verdicts[ix_id] = (verdict_key, detail, title, via_info_url)
verdicts: dict[int, tuple] = {}
total_searches = 0
total_getnfo = 0
for title_idx, title in enumerate(titles):
undecided = [ix for ix in all_ids if verdicts.get(ix, (None,))[0] not in DEFINITIVE]
if not undecided:
print(f"\n All indexers decided after {title_idx} title(s) — stopping early.")
break
print(f"[{title_idx+1}/{len(titles)}] Title: {title!r} ({len(undecided)} indexer(s) still undecided)")
for ix_id in undecided:
name = ix_names.get(ix_id, f"#{ix_id}")
cfg = ix_env.get(ix_id, {})
base_url = cfg.get("BASEURL", "")
api_key = cfg.get("APIKEY", "")
if not base_url or not api_key:
verdicts[ix_id] = ("NO-KEY", "no BASEURL/APIKEY in .env", title, False)
print(f" {name:<20} → NO-KEY")
continue
# Search this indexer
print(f" {name:<20} searching... ", end="", flush=True)
results = newznab_search(ix_id, title)
total_searches += 1
if not results:
print(f"0 results (skipping this title)")
continue
r0 = results[0]
nzb_id = r0["nzb_id"]
info_url = r0["info_url"]
print(f"{len(results)} results, nzb_id={nzb_id!r} → ", end="", flush=True)
if not nzb_id:
print(f"could not extract NZB ID from guid: {r0['guid'][:50]}")
continue
# Try getnfo — Stage 1 via Prowlarr proxy, Stage 2 direct fallback
verdict, detail = probe_getnfo(
PROWLARR_URL, PROWLARR_KEY, ix_id, nzb_id,
direct_base=base_url, direct_key=api_key)
total_getnfo += 1
time.sleep(0.4)
# If getnfo gave nothing but we have an info_url, try that too
if verdict == "NO-NFO" and info_url:
v2, d2 = probe_info_url(info_url)
total_getnfo += 1
if v2 == "WORKS":
verdict, detail = "WORKS", f"{d2} [via info_url attr]"
verdicts[ix_id] = (verdict, detail, title, bool(info_url and verdict == "WORKS" and "info_url" in detail))
symbol = "✓" if verdict == "WORKS" else ("✗" if verdict == "UNSUPPORTED" else "?")
print(f"[{symbol}] {verdict} {detail}")
print()
# ── Final summary ──────────────────────────────────────────────────────────
print("=" * 72)
print(f"SUMMARY ({total_searches} searches, {total_getnfo} getnfo/info calls)\n")
works = [(ix, ix_names.get(ix, f"#{ix}"), v) for ix, v in verdicts.items() if v[0] == "WORKS"]
unsupported = [(ix, ix_names.get(ix, f"#{ix}"), v) for ix, v in verdicts.items() if v[0] == "UNSUPPORTED"]
inconclusive = [(ix, ix_names.get(ix, f"#{ix}"), v) for ix, v in verdicts.items()
if v[0] not in DEFINITIVE and v[0] is not None]
no_key = [(ix, ix_names.get(ix, f"#{ix}"), v) for ix, v in verdicts.items() if v[0] == "NO-KEY"]
never_tested = [(ix, ix_names.get(ix, f"#{ix}")) for ix in all_ids if ix not in verdicts]
if works:
print(" ✓ WORKING getnfo:")
for ix, name, v in sorted(works):
print(f" [{ix:>2}] {name:<22} {v[1]} (via {v[2]!r})")
if unsupported:
print("\n ✗ UNSUPPORTED (error 202 or endpoint missing):")
for ix, name, v in sorted(unsupported):
print(f" [{ix:>2}] {name:<22} {v[1]}")
if inconclusive:
print("\n ? INCONCLUSIVE (has endpoint, no NFOs found across all tested titles):")
for ix, name, v in sorted(inconclusive):
print(f" [{ix:>2}] {name:<22} last result: {v[0]} — {v[1]}")
if never_tested:
print("\n — NEVER GOT RESULTS (zero search hits across all titles):")
for ix, name in sorted(never_tested):
print(f" [{ix:>2}] {name}")
if no_key:
print("\n ! NO-KEY (missing from .env):")
for ix, name, v in sorted(no_key):
print(f" [{ix:>2}] {name}")
# Actionable recommendation
current_nfo = set(x.strip() for x in os.environ.get("NFO_INDEXERS", "").split(",") if x.strip())
new_ids = [str(ix) for ix, *_ in works if str(ix) not in current_nfo]
print()
if new_ids:
print(f" ACTION: add to NFO_INDEXERS in .env: {','.join(new_ids)}")
print(f" then: docker restart dksubs-proxy")
else:
print(" No changes needed — all working indexers already in NFO_INDEXERS.")
print()
if __name__ == "__main__":
main()