Skip to content

Commit 5aa2529

Browse files
committed
feat: retry LLM classification failures instead of silently dropping
- classify_paper returns llm_failed=True when all retries exhausted - classify_papers returns (relevant, failed_ids) tuple - Failed papers NOT added to processed_ids so they get retried - State tracks retry_counts per arxiv_id; gives up after 3 failures - run_daily fetches retry papers at start of each run - 2605.18747 (Code as Agent Harness) was already recovered manually
1 parent 744c768 commit 5aa2529

3 files changed

Lines changed: 83 additions & 15 deletions

File tree

automation/classifier/llm.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,13 @@ def classify_paper(
155155

156156
logger.error("All %d attempts failed for paper: %s", retries, paper.get("arxiv_id"))
157157
return {
158-
"relevant": False,
159-
"reason": "LLM classification failed after retries",
160-
"category": None,
161-
"tags": [],
162-
"summary": "",
163-
"venue_hint": "",
158+
"relevant": False,
159+
"reason": "LLM classification failed after retries",
160+
"llm_failed": True, # distinguishes LLM error from "genuinely not relevant"
161+
"category": None,
162+
"tags": [],
163+
"summary": "",
164+
"venue_hint": "",
164165
}
165166

166167

@@ -171,21 +172,33 @@ def classify_papers(
171172
model: str | None = None,
172173
temperature: float = 0.1,
173174
learned_rules: str = "",
174-
) -> list[dict[str, Any]]:
175+
) -> tuple[list[dict[str, Any]], list[str]]:
175176
"""
176177
Classify a list of papers. Mutates each paper in place, adding:
177178
- paper["category"] : str | None
178179
- paper["tags"] : list[str]
179180
- paper["summary"] : str
180181
- paper["relevant"] : bool
181-
Returns only the relevant papers.
182+
183+
Returns (relevant_papers, failed_arxiv_ids):
184+
- relevant_papers: papers the LLM confirmed are relevant
185+
- failed_arxiv_ids: papers where LLM call failed (should be retried later)
182186
"""
183187
relevant: list[dict[str, Any]] = []
188+
failed_ids: list[str] = []
184189

185190
for i, paper in enumerate(papers, 1):
186191
logger.info("[%d/%d] Classifying: %s", i, len(papers), paper["title"][:80])
187192
result = classify_paper(paper, categories, tags, model=model, temperature=temperature, learned_rules=learned_rules)
188193

194+
if result.get("llm_failed"):
195+
aid = paper.get("arxiv_id", "")
196+
if aid:
197+
failed_ids.append(aid)
198+
logger.warning(" → LLM FAILED, will retry next run: %s", paper["title"][:60])
199+
time.sleep(0.5)
200+
continue
201+
189202
paper["relevant"] = result.get("relevant", False)
190203
paper["category"] = result.get("category")
191204
paper["tags"] = result.get("tags", [])
@@ -205,5 +218,6 @@ def classify_papers(
205218
# Small delay to avoid hammering the proxy
206219
time.sleep(0.5)
207220

208-
logger.info("Classified %d papers → %d relevant", len(papers), len(relevant))
209-
return relevant
221+
logger.info("Classified %d papers → %d relevant, %d failed",
222+
len(papers), len(relevant), len(failed_ids))
223+
return relevant, failed_ids

automation/main.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,19 @@ def run_daily() -> None:
141141
deduped.append(p)
142142
new_papers = deduped
143143

144+
# ── 3b. Add papers pending retry (LLM failed last time) ─────────────────
145+
retry_ids = state_mgr.get_retry_ids(state)
146+
if retry_ids:
147+
logger.info("=== Retrying %d previously failed classification(s) ===", len(retry_ids))
148+
already = {p["arxiv_id"] for p in new_papers}
149+
from automation.crawler.arxiv import fetch_single_paper
150+
for aid in retry_ids:
151+
if aid not in already:
152+
p = fetch_single_paper(aid)
153+
if p:
154+
new_papers.append(p)
155+
already.add(aid)
156+
144157
logger.info("After dedup: %d new papers to process", len(new_papers))
145158

146159
if not new_papers:
@@ -156,18 +169,24 @@ def run_daily() -> None:
156169
# ── 5. Classify ─────────────────────────────────────────────────────────
157170
logger.info("=== Step 4: Classifying with LLM ===")
158171
learned_rules = state_mgr.maybe_refresh_learned_rules(state)
159-
relevant = classify_papers(
172+
relevant, failed_ids = classify_papers(
160173
new_papers,
161174
categories=cfg["categories"],
162175
tags=cfg["tags"],
163176
temperature=cfg.get("llm", {}).get("temperature", 0.1),
164177
learned_rules=learned_rules,
165178
)
166-
logger.info("Relevant papers: %d / %d", len(relevant), len(new_papers))
179+
logger.info("Relevant papers: %d / %d (failed: %d)", len(relevant), len(new_papers), len(failed_ids))
167180

168-
# Mark as processed only after classification completes successfully.
169-
# This way a mid-run crash won't permanently lose unclassified papers.
170-
state_mgr.mark_processed(state, [p["arxiv_id"] for p in new_papers])
181+
# Mark successfully classified papers as processed (failed ones stay out)
182+
succeeded_ids = [p["arxiv_id"] for p in new_papers if p.get("arxiv_id") not in failed_ids]
183+
state_mgr.mark_processed(state, succeeded_ids)
184+
185+
# Track failures — give up after MAX_RETRIES, mark as processed to stop retrying
186+
if failed_ids:
187+
_, give_up_ids = state_mgr.add_failed_classifications(state, failed_ids)
188+
if give_up_ids:
189+
state_mgr.mark_processed(state, give_up_ids)
171190
state_mgr.save(state)
172191

173192
if not relevant:

automation/state_manager.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"reject_feedback": [], # [{arxiv_id, title, reason}, ...] — curator reject reasons
4242
"learned_rules": "", # LLM-synthesised rules injected into classifier prompt
4343
"rules_last_updated": "", # ISO date when learned_rules was last generated
44+
"retry_counts": {}, # {arxiv_id: int} — LLM failure retry count
4445
}
4546

4647

@@ -91,6 +92,40 @@ def update_pending_issues(state: dict[str, Any], updated: list[dict[str, Any]])
9192
state["pending_issues"] = updated
9293

9394

95+
_MAX_RETRIES = 3
96+
97+
98+
def add_failed_classifications(state: dict[str, Any], arxiv_ids: list[str]) -> tuple[list[str], list[str]]:
99+
"""
100+
Record LLM classification failures. Increment retry count for each ID.
101+
Returns (retry_later, give_up):
102+
- retry_later: IDs that haven't hit the retry limit yet (keep out of processed_ids)
103+
- give_up: IDs that have failed too many times (mark as processed to stop retrying)
104+
"""
105+
counts: dict[str, int] = state.setdefault("retry_counts", {})
106+
retry_later: list[str] = []
107+
give_up: list[str] = []
108+
109+
for aid in arxiv_ids:
110+
counts[aid] = counts.get(aid, 0) + 1
111+
if counts[aid] >= _MAX_RETRIES:
112+
logger.warning("Giving up on %s after %d failed attempts", aid, counts[aid])
113+
give_up.append(aid)
114+
else:
115+
logger.info("Will retry %s (attempt %d/%d)", aid, counts[aid], _MAX_RETRIES)
116+
retry_later.append(aid)
117+
118+
return retry_later, give_up
119+
120+
121+
def get_retry_ids(state: dict[str, Any]) -> list[str]:
122+
"""Return IDs that failed classification and should be retried."""
123+
counts: dict[str, int] = state.get("retry_counts", {})
124+
processed = set(state.get("processed_ids", [])) | set(state.get("rejected_ids", []))
125+
return [aid for aid, cnt in counts.items()
126+
if cnt < _MAX_RETRIES and aid not in processed]
127+
128+
94129
def add_reject_feedback(state: dict[str, Any], items: list[dict[str, Any]]) -> None:
95130
"""Append curator reject reasons for classifier learning."""
96131
state.setdefault("reject_feedback", []).extend(items)

0 commit comments

Comments
 (0)