Skip to content

Commit 7c555f6

Browse files
realmarcinclaude
andcommitted
Fix broken literature_enhanced imports in two writer scripts
scripts/add_evidence_source.py and scripts/intelligent_snippet_fixer.py both import EnhancedLiteratureFetcher from communitymech.literature_enhanced — a module that was never committed to git (only a stale .pyc was shadowing the missing source locally). Both scripts have raised ModuleNotFoundError on import for as long as anyone has tried to run them, which was surfaced as a pre-existing-state heads-up by the recent writer-conversion PR #87. Swap to LiteratureFetcher from communitymech.literature, which exposes the same fetch_pubmed_abstract + fetch_paper surface plus a richer DOI fallback chain (CrossRef → PubMed via DOI lookup → PMC full-text → OpenAlex → Semantic Scholar → Europe PMC → publisher meta-tag scrape) that subsumes what fetch_abstract_for_doi did. API differences: - fetch_paper returns (abstract, pdf_url) not a dict; tuple-unpack at call sites. - LiteratureFetcher.fetch_paper has no download_pdf kwarg (the older version's flag was a no-op in the LiteratureFetcher pipeline; the pdf URL is just returned alongside the abstract). - Title field is unavailable separately. In add_evidence_source.py's guess_evidence_source classifier the title was filter(None, …)-merged with snippet and abstract anyway; losing it degrades classification marginally (PubMed abstracts include the title in the abstract text, so PMID references are unaffected). If richer DOI classification is needed later, LiteratureFetcher.fetch_doi_metadata() returns CrossRef metadata with a title field. After-state: both scripts now import and run their initialization paths cleanly. pytest tests/ still passes (136 passed, 9 skipped). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a49f889 commit 7c555f6

2 files changed

Lines changed: 35 additions & 22 deletions

File tree

scripts/add_evidence_source.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
2828

29-
from communitymech.literature_enhanced import EnhancedLiteratureFetcher
29+
from communitymech.literature import LiteratureFetcher
3030

3131
from communitymech.curate.curation_event import record_curation_event
3232
from communitymech.validation.write_validated import (
@@ -39,10 +39,13 @@ class EvidenceSourceAdder:
3939
"""Add evidence_source to evidence items"""
4040

4141
def __init__(self):
42-
self.fetcher = EnhancedLiteratureFetcher(
43-
cache_dir=".literature_cache",
44-
use_fallback_pdf=False
45-
)
42+
# Previously imported a sibling EnhancedLiteratureFetcher class that
43+
# was never committed to the repo; the LiteratureFetcher in
44+
# communitymech.literature exposes the same fetch_pubmed_abstract +
45+
# fetch_paper surface (plus a richer DOI fallback chain through
46+
# CrossRef / PMC / OpenAlex / Semantic Scholar / Europe PMC) which
47+
# is what these scripts actually need.
48+
self.fetcher = LiteratureFetcher(cache_dir=".literature_cache")
4649
self.stats = {
4750
'total_evidence': 0,
4851
'already_has_source': 0,
@@ -148,12 +151,14 @@ def process_yaml(
148151

149152
# Try to fetch abstract for better classification
150153
abstract = None
151-
title = None
154+
title = None # LiteratureFetcher.fetch_paper returns
155+
# (abstract, pdf_url); the title is embedded
156+
# in PubMed abstracts and can be pulled from
157+
# CrossRef metadata via fetch_doi_metadata()
158+
# if richer classification is needed later.
152159
try:
153-
paper = self.fetcher.fetch_paper(reference, download_pdf=False)
154-
abstract = paper.get('abstract')
155-
title = paper.get('title')
156-
except:
160+
abstract, _ = self.fetcher.fetch_paper(reference)
161+
except Exception:
157162
pass
158163

159164
# Guess evidence source
@@ -221,12 +226,14 @@ def process_yaml(
221226
reference = ev.get('reference', '')
222227

223228
abstract = None
224-
title = None
229+
title = None # LiteratureFetcher.fetch_paper returns
230+
# (abstract, pdf_url); the title is embedded
231+
# in PubMed abstracts and can be pulled from
232+
# CrossRef metadata via fetch_doi_metadata()
233+
# if richer classification is needed later.
225234
try:
226-
paper = self.fetcher.fetch_paper(reference, download_pdf=False)
227-
abstract = paper.get('abstract')
228-
title = paper.get('title')
229-
except:
235+
abstract, _ = self.fetcher.fetch_paper(reference)
236+
except Exception:
230237
pass
231238

232239
guessed_source = self.guess_evidence_source(

scripts/intelligent_snippet_fixer.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
2626

2727
from communitymech.curate.curation_event import record_curation_event
28-
from communitymech.literature_enhanced import EnhancedLiteratureFetcher
28+
from communitymech.literature import LiteratureFetcher
2929
from communitymech.validation.write_validated import (
3030
ValidationFailedError,
3131
write_validated_community,
@@ -59,7 +59,12 @@ class IntelligentSnippetFixer:
5959
"""Intelligent snippet fixer with context-aware abstract analysis."""
6060

6161
def __init__(self, verbose: bool = False):
62-
self.fetcher = EnhancedLiteratureFetcher()
62+
# Previously imported a sibling EnhancedLiteratureFetcher class
63+
# that was never committed; LiteratureFetcher exposes the same
64+
# fetch_pubmed_abstract + fetch_paper surface plus a richer DOI
65+
# fallback chain (CrossRef / PMC / OpenAlex / Semantic Scholar /
66+
# Europe PMC) which subsumes what fetch_abstract_for_doi did.
67+
self.fetcher = LiteratureFetcher()
6368
self.verbose = verbose
6469

6570
def extract_relevant_sentences(
@@ -210,12 +215,13 @@ def suggest_snippets_for_evidence(
210215
if reference.upper().startswith("PMID:"):
211216
pmid = reference.replace("PMID:", "").replace("pmid:", "").strip()
212217
abstract = self.fetcher.fetch_pubmed_abstract(pmid)
213-
elif "doi" in reference.lower() or reference.startswith("10."):
214-
doi = reference.replace("doi:", "").replace("https://doi.org/", "").strip()
215-
abstract = self.fetcher.fetch_abstract_for_doi(doi)
216218
else:
217-
paper = self.fetcher.fetch_paper(reference, download_pdf=False)
218-
abstract = paper.get("abstract")
219+
# fetch_paper auto-detects PMID vs DOI and runs the full
220+
# DOI fallback chain (CrossRef → PMID via DOI lookup → PMC
221+
# full-text → OpenAlex → Semantic Scholar → Europe PMC →
222+
# publisher meta-tag scrape). Returns (abstract, pdf_url);
223+
# we don't need the pdf here.
224+
abstract, _ = self.fetcher.fetch_paper(reference)
219225

220226
if not abstract:
221227
if self.verbose:

0 commit comments

Comments
 (0)