Fix broken literature_enhanced imports in two writer scripts

realmarcin · claude · realmarcin · commit 7c555f659c79 · 2026-05-25T19:27:32.000-07:00
scripts/add_evidence_source.py and scripts/intelligent_snippet_fixer.py both import EnhancedLiteratureFetcher from communitymech.literature_enhanced — a module that was never committed to git (only a stale .pyc was shadowing the missing source locally). Both scripts have raised ModuleNotFoundError on import for as long as anyone has tried to run them, which was surfaced as a pre-existing-state heads-up by the recent writer-conversion PR #87. Swap to LiteratureFetcher from communitymech.literature, which exposes the same fetch_pubmed_abstract + fetch_paper surface plus a richer DOI fallback chain (CrossRef → PubMed via DOI lookup → PMC full-text → OpenAlex → Semantic Scholar → Europe PMC → publisher meta-tag scrape) that subsumes what fetch_abstract_for_doi did. API differences: - fetch_paper returns (abstract, pdf_url) not a dict; tuple-unpack at call sites. - LiteratureFetcher.fetch_paper has no download_pdf kwarg (the older version's flag was a no-op in the LiteratureFetcher pipeline; the pdf URL is just returned alongside the abstract). - Title field is unavailable separately. In add_evidence_source.py's guess_evidence_source classifier the title was filter(None, …)-merged with snippet and abstract anyway; losing it degrades classification marginally (PubMed abstracts include the title in the abstract text, so PMID references are unaffected). If richer DOI classification is needed later, LiteratureFetcher.fetch_doi_metadata() returns CrossRef metadata with a title field. After-state: both scripts now import and run their initialization paths cleanly. pytest tests/ still passes (136 passed, 9 skipped). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/scripts/add_evidence_source.py b/scripts/add_evidence_source.py
@@ -26,7 +26,7 @@
 
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
-from communitymech.literature_enhanced import EnhancedLiteratureFetcher
+from communitymech.literature import LiteratureFetcher
 
 from communitymech.curate.curation_event import record_curation_event
 from communitymech.validation.write_validated import (
@@ -39,10 +39,13 @@ class EvidenceSourceAdder:
     """Add evidence_source to evidence items"""
 
     def __init__(self):
-        self.fetcher = EnhancedLiteratureFetcher(
-            cache_dir=".literature_cache",
-            use_fallback_pdf=False
-        )
+        # Previously imported a sibling EnhancedLiteratureFetcher class that
+        # was never committed to the repo; the LiteratureFetcher in
+        # communitymech.literature exposes the same fetch_pubmed_abstract +
+        # fetch_paper surface (plus a richer DOI fallback chain through
+        # CrossRef / PMC / OpenAlex / Semantic Scholar / Europe PMC) which
+        # is what these scripts actually need.
+        self.fetcher = LiteratureFetcher(cache_dir=".literature_cache")
         self.stats = {
             'total_evidence': 0,
             'already_has_source': 0,
@@ -148,12 +151,14 @@ def process_yaml(
 
                     # Try to fetch abstract for better classification
                     abstract = None
-                    title = None
+                    title = None  # LiteratureFetcher.fetch_paper returns
+                                  # (abstract, pdf_url); the title is embedded
+                                  # in PubMed abstracts and can be pulled from
+                                  # CrossRef metadata via fetch_doi_metadata()
+                                  # if richer classification is needed later.
                     try:
-                        paper = self.fetcher.fetch_paper(reference, download_pdf=False)
-                        abstract = paper.get('abstract')
-                        title = paper.get('title')
-                    except:
+                        abstract, _ = self.fetcher.fetch_paper(reference)
+                    except Exception:
                         pass
 
                     # Guess evidence source
@@ -221,12 +226,14 @@ def process_yaml(
                     reference = ev.get('reference', '')
 
                     abstract = None
-                    title = None
+                    title = None  # LiteratureFetcher.fetch_paper returns
+                                  # (abstract, pdf_url); the title is embedded
+                                  # in PubMed abstracts and can be pulled from
+                                  # CrossRef metadata via fetch_doi_metadata()
+                                  # if richer classification is needed later.
                     try:
-                        paper = self.fetcher.fetch_paper(reference, download_pdf=False)
-                        abstract = paper.get('abstract')
-                        title = paper.get('title')
-                    except:
+                        abstract, _ = self.fetcher.fetch_paper(reference)
+                    except Exception:
                         pass
 
                     guessed_source = self.guess_evidence_source(
diff --git a/scripts/intelligent_snippet_fixer.py b/scripts/intelligent_snippet_fixer.py
@@ -25,7 +25,7 @@
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
 from communitymech.curate.curation_event import record_curation_event
-from communitymech.literature_enhanced import EnhancedLiteratureFetcher
+from communitymech.literature import LiteratureFetcher
 from communitymech.validation.write_validated import (
     ValidationFailedError,
     write_validated_community,
@@ -59,7 +59,12 @@ class IntelligentSnippetFixer:
     """Intelligent snippet fixer with context-aware abstract analysis."""
 
     def __init__(self, verbose: bool = False):
-        self.fetcher = EnhancedLiteratureFetcher()
+        # Previously imported a sibling EnhancedLiteratureFetcher class
+        # that was never committed; LiteratureFetcher exposes the same
+        # fetch_pubmed_abstract + fetch_paper surface plus a richer DOI
+        # fallback chain (CrossRef / PMC / OpenAlex / Semantic Scholar /
+        # Europe PMC) which subsumes what fetch_abstract_for_doi did.
+        self.fetcher = LiteratureFetcher()
         self.verbose = verbose
 
     def extract_relevant_sentences(
@@ -210,12 +215,13 @@ def suggest_snippets_for_evidence(
         if reference.upper().startswith("PMID:"):
             pmid = reference.replace("PMID:", "").replace("pmid:", "").strip()
             abstract = self.fetcher.fetch_pubmed_abstract(pmid)
-        elif "doi" in reference.lower() or reference.startswith("10."):
-            doi = reference.replace("doi:", "").replace("https://doi.org/", "").strip()
-            abstract = self.fetcher.fetch_abstract_for_doi(doi)
         else:
-            paper = self.fetcher.fetch_paper(reference, download_pdf=False)
-            abstract = paper.get("abstract")
+            # fetch_paper auto-detects PMID vs DOI and runs the full
+            # DOI fallback chain (CrossRef → PMID via DOI lookup → PMC
+            # full-text → OpenAlex → Semantic Scholar → Europe PMC →
+            # publisher meta-tag scrape). Returns (abstract, pdf_url);
+            # we don't need the pdf here.
+            abstract, _ = self.fetcher.fetch_paper(reference)
 
         if not abstract:
             if self.verbose: