Skip to content

Commit a213d9f

Browse files
committed
fix(comses): surface PDF/DOCX ODD docs as odd_doc_binary
Smoke test B (FNNR-ABM Python/Mesa model) showed the ODD protocol was a PDF (docs/FNNR ABM - ODD Protocol with References.pdf). Our old find_odd_doc only scanned .md/.txt, so open_comses_model returned odd_doc=null even though a real ODD existed. The AI ended up summarizing from metadata + DEM header bytes. - New find_odd_doc_binary: same name patterns as find_odd_doc, but for .pdf / .docx / .doc / .odt. Two-pass matching (name prefix, then substring) so "FNNR ABM - ODD ..." is caught. - DownloadOutcome gains odd_doc_binary; wired through _inspect_extracted and _outcome_to_payload so both open_comses_model and download_comses_model return the path. - read_comses_files still doesn't try to read PDFs (v1 scope limit) — the AI should surface the path from odd_doc_binary so the user can open it in a viewer. - 2 new unit tests + 1 assertion update on the non-NetLogo integration test (FNNR-style PDF-only docs). 89 tests pass, lint + mypy clean.
1 parent b624f83 commit a213d9f

3 files changed

Lines changed: 67 additions & 7 deletions

File tree

src/netlogo_mcp/comses.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -585,27 +585,55 @@ def rel_parts(p: Path) -> tuple[str, ...]:
585585
return candidates[-1]
586586

587587

588-
def find_odd_doc(extracted_dir: Path) -> Path | None:
589-
"""First matching ODD / documentation file, None if none found."""
588+
_ODD_NAME_PATTERNS = ("ODD", "odd", "documentation", "README", "readme")
589+
_ODD_TEXT_EXTS = (".md", ".txt")
590+
_ODD_BINARY_EXTS = (".pdf", ".docx", ".odt", ".doc")
591+
592+
593+
def _find_odd_by_exts(extracted_dir: Path, exts: tuple[str, ...]) -> Path | None:
594+
"""Shared scanner for `find_odd_doc` / `find_odd_doc_binary`."""
590595
docs_dir = extracted_dir / "docs"
591596
search_roots = [docs_dir, extracted_dir] if docs_dir.is_dir() else [extracted_dir]
592-
# Priority patterns (case-insensitive).
593-
patterns = ("ODD", "odd", "documentation", "README", "readme")
594597
for root in search_roots:
595598
if not root.is_dir():
596599
continue
597600
for p in sorted(root.iterdir(), key=lambda x: x.name):
598601
if not p.is_file():
599602
continue
600-
if p.suffix.lower() not in (".md", ".txt"):
603+
if p.suffix.lower() not in exts:
601604
continue
602605
name = p.name
603-
for pat in patterns:
606+
for pat in _ODD_NAME_PATTERNS:
604607
if name.startswith(pat):
605608
return p
609+
# Second pass: match anywhere in the filename (e.g. "FNNR ABM - ODD.pdf").
610+
for root in search_roots:
611+
if not root.is_dir():
612+
continue
613+
for p in sorted(root.iterdir(), key=lambda x: x.name):
614+
if not p.is_file() or p.suffix.lower() not in exts:
615+
continue
616+
lower = p.name.lower()
617+
if any(pat.lower() in lower for pat in _ODD_NAME_PATTERNS):
618+
return p
606619
return None
607620

608621

622+
def find_odd_doc(extracted_dir: Path) -> Path | None:
623+
"""First matching text ODD / documentation file, None if none found."""
624+
return _find_odd_by_exts(extracted_dir, _ODD_TEXT_EXTS)
625+
626+
627+
def find_odd_doc_binary(extracted_dir: Path) -> Path | None:
628+
"""First matching ODD / documentation file in a binary format (PDF/DOCX/etc.).
629+
630+
Useful so the tool can tell the AI "an ODD exists but is a PDF — the
631+
user will need to open it in a viewer." The file itself is NOT read
632+
by read_comses_files (v1 scope limit).
633+
"""
634+
return _find_odd_by_exts(extracted_dir, _ODD_BINARY_EXTS)
635+
636+
609637
@dataclass
610638
class DownloadOutcome:
611639
"""Result of a high-level download_release call.
@@ -622,6 +650,7 @@ class DownloadOutcome:
622650
selected_netlogo_file: Path | None
623651
code_files: list[Path]
624652
odd_doc: Path | None
653+
odd_doc_binary: Path | None
625654
license_name: str | None
626655
title: str | None
627656

@@ -737,6 +766,7 @@ def _inspect_extracted(
737766
selected = select_netlogo_file(netlogo, extracted_path)
738767
code = find_code_files(extracted_path)
739768
odd = find_odd_doc(extracted_path)
769+
odd_bin = find_odd_doc_binary(extracted_path)
740770
return DownloadOutcome(
741771
identifier=identifier,
742772
resolved_version=resolved_version,
@@ -747,6 +777,7 @@ def _inspect_extracted(
747777
selected_netlogo_file=selected,
748778
code_files=code,
749779
odd_doc=odd,
780+
odd_doc_binary=odd_bin,
750781
license_name=license_name,
751782
title=title,
752783
)

src/netlogo_mcp/tools.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,9 @@ def _outcome_to_payload(outcome: _comses.DownloadOutcome) -> dict:
720720
"loaded_netlogo_file": rel(outcome.selected_netlogo_file),
721721
"code_files": [rel(p) for p in outcome.code_files],
722722
"odd_doc": rel(outcome.odd_doc),
723+
# Non-text ODD (PDF/DOCX) — read_comses_files cannot read it, but
724+
# the AI should surface the path so the user can open it manually.
725+
"odd_doc_binary": rel(outcome.odd_doc_binary),
723726
}
724727

725728

tests/test_comses.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,26 @@ def test_find_odd_doc_prefers_odd_then_readme(tmp_path):
473473
assert found is not None and found.name == "ODD.md"
474474

475475

476+
def test_find_odd_doc_binary_picks_up_pdf_with_odd_in_name(tmp_path):
477+
"""Real-world case from FNNR-ABM smoke test — PDF ODD is the only doc."""
478+
(tmp_path / "docs").mkdir()
479+
(tmp_path / "docs" / "FNNR ABM - ODD Protocol with References.pdf").write_bytes(
480+
b"%PDF-1.4\n"
481+
)
482+
# Text scanner finds nothing.
483+
assert comses.find_odd_doc(tmp_path) is None
484+
# Binary scanner finds the PDF.
485+
found = comses.find_odd_doc_binary(tmp_path)
486+
assert found is not None and found.suffix == ".pdf"
487+
assert "ODD" in found.name
488+
489+
490+
def test_find_odd_doc_binary_none_when_only_text_docs(tmp_path):
491+
(tmp_path / "docs").mkdir()
492+
(tmp_path / "docs" / "ODD.md").write_text("odd")
493+
assert comses.find_odd_doc_binary(tmp_path) is None
494+
495+
476496
# ── High-level download_release + MCP tool ───────────────────────────────────
477497

478498

@@ -788,7 +808,8 @@ async def test_open_comses_model_non_netlogo_returns_structured_json(
788808
{
789809
"code/model.py": b"# python\n",
790810
"codemeta.json": b'{"programmingLanguage": {"name": "Python"}}',
791-
"docs/ODD.md": b"# ODD",
811+
# Simulate the FNNR-ABM real case: ODD is a PDF only.
812+
"docs/FNNR ABM - ODD Protocol.pdf": b"%PDF-1.4\n",
792813
},
793814
)
794815

@@ -801,6 +822,11 @@ async def test_open_comses_model_non_netlogo_returns_structured_json(
801822
mock_context, identifier=identifier, version=version
802823
)
803824
data = json.loads(raw)
825+
# Binary ODD is surfaced so the AI can tell the user where to read it.
826+
assert data["odd_doc"] is None
827+
assert data["odd_doc_binary"] is not None
828+
assert "ODD" in data["odd_doc_binary"]
829+
assert data["odd_doc_binary"].endswith(".pdf")
804830

805831
assert data["status"] == "not_runnable_in_netlogo"
806832
assert data["language"] == "Python"

0 commit comments

Comments
 (0)