Skip to content

Commit 626f8ed

Browse files
committed
Add llm_review
1 parent b14bcd5 commit 626f8ed

7 files changed

Lines changed: 76 additions & 75 deletions

File tree

.github/workflows/ci.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,15 @@ jobs:
5050
- name: Install dependencies
5151
run: python -m pip install --upgrade pip && python -m pip install -e .[dev]
5252

53-
- name: Run golden tests
54-
run: python -m pytest tests/golden -q
55-
5653
- name: Run smoke tests
5754
run: python -m pytest tests/smoke -q
5855

5956
- name: Run full test suite with coverage
60-
run: python -m pytest --cov=portfolio_auditor --cov-report=term-missing --cov-fail-under=72
57+
run: python -m pytest --cov=portfolio_auditor --cov-report=term-missing --cov-fail-under=72
58+
59+
- name: Run golden tests
60+
# Golden tests run last: a snapshot drift (e.g. after an intentional policy
61+
# change) should not block coverage reporting. Fix snapshots with:
62+
# python -m pytest tests/golden --snapshot-update (if using syrupy)
63+
# or by regenerating the JSON files in tests/golden/snapshots/.
64+
run: python -m pytest tests/golden -q

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ dependencies = [
1919
"typer>=0.12.3",
2020
"PyYAML>=6.0.1",
2121
"pandas>=2.2.0",
22+
"numpy>=1.26",
23+
"rich>=13.0",
2224
"streamlit>=1.35.0",
2325
]
2426

requirements.txt

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,5 @@
1-
streamlit>=1.35
2-
pandas>=2.2
3-
numpy>=1.26
4-
requests>=2.31
5-
python-dotenv>=1.0
6-
GitPython>=3.1
7-
typer>=0.12
8-
rich>=13.0
9-
pydantic>=2.7
10-
pydantic-settings>=2.2
11-
-e .
1+
# Development install — all runtime deps come from pyproject.toml.
2+
# Keep this file minimal: its only job is to editable-install the package
3+
# so that `pip install -r requirements.txt` works in dev without repeating
4+
# the dependency list.
5+
-e .

src/portfolio_auditor/collectors/github/collector.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -187,41 +187,6 @@ def _list_owner_repos(self, owner: str) -> tuple[list[dict[str, Any]], str]:
187187
return cached_payload, "normalized_snapshot"
188188
raise
189189

190-
def _load_raw_owner_snapshot_payload(self, owner: str) -> list[dict[str, Any]]:
191-
snapshot_path = self.get_raw_owner_snapshot_path(owner)
192-
payload = json.loads(snapshot_path.read_text(encoding="utf-8"))
193-
if not isinstance(payload, list):
194-
raise ValueError(
195-
f"Cached GitHub snapshot for owner '{owner}' is not a valid list payload."
196-
)
197-
198-
normalized_payload: list[dict[str, Any]] = []
199-
for item in payload:
200-
if isinstance(item, dict):
201-
normalized_payload.append(item)
202-
return normalized_payload
203-
204-
def _build_rate_limit_message(
205-
self,
206-
*,
207-
owner: str,
208-
original_error: GitHubRateLimitError,
209-
) -> str:
210-
snapshot_path = self.get_raw_owner_snapshot_path(owner)
211-
if snapshot_path.exists():
212-
return (
213-
f"GitHub API rate limit exceeded while collecting owner '{owner}', and a cached "
214-
f"snapshot exists at {snapshot_path}. The pipeline can fall back to cached metadata. "
215-
f"Original error: {original_error}"
216-
)
217-
218-
return (
219-
f"GitHub API rate limit exceeded while collecting owner '{owner}'. "
220-
f"No cached snapshot is available at {snapshot_path}. "
221-
f"Add GITHUB_TOKEN to increase your rate limit, then rerun the command. "
222-
f"Original error: {original_error}"
223-
)
224-
225190
def _apply_filters(self, repos: list[RepoMetadata]) -> list[RepoMetadata]:
226191
filtered = repos
227192
excluded_names = self.settings.normalized_excluded_repo_names

src/portfolio_auditor/reviewing/llm_review.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
logger = logging.getLogger(__name__)
1313

14-
_deterministic_reviewer = DeterministicReviewer()
15-
1614

1715
class LLMReviewNotImplemented(NotImplementedError):
1816
"""
@@ -62,4 +60,4 @@ def review_repo_with_llm(
6260
"LLM review not yet implemented — falling back to deterministic reviewer for %s",
6361
repo.full_name,
6462
)
65-
return _deterministic_reviewer.review(repo, scan, score)
63+
return DeterministicReviewer().review(repo, scan, score)

src/portfolio_auditor/reviewing/review_orchestrator.py

Lines changed: 0 additions & 19 deletions
This file was deleted.
Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,63 @@
1+
"""
2+
Integration tests for scripts/rebuild_sample_data.py.
3+
4+
These tests actually run the script and verify the artifact it produces,
5+
rather than just checking whether the file exists.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import json
11+
import subprocess
12+
import sys
113
from pathlib import Path
214

15+
import pytest
16+
17+
SCRIPT = Path("scripts/rebuild_sample_data.py").resolve()
18+
19+
20+
@pytest.mark.integration
21+
def test_script_exists():
22+
assert SCRIPT.exists(), f"Expected script at {SCRIPT}"
23+
24+
25+
@pytest.mark.integration
26+
def test_script_runs_successfully(tmp_path, monkeypatch):
27+
"""Script must exit 0 and write a valid JSON file under the tmp workspace."""
28+
monkeypatch.chdir(tmp_path)
29+
result = subprocess.run(
30+
[sys.executable, str(SCRIPT)],
31+
capture_output=True,
32+
text=True,
33+
)
34+
assert result.returncode == 0, f"Script exited with {result.returncode}:\n{result.stderr}"
35+
36+
37+
@pytest.mark.integration
38+
def test_script_produces_valid_json(tmp_path, monkeypatch):
39+
"""The artifact written by the script must be a non-empty JSON list."""
40+
monkeypatch.chdir(tmp_path)
41+
subprocess.run([sys.executable, str(SCRIPT)], check=True, capture_output=True)
42+
43+
output_file = tmp_path / "data" / "raw" / "github" / "repos_raw.json"
44+
assert output_file.exists(), f"Expected output at {output_file}"
45+
46+
payload = json.loads(output_file.read_text(encoding="utf-8"))
47+
assert isinstance(payload, list), "Output must be a JSON list"
48+
assert len(payload) > 0, "Output list must not be empty"
49+
50+
51+
@pytest.mark.integration
52+
def test_script_output_has_required_fields(tmp_path, monkeypatch):
53+
"""Each entry in the output must carry the fields downstream code relies on."""
54+
monkeypatch.chdir(tmp_path)
55+
subprocess.run([sys.executable, str(SCRIPT)], check=True, capture_output=True)
56+
57+
output_file = tmp_path / "data" / "raw" / "github" / "repos_raw.json"
58+
payload = json.loads(output_file.read_text(encoding="utf-8"))
359

4-
def test_sample_data_file_layout():
5-
path = Path("scripts/rebuild_sample_data.py")
6-
assert path.exists()
60+
required_fields = {"name", "html_url", "language", "private", "fork", "archived"}
61+
for entry in payload:
62+
missing = required_fields - entry.keys()
63+
assert not missing, f"Entry missing fields {missing}: {entry}"

0 commit comments

Comments
 (0)