Skip to content

Commit 7b36d43

Browse files
committed
change app and live_audit
1 parent ad6f07f commit 7b36d43

2 files changed

Lines changed: 80 additions & 145 deletions

File tree

src/portfolio_auditor/dashboard/app.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ def _render_refresh_controls(owner: str) -> None:
131131
"Refresh local clones during audit",
132132
value=False,
133133
help=(
134-
"Enable this only if your pipeline supports a deeper refresh mode that "
135-
"updates local clones before scanning."
134+
"Enable this only if you want to force repository re-cloning before scanning. "
135+
"This is slower but can refresh outdated local mirrors."
136136
),
137137
)
138138

@@ -146,14 +146,17 @@ def _render_refresh_controls(owner: str) -> None:
146146
except Exception as exc:
147147
st.error(f"Fresh audit failed: {exc}")
148148
else:
149-
success_message = result.message
149+
message = (
150+
f"{result.message} "
151+
f"{result.repo_count} repositories analyzed."
152+
)
150153
if result.history_dir is not None:
151-
success_message += f" Previous snapshot saved to {result.history_dir.name}."
154+
message += f" Previous snapshot saved to {result.history_dir.name}."
152155
if result.used_token:
153-
success_message += " Authenticated GitHub access was used."
156+
message += " Authenticated GitHub access was used."
154157
else:
155-
success_message += " No GitHub token was detected."
156-
st.success(success_message)
158+
message += " No GitHub token was detected."
159+
st.success(message)
157160
st.rerun()
158161

159162

@@ -169,7 +172,7 @@ def main() -> None:
169172
st.title("GitHub Portfolio Auditor · Dashboard V2")
170173
st.caption(
171174
"Deterministic portfolio decision dashboard powered only by processed JSON artifacts. "
172-
"V2 adds impact simulation and optimization views without rescanning repositories."
175+
"V2 adds impact simulation and optimization views."
173176
)
174177

175178
discovered_owners = discover_owners()
Lines changed: 69 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,19 @@
11
from __future__ import annotations
22

3-
import inspect
43
import os
54
import shutil
6-
import subprocess
7-
import sys
85
from dataclasses import dataclass
96
from datetime import datetime, timezone
10-
from importlib import import_module
117
from pathlib import Path
12-
from typing import Any, Callable
138

149
import streamlit as st
1510

11+
from portfolio_auditor.audit_runner import AuditRunner
12+
from portfolio_auditor.collectors.github.client import GitHubApiError, GitHubRateLimitError
13+
from portfolio_auditor.settings import get_settings
14+
1615

1716
ROOT_DIR = Path(__file__).resolve().parents[3]
18-
SRC_DIR = ROOT_DIR / "src"
1917
DATA_DIR = ROOT_DIR / "data"
2018
PROCESSED_DIR = DATA_DIR / "processed"
2119
PROCESSED_HISTORY_DIR = DATA_DIR / "processed_history"
@@ -29,17 +27,24 @@ class AuditRunResult:
2927
output_dir: Path
3028
history_dir: Path | None = None
3129
used_token: bool = False
30+
repo_count: int = 0
3231

3332

3433
def resolve_github_token() -> str | None:
34+
"""
35+
Resolve the GitHub token from Streamlit secrets first, then environment variables.
36+
Works both:
37+
- locally via .env / environment variables
38+
- on Streamlit Community Cloud via Secrets
39+
"""
3540
try:
3641
secret_token = st.secrets.get("GITHUB_TOKEN")
3742
except Exception:
3843
secret_token = None
3944

4045
env_token = os.getenv("GITHUB_TOKEN")
41-
token = secret_token or env_token
4246

47+
token = secret_token or env_token
4348
if not token:
4449
return None
4550

@@ -60,6 +65,9 @@ def _owner_history_dir(owner: str) -> Path:
6065

6166

6267
def _snapshot_existing_processed_dir(owner: str) -> Path | None:
68+
"""
69+
Keep a timestamped backup of the current processed artifacts before refresh.
70+
"""
6371
current_dir = _owner_output_dir(owner)
6472
if not current_dir.exists():
6573
return None
@@ -72,13 +80,18 @@ def _snapshot_existing_processed_dir(owner: str) -> Path | None:
7280
return snapshot_dir
7381

7482

75-
def _build_excluded_repo_names(owner: str) -> str:
76-
default_names = {
77-
owner.lower(),
83+
def _build_excluded_repo_names(owner: str) -> set[str]:
84+
"""
85+
Default exclusions:
86+
- profile README repo: owner/owner
87+
- this project itself, to avoid self-bias
88+
Plus optional additions from env or Streamlit secrets.
89+
"""
90+
excluded = {
91+
owner.strip().lower(),
7892
"github-portfolio-auditor",
7993
}
8094

81-
extra = ""
8295
try:
8396
extra = str(st.secrets.get("GITHUB_EXCLUDED_REPO_NAMES", "")).strip()
8497
except Exception:
@@ -88,139 +101,42 @@ def _build_excluded_repo_names(owner: str) -> str:
88101
for value in extra.split(","):
89102
cleaned = value.strip().lower()
90103
if cleaned:
91-
default_names.add(cleaned)
92-
93-
return ",".join(sorted(default_names))
94-
95-
96-
def _try_call_python_runner(
97-
owner: str,
98-
token: str | None,
99-
refresh_local_clones: bool,
100-
) -> bool:
101-
candidates: list[tuple[str, str]] = [
102-
("portfolio_auditor.audit_runner", "run_full_audit"),
103-
("portfolio_auditor.cli", "run_full_audit"),
104-
("portfolio_auditor.runner", "run_full_audit"),
105-
("portfolio_auditor.main", "run_full_audit"),
106-
]
107-
108-
for module_name, function_name in candidates:
109-
try:
110-
module = import_module(module_name)
111-
fn = getattr(module, function_name, None)
112-
if fn is None or not callable(fn):
113-
continue
114-
115-
_call_runner_function(
116-
fn=fn,
117-
owner=owner,
118-
token=token,
119-
refresh_local_clones=refresh_local_clones,
120-
)
121-
return True
122-
except Exception:
123-
continue
124-
125-
return False
126-
127-
128-
def _call_runner_function(
129-
fn: Callable[..., Any],
130-
owner: str,
131-
token: str | None,
132-
refresh_local_clones: bool,
133-
) -> Any:
134-
signature = inspect.signature(fn)
135-
kwargs: dict[str, Any] = {}
136-
137-
for param_name in signature.parameters:
138-
lowered = param_name.lower()
139-
140-
if lowered in {"owner", "github_owner", "username", "account"}:
141-
kwargs[param_name] = owner
142-
elif lowered in {"output_dir", "output_path", "output_root"}:
143-
kwargs[param_name] = str(PROCESSED_DIR)
144-
elif lowered in {"github_token", "token", "access_token"} and token:
145-
kwargs[param_name] = token
146-
elif lowered in {"refresh_local_clones", "refresh_clones", "refresh_clone"}:
147-
kwargs[param_name] = refresh_local_clones
148-
elif lowered in {"excluded_repo_names", "excluded_names"}:
149-
kwargs[param_name] = _build_excluded_repo_names(owner)
150-
151-
return fn(**kwargs)
104+
excluded.add(cleaned)
152105

106+
return excluded
153107

154-
def _build_subprocess_env(token: str | None, owner: str) -> dict[str, str]:
155-
env = os.environ.copy()
156108

109+
def _apply_runtime_github_env(owner: str, token: str | None) -> None:
110+
"""
111+
Inject runtime environment variables so the existing settings / GitHub client
112+
can pick them up without changing the whole project architecture.
113+
"""
157114
if token:
158-
env["GITHUB_TOKEN"] = token
115+
os.environ["GITHUB_TOKEN"] = token
159116

160-
env["GITHUB_EXCLUDED_REPO_NAMES"] = _build_excluded_repo_names(owner)
161-
162-
existing_pythonpath = env.get("PYTHONPATH", "").strip()
163-
src_path = str(SRC_DIR)
164-
165-
if existing_pythonpath:
166-
env["PYTHONPATH"] = f"{src_path}{os.pathsep}{existing_pythonpath}"
167-
else:
168-
env["PYTHONPATH"] = src_path
169-
170-
return env
171-
172-
173-
def _run_cli_subprocess(
174-
owner: str,
175-
token: str | None,
176-
refresh_local_clones: bool,
177-
) -> None:
178-
env = _build_subprocess_env(token=token, owner=owner)
179-
180-
cmd = [
181-
sys.executable,
182-
"-m",
183-
"portfolio_auditor.cli",
184-
"--owner",
185-
owner,
186-
"--output",
187-
str(PROCESSED_DIR),
188-
]
189-
190-
if refresh_local_clones:
191-
cmd.append("--refresh-local-clones")
192-
193-
completed = subprocess.run(
194-
cmd,
195-
cwd=str(ROOT_DIR),
196-
env=env,
197-
text=True,
198-
capture_output=True,
199-
)
200-
201-
if completed.returncode != 0:
202-
stderr = (completed.stderr or "").strip()
203-
stdout = (completed.stdout or "").strip()
204-
message = stderr or stdout or "Unknown CLI execution failure."
205-
raise RuntimeError(
206-
"Fresh audit failed via CLI fallback.\n\n"
207-
f"Command: {' '.join(cmd)}\n\n"
208-
f"Details: {message}"
209-
)
117+
excluded_names = ",".join(sorted(_build_excluded_repo_names(owner)))
118+
os.environ["GITHUB_EXCLUDED_REPO_NAMES"] = excluded_names
210119

211120

212121
def _validate_output(owner: str) -> Path:
213122
owner_dir = _owner_output_dir(owner)
214-
ranking_path = owner_dir / "ranking.json"
123+
required_files = [
124+
owner_dir / "ranking.json",
125+
owner_dir / "ranking_summary.json",
126+
owner_dir / "portfolio_selection.json",
127+
owner_dir / "redundancy_analysis.json",
128+
]
215129

216130
if not owner_dir.exists():
217131
raise RuntimeError(
218132
f"Audit finished without creating the expected owner directory: {owner_dir}"
219133
)
220134

221-
if not ranking_path.exists():
135+
missing = [str(path.name) for path in required_files if not path.exists()]
136+
if missing:
222137
raise RuntimeError(
223-
f"Audit finished but ranking.json was not found at: {ranking_path}"
138+
"Audit finished but some required processed artifacts are missing: "
139+
+ ", ".join(missing)
224140
)
225141

226142
return owner_dir
@@ -230,27 +146,42 @@ def run_fresh_audit(
230146
owner: str,
231147
refresh_local_clones: bool = False,
232148
) -> AuditRunResult:
149+
"""
150+
Launch a fresh GitHub audit directly from Streamlit using the existing Python
151+
orchestration layer (AuditRunner), not the CLI.
152+
153+
This is the correct integration point for your project.
154+
"""
233155
normalized_owner = owner.strip()
234156
if not normalized_owner:
235157
raise ValueError("Owner cannot be empty.")
236158

237159
token = resolve_github_token()
160+
_apply_runtime_github_env(normalized_owner, token)
161+
238162
history_dir = _snapshot_existing_processed_dir(normalized_owner)
239163

240164
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
241165

242-
python_runner_worked = _try_call_python_runner(
243-
owner=normalized_owner,
244-
token=token,
245-
refresh_local_clones=refresh_local_clones,
246-
)
166+
settings = get_settings()
167+
runner = AuditRunner(settings)
247168

248-
if not python_runner_worked:
249-
_run_cli_subprocess(
169+
try:
170+
artifacts = runner.run(
250171
owner=normalized_owner,
251-
token=token,
252-
refresh_local_clones=refresh_local_clones,
172+
refresh_clones=refresh_local_clones,
173+
enrich=True,
174+
export=True,
253175
)
176+
except GitHubRateLimitError as exc:
177+
raise RuntimeError(
178+
"GitHub API rate limit exceeded during fresh audit. "
179+
"Make sure GITHUB_TOKEN is configured correctly."
180+
) from exc
181+
except GitHubApiError as exc:
182+
raise RuntimeError(f"GitHub collection failed: {exc}") from exc
183+
finally:
184+
runner.close()
254185

255186
output_dir = _validate_output(normalized_owner)
256187

@@ -261,4 +192,5 @@ def run_fresh_audit(
261192
output_dir=output_dir,
262193
history_dir=history_dir,
263194
used_token=bool(token),
195+
repo_count=len(artifacts.repos),
264196
)

0 commit comments

Comments
 (0)