11from __future__ import annotations
22
3- import inspect
43import os
54import shutil
6- import subprocess
7- import sys
85from dataclasses import dataclass
96from datetime import datetime , timezone
10- from importlib import import_module
117from pathlib import Path
12- from typing import Any , Callable
138
149import streamlit as st
1510
11+ from portfolio_auditor .audit_runner import AuditRunner
12+ from portfolio_auditor .collectors .github .client import GitHubApiError , GitHubRateLimitError
13+ from portfolio_auditor .settings import get_settings
14+
1615
1716ROOT_DIR = Path (__file__ ).resolve ().parents [3 ]
18- SRC_DIR = ROOT_DIR / "src"
1917DATA_DIR = ROOT_DIR / "data"
2018PROCESSED_DIR = DATA_DIR / "processed"
2119PROCESSED_HISTORY_DIR = DATA_DIR / "processed_history"
@@ -29,17 +27,24 @@ class AuditRunResult:
2927 output_dir : Path
3028 history_dir : Path | None = None
3129 used_token : bool = False
30+ repo_count : int = 0
3231
3332
3433def resolve_github_token () -> str | None :
34+ """
35+ Resolve the GitHub token from Streamlit secrets first, then environment variables.
36+ Works both:
37+ - locally via .env / environment variables
38+ - on Streamlit Community Cloud via Secrets
39+ """
3540 try :
3641 secret_token = st .secrets .get ("GITHUB_TOKEN" )
3742 except Exception :
3843 secret_token = None
3944
4045 env_token = os .getenv ("GITHUB_TOKEN" )
41- token = secret_token or env_token
4246
47+ token = secret_token or env_token
4348 if not token :
4449 return None
4550
@@ -60,6 +65,9 @@ def _owner_history_dir(owner: str) -> Path:
6065
6166
6267def _snapshot_existing_processed_dir (owner : str ) -> Path | None :
68+ """
69+ Keep a timestamped backup of the current processed artifacts before refresh.
70+ """
6371 current_dir = _owner_output_dir (owner )
6472 if not current_dir .exists ():
6573 return None
@@ -72,13 +80,18 @@ def _snapshot_existing_processed_dir(owner: str) -> Path | None:
7280 return snapshot_dir
7381
7482
75- def _build_excluded_repo_names (owner : str ) -> str :
76- default_names = {
77- owner .lower (),
83+ def _build_excluded_repo_names (owner : str ) -> set [str ]:
84+ """
85+ Default exclusions:
86+ - profile README repo: owner/owner
87+ - this project itself, to avoid self-bias
88+ Plus optional additions from env or Streamlit secrets.
89+ """
90+ excluded = {
91+ owner .strip ().lower (),
7892 "github-portfolio-auditor" ,
7993 }
8094
81- extra = ""
8295 try :
8396 extra = str (st .secrets .get ("GITHUB_EXCLUDED_REPO_NAMES" , "" )).strip ()
8497 except Exception :
@@ -88,139 +101,42 @@ def _build_excluded_repo_names(owner: str) -> str:
88101 for value in extra .split ("," ):
89102 cleaned = value .strip ().lower ()
90103 if cleaned :
91- default_names .add (cleaned )
92-
93- return "," .join (sorted (default_names ))
94-
95-
96- def _try_call_python_runner (
97- owner : str ,
98- token : str | None ,
99- refresh_local_clones : bool ,
100- ) -> bool :
101- candidates : list [tuple [str , str ]] = [
102- ("portfolio_auditor.audit_runner" , "run_full_audit" ),
103- ("portfolio_auditor.cli" , "run_full_audit" ),
104- ("portfolio_auditor.runner" , "run_full_audit" ),
105- ("portfolio_auditor.main" , "run_full_audit" ),
106- ]
107-
108- for module_name , function_name in candidates :
109- try :
110- module = import_module (module_name )
111- fn = getattr (module , function_name , None )
112- if fn is None or not callable (fn ):
113- continue
114-
115- _call_runner_function (
116- fn = fn ,
117- owner = owner ,
118- token = token ,
119- refresh_local_clones = refresh_local_clones ,
120- )
121- return True
122- except Exception :
123- continue
124-
125- return False
126-
127-
128- def _call_runner_function (
129- fn : Callable [..., Any ],
130- owner : str ,
131- token : str | None ,
132- refresh_local_clones : bool ,
133- ) -> Any :
134- signature = inspect .signature (fn )
135- kwargs : dict [str , Any ] = {}
136-
137- for param_name in signature .parameters :
138- lowered = param_name .lower ()
139-
140- if lowered in {"owner" , "github_owner" , "username" , "account" }:
141- kwargs [param_name ] = owner
142- elif lowered in {"output_dir" , "output_path" , "output_root" }:
143- kwargs [param_name ] = str (PROCESSED_DIR )
144- elif lowered in {"github_token" , "token" , "access_token" } and token :
145- kwargs [param_name ] = token
146- elif lowered in {"refresh_local_clones" , "refresh_clones" , "refresh_clone" }:
147- kwargs [param_name ] = refresh_local_clones
148- elif lowered in {"excluded_repo_names" , "excluded_names" }:
149- kwargs [param_name ] = _build_excluded_repo_names (owner )
150-
151- return fn (** kwargs )
104+ excluded .add (cleaned )
152105
106+ return excluded
153107
154- def _build_subprocess_env (token : str | None , owner : str ) -> dict [str , str ]:
155- env = os .environ .copy ()
156108
109+ def _apply_runtime_github_env (owner : str , token : str | None ) -> None :
110+ """
111+ Inject runtime environment variables so the existing settings / GitHub client
112+ can pick them up without changing the whole project architecture.
113+ """
157114 if token :
158- env ["GITHUB_TOKEN" ] = token
115+ os . environ ["GITHUB_TOKEN" ] = token
159116
160- env ["GITHUB_EXCLUDED_REPO_NAMES" ] = _build_excluded_repo_names (owner )
161-
162- existing_pythonpath = env .get ("PYTHONPATH" , "" ).strip ()
163- src_path = str (SRC_DIR )
164-
165- if existing_pythonpath :
166- env ["PYTHONPATH" ] = f"{ src_path } { os .pathsep } { existing_pythonpath } "
167- else :
168- env ["PYTHONPATH" ] = src_path
169-
170- return env
171-
172-
173- def _run_cli_subprocess (
174- owner : str ,
175- token : str | None ,
176- refresh_local_clones : bool ,
177- ) -> None :
178- env = _build_subprocess_env (token = token , owner = owner )
179-
180- cmd = [
181- sys .executable ,
182- "-m" ,
183- "portfolio_auditor.cli" ,
184- "--owner" ,
185- owner ,
186- "--output" ,
187- str (PROCESSED_DIR ),
188- ]
189-
190- if refresh_local_clones :
191- cmd .append ("--refresh-local-clones" )
192-
193- completed = subprocess .run (
194- cmd ,
195- cwd = str (ROOT_DIR ),
196- env = env ,
197- text = True ,
198- capture_output = True ,
199- )
200-
201- if completed .returncode != 0 :
202- stderr = (completed .stderr or "" ).strip ()
203- stdout = (completed .stdout or "" ).strip ()
204- message = stderr or stdout or "Unknown CLI execution failure."
205- raise RuntimeError (
206- "Fresh audit failed via CLI fallback.\n \n "
207- f"Command: { ' ' .join (cmd )} \n \n "
208- f"Details: { message } "
209- )
117+ excluded_names = "," .join (sorted (_build_excluded_repo_names (owner )))
118+ os .environ ["GITHUB_EXCLUDED_REPO_NAMES" ] = excluded_names
210119
211120
212121def _validate_output (owner : str ) -> Path :
213122 owner_dir = _owner_output_dir (owner )
214- ranking_path = owner_dir / "ranking.json"
123+ required_files = [
124+ owner_dir / "ranking.json" ,
125+ owner_dir / "ranking_summary.json" ,
126+ owner_dir / "portfolio_selection.json" ,
127+ owner_dir / "redundancy_analysis.json" ,
128+ ]
215129
216130 if not owner_dir .exists ():
217131 raise RuntimeError (
218132 f"Audit finished without creating the expected owner directory: { owner_dir } "
219133 )
220134
221- if not ranking_path .exists ():
135+ missing = [str (path .name ) for path in required_files if not path .exists ()]
136+ if missing :
222137 raise RuntimeError (
223- f"Audit finished but ranking.json was not found at: { ranking_path } "
138+ "Audit finished but some required processed artifacts are missing: "
139+ + ", " .join (missing )
224140 )
225141
226142 return owner_dir
@@ -230,27 +146,42 @@ def run_fresh_audit(
230146 owner : str ,
231147 refresh_local_clones : bool = False ,
232148) -> AuditRunResult :
149+ """
150+ Launch a fresh GitHub audit directly from Streamlit using the existing Python
151+ orchestration layer (AuditRunner), not the CLI.
152+
153+ This is the correct integration point for your project.
154+ """
233155 normalized_owner = owner .strip ()
234156 if not normalized_owner :
235157 raise ValueError ("Owner cannot be empty." )
236158
237159 token = resolve_github_token ()
160+ _apply_runtime_github_env (normalized_owner , token )
161+
238162 history_dir = _snapshot_existing_processed_dir (normalized_owner )
239163
240164 PROCESSED_DIR .mkdir (parents = True , exist_ok = True )
241165
242- python_runner_worked = _try_call_python_runner (
243- owner = normalized_owner ,
244- token = token ,
245- refresh_local_clones = refresh_local_clones ,
246- )
166+ settings = get_settings ()
167+ runner = AuditRunner (settings )
247168
248- if not python_runner_worked :
249- _run_cli_subprocess (
169+ try :
170+ artifacts = runner . run (
250171 owner = normalized_owner ,
251- token = token ,
252- refresh_local_clones = refresh_local_clones ,
172+ refresh_clones = refresh_local_clones ,
173+ enrich = True ,
174+ export = True ,
253175 )
176+ except GitHubRateLimitError as exc :
177+ raise RuntimeError (
178+ "GitHub API rate limit exceeded during fresh audit. "
179+ "Make sure GITHUB_TOKEN is configured correctly."
180+ ) from exc
181+ except GitHubApiError as exc :
182+ raise RuntimeError (f"GitHub collection failed: { exc } " ) from exc
183+ finally :
184+ runner .close ()
254185
255186 output_dir = _validate_output (normalized_owner )
256187
@@ -261,4 +192,5 @@ def run_fresh_audit(
261192 output_dir = output_dir ,
262193 history_dir = history_dir ,
263194 used_token = bool (token ),
195+ repo_count = len (artifacts .repos ),
264196 )
0 commit comments