Skip to content

Commit 1a3d438

Browse files
committed
Enhance GitHub repository synchronization: add support for modified repos, improve error handling, and implement live verification status
1 parent a101444 commit 1a3d438

4 files changed

Lines changed: 221 additions & 35 deletions

File tree

src/portfolio_auditor/collectors/github/collector.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,11 +168,15 @@ def _list_owner_repos(self, owner: str) -> tuple[list[dict[str, Any]], str]:
168168
# Authenticated owner — /user/repos returns private repos too.
169169
payloads = self.client.list_authenticated_user_repos()
170170
else:
171-
# Unknown owner: try org first, fall back to public user listing.
172-
try:
173-
self.client.get_org(owner)
174-
payloads = self.client.list_org_repos(owner)
175-
except GitHubApiError:
171+
# Unknown owner: try org first when the client supports org probing,
172+
# otherwise fall back directly to the public user listing.
173+
if hasattr(self.client, "get_org") and hasattr(self.client, "list_org_repos"):
174+
try:
175+
self.client.get_org(owner)
176+
payloads = self.client.list_org_repos(owner)
177+
except GitHubApiError:
178+
payloads = self.client.list_user_repos(owner)
179+
else:
176180
payloads = self.client.list_user_repos(owner)
177181

178182
return payloads, "github_api"

src/portfolio_auditor/dashboard/app.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44
from datetime import datetime, timezone
55

6-
76
import streamlit as st
87

98
from portfolio_auditor.dashboard.components.optimizer_view import render_optimizer_view
@@ -129,7 +128,7 @@ def _render_staleness_indicator(base_dir_mtime: float | None) -> None:
129128
if base_dir_mtime is None:
130129
return
131130

132-
age_seconds = (datetime.now(timezone.utc).timestamp() - base_dir_mtime)
131+
age_seconds = datetime.now(timezone.utc).timestamp() - base_dir_mtime
133132
age_hours = age_seconds / 3600
134133

135134
if age_hours < 1:
@@ -147,9 +146,10 @@ def _render_staleness_indicator(base_dir_mtime: float | None) -> None:
147146
unsafe_allow_html=True,
148147
)
149148
if age_hours >= 24:
150-
st.warning("Artifacts are more than 24 h old. Run a fresh audit to pick up new repositories.", icon="⚠️")
151-
152-
149+
st.warning(
150+
"Artifacts are more than 24 h old. Run a fresh audit to pick up new repositories.",
151+
icon="⚠️",
152+
)
153153

154154

155155
def _format_relative_timestamp(moment: datetime | None) -> str:
@@ -172,6 +172,11 @@ def _render_repo_sync_status(sync_result: RepoSyncResult) -> None:
172172
delta = sync_result.delta
173173

174174
st.markdown("### GitHub sync status")
175+
if sync_result.verified_live:
176+
st.success("Live verification: GitHub API reachable", icon="✅")
177+
else:
178+
st.warning("Live verification: unavailable, fallback-only view", icon="⚠️")
179+
175180
st.caption(
176181
f"Checked {_format_relative_timestamp(delta.checked_at)} · Source: {sync_result.source.replace('_', ' ')}"
177182
)
@@ -183,23 +188,30 @@ def _render_repo_sync_status(sync_result: RepoSyncResult) -> None:
183188
if sync_result.warning:
184189
st.info(sync_result.warning)
185190

186-
if decision.should_refresh:
187-
st.warning(decision.reason, icon="⚠️")
188-
else:
189-
st.success(decision.reason)
190-
191191
if delta.latest_live_push_at or delta.latest_cached_push_at:
192192
st.caption(
193193
"Latest live push: "
194194
f"{_format_relative_timestamp(delta.latest_live_push_at)} · "
195195
"Latest cached push: "
196196
f"{_format_relative_timestamp(delta.latest_cached_push_at)}"
197197
)
198+
if delta.latest_processed_audit_at:
199+
st.caption(
200+
f"Latest processed audit: {_format_relative_timestamp(delta.latest_processed_audit_at)}"
201+
)
202+
203+
if decision.should_refresh:
204+
st.warning(decision.reason, icon="⚠️")
205+
elif sync_result.verified_live:
206+
st.success(decision.reason)
207+
else:
208+
st.info(decision.reason)
198209

199210
sections = [
200211
("New repositories", delta.new_repos),
201212
("Removed repositories", delta.removed_repos),
202-
("Changed repositories", delta.changed_repos),
213+
("Modified after processed audit", delta.modified_since_processed),
214+
("Changed since cached raw snapshot", delta.changed_repos),
203215
]
204216
for title, repo_names in sections:
205217
if not repo_names:
@@ -228,16 +240,13 @@ def main() -> None:
228240
owners = discover_owners()
229241
default_owner = _resolve_default_owner()
230242

231-
# Build owner options: always include the resolved default so the sidebar
232-
# is not empty on a fresh Streamlit Cloud deploy (no processed/ on disk yet).
233243
if owners:
234244
owner_options = owners
235245
if default_owner and default_owner not in owner_options:
236246
owner_options = [default_owner] + owner_options
237247
else:
238248
owner_options = [default_owner] if default_owner else ["MatALass"]
239249

240-
# Pre-select the default owner when possible.
241250
default_index = 0
242251
if default_owner and default_owner in owner_options:
243252
default_index = owner_options.index(default_owner)
@@ -250,7 +259,7 @@ def main() -> None:
250259
"Use refresh when repositories changed recently or when you want the latest "
251260
"local scan evidence."
252261
)
253-
if st.button("Run fresh audit now", type="primary", use_container_width=True):
262+
if st.button("Run fresh audit now", type="primary", width='stretch'):
254263
with st.spinner(
255264
"Refreshing portfolio artifacts from GitHub. This can take a while for "
256265
"larger portfolios."
@@ -281,17 +290,17 @@ def main() -> None:
281290
else:
282291
sync_error = None
283292

284-
# Staleness indicator — use mtime of ranking.json as the audit timestamp proxy.
285293
ranking_path = data.base_dir / "ranking.json"
286294
mtime = ranking_path.stat().st_mtime if ranking_path.exists() else None
287295
with st.sidebar:
288296
_render_staleness_indicator(mtime)
289297
st.markdown("---")
290298
if sync_result is not None:
291299
_render_repo_sync_status(sync_result)
292-
if should_refresh_audit(sync_result).should_refresh and st.button(
300+
refresh_decision = should_refresh_audit(sync_result)
301+
if refresh_decision.should_refresh and st.button(
293302
"Refresh audit to sync GitHub changes",
294-
use_container_width=True,
303+
width='stretch',
295304
):
296305
with st.spinner(
297306
"Refreshing portfolio artifacts to include the latest GitHub changes."
@@ -384,4 +393,4 @@ def main() -> None:
384393

385394

386395
if __name__ == "__main__":
387-
main()
396+
main()

src/portfolio_auditor/dashboard/repo_sync.py

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from dataclasses import dataclass
44
from datetime import datetime, timezone
55

6-
from portfolio_auditor.audit_runner import AuditRunner
76
from portfolio_auditor.collectors.github.client import GitHubApiError, GitHubClient
87
from portfolio_auditor.collectors.github.collector import GitHubCollector
98
from portfolio_auditor.models.repo_metadata import RepoMetadata
@@ -28,24 +27,38 @@ class RepoSyncDelta:
2827
new_repos: tuple[str, ...]
2928
removed_repos: tuple[str, ...]
3029
changed_repos: tuple[str, ...]
30+
modified_since_processed: tuple[str, ...]
3131
latest_live_push_at: datetime | None
3232
latest_cached_push_at: datetime | None
33+
latest_processed_audit_at: datetime | None
3334
checked_at: datetime
3435

3536
@property
3637
def has_changes(self) -> bool:
37-
return bool(self.new_repos or self.removed_repos or self.changed_repos)
38+
return bool(
39+
self.new_repos
40+
or self.removed_repos
41+
or self.changed_repos
42+
or self.modified_since_processed
43+
)
3844

3945
@property
4046
def total_changed_count(self) -> int:
41-
return len(self.new_repos) + len(self.removed_repos) + len(self.changed_repos)
47+
repo_names = {
48+
*self.new_repos,
49+
*self.removed_repos,
50+
*self.changed_repos,
51+
*self.modified_since_processed,
52+
}
53+
return len(repo_names)
4254

4355

4456
@dataclass(frozen=True)
4557
class RepoSyncResult:
4658
owner: str
4759
delta: RepoSyncDelta
4860
source: str
61+
verified_live: bool
4962
warning: str | None = None
5063

5164

@@ -74,31 +87,42 @@ def fetch_live_repo_sync_result(owner: str, settings: Settings) -> RepoSyncResul
7487
client = GitHubClient(settings)
7588
collector = GitHubCollector(client, settings)
7689
checked_at = datetime.now(timezone.utc)
90+
latest_processed_at = latest_processed_audit_timestamp(owner, settings)
7791

7892
try:
7993
payloads, payload_kind = collector._list_owner_repos(owner)
8094
if payload_kind == "normalized_snapshot":
8195
live_repos = [RepoMetadata.model_validate(item) for item in payloads]
96+
live_repos = collector._apply_filters(live_repos)
8297
source = "cached_snapshot_fallback"
98+
verified_live = False
8399
warning = (
84-
"GitHub live sync fell back to the cached raw snapshot because the API could not be queried live."
100+
"GitHub API was not queried live. The sync view is using the cached raw snapshot only, "
101+
"so brand-new repositories cannot be confirmed until live access succeeds."
85102
)
86103
else:
87104
live_repos = [collector._parse_repo_payload(item) for item in payloads]
105+
live_repos = collector._apply_filters(live_repos)
88106
source = "github_api"
107+
verified_live = True
89108
warning = None
90109
except GitHubApiError as exc:
91-
cached_repos = collector.load_raw_owner_snapshot(owner)
110+
cached_repos = collector.load_raw_owner_snapshot(owner) if collector.has_raw_owner_snapshot(owner) else []
92111
delta = compare_repo_states(
93112
live_repos=cached_repos,
94113
cached_repos=cached_repos,
95114
checked_at=checked_at,
115+
latest_processed_audit_at=latest_processed_at,
96116
)
97117
return RepoSyncResult(
98118
owner=owner,
99119
delta=delta,
100120
source="cached_snapshot_fallback",
101-
warning=f"GitHub live sync unavailable: {exc}",
121+
verified_live=False,
122+
warning=(
123+
"GitHub live sync unavailable. Using the cached raw snapshot only. "
124+
f"Underlying error: {exc}"
125+
),
102126
)
103127
finally:
104128
client.close()
@@ -108,15 +132,23 @@ def fetch_live_repo_sync_result(owner: str, settings: Settings) -> RepoSyncResul
108132
live_repos=live_repos,
109133
cached_repos=cached_repos,
110134
checked_at=checked_at,
135+
latest_processed_audit_at=latest_processed_at,
136+
)
137+
return RepoSyncResult(
138+
owner=owner,
139+
delta=delta,
140+
source=source,
141+
verified_live=verified_live,
142+
warning=warning,
111143
)
112-
return RepoSyncResult(owner=owner, delta=delta, source=source, warning=warning)
113144

114145

115146
def compare_repo_states(
116147
*,
117148
live_repos: list[RepoMetadata],
118149
cached_repos: list[RepoMetadata],
119150
checked_at: datetime,
151+
latest_processed_audit_at: datetime | None = None,
120152
) -> RepoSyncDelta:
121153
live_map = build_repo_snapshot_state_map(live_repos)
122154
cached_map = build_repo_snapshot_state_map(cached_repos)
@@ -137,27 +169,50 @@ def compare_repo_states(
137169
):
138170
changed_repos.append(repo_name)
139171

172+
modified_since_processed = _compute_modified_since_processed(
173+
live_map=live_map,
174+
latest_processed_audit_at=latest_processed_audit_at,
175+
)
176+
140177
return RepoSyncDelta(
141178
live_repo_count=len(live_map),
142179
cached_repo_count=len(cached_map),
143180
new_repos=tuple(new_repos),
144181
removed_repos=tuple(removed_repos),
145182
changed_repos=tuple(changed_repos),
183+
modified_since_processed=tuple(modified_since_processed),
146184
latest_live_push_at=_latest_push_at(live_map.values()),
147185
latest_cached_push_at=_latest_push_at(cached_map.values()),
186+
latest_processed_audit_at=latest_processed_audit_at,
148187
checked_at=checked_at,
149188
)
150189

151190

152191
def should_refresh_audit(sync_result: RepoSyncResult) -> AuditDecision:
153192
delta = sync_result.delta
193+
if not sync_result.verified_live:
194+
return AuditDecision(
195+
False,
196+
"Live GitHub state could not be verified. Refresh decisions are disabled until API access succeeds.",
197+
)
154198
if delta.new_repos:
155199
return AuditDecision(True, f"{len(delta.new_repos)} new repositories detected on GitHub.")
156200
if delta.removed_repos:
157-
return AuditDecision(True, f"{len(delta.removed_repos)} repositories were removed from GitHub or are no longer visible.")
201+
return AuditDecision(
202+
True,
203+
f"{len(delta.removed_repos)} repositories were removed from GitHub or are no longer visible.",
204+
)
205+
if delta.modified_since_processed:
206+
return AuditDecision(
207+
True,
208+
f"{len(delta.modified_since_processed)} repositories were updated after the latest processed audit.",
209+
)
158210
if delta.changed_repos:
159-
return AuditDecision(True, f"{len(delta.changed_repos)} repositories changed since the last cached snapshot.")
160-
return AuditDecision(False, "GitHub metadata matches the current cached snapshot.")
211+
return AuditDecision(
212+
True,
213+
f"{len(delta.changed_repos)} repositories changed since the last cached raw snapshot.",
214+
)
215+
return AuditDecision(False, "Live GitHub metadata matches the current processed snapshot.")
161216

162217

163218
def latest_processed_audit_timestamp(owner: str, settings: Settings) -> datetime | None:
@@ -172,7 +227,24 @@ def latest_live_repo_push_timestamp(owner: str, settings: Settings) -> datetime
172227
return sync_result.delta.latest_live_push_at
173228

174229

175-
def _latest_push_at(items: list[RepoSnapshotState] | tuple[RepoSnapshotState, ...] | object) -> datetime | None:
230+
def _compute_modified_since_processed(
231+
*,
232+
live_map: dict[str, RepoSnapshotState],
233+
latest_processed_audit_at: datetime | None,
234+
) -> list[str]:
235+
if latest_processed_audit_at is None:
236+
return []
237+
238+
modified_repo_names: list[str] = []
239+
for repo_name in sorted(live_map):
240+
repo = live_map[repo_name]
241+
last_change = repo.pushed_at or repo.updated_at
242+
if last_change > latest_processed_audit_at:
243+
modified_repo_names.append(repo_name)
244+
return modified_repo_names
245+
246+
247+
def _latest_push_at(items: object) -> datetime | None:
176248
timestamps: list[datetime] = []
177249
for item in items:
178250
if isinstance(item, RepoSnapshotState):

0 commit comments

Comments
 (0)