33from dataclasses import dataclass
44from datetime import datetime , timezone
55
6- from portfolio_auditor .audit_runner import AuditRunner
76from portfolio_auditor .collectors .github .client import GitHubApiError , GitHubClient
87from portfolio_auditor .collectors .github .collector import GitHubCollector
98from portfolio_auditor .models .repo_metadata import RepoMetadata
@@ -28,24 +27,38 @@ class RepoSyncDelta:
2827 new_repos : tuple [str , ...]
2928 removed_repos : tuple [str , ...]
3029 changed_repos : tuple [str , ...]
30+ modified_since_processed : tuple [str , ...]
3131 latest_live_push_at : datetime | None
3232 latest_cached_push_at : datetime | None
33+ latest_processed_audit_at : datetime | None
3334 checked_at : datetime
3435
3536 @property
3637 def has_changes (self ) -> bool :
37- return bool (self .new_repos or self .removed_repos or self .changed_repos )
38+ return bool (
39+ self .new_repos
40+ or self .removed_repos
41+ or self .changed_repos
42+ or self .modified_since_processed
43+ )
3844
3945 @property
4046 def total_changed_count (self ) -> int :
41- return len (self .new_repos ) + len (self .removed_repos ) + len (self .changed_repos )
47+ repo_names = {
48+ * self .new_repos ,
49+ * self .removed_repos ,
50+ * self .changed_repos ,
51+ * self .modified_since_processed ,
52+ }
53+ return len (repo_names )
4254
4355
4456@dataclass (frozen = True )
4557class RepoSyncResult :
4658 owner : str
4759 delta : RepoSyncDelta
4860 source : str
61+ verified_live : bool
4962 warning : str | None = None
5063
5164
@@ -74,31 +87,42 @@ def fetch_live_repo_sync_result(owner: str, settings: Settings) -> RepoSyncResul
7487 client = GitHubClient (settings )
7588 collector = GitHubCollector (client , settings )
7689 checked_at = datetime .now (timezone .utc )
90+ latest_processed_at = latest_processed_audit_timestamp (owner , settings )
7791
7892 try :
7993 payloads , payload_kind = collector ._list_owner_repos (owner )
8094 if payload_kind == "normalized_snapshot" :
8195 live_repos = [RepoMetadata .model_validate (item ) for item in payloads ]
96+ live_repos = collector ._apply_filters (live_repos )
8297 source = "cached_snapshot_fallback"
98+ verified_live = False
8399 warning = (
84- "GitHub live sync fell back to the cached raw snapshot because the API could not be queried live."
100+ "GitHub API was not queried live. The sync view is using the cached raw snapshot only, "
101+ "so brand-new repositories cannot be confirmed until live access succeeds."
85102 )
86103 else :
87104 live_repos = [collector ._parse_repo_payload (item ) for item in payloads ]
105+ live_repos = collector ._apply_filters (live_repos )
88106 source = "github_api"
107+ verified_live = True
89108 warning = None
90109 except GitHubApiError as exc :
91- cached_repos = collector .load_raw_owner_snapshot (owner )
110+ cached_repos = collector .load_raw_owner_snapshot (owner ) if collector . has_raw_owner_snapshot ( owner ) else []
92111 delta = compare_repo_states (
93112 live_repos = cached_repos ,
94113 cached_repos = cached_repos ,
95114 checked_at = checked_at ,
115+ latest_processed_audit_at = latest_processed_at ,
96116 )
97117 return RepoSyncResult (
98118 owner = owner ,
99119 delta = delta ,
100120 source = "cached_snapshot_fallback" ,
101- warning = f"GitHub live sync unavailable: { exc } " ,
121+ verified_live = False ,
122+ warning = (
123+ "GitHub live sync unavailable. Using the cached raw snapshot only. "
124+ f"Underlying error: { exc } "
125+ ),
102126 )
103127 finally :
104128 client .close ()
@@ -108,15 +132,23 @@ def fetch_live_repo_sync_result(owner: str, settings: Settings) -> RepoSyncResul
108132 live_repos = live_repos ,
109133 cached_repos = cached_repos ,
110134 checked_at = checked_at ,
135+ latest_processed_audit_at = latest_processed_at ,
136+ )
137+ return RepoSyncResult (
138+ owner = owner ,
139+ delta = delta ,
140+ source = source ,
141+ verified_live = verified_live ,
142+ warning = warning ,
111143 )
112- return RepoSyncResult (owner = owner , delta = delta , source = source , warning = warning )
113144
114145
115146def compare_repo_states (
116147 * ,
117148 live_repos : list [RepoMetadata ],
118149 cached_repos : list [RepoMetadata ],
119150 checked_at : datetime ,
151+ latest_processed_audit_at : datetime | None = None ,
120152) -> RepoSyncDelta :
121153 live_map = build_repo_snapshot_state_map (live_repos )
122154 cached_map = build_repo_snapshot_state_map (cached_repos )
@@ -137,27 +169,50 @@ def compare_repo_states(
137169 ):
138170 changed_repos .append (repo_name )
139171
172+ modified_since_processed = _compute_modified_since_processed (
173+ live_map = live_map ,
174+ latest_processed_audit_at = latest_processed_audit_at ,
175+ )
176+
140177 return RepoSyncDelta (
141178 live_repo_count = len (live_map ),
142179 cached_repo_count = len (cached_map ),
143180 new_repos = tuple (new_repos ),
144181 removed_repos = tuple (removed_repos ),
145182 changed_repos = tuple (changed_repos ),
183+ modified_since_processed = tuple (modified_since_processed ),
146184 latest_live_push_at = _latest_push_at (live_map .values ()),
147185 latest_cached_push_at = _latest_push_at (cached_map .values ()),
186+ latest_processed_audit_at = latest_processed_audit_at ,
148187 checked_at = checked_at ,
149188 )
150189
151190
152191def should_refresh_audit (sync_result : RepoSyncResult ) -> AuditDecision :
153192 delta = sync_result .delta
193+ if not sync_result .verified_live :
194+ return AuditDecision (
195+ False ,
196+ "Live GitHub state could not be verified. Refresh decisions are disabled until API access succeeds." ,
197+ )
154198 if delta .new_repos :
155199 return AuditDecision (True , f"{ len (delta .new_repos )} new repositories detected on GitHub." )
156200 if delta .removed_repos :
157- return AuditDecision (True , f"{ len (delta .removed_repos )} repositories were removed from GitHub or are no longer visible." )
201+ return AuditDecision (
202+ True ,
203+ f"{ len (delta .removed_repos )} repositories were removed from GitHub or are no longer visible." ,
204+ )
205+ if delta .modified_since_processed :
206+ return AuditDecision (
207+ True ,
208+ f"{ len (delta .modified_since_processed )} repositories were updated after the latest processed audit." ,
209+ )
158210 if delta .changed_repos :
159- return AuditDecision (True , f"{ len (delta .changed_repos )} repositories changed since the last cached snapshot." )
160- return AuditDecision (False , "GitHub metadata matches the current cached snapshot." )
211+ return AuditDecision (
212+ True ,
213+ f"{ len (delta .changed_repos )} repositories changed since the last cached raw snapshot." ,
214+ )
215+ return AuditDecision (False , "Live GitHub metadata matches the current processed snapshot." )
161216
162217
163218def latest_processed_audit_timestamp (owner : str , settings : Settings ) -> datetime | None :
@@ -172,7 +227,24 @@ def latest_live_repo_push_timestamp(owner: str, settings: Settings) -> datetime
172227 return sync_result .delta .latest_live_push_at
173228
174229
175- def _latest_push_at (items : list [RepoSnapshotState ] | tuple [RepoSnapshotState , ...] | object ) -> datetime | None :
230+ def _compute_modified_since_processed (
231+ * ,
232+ live_map : dict [str , RepoSnapshotState ],
233+ latest_processed_audit_at : datetime | None ,
234+ ) -> list [str ]:
235+ if latest_processed_audit_at is None :
236+ return []
237+
238+ modified_repo_names : list [str ] = []
239+ for repo_name in sorted (live_map ):
240+ repo = live_map [repo_name ]
241+ last_change = repo .pushed_at or repo .updated_at
242+ if last_change > latest_processed_audit_at :
243+ modified_repo_names .append (repo_name )
244+ return modified_repo_names
245+
246+
247+ def _latest_push_at (items : object ) -> datetime | None :
176248 timestamps : list [datetime ] = []
177249 for item in items :
178250 if isinstance (item , RepoSnapshotState ):
0 commit comments