1212
1313
1414class RepoMovedException (Exception ):
15- def __init__ (self , message , new_url = None ):
15+ def __init__ (self , message , new_url = None ):
1616 super ().__init__ (message )
17- self .new_url = new_url
17+ self .new_url = new_url
18+
1819
1920class RepoGoneException (Exception ):
2021 pass
2122
2223
23- def update_repo_with_dict (repo ,new_dict ,logger ):
24+ def update_repo_with_dict (repo , new_dict , logger ):
2425 """
25- Update a repository record in the database using a dictionary tagged with
26- the appropriate table fields
27-
28- Args:
29- repo: orm repo object to update
30- new_dict: dict of new values to add to the repo record
31- logger: logging object
32- db: db object
26+ Update a repository record in the database using a dictionary
27+ tagged with the appropriate table fields.
3328 """
3429 to_insert = dict (repo .__dict__ )
3530 del to_insert ['_sa_instance_state' ]
@@ -40,66 +35,86 @@ def update_repo_with_dict(repo,new_dict,logger):
4035 with DatabaseSession (logger ) as session :
4136 previous_alias = HistoricalRepoURLs (repo_id = repo_id , git_url = old_url )
4237 try :
43- result = session .add (previous_alias )
38+ session .add (previous_alias )
4439 session .commit ()
45- except IntegrityError as e : #Unique violation
46- session .rollback ()
40+ except IntegrityError :
41+ session .rollback ()
4742
4843 to_insert .update (new_dict )
4944
50- result = bulk_insert_dicts (logger , to_insert , Repo , ['repo_id' ])
45+ bulk_insert_dicts (logger , to_insert , Repo , ['repo_id' ])
5146
5247 url = to_insert ['repo_git' ]
53- logger .info (f"Updated repo { old_url } to { url } and set alias\n " )
48+ logger .info (f"Updated repo { old_url } to { url } and set alias" )
5449 return url
5550
5651
57-
5852def extract_owner_and_repo_from_endpoint (key_auth , url , logger ):
5953 response_from_gh = hit_api (key_auth , url , logger )
60-
6154 page_data = parse_json_response (logger , response_from_gh )
6255
63- full_repo_name = page_data [ 'full_name' ]
56+ full_repo_name = page_data . get ( 'full_name' )
6457
65- splits = full_repo_name .split ('/' )
58+ # ✅ FIX FOR #3621
59+ if not full_repo_name :
60+ logger .warning (
61+ f"GitHub repo move task: 'full_name' missing in page_data. "
62+ f"Keys present: { list (page_data .keys ())} "
63+ )
64+ return None , None
6665
67- return splits [0 ], splits [- 1 ]
66+ owner , repo = full_repo_name .split ('/' )
67+ return owner , repo
6868
69- def ping_github_for_repo_move (session , key_auth , repo , logger ,collection_hook = 'core' ):
7069
70+ def ping_github_for_repo_move (session , key_auth , repo , logger , collection_hook = 'core' ):
7171 owner , name = get_owner_repo (repo .repo_git )
7272 url = f"https://api.github.com/repos/{ owner } /{ name } "
7373
7474 attempts = 0
7575 while attempts < 10 :
7676 response_from_gh = hit_api (key_auth , url , logger , follow_redirects = False )
77-
7877 if response_from_gh :
7978 break
80-
8179 attempts += 1
8280
8381 if attempts >= 10 :
8482 logger .error (f"Could not check if repo moved because the api timed out 10 times. Url: { url } " )
8583 raise Exception (f"ERROR: Could not get api response for repo: { url } " )
8684
87- #Update Url and retry if 301
88- #301 moved permanently
85+ # --------------------
86+ # 301: Repo moved
87+ # --------------------
8988 if response_from_gh .status_code == 301 :
90- redirect_location = response_from_gh .headers .get ('location' ) or response_from_gh .headers .get ('Location' )
89+ redirect_location = (
90+ response_from_gh .headers .get ('location' )
91+ or response_from_gh .headers .get ('Location' )
92+ )
93+
9194 if not redirect_location :
92- logger .error (f"Could not check if repo moved because the redirect location is not present. Url : { url } " )
95+ logger .error (f"Redirect location missing for repo : { url } " )
9396 raise Exception (f"ERROR: Could not get redirect location for repo: { url } " )
9497
95- owner , name = extract_owner_and_repo_from_endpoint (key_auth , redirect_location , logger )
98+ owner , name = extract_owner_and_repo_from_endpoint (
99+ key_auth , redirect_location , logger
100+ )
101+
102+ # ✅ SAFETY CHECK (KeyError fix)
103+ if not owner or not name :
104+ logger .error (
105+ f"GitHub repo move task: Unable to extract owner/repo "
106+ f"from redirect URL { redirect_location } "
107+ )
108+ raise RepoMovedException (
109+ "Repo moved but new location could not be determined" ,
110+ new_url = redirect_location
111+ )
96112
97113 try :
98114 old_description = str (repo .description )
99115 except Exception :
100116 old_description = ""
101117
102- #Create new repo object to update existing
103118 repo_update_dict = {
104119 'repo_git' : f"https://github.com/{ owner } /{ name } " ,
105120 'repo_path' : None ,
@@ -109,46 +124,61 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
109124
110125 new_url = update_repo_with_dict (repo , repo_update_dict , logger )
111126
112- raise RepoMovedException ("ERROR: Repo has moved! Resetting Collection!" , new_url = new_url )
113-
114- #Mark as ignore if 404
127+ raise RepoMovedException (
128+ "ERROR: Repo has moved! Resetting Collection!" ,
129+ new_url = new_url
130+ )
131+
132+ # --------------------
133+ # 404: Repo deleted
134+ # --------------------
115135 if response_from_gh .status_code == 404 :
116136 repo_update_dict = {
117137 'repo_git' : repo .repo_git ,
118138 'repo_path' : None ,
119139 'repo_name' : None ,
120- 'description' : f"During our check for this repo on { datetime .today ().strftime ('%Y-%m-%d' )} , a 404 error was returned. The repository does not appear to have moved. Instead, it appears to be deleted" ,
140+ 'description' : (
141+ f"During our check for this repo on "
142+ f"{ datetime .today ().strftime ('%Y-%m-%d' )} , a 404 error was returned. "
143+ f"The repository does not appear to have moved. Instead, it appears to be deleted"
144+ ),
121145 'data_collection_date' : datetime .today ().strftime ('%Y-%m-%dT%H:%M:%SZ' )
122- }
146+ }
123147
124148 update_repo_with_dict (repo , repo_update_dict , logger )
125149
126- statusQuery = session .query (CollectionStatus ).filter (CollectionStatus .repo_id == repo .repo_id )
127-
128- collectionRecord = execute_session_query (statusQuery ,'one' )
150+ status_query = session .query (CollectionStatus ).filter (
151+ CollectionStatus .repo_id == repo .repo_id
152+ )
153+ collection_record = execute_session_query (status_query , 'one' )
129154
130- collectionRecord .core_status = CollectionState .IGNORE .value
131- collectionRecord .core_task_id = None
132- collectionRecord .core_data_last_collected = datetime .today ().strftime ('%Y-%m-%dT%H:%M:%SZ' )
155+ now = datetime .today ().strftime ('%Y-%m-%dT%H:%M:%SZ' )
133156
134- collectionRecord . secondary_status = CollectionState .IGNORE .value
135- collectionRecord . secondary_task_id = None
136- collectionRecord . secondary_data_last_collected = datetime . today (). strftime ( '%Y-%m-%dT%H:%M:%SZ' )
157+ collection_record . core_status = CollectionState .IGNORE .value
158+ collection_record . core_task_id = None
159+ collection_record . core_data_last_collected = now
137160
138- collectionRecord . facade_status = CollectionState .IGNORE .value
139- collectionRecord . facade_task_id = None
140- collectionRecord . facade_data_last_collected = datetime . today (). strftime ( '%Y-%m-%dT%H:%M:%SZ' )
161+ collection_record . secondary_status = CollectionState .IGNORE .value
162+ collection_record . secondary_task_id = None
163+ collection_record . secondary_data_last_collected = now
141164
142- collectionRecord . ml_status = CollectionState .IGNORE .value
143- collectionRecord . ml_task_id = None
144- collectionRecord . ml_data_last_collected = datetime . today (). strftime ( '%Y-%m-%dT%H:%M:%SZ' )
165+ collection_record . facade_status = CollectionState .IGNORE .value
166+ collection_record . facade_task_id = None
167+ collection_record . facade_data_last_collected = now
145168
169+ collection_record .ml_status = CollectionState .IGNORE .value
170+ collection_record .ml_task_id = None
171+ collection_record .ml_data_last_collected = now
146172
147173 session .commit ()
148- raise RepoGoneException ("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!" )
149174
150-
151- #skip if not 404
175+ raise RepoGoneException (
176+ "ERROR: Repo has moved, and there is no redirection! "
177+ "404 returned, not 301. Resetting Collection!"
178+ )
179+
180+ # --------------------
181+ # Repo still exists
182+ # --------------------
152183 logger .info (f"Repo found at url: { url } " )
153184 return
154-
0 commit comments