Skip to content

Commit 4125cd4

Browse files
Himasree KolathurHimasreeKolathur24
authored andcommitted
Fix KeyError when GitHub repo move response lacks full_name
Signed-off-by: Himasree Kolathur <himaa@Himasrees-MacBook-Air.local> Signed-off-by: HimasreeKolathur24 <himasrikolathur@gmail.com>
1 parent 8abc5cd commit 4125cd4

1 file changed

Lines changed: 84 additions & 54 deletions

File tree

  • augur/tasks/github/detect_move

augur/tasks/github/detect_move/core.py

Lines changed: 84 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,19 @@
1212

1313

1414
class RepoMovedException(Exception):
15-
def __init__(self, message, new_url=None):
15+
def __init__(self, message, new_url=None):
1616
super().__init__(message)
17-
self.new_url = new_url
17+
self.new_url = new_url
18+
1819

1920
class RepoGoneException(Exception):
2021
pass
2122

2223

23-
def update_repo_with_dict(repo,new_dict,logger):
24+
def update_repo_with_dict(repo, new_dict, logger):
2425
"""
25-
Update a repository record in the database using a dictionary tagged with
26-
the appropriate table fields
27-
28-
Args:
29-
repo: orm repo object to update
30-
new_dict: dict of new values to add to the repo record
31-
logger: logging object
32-
db: db object
26+
Update a repository record in the database using a dictionary
27+
tagged with the appropriate table fields.
3328
"""
3429
to_insert = dict(repo.__dict__)
3530
del to_insert['_sa_instance_state']
@@ -40,66 +35,86 @@ def update_repo_with_dict(repo,new_dict,logger):
4035
with DatabaseSession(logger) as session:
4136
previous_alias = HistoricalRepoURLs(repo_id=repo_id, git_url=old_url)
4237
try:
43-
result = session.add(previous_alias)
38+
session.add(previous_alias)
4439
session.commit()
45-
except IntegrityError as e: #Unique violation
46-
session.rollback()
40+
except IntegrityError:
41+
session.rollback()
4742

4843
to_insert.update(new_dict)
4944

50-
result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id'])
45+
bulk_insert_dicts(logger, to_insert, Repo, ['repo_id'])
5146

5247
url = to_insert['repo_git']
53-
logger.info(f"Updated repo {old_url} to {url} and set alias\n")
48+
logger.info(f"Updated repo {old_url} to {url} and set alias")
5449
return url
5550

5651

57-
5852
def extract_owner_and_repo_from_endpoint(key_auth, url, logger):
5953
response_from_gh = hit_api(key_auth, url, logger)
60-
6154
page_data = parse_json_response(logger, response_from_gh)
6255

63-
full_repo_name = page_data['full_name']
56+
full_repo_name = page_data.get('full_name')
6457

65-
splits = full_repo_name.split('/')
58+
# ✅ FIX FOR #3621
59+
if not full_repo_name:
60+
logger.warning(
61+
f"GitHub repo move task: 'full_name' missing in page_data. "
62+
f"Keys present: {list(page_data.keys())}"
63+
)
64+
return None, None
6665

67-
return splits[0], splits[-1]
66+
owner, repo = full_repo_name.split('/')
67+
return owner, repo
6868

69-
def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='core'):
7069

70+
def ping_github_for_repo_move(session, key_auth, repo, logger, collection_hook='core'):
7171
owner, name = get_owner_repo(repo.repo_git)
7272
url = f"https://api.github.com/repos/{owner}/{name}"
7373

7474
attempts = 0
7575
while attempts < 10:
7676
response_from_gh = hit_api(key_auth, url, logger, follow_redirects=False)
77-
7877
if response_from_gh:
7978
break
80-
8179
attempts += 1
8280

8381
if attempts >= 10:
8482
logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}")
8583
raise Exception(f"ERROR: Could not get api response for repo: {url}")
8684

87-
#Update Url and retry if 301
88-
#301 moved permanently
85+
# --------------------
86+
# 301: Repo moved
87+
# --------------------
8988
if response_from_gh.status_code == 301:
90-
redirect_location = response_from_gh.headers.get('location') or response_from_gh.headers.get('Location')
89+
redirect_location = (
90+
response_from_gh.headers.get('location')
91+
or response_from_gh.headers.get('Location')
92+
)
93+
9194
if not redirect_location:
92-
logger.error(f"Could not check if repo moved because the redirect location is not present. Url: {url}")
95+
logger.error(f"Redirect location missing for repo: {url}")
9396
raise Exception(f"ERROR: Could not get redirect location for repo: {url}")
9497

95-
owner, name = extract_owner_and_repo_from_endpoint(key_auth, redirect_location, logger)
98+
owner, name = extract_owner_and_repo_from_endpoint(
99+
key_auth, redirect_location, logger
100+
)
101+
102+
# ✅ SAFETY CHECK (KeyError fix)
103+
if not owner or not name:
104+
logger.error(
105+
f"GitHub repo move task: Unable to extract owner/repo "
106+
f"from redirect URL {redirect_location}"
107+
)
108+
raise RepoMovedException(
109+
"Repo moved but new location could not be determined",
110+
new_url=redirect_location
111+
)
96112

97113
try:
98114
old_description = str(repo.description)
99115
except Exception:
100116
old_description = ""
101117

102-
#Create new repo object to update existing
103118
repo_update_dict = {
104119
'repo_git': f"https://github.com/{owner}/{name}",
105120
'repo_path': None,
@@ -109,46 +124,61 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
109124

110125
new_url = update_repo_with_dict(repo, repo_update_dict, logger)
111126

112-
raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!", new_url=new_url)
113-
114-
#Mark as ignore if 404
127+
raise RepoMovedException(
128+
"ERROR: Repo has moved! Resetting Collection!",
129+
new_url=new_url
130+
)
131+
132+
# --------------------
133+
# 404: Repo deleted
134+
# --------------------
115135
if response_from_gh.status_code == 404:
116136
repo_update_dict = {
117137
'repo_git': repo.repo_git,
118138
'repo_path': None,
119139
'repo_name': None,
120-
'description': f"During our check for this repo on {datetime.today().strftime('%Y-%m-%d')}, a 404 error was returned. The repository does not appear to have moved. Instead, it appears to be deleted",
140+
'description': (
141+
f"During our check for this repo on "
142+
f"{datetime.today().strftime('%Y-%m-%d')}, a 404 error was returned. "
143+
f"The repository does not appear to have moved. Instead, it appears to be deleted"
144+
),
121145
'data_collection_date': datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
122-
}
146+
}
123147

124148
update_repo_with_dict(repo, repo_update_dict, logger)
125149

126-
statusQuery = session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id)
127-
128-
collectionRecord = execute_session_query(statusQuery,'one')
150+
status_query = session.query(CollectionStatus).filter(
151+
CollectionStatus.repo_id == repo.repo_id
152+
)
153+
collection_record = execute_session_query(status_query, 'one')
129154

130-
collectionRecord.core_status = CollectionState.IGNORE.value
131-
collectionRecord.core_task_id = None
132-
collectionRecord.core_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
155+
now = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
133156

134-
collectionRecord.secondary_status = CollectionState.IGNORE.value
135-
collectionRecord.secondary_task_id = None
136-
collectionRecord.secondary_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
157+
collection_record.core_status = CollectionState.IGNORE.value
158+
collection_record.core_task_id = None
159+
collection_record.core_data_last_collected = now
137160

138-
collectionRecord.facade_status = CollectionState.IGNORE.value
139-
collectionRecord.facade_task_id = None
140-
collectionRecord.facade_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
161+
collection_record.secondary_status = CollectionState.IGNORE.value
162+
collection_record.secondary_task_id = None
163+
collection_record.secondary_data_last_collected = now
141164

142-
collectionRecord.ml_status = CollectionState.IGNORE.value
143-
collectionRecord.ml_task_id = None
144-
collectionRecord.ml_data_last_collected = datetime.today().strftime('%Y-%m-%dT%H:%M:%SZ')
165+
collection_record.facade_status = CollectionState.IGNORE.value
166+
collection_record.facade_task_id = None
167+
collection_record.facade_data_last_collected = now
145168

169+
collection_record.ml_status = CollectionState.IGNORE.value
170+
collection_record.ml_task_id = None
171+
collection_record.ml_data_last_collected = now
146172

147173
session.commit()
148-
raise RepoGoneException("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")
149174

150-
151-
#skip if not 404
175+
raise RepoGoneException(
176+
"ERROR: Repo has moved, and there is no redirection! "
177+
"404 returned, not 301. Resetting Collection!"
178+
)
179+
180+
# --------------------
181+
# Repo still exists
182+
# --------------------
152183
logger.info(f"Repo found at url: {url}")
153184
return
154-

0 commit comments

Comments
 (0)