Skip to content

Commit 9cc205b

Browse files
committed
Handle 429 errors correctly in link verifier
Previously, the link verifier did not properly handle 429 (Too Many Requests) responses, preventing proper backoff behavior. This fix implements correct throttling logic to respect rate limits. Signed-off-by: Gaurav Aggarwal <aggarg@amazon.com>
1 parent 5eabaeb commit 9cc205b

1 file changed

Lines changed: 27 additions & 12 deletions

File tree

link-verifier/verify-links.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -193,23 +193,41 @@ def create_html(markdown_file):
193193
)
194194
return process
195195

196+
def issue_request(method, url, **kwargs):
197+
retries = 0
198+
199+
while retries < 3:
200+
try:
201+
r = method(url, **kwargs)
202+
if r.status_code == 429:
203+
retry_after = int(r.headers.get("Retry-After", 60))
204+
time.sleep(retry_after)
205+
retries += 1
206+
else:
207+
return r
208+
except Exception as e:
209+
if e.code == 429:
210+
retry_after = int(e.headers.get("Retry-After", 60))
211+
time.sleep(retry_after)
212+
retries += 1
213+
else:
214+
raise e
215+
raise Exception("Max retries exceeded")
216+
196217
def access_url(url):
197218
global http_headers
198219
status = ''
199220
is_broken = False
200221
try_with_trusted_ca_bundle = False
201222

202223
try:
203-
r = requests.head(url, allow_redirects=True, headers=http_headers)
224+
r = issue_request(requests.head, url, allow_redirects=True, headers=http_headers)
204225
# Some sites may return 404 for head but not get, e.g.
205226
# https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
206227
if r.status_code >= 400:
207228
# Allow redirects is already enabled by default for GET.
208-
r = requests.get(url, headers=http_headers)
209-
# It's likely we will run into GitHub's rate-limiting if there are many links.
210-
if r.status_code == 429:
211-
time.sleep(int(r.headers['Retry-After']))
212-
r = requests.head(url, allow_redirects=True)
229+
r = issue_request(requests.get, url, headers=http_headers)
230+
213231
if r.status_code >= 400:
214232
is_broken = True
215233
status = r.status_code
@@ -223,16 +241,13 @@ def access_url(url):
223241

224242
if try_with_trusted_ca_bundle == True:
225243
try:
226-
r = requests.head(url, allow_redirects=True, headers=http_headers, verify=TRUSTED_CA_BUNDLE)
244+
r = issue_request(requests.head, url, allow_redirects=True, headers=http_headers, verify=TRUSTED_CA_BUNDLE)
227245
# Some sites may return 404 for head but not get, e.g.
228246
# https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
229247
if r.status_code >= 400:
230248
# Allow redirects is already enabled by default for GET.
231-
r = requests.get(url, headers=http_headers, verify=TRUSTED_CA_BUNDLE)
232-
# It's likely we will run into GitHub's rate-limiting if there are many links.
233-
if r.status_code == 429:
234-
time.sleep(int(r.headers['Retry-After']))
235-
r = requests.head(url, allow_redirects=True, verify=TRUSTED_CA_BUNDLE)
249+
r = issue_request(requests.get, url, headers=http_headers, verify=TRUSTED_CA_BUNDLE)
250+
236251
if r.status_code >= 400:
237252
is_broken = True
238253
status = r.status_code

0 commit comments

Comments
 (0)