Skip to content

Commit fe2ce06

Browse files
Improve huggingface robustness (#19311)
Really should retry the web requests in optimum instead of just retrying the whole export here.
1 parent 10a0c91 commit fe2ce06

1 file changed

Lines changed: 26 additions & 5 deletions

File tree

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
import gc
33
import logging
44
import math
5+
import shutil
56
import subprocess
67
import tempfile
8+
import time
79
from pathlib import Path
810
from typing import List
911

@@ -25,6 +27,17 @@
2527
)
2628

2729

30+
EXPORT_RETRIES = 3
31+
32+
33+
def _clear_export_dir(model_dir):
34+
for path in Path(model_dir).iterdir():
35+
if path.is_dir() and not path.is_symlink():
36+
shutil.rmtree(path)
37+
else:
38+
path.unlink()
39+
40+
2841
def cli_export(command, model_dir):
2942
p = Path(model_dir)
3043
if p.exists():
@@ -34,11 +47,19 @@ def cli_export(command, model_dir):
3447
raise Exception(
3548
f"Existing directory {model_dir} is non-empty. Please remove it first."
3649
)
37-
try:
38-
subprocess.run(command, check=True)
39-
print("Export completed successfully.")
40-
except subprocess.CalledProcessError as e:
41-
print(f"Export failed with error: {e}")
50+
51+
for attempt in range(1, EXPORT_RETRIES + 1):
52+
try:
53+
subprocess.run(command, check=True)
54+
print("Export completed successfully.")
55+
return
56+
except subprocess.CalledProcessError as e:
57+
print(f"Export attempt {attempt}/{EXPORT_RETRIES} failed with error: {e}")
58+
if attempt == EXPORT_RETRIES:
59+
raise
60+
if p.exists():
61+
_clear_export_dir(model_dir)
62+
time.sleep(attempt * 10)
4263

4364

4465
def check_causal_lm_output_quality(

0 commit comments

Comments
 (0)