2121from contextlib import contextmanager
2222
2323import docker
24+ import filelock
2425import pytest
2526from docker .errors import BuildError
2627
@@ -149,45 +150,52 @@ def gpu_instance_type():
149150
150151
151152@pytest .fixture (scope = "session" )
152- def dummy_container_without_error (sagemaker_session , compatible_python_version ):
153- ecr_uri = _build_container (sagemaker_session , compatible_python_version , DOCKERFILE_TEMPLATE )
154- return ecr_uri
153+ def dummy_container_without_error (sagemaker_session , compatible_python_version , sagemaker_sdk_tar_path , tmp_path_factory ):
154+ return _build_container_once (
155+ "dummy_container_without_error" , sagemaker_session , compatible_python_version ,
156+ DOCKERFILE_TEMPLATE , sagemaker_sdk_tar_path , tmp_path_factory ,
157+ )
155158
156159
157160@pytest .fixture (scope = "session" )
158- def dummy_container_with_user_and_workdir (sagemaker_session , compatible_python_version ):
159- ecr_uri = _build_container (
160- sagemaker_session ,
161- compatible_python_version ,
162- DOCKERFILE_TEMPLATE_WITH_USER_AND_WORKDIR ,
161+ def dummy_container_with_user_and_workdir (sagemaker_session , compatible_python_version , sagemaker_sdk_tar_path , tmp_path_factory ):
162+ return _build_container_once (
163+ "dummy_container_with_user_and_workdir" , sagemaker_session , compatible_python_version ,
164+ DOCKERFILE_TEMPLATE_WITH_USER_AND_WORKDIR , sagemaker_sdk_tar_path , tmp_path_factory ,
163165 )
164- return ecr_uri
165166
166167
167168@pytest .fixture (scope = "session" )
168- def dummy_container_incompatible_python_runtime (sagemaker_session , incompatible_python_version ):
169- ecr_uri = _build_container (sagemaker_session , incompatible_python_version , DOCKERFILE_TEMPLATE )
170- return ecr_uri
169+ def dummy_container_incompatible_python_runtime (sagemaker_session , incompatible_python_version , sagemaker_sdk_tar_path , tmp_path_factory ):
170+ return _build_container_once (
171+ "dummy_container_incompatible_python_runtime" , sagemaker_session , incompatible_python_version ,
172+ DOCKERFILE_TEMPLATE , sagemaker_sdk_tar_path , tmp_path_factory ,
173+ )
171174
172175
173176@pytest .fixture (scope = "session" )
174- def dummy_container_with_conda (sagemaker_session , compatible_python_version ):
175- ecr_uri = _build_container (
176- sagemaker_session , compatible_python_version , DOCKERFILE_TEMPLATE_WITH_CONDA
177+ def dummy_container_with_conda (sagemaker_session , compatible_python_version , sagemaker_sdk_tar_path , tmp_path_factory ):
178+ return _build_container_once (
179+ "dummy_container_with_conda" , sagemaker_session , compatible_python_version ,
180+ DOCKERFILE_TEMPLATE_WITH_CONDA , sagemaker_sdk_tar_path , tmp_path_factory ,
177181 )
178- return ecr_uri
179182
180183
181184@pytest .fixture (scope = "session" )
182- def auto_capture_test_container (sagemaker_session ):
183- ecr_uri = _build_auto_capture_client_container ("3.10" , AUTO_CAPTURE_CLIENT_DOCKER_TEMPLATE )
184- return ecr_uri
185+ def auto_capture_test_container (sagemaker_session , sagemaker_sdk_tar_path , tmp_path_factory ):
186+ return _build_container_once (
187+ "auto_capture_test_container" , sagemaker_session , "3.10" ,
188+ AUTO_CAPTURE_CLIENT_DOCKER_TEMPLATE , sagemaker_sdk_tar_path , tmp_path_factory ,
189+ is_auto_capture = True ,
190+ )
185191
186192
187193@pytest .fixture (scope = "session" )
188- def spark_test_container (sagemaker_session ):
189- ecr_uri = _build_container (sagemaker_session , "3.9" , DOCKERFILE_TEMPLATE )
190- return ecr_uri
194+ def spark_test_container (sagemaker_session , sagemaker_sdk_tar_path , tmp_path_factory ):
195+ return _build_container_once (
196+ "spark_test_container" , sagemaker_session , "3.9" ,
197+ DOCKERFILE_TEMPLATE , sagemaker_sdk_tar_path , tmp_path_factory ,
198+ )
191199
192200
193201@pytest .fixture (scope = "session" )
@@ -208,6 +216,27 @@ def conda_env_yml():
208216 os .remove (conda_yml_file_name )
209217
210218
219+ @pytest .fixture (scope = "session" )
220+ def sagemaker_sdk_tar_path (tmp_path_factory ):
221+ """Build the sagemaker-core sdist once and share it across all xdist workers.
222+
223+ Uses a file lock so only one worker runs the build; others wait and reuse
224+ the already-built tar.gz from the shared temp directory.
225+ """
226+ # tmp_path_factory.getbasetemp().parent is shared across all xdist workers
227+ root_tmp = tmp_path_factory .getbasetemp ().parent
228+ tar_dir = root_tmp / "sagemaker_sdk_tar"
229+ tar_dir .mkdir (exist_ok = True )
230+ lock_file = root_tmp / "sagemaker_sdk_tar.lock"
231+
232+ with filelock .FileLock (str (lock_file )):
233+ existing = list (tar_dir .glob ("*.tar.gz" ))
234+ if not existing :
235+ _generate_sagemaker_sdk_tar (str (tar_dir ))
236+ existing = list (tar_dir .glob ("*.tar.gz" ))
237+ return str (existing [0 ])
238+
239+
211240def _tmpdir ():
212241 """Create a temporary directory context manager."""
213242 import tempfile
@@ -222,7 +251,33 @@ def _tmpdir():
222251_tmpdir = contextmanager (_tmpdir )
223252
224253
225- def _build_container (sagemaker_session , py_version , docker_template ):
254+ def _build_container_once (
255+ fixture_name , sagemaker_session , py_version , docker_template , sdk_tar_path ,
256+ tmp_path_factory , is_auto_capture = False ,
257+ ):
258+ """Build and push a container image exactly once across all xdist workers.
259+
260+ Uses a file lock keyed by fixture_name so parallel workers wait for the
261+ first worker to finish, then reuse the ECR URI written to a shared file.
262+ """
263+ root_tmp = tmp_path_factory .getbasetemp ().parent
264+ uri_file = root_tmp / f"{ fixture_name } .ecr_uri"
265+ lock_file = root_tmp / f"{ fixture_name } .lock"
266+
267+ with filelock .FileLock (str (lock_file )):
268+ if uri_file .exists ():
269+ return uri_file .read_text ().strip ()
270+ if is_auto_capture :
271+ ecr_uri = _build_auto_capture_client_container (
272+ py_version , docker_template , sdk_tar_path
273+ )
274+ else :
275+ ecr_uri = _build_container (sagemaker_session , py_version , docker_template , sdk_tar_path )
276+ uri_file .write_text (ecr_uri )
277+ return ecr_uri
278+
279+
280+ def _build_container (sagemaker_session , py_version , docker_template , sdk_tar_path ):
226281 """Build a dummy test container locally and push to ECR."""
227282 region = sagemaker_session .boto_region_name
228283 image_tag = f"{ py_version .replace ('.' , '-' )} -{ sagemaker_timestamp ()} "
@@ -231,7 +286,8 @@ def _build_container(sagemaker_session, py_version, docker_template):
231286
232287 with _tmpdir () as tmpdir :
233288 print ("building docker image locally in " , tmpdir )
234- source_archive = _generate_sagemaker_sdk_tar (tmpdir )
289+ source_archive = os .path .basename (sdk_tar_path )
290+ shutil .copy2 (sdk_tar_path , os .path .join (tmpdir , source_archive ))
235291 with open (os .path .join (tmpdir , "Dockerfile" ), "w" ) as file :
236292 content = docker_template .format (py_version = py_version , source_archive = source_archive )
237293 print (f"Dockerfile contents: \n { content } \n " )
@@ -267,10 +323,11 @@ def _build_container(sagemaker_session, py_version, docker_template):
267323 return ecr_image
268324
269325
270- def _build_auto_capture_client_container (py_version , docker_template ):
326+ def _build_auto_capture_client_container (py_version , docker_template , sdk_tar_path ):
271327 """Build a test docker container for auto_capture tests."""
272328 with _tmpdir () as tmpdir :
273- source_archive = _generate_sdk_tar_with_public_version (tmpdir )
329+ source_archive = os .path .basename (sdk_tar_path )
330+ shutil .copy2 (sdk_tar_path , os .path .join (tmpdir , source_archive ))
274331 _move_auto_capture_test_file (tmpdir )
275332 with open (os .path .join (tmpdir , "Dockerfile" ), "w" ) as file :
276333 content = docker_template .format (py_version = py_version , source_archive = source_archive )
@@ -304,7 +361,11 @@ def _ecr_image_uri(account, region, image_name, tag):
304361def _generate_sagemaker_sdk_tar (destination_folder ):
305362 """Run build to generate the SDK tar file."""
306363 command = f"python -m build --sdist -o { destination_folder } "
307- result = subprocess .run (command , shell = True , check = True , capture_output = True )
364+ try :
365+ subprocess .run (command , shell = True , check = True , capture_output = True , text = True )
366+ except subprocess .CalledProcessError as e :
367+ print (f"Error when building sagemaker-core sdist: { e .stderr } " )
368+ raise
308369 destination_folder_contents = os .listdir (destination_folder )
309370 source_archive = [f for f in destination_folder_contents if f .endswith ("tar.gz" )][0 ]
310371 return source_archive
0 commit comments