4141def unzip_to_temporary_file (job_definition : JobBaseData , zip_content : Any ) -> Path :
4242 temp_dir = Path (tempfile .gettempdir (), AZUREML_RUNS_DIR , job_definition .name )
4343 temp_dir .mkdir (parents = True , exist_ok = True )
44+ resolved_temp_dir = temp_dir .resolve ()
4445 with zipfile .ZipFile (io .BytesIO (zip_content )) as zip_ref :
46+ for member in zip_ref .namelist ():
47+ member_path = (resolved_temp_dir / member ).resolve ()
48+ # Ensure the member extracts within temp_dir (allow temp_dir itself for directory entries)
49+ if member_path != resolved_temp_dir and not str (member_path ).startswith (str (resolved_temp_dir ) + os .sep ):
50+ raise ValueError (
51+ f"Zip archive contains a path traversal entry and cannot be extracted safely: { member } "
52+ )
4553 zip_ref .extractall (temp_dir )
4654 return temp_dir
4755
@@ -142,7 +150,7 @@ def get_execution_service_response(
142150 try :
143151 local = job_definition .properties .services .get ("Local" , None )
144152
145- ( url , encodedBody ) = local .endpoint .split (EXECUTION_SERVICE_URL_KEY )
153+ url , encodedBody = local .endpoint .split (EXECUTION_SERVICE_URL_KEY )
146154 body = urllib .parse .unquote_plus (encodedBody )
147155 body_dict : Dict = json .loads (body )
148156 response = requests_pipeline .post (url , json = body_dict , headers = {"Authorization" : "Bearer " + token })
@@ -167,6 +175,51 @@ def is_local_run(job_definition: JobBaseData) -> bool:
167175 return local is not None and EXECUTION_SERVICE_URL_KEY in local .endpoint
168176
169177
178+ def _safe_tar_extractall (tar : tarfile .TarFile , dest_dir : str ) -> None :
179+ """Extract tar archive members safely, preventing path traversal (TarSlip).
180+
181+ On Python 3.12+, uses the built-in 'data' filter. On older versions,
182+ manually validates each member to ensure no path traversal, symlinks,
183+ hard links, or other special entries that could write outside the
184+ destination directory or create unsafe filesystem nodes.
185+
186+ :param tar: An opened tarfile.TarFile object.
187+ :type tar: tarfile.TarFile
188+ :param dest_dir: The destination directory for extraction.
189+ :type dest_dir: str
190+ :raises ValueError: If a tar member would escape the destination directory
191+ or contains a symlink, hard link, or unsupported special entry type.
192+ """
193+ resolved_dest = os .path .realpath (dest_dir )
194+
195+ # Python 3.12+ has built-in data_filter for safe extraction
196+ if hasattr (tarfile , "data_filter" ):
197+ try :
198+ tar .extractall (resolved_dest , filter = "data" )
199+ except tarfile .TarError as exc :
200+ raise ValueError (f"Failed to safely extract tar archive: { exc } " ) from exc
201+ else :
202+ for member in tar .getmembers ():
203+ # Reject symbolic and hard links
204+ if member .issym () or member .islnk ():
205+ raise ValueError (
206+ f"Tar archive contains a symbolic or hard link and cannot be extracted safely: { member .name } "
207+ )
208+ # Reject any non-regular, non-directory entries (e.g., devices, FIFOs)
209+ if not (member .isfile () or member .isdir ()):
210+ raise ValueError (
211+ f"Tar archive contains an unsupported special entry type and cannot be extracted safely: "
212+ f"{ member .name } "
213+ )
214+ member_path = os .path .realpath (os .path .join (resolved_dest , member .name ))
215+ if member_path != resolved_dest and not member_path .startswith (resolved_dest + os .sep ):
216+ raise ValueError (
217+ f"Tar archive contains a path traversal entry and cannot be extracted safely: { member .name } "
218+ )
219+ # All members validated; safe to extract
220+ tar .extractall (resolved_dest )
221+
222+
170223class CommonRuntimeHelper :
171224 COMMON_RUNTIME_BOOTSTRAPPER_INFO = "common_runtime_bootstrapper_info.json"
172225 COMMON_RUNTIME_JOB_SPEC = "common_runtime_job_spec.json"
@@ -208,10 +261,14 @@ def __init__(self, job_name: str):
208261 CommonRuntimeHelper .VM_BOOTSTRAPPER_FILE_NAME ,
209262 )
210263 self .stdout = open ( # pylint: disable=consider-using-with
211- os .path .join (self .common_runtime_temp_folder , "stdout" ), "w+" , encoding = DefaultOpenEncoding .WRITE
264+ os .path .join (self .common_runtime_temp_folder , "stdout" ),
265+ "w+" ,
266+ encoding = DefaultOpenEncoding .WRITE ,
212267 )
213268 self .stderr = open ( # pylint: disable=consider-using-with
214- os .path .join (self .common_runtime_temp_folder , "stderr" ), "w+" , encoding = DefaultOpenEncoding .WRITE
269+ os .path .join (self .common_runtime_temp_folder , "stderr" ),
270+ "w+" ,
271+ encoding = DefaultOpenEncoding .WRITE ,
215272 )
216273
217274 # Bug Item number: 2885723
@@ -266,8 +323,7 @@ def copy_bootstrapper_from_container(self, container: "docker.models.containers.
266323 for chunk in data_stream :
267324 f .write (chunk )
268325 with tarfile .open (tar_file , mode = "r" ) as tar :
269- for file_name in tar .getnames ():
270- tar .extract (file_name , os .path .dirname (path_in_host ))
326+ _safe_tar_extractall (tar , os .path .dirname (path_in_host ))
271327 os .remove (tar_file )
272328 except docker .errors .APIError as e :
273329 msg = f"Copying { path_in_container } from container has failed. Detailed message: { e } "
@@ -408,7 +464,7 @@ def start_run_if_local(
408464 :rtype: str
409465 """
410466 token = credential .get_token (ws_base_url + "/.default" ).token
411- ( zip_content , snapshot_id ) = get_execution_service_response (job_definition , token , requests_pipeline )
467+ zip_content , snapshot_id = get_execution_service_response (job_definition , token , requests_pipeline )
412468
413469 try :
414470 temp_dir = unzip_to_temporary_file (job_definition , zip_content )
0 commit comments