Skip to content

Commit 2796003

Browse files
committed
Use chmod -R 777 via Docker
1 parent de81247 commit 2796003

3 files changed

Lines changed: 14 additions & 38 deletions

File tree

sagemaker-core/src/sagemaker/core/modules/local_core/local_container.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def _rmtree(path, image=None, is_studio=False):
6363
shutil.rmtree(path)
6464
except PermissionError:
6565
# Files created by Docker containers are owned by root.
66-
# Use a Docker container to remove them since os.chmod will also fail.
66+
# Use docker to chmod as root, then retry shutil.rmtree.
6767
if image is None:
6868
logger.warning(
6969
"Failed to clean up root-owned files in %s. "
@@ -75,11 +75,9 @@ def _rmtree(path, image=None, is_studio=False):
7575
cmd = ["docker", "run", "--rm"]
7676
if is_studio:
7777
cmd += ["--network", "sagemaker"]
78-
cmd += ["-v", f"{path}:/delete", image, "sh", "-c", "find /delete -mindepth 1 -delete"]
78+
cmd += ["-v", f"{path}:/delete", image, "chmod", "-R", "777", "/delete"]
7979
subprocess.run(cmd, check=True, capture_output=True)
80-
# The mount point directory itself may remain — clean it up
81-
if os.path.exists(path):
82-
shutil.rmtree(path, ignore_errors=True)
80+
shutil.rmtree(path)
8381
except Exception:
8482
logger.warning(
8583
"Failed to clean up root-owned files in %s. "

sagemaker-train/src/sagemaker/train/local/local_container.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _rmtree(path, image=None, is_studio=False):
7171
shutil.rmtree(path)
7272
except PermissionError:
7373
# Files created by Docker containers are owned by root.
74-
# Use a Docker container to remove them since os.chmod will also fail.
74+
# Use docker to chmod as root, then retry shutil.rmtree.
7575
if image is None:
7676
logger.warning(
7777
"Failed to clean up root-owned files in %s. "
@@ -83,11 +83,9 @@ def _rmtree(path, image=None, is_studio=False):
8383
cmd = ["docker", "run", "--rm"]
8484
if is_studio:
8585
cmd += ["--network", "sagemaker"]
86-
cmd += ["-v", f"{path}:/delete", image, "sh", "-c", "find /delete -mindepth 1 -delete"]
86+
cmd += ["-v", f"{path}:/delete", image, "chmod", "-R", "777", "/delete"]
8787
subprocess.run(cmd, check=True, capture_output=True)
88-
# The mount point directory itself may remain — clean it up
89-
if os.path.exists(path):
90-
shutil.rmtree(path, ignore_errors=True)
88+
shutil.rmtree(path)
9189
except Exception:
9290
logger.warning(
9391
"Failed to clean up root-owned files in %s. "

sagemaker-train/tests/unit/train/local/test_local_container.py

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,24 @@ def test_rmtree_success(self, mock_rmtree):
2929

3030
@patch("sagemaker.train.local.local_container.shutil.rmtree")
3131
@patch("sagemaker.train.local.local_container.subprocess.run")
32-
@patch("sagemaker.train.local.local_container.os.path.exists", return_value=False)
33-
def test_rmtree_permission_error_docker_fallback(self, mock_exists, mock_run, mock_rmtree):
34-
"""PermissionError triggers docker fallback using the training image."""
35-
mock_rmtree.side_effect = PermissionError("Permission denied")
32+
def test_rmtree_permission_error_docker_chmod_fallback(self, mock_run, mock_rmtree):
33+
"""PermissionError triggers docker chmod then retry."""
34+
mock_rmtree.side_effect = [PermissionError("Permission denied"), None]
3635

3736
_rmtree("/tmp/test", IMAGE)
3837

3938
mock_run.assert_called_once_with(
40-
[
41-
"docker", "run", "--rm",
42-
"-v", "/tmp/test:/delete", IMAGE,
43-
"sh", "-c", "find /delete -mindepth 1 -delete",
44-
],
39+
["docker", "run", "--rm", "-v", "/tmp/test:/delete", IMAGE, "chmod", "-R", "777", "/delete"],
4540
check=True,
4641
capture_output=True,
4742
)
43+
assert mock_rmtree.call_count == 2
4844

4945
@patch("sagemaker.train.local.local_container.shutil.rmtree")
5046
@patch("sagemaker.train.local.local_container.subprocess.run")
51-
@patch("sagemaker.train.local.local_container.os.path.exists", return_value=False)
52-
def test_rmtree_studio_adds_network(self, mock_exists, mock_run, mock_rmtree):
47+
def test_rmtree_studio_adds_network(self, mock_run, mock_rmtree):
5348
"""In Studio, docker run includes --network sagemaker."""
54-
mock_rmtree.side_effect = PermissionError("Permission denied")
49+
mock_rmtree.side_effect = [PermissionError("Permission denied"), None]
5550

5651
_rmtree("/tmp/test", IMAGE, is_studio=True)
5752

@@ -60,27 +55,12 @@ def test_rmtree_studio_adds_network(self, mock_exists, mock_run, mock_rmtree):
6055
"docker", "run", "--rm",
6156
"--network", "sagemaker",
6257
"-v", "/tmp/test:/delete", IMAGE,
63-
"sh", "-c", "find /delete -mindepth 1 -delete",
58+
"chmod", "-R", "777", "/delete",
6459
],
6560
check=True,
6661
capture_output=True,
6762
)
6863

69-
@patch("sagemaker.train.local.local_container.shutil.rmtree")
70-
@patch("sagemaker.train.local.local_container.subprocess.run")
71-
@patch("sagemaker.train.local.local_container.os.path.exists", return_value=True)
72-
def test_rmtree_cleans_up_mount_point(self, mock_exists, mock_run, mock_rmtree):
73-
"""After docker cleanup, remaining mount point directory is removed."""
74-
mock_rmtree.side_effect = [PermissionError("Permission denied"), None]
75-
76-
_rmtree("/tmp/test", IMAGE)
77-
78-
assert mock_rmtree.call_count == 2
79-
mock_rmtree.assert_has_calls([
80-
call("/tmp/test"),
81-
call("/tmp/test", ignore_errors=True),
82-
])
83-
8464
@patch("sagemaker.train.local.local_container.shutil.rmtree")
8565
@patch("sagemaker.train.local.local_container.subprocess.run")
8666
def test_rmtree_docker_fallback_fails_raises(self, mock_run, mock_rmtree):

0 commit comments

Comments
 (0)