Skip to content

Commit a7e8eb5

Browse files
author
Namrata Madan
committed
chore: migrate remote_function integ tests from V2
1 parent c91dc79 commit a7e8eb5

23 files changed

Lines changed: 1324 additions & 34 deletions

File tree

requirements/extras/test_requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ omegaconf
1212
graphene
1313
typing_extensions>=4.9.0
1414
tensorflow>=2.16.2,<=2.19.0
15-
build
15+
build
16+
docker>=5.0.2,<8.0.0

sagemaker-core/src/sagemaker/core/remote_function/job.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,12 @@
179179
fi
180180
181181
printf "INFO: Invoking remote function inside conda environment: $conda_env.\\n"
182-
printf "INFO: $conda_exe run -n $conda_env python -m sagemaker.train.remote_function.invoke_function \\n"
183-
$conda_exe run -n $conda_env python -m sagemaker.train.remote_function.invoke_function "$@"
182+
printf "INFO: $conda_exe run -n $conda_env python -m sagemaker.core.remote_function.invoke_function \\n"
183+
$conda_exe run -n $conda_env python -m sagemaker.core.remote_function.invoke_function "$@"
184184
else
185185
printf "INFO: No conda env provided. Invoking remote function\\n"
186-
printf "INFO: python -m sagemaker.train.remote_function.invoke_function \\n"
187-
python -m sagemaker.train.remote_function.invoke_function "$@"
186+
printf "INFO: python -m sagemaker.core.remote_function.invoke_function \\n"
187+
python -m sagemaker.core.remote_function.invoke_function "$@"
188188
fi
189189
"""
190190

@@ -238,14 +238,14 @@
238238
-mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \
239239
-x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \
240240
241-
python -m mpi4py -m sagemaker.train.remote_function.invoke_function \\n"
241+
python -m mpi4py -m sagemaker.core.remote_function.invoke_function \\n"
242242
$conda_exe run -n $conda_env mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \
243243
--allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \
244244
-mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \
245245
-mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \
246246
-x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \
247247
$SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \
248-
python -m mpi4py -m sagemaker.train.remote_function.invoke_function "$@"
248+
python -m mpi4py -m sagemaker.core.remote_function.invoke_function "$@"
249249
250250
python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} --job_ended 1
251251
else
@@ -263,15 +263,15 @@
263263
-mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \
264264
-x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \
265265
$SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \
266-
python -m mpi4py -m sagemaker.train.remote_function.invoke_function \\n"
266+
python -m mpi4py -m sagemaker.core.remote_function.invoke_function \\n"
267267
268268
mpirun --host $SM_HOSTS_LIST -np $SM_NPROC_PER_NODE \
269269
--allow-run-as-root --display-map --tag-output -mca btl_tcp_if_include $SM_NETWORK_INTERFACE_NAME \
270270
-mca plm_rsh_no_tree_spawn 1 -mca pml ob1 -mca btl ^openib -mca orte_abort_on_non_zero_status 1 \
271271
-mca btl_vader_single_copy_mechanism none -mca plm_rsh_num_concurrent $SM_HOST_COUNT \
272272
-x NCCL_SOCKET_IFNAME=$SM_NETWORK_INTERFACE_NAME -x LD_LIBRARY_PATH -x PATH \
273273
$SM_FI_PROVIDER $SM_NCCL_PROTO $SM_FI_EFA_USE_DEVICE_RDMA \
274-
python -m mpi4py -m sagemaker.train.remote_function.invoke_function "$@"
274+
python -m mpi4py -m sagemaker.core.remote_function.invoke_function "$@"
275275
276276
python /opt/ml/input/data/{RUNTIME_SCRIPTS_CHANNEL_NAME}/{MPI_UTILS_SCRIPT_NAME} --job_ended 1
277277
else
@@ -324,18 +324,18 @@
324324
printf "INFO: Invoking remote function with torchrun inside conda environment: $conda_env.\\n"
325325
printf "INFO: $conda_exe run -n $conda_env torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE \
326326
--master_addr $SM_MASTER_ADDR --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK \
327-
-m sagemaker.train.remote_function.invoke_function \\n"
327+
-m sagemaker.core.remote_function.invoke_function \\n"
328328
329329
$conda_exe run -n $conda_env torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE \
330330
--master_addr $SM_MASTER_ADDR --master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK \
331-
-m sagemaker.train.remote_function.invoke_function "$@"
331+
-m sagemaker.core.remote_function.invoke_function "$@"
332332
else
333333
printf "INFO: No conda env provided. Invoking remote function with torchrun\\n"
334334
printf "INFO: torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE --master_addr $SM_MASTER_ADDR \
335-
--master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.train.remote_function.invoke_function \\n"
335+
--master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.core.remote_function.invoke_function \\n"
336336
337337
torchrun --nnodes $SM_HOST_COUNT --nproc_per_node $SM_NPROC_PER_NODE --master_addr $SM_MASTER_ADDR \
338-
--master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.train.remote_function.invoke_function "$@"
338+
--master_port $SM_MASTER_PORT --node_rank $SM_CURRENT_HOST_RANK -m sagemaker.core.remote_function.invoke_function "$@"
339339
fi
340340
"""
341341

@@ -728,7 +728,7 @@ def __init__(
728728
sagemaker_session=self.sagemaker_session,
729729
)
730730
if _role:
731-
self.role = expand_role(self.sagemaker_session.boto_session, _role)
731+
self.role = expand_role(self.sagemaker_session, _role)
732732
else:
733733
self.role = get_execution_role(self.sagemaker_session)
734734

@@ -941,16 +941,24 @@ def compile(
941941
# generate asymmetric key pair for integrity check
942942
if step_compilation_context is None:
943943
private_key = ec.generate_private_key(ec.SECP256R1())
944-
public_key_pem = private_key.public_key().public_bytes(
945-
crypto_serialization.Encoding.PEM,
946-
crypto_serialization.PublicFormat.SubjectPublicKeyInfo,
947-
).decode("utf-8")
944+
public_key_pem = (
945+
private_key.public_key()
946+
.public_bytes(
947+
crypto_serialization.Encoding.PEM,
948+
crypto_serialization.PublicFormat.SubjectPublicKeyInfo,
949+
)
950+
.decode("utf-8")
951+
)
948952
else:
949953
private_key = step_compilation_context.function_step_secret_token
950-
public_key_pem = private_key.public_key().public_bytes(
951-
crypto_serialization.Encoding.PEM,
952-
crypto_serialization.PublicFormat.SubjectPublicKeyInfo,
953-
).decode("utf-8")
954+
public_key_pem = (
955+
private_key.public_key()
956+
.public_bytes(
957+
crypto_serialization.Encoding.PEM,
958+
crypto_serialization.PublicFormat.SubjectPublicKeyInfo,
959+
)
960+
.decode("utf-8")
961+
)
954962

955963
# serialize function and arguments
956964
if step_compilation_context is None:

sagemaker-core/tests/__init__.py

Whitespace-only changes.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
SchemaVersion: '1.0'
2+
SageMaker:
3+
PythonSDK:
4+
Modules:
5+
RemoteFunction:
6+
Dependencies: "path/to/requirements.txt"
7+
PreExecutionCommands: ["command_1", "command_2"]
8+
EnableInterContainerTrafficEncryption: true
9+
EnvironmentVariables: {"EnvVarKey": "EnvVarValue"}
10+
IncludeLocalWorkDir: true
11+
CustomFileFilter:
12+
IgnoreNamePatterns:
13+
- "data"
14+
- "test"
15+
InstanceType: "ml.m5.large"
16+
JobCondaEnvironment: "my_conda_env"
17+
S3KmsKeyId: "someS3KmsKey"
18+
VpcConfig:
19+
SecurityGroupIds: ["sg123"]
20+
Subnets: ["subnet-1234"]
21+
Tags: [{"Key": "someTagKey", "Value":"someTagValue"}, {"Key":"someTagKey2", "Value":"someTagValue2"}]
22+
VolumeKmsKeyId: "someVolumeKmsKey"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
does_not_exist
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pandas==1.3.4
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
echo "test-content-1" > test_file_1
2+
echo "test-content-2" > test_file_2
3+
echo "test-content-3" > test_file_3
4+
rm ./test_file_2
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
echo "test-content-1" > test_file_1
2+
bws sagemaker describe-training-job
3+
echo "test-content-3" > test_file_3
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
scipy==1.13.0

sagemaker-core/tests/data/remote_function/workdir/data/data.csv

Whitespace-only changes.

0 commit comments

Comments
 (0)