Skip to content

Commit 95c9824

Browse files
author
Syed Jafri
committed
fix: added unique names in sm-train tests to avoid Resource already exists error in CI
1 parent 877225c commit 95c9824

4 files changed

Lines changed: 36 additions & 11 deletions

File tree

sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,16 @@
2424

2525
def test_dpo_trainer_lora_complete_workflow(sagemaker_session):
2626
"""Test complete DPO training workflow with LORA."""
27+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
2728
# Create DPOTrainer instance with comprehensive configuration
2829
trainer = DPOTrainer(
2930
model="meta-textgeneration-llama-3-2-1b-instruct",
3031
training_type=TrainingType.LORA,
3132
model_package_group="sdk-test-finetuned-models",
3233
training_dataset="s3://mc-flows-sdk-testing/input_data/dpo/preference_dataset_train_256.jsonl",
3334
s3_output_path="s3://mc-flows-sdk-testing/output/",
34-
accept_eula=True
35+
accept_eula=True,
36+
base_job_name=f"dpo-lora-integ-{unique_id}",
3537
)
3638

3739
# Customize hyperparameters for quick training
@@ -62,6 +64,7 @@ def test_dpo_trainer_lora_complete_workflow(sagemaker_session):
6264

6365
def test_dpo_trainer_with_validation_dataset(sagemaker_session):
6466
"""Test DPO trainer with both training and validation datasets."""
67+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
6568

6669
dpo_trainer = DPOTrainer(
6770
model="meta-textgeneration-llama-3-2-1b-instruct",
@@ -70,7 +73,8 @@ def test_dpo_trainer_with_validation_dataset(sagemaker_session):
7073
training_dataset="s3://mc-flows-sdk-testing/input_data/dpo/preference_dataset_train_256.jsonl",
7174
validation_dataset="s3://mc-flows-sdk-testing/input_data/dpo/preference_dataset_train_256.jsonl",
7275
s3_output_path="s3://mc-flows-sdk-testing/output/",
73-
accept_eula=True
76+
accept_eula=True,
77+
base_job_name=f"dpo-val-integ-{unique_id}",
7478
)
7579

7680
# Customize hyperparameters for quick training

sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import absolute_import
1515

1616
import time
17+
import random
1718
import boto3
1819
from sagemaker.core.helper.session_helper import Session
1920
from sagemaker.train.rlaif_trainer import RLAIFTrainer
@@ -23,6 +24,7 @@
2324

2425
def test_rlaif_trainer_lora_complete_workflow(sagemaker_session):
2526
"""Test complete RLAIF training workflow with LORA."""
27+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
2628

2729
rlaif_trainer = RLAIFTrainer(
2830
model="meta-textgeneration-llama-3-2-1b-instruct",
@@ -34,7 +36,8 @@ def test_rlaif_trainer_lora_complete_workflow(sagemaker_session):
3436
mlflow_run_name="test-rlaif-finetuned-models-run",
3537
training_dataset="s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl",
3638
s3_output_path="s3://mc-flows-sdk-testing/output/",
37-
accept_eula=True
39+
accept_eula=True,
40+
base_job_name=f"rlaif-lora-integ-{unique_id}",
3841
)
3942

4043
# Create training job
@@ -62,6 +65,7 @@ def test_rlaif_trainer_lora_complete_workflow(sagemaker_session):
6265

6366
def test_rlaif_trainer_with_custom_reward_settings(sagemaker_session):
6467
"""Test RLAIF trainer with different reward model and prompt."""
68+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
6569

6670
rlaif_trainer = RLAIFTrainer(
6771
model="meta-textgeneration-llama-3-2-1b-instruct",
@@ -73,7 +77,8 @@ def test_rlaif_trainer_with_custom_reward_settings(sagemaker_session):
7377
mlflow_run_name="test-rlaif-finetuned-models-run",
7478
training_dataset="s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl",
7579
s3_output_path="s3://mc-flows-sdk-testing/output/",
76-
accept_eula=True
80+
accept_eula=True,
81+
base_job_name=f"rlaif-rwd-integ-{unique_id}",
7782
)
7883

7984
training_job = rlaif_trainer.train(wait=False)
@@ -100,6 +105,7 @@ def test_rlaif_trainer_with_custom_reward_settings(sagemaker_session):
100105

101106
def test_rlaif_trainer_continued_finetuning(sagemaker_session):
102107
"""Test complete RLAIF training workflow with LORA."""
108+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
103109

104110
rlaif_trainer = RLAIFTrainer(
105111
model="arn:aws:sagemaker:us-west-2:729646638167:model-package/sdk-test-finetuned-models/1",
@@ -111,7 +117,8 @@ def test_rlaif_trainer_continued_finetuning(sagemaker_session):
111117
mlflow_run_name="test-rlaif-finetuned-models-run",
112118
training_dataset="s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl",
113119
s3_output_path="s3://mc-flows-sdk-testing/output/",
114-
accept_eula=True
120+
accept_eula=True,
121+
base_job_name=f"rlaif-cont-integ-{unique_id}",
115122
)
116123

117124
# Create training job

sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import absolute_import
1515

1616
import time
17+
import random
1718
import pytest
1819
import boto3
1920
from sagemaker.core.helper.session_helper import Session
@@ -23,6 +24,7 @@
2324

2425
def test_rlvr_trainer_lora_complete_workflow(sagemaker_session):
2526
"""Test complete RLVR training workflow with LORA."""
27+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
2628

2729
rlvr_trainer = RLVRTrainer(
2830
model="meta-textgeneration-llama-3-2-1b-instruct",
@@ -32,7 +34,8 @@ def test_rlvr_trainer_lora_complete_workflow(sagemaker_session):
3234
mlflow_run_name="test-rlvr-finetuned-models-run",
3335
training_dataset="s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl",
3436
s3_output_path="s3://mc-flows-sdk-testing/output/",
35-
accept_eula=True
37+
accept_eula=True,
38+
base_job_name=f"rlvr-lora-integ-{unique_id}",
3639
)
3740

3841
# Create training job
@@ -60,6 +63,7 @@ def test_rlvr_trainer_lora_complete_workflow(sagemaker_session):
6063

6164
def test_rlvr_trainer_with_custom_reward_function(sagemaker_session):
6265
"""Test RLVR trainer with custom reward function."""
66+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
6367

6468
rlvr_trainer = RLVRTrainer(
6569
model="meta-textgeneration-llama-3-2-1b-instruct",
@@ -70,7 +74,8 @@ def test_rlvr_trainer_with_custom_reward_function(sagemaker_session):
7074
training_dataset="s3://mc-flows-sdk-testing/input_data/rlvr-rlaif-test-data/train_285.jsonl",
7175
s3_output_path="s3://mc-flows-sdk-testing/output/",
7276
custom_reward_function="arn:aws:sagemaker:us-west-2:729646638167:hub-content/sdktest/JsonDoc/rlvr-test-rf/0.0.1",
73-
accept_eula=True
77+
accept_eula=True,
78+
base_job_name=f"rlvr-rf-integ-{unique_id}",
7479
)
7580

7681
training_job = rlvr_trainer.train(wait=False)
@@ -100,6 +105,7 @@ def test_rlvr_trainer_nova_workflow(sagemaker_session_us_east_1):
100105
"""Test RLVR training workflow with Nova model."""
101106
# sagemaker_session_us_east_1 fixture is defined in conftest.py (us-east-1 region)
102107

108+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
103109
rlvr_trainer = RLVRTrainer(
104110
model="nova-textgeneration-lite-v2",
105111
model_package_group="sdk-test-finetuned-models",
@@ -110,7 +116,8 @@ def test_rlvr_trainer_nova_workflow(sagemaker_session_us_east_1):
110116
s3_output_path="s3://mc-flows-sdk-testing-us-east-1/output/",
111117
custom_reward_function="arn:aws:sagemaker:us-east-1:729646638167:hub-content/sdktest/JsonDoc/rlvr-nova-test-rf/0.0.1",
112118
accept_eula=True,
113-
sagemaker_session=sagemaker_session_us_east_1
119+
sagemaker_session=sagemaker_session_us_east_1,
120+
base_job_name=f"rlvr-nova-integ-{unique_id}",
114121
)
115122
training_job = rlvr_trainer.train(wait=False)
116123

sagemaker-train/tests/integ/train/test_sft_trainer_integration.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import absolute_import
1515

1616
import time
17+
import random
1718
import pytest
1819
import boto3
1920
from sagemaker.core.helper.session_helper import Session
@@ -23,14 +24,16 @@
2324

2425
def test_sft_trainer_lora_complete_workflow(sagemaker_session):
2526
"""Test complete SFT training workflow with LORA."""
27+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
2628

2729
sft_trainer = SFTTrainer(
2830
model="meta-textgeneration-llama-3-2-1b-instruct",
2931
training_type=TrainingType.LORA,
3032
model_package_group="arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models",
3133
training_dataset="s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl",
3234
s3_output_path="s3://mc-flows-sdk-testing/output/",
33-
accept_eula=True
35+
accept_eula=True,
36+
base_job_name=f"sft-lora-integ-{unique_id}",
3437
)
3538

3639
# Create training job
@@ -58,14 +61,16 @@ def test_sft_trainer_lora_complete_workflow(sagemaker_session):
5861

5962
def test_sft_trainer_with_validation_dataset(sagemaker_session):
6063
"""Test SFT trainer with both training and validation datasets."""
64+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
6165

6266
sft_trainer = SFTTrainer(
6367
model="meta-textgeneration-llama-3-2-1b-instruct",
6468
training_type=TrainingType.LORA,
6569
model_package_group="arn:aws:sagemaker:us-west-2:729646638167:model-package-group/sdk-test-finetuned-models",
6670
training_dataset="s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl",
6771
validation_dataset="s3://mc-flows-sdk-testing/input_data/sft/sample_data_256_final.jsonl",
68-
accept_eula=True
72+
accept_eula=True,
73+
base_job_name=f"sft-val-integ-{unique_id}",
6974
)
7075

7176
training_job = sft_trainer.train(wait=False)
@@ -94,6 +99,7 @@ def test_sft_trainer_nova_workflow(sagemaker_session_us_east_1):
9499
"""Test SFT trainer with Nova model."""
95100
# sagemaker_session_us_east_1 fixture is defined in conftest.py (us-east-1 region)
96101

102+
unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}"
97103
sft_trainer_nova = SFTTrainer(
98104
model="nova-textgeneration-lite-v2",
99105
training_type=TrainingType.LORA,
@@ -102,7 +108,8 @@ def test_sft_trainer_nova_workflow(sagemaker_session_us_east_1):
102108
mlflow_run_name="test-nova-finetuned-models-run",
103109
training_dataset="s3://mc-flows-sdk-testing-us-east-1/input_data/sft-nova/sft_200_samples.jsonl",
104110
s3_output_path="s3://mc-flows-sdk-testing-us-east-1/output/",
105-
sagemaker_session=sagemaker_session_us_east_1
111+
sagemaker_session=sagemaker_session_us_east_1,
112+
base_job_name=f"sft-nova-integ-{unique_id}",
106113
)
107114

108115
# Create training job

0 commit comments

Comments
 (0)