Skip to content

Commit 181df71

Browse files
committed
tests: add new marker and new github action for 3 times per day gpu intensive tests
1 parent f61a1ea commit 181df71

11 files changed

Lines changed: 43 additions & 0 deletions
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: GPU Integ Tests
2+
on:
3+
schedule:
4+
- cron: "0 */8 * * *"
5+
workflow_dispatch:
6+
7+
permissions:
8+
id-token: write # This is required for requesting the JWT
9+
10+
jobs:
11+
gpu-integ-tests:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Configure AWS Credentials
15+
uses: aws-actions/configure-aws-credentials@v4
16+
with:
17+
role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }}
18+
aws-region: us-west-2
19+
role-duration-seconds: 10800
20+
- name: Run GPU Integ Tests
21+
uses: aws-actions/aws-codebuild-run-build@v1
22+
with:
23+
project-name: sagemaker-python-sdk-ci-health-gpu-integ-tests
24+
source-version: refs/heads/master

sagemaker-train/tests/integ/train/test_benchmark_evaluator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
EvaluationPipelineExecution,
2424
)
2525

26+
pytestmark = pytest.mark.gpu_intensive
27+
2628
# Configure logging
2729
logging.basicConfig(
2830
level=logging.INFO,

sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
EvaluationPipelineExecution,
2323
)
2424

25+
pytestmark = pytest.mark.gpu_intensive
26+
2527
# Configure logging
2628
logging.basicConfig(
2729
level=logging.INFO,

sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from sagemaker.train.common import TrainingType
2222
import pytest
2323

24+
pytestmark = pytest.mark.gpu_intensive
25+
2426

2527
def test_dpo_trainer_lora_complete_workflow(sagemaker_session):
2628
"""Test complete DPO training workflow with LORA."""

sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
EvaluationPipelineExecution,
2929
)
3030

31+
pytestmark = pytest.mark.gpu_intensive
32+
3133
# Configure logging
3234
logging.basicConfig(
3335
level=logging.INFO,

sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
EvaluationPipelineExecution,
2323
)
2424

25+
pytestmark = pytest.mark.gpu_intensive
26+
2527
# Configure logging
2628
logging.basicConfig(
2729
level=logging.INFO,

sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from sagemaker.train.common import TrainingType
2222
import pytest
2323

24+
pytestmark = pytest.mark.gpu_intensive
25+
2426

2527
def test_rlaif_trainer_lora_complete_workflow(sagemaker_session):
2628
"""Test complete RLAIF training workflow with LORA."""

sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from sagemaker.train.rlvr_trainer import RLVRTrainer
2222
from sagemaker.train.common import TrainingType
2323

24+
pytestmark = pytest.mark.gpu_intensive
25+
2426

2527
def test_rlvr_trainer_lora_complete_workflow(sagemaker_session):
2628
"""Test complete RLVR training workflow with LORA."""

sagemaker-train/tests/integ/train/test_sft_trainer_integration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from sagemaker.train.sft_trainer import SFTTrainer
2222
from sagemaker.train.common import TrainingType
2323

24+
pytestmark = pytest.mark.gpu_intensive
25+
2426

2527
def test_sft_trainer_lora_complete_workflow(sagemaker_session):
2628
"""Test complete SFT training workflow with LORA."""

sagemaker-train/tests/integ/train/test_tuner_distributed.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from sagemaker.train.configs import SourceCode, Compute
2929
from sagemaker.train.distributed import Torchrun
3030
from sagemaker.train.tuner import HyperparameterTuner
31+
32+
pytestmark = pytest.mark.gpu_intensive
3133
from sagemaker.core.parameter import ContinuousParameter
3234

3335
logger = logging.getLogger(__name__)

0 commit comments

Comments
 (0)