torchtitan/tests/integration_tests/h100.py at main · githubsgi/torchtitan · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import logging

from tests.integration_tests import OverrideDefinitions

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def build_h100_tests_list() -> list[OverrideDefinitions]:
    """
    key is the config file name and value is a list of OverrideDefinitions
    that is used to generate variations of integration tests based on the
    same root config file.
    """
    integration_tests_flavors = [
        OverrideDefinitions(
            [
                [
                    "--compile.enable",
                    "--parallelism.tensor_parallel_degree 2",
                    "--parallelism.enable_async_tensor_parallel",
                ],
            ],
            "2D async TP compile",
            "2d_asynctp_compile",
        ),
        OverrideDefinitions(
            [
                [
                    "--module llama3 --config llama3_debugmodel_float8",
                ],
            ],
            "Float8 test",
            "float8",
        ),
        OverrideDefinitions(
            [
                [
                    "--parallelism.spmd_backend full_dtensor",
                    "--parallelism.enable-fsdp-symm-mem",
                ],
            ],
            "FSDP symmetric memory",
            "fsdp_symm_mem",
            ngpu=2,
            skip_rocm_test=True,
        ),
        OverrideDefinitions(
            [
                [
                    "--module llama3 --config llama3_debugmodel_float8",
                    "--compile.enable",
                    "--parallelism.data_parallel_shard_degree 2",
                    "--parallelism.tensor_parallel_degree 2",
                    "--parallelism.pipeline_parallel_degree 2",
                    "--parallelism.enable_async_tensor_parallel",
                ],
            ],
            "FSDP+async TP+PP+torch.compile+Float8",
            "fsdp+tp+cp+compile+float8",
            ngpu=8,
        ),
        OverrideDefinitions(
            [
                [
                    "--module llama3 --config llama3_debugmodel_float8",
                    "--compile.enable",
                    "--parallelism.data_parallel_shard_degree 2",
                    "--parallelism.data_parallel_replicate_degree 2",
                    "--parallelism.context_parallel_degree 2",
                ]
            ],
            "HSDP+CP+torch.compile+Float8",
            "hsdp+cp+compile+float8",
            ngpu=8,
        ),
        OverrideDefinitions(
            [
                [
                    "--module deepseek_v3 --config deepseek_v3_debugmodel_hybridep",
                    "--parallelism.data_parallel_shard_degree 4",
                    "--parallelism.expert_parallel_degree 2",
                    "--compile.enable",
                    "--compile.components model,loss",
                ],
            ],
            "DeepSeek V3 FSDP+HybridEP+compile",
            "deepseek_v3_fsdp+hybridep+compile",
            ngpu=4,
            # deep_ep/NVSHMEM is CUDA-only, so skip on ROCm.
            skip_rocm_test=True,
        ),
    ]
    return integration_tests_flavors