forked from pytorch/torchtitan
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathh100.py
More file actions
executable file
·100 lines (95 loc) · 3.26 KB
/
Copy pathh100.py
File metadata and controls
executable file
·100 lines (95 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
from tests.integration_tests import OverrideDefinitions
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def build_h100_tests_list() -> list[OverrideDefinitions]:
"""
key is the config file name and value is a list of OverrideDefinitions
that is used to generate variations of integration tests based on the
same root config file.
"""
integration_tests_flavors = [
OverrideDefinitions(
[
[
"--compile.enable",
"--parallelism.tensor_parallel_degree 2",
"--parallelism.enable_async_tensor_parallel",
],
],
"2D async TP compile",
"2d_asynctp_compile",
),
OverrideDefinitions(
[
[
"--module llama3 --config llama3_debugmodel_float8",
],
],
"Float8 test",
"float8",
),
OverrideDefinitions(
[
[
"--parallelism.spmd_backend full_dtensor",
"--parallelism.enable-fsdp-symm-mem",
],
],
"FSDP symmetric memory",
"fsdp_symm_mem",
ngpu=2,
skip_rocm_test=True,
),
OverrideDefinitions(
[
[
"--module llama3 --config llama3_debugmodel_float8",
"--compile.enable",
"--parallelism.data_parallel_shard_degree 2",
"--parallelism.tensor_parallel_degree 2",
"--parallelism.pipeline_parallel_degree 2",
"--parallelism.enable_async_tensor_parallel",
],
],
"FSDP+async TP+PP+torch.compile+Float8",
"fsdp+tp+cp+compile+float8",
ngpu=8,
),
OverrideDefinitions(
[
[
"--module llama3 --config llama3_debugmodel_float8",
"--compile.enable",
"--parallelism.data_parallel_shard_degree 2",
"--parallelism.data_parallel_replicate_degree 2",
"--parallelism.context_parallel_degree 2",
]
],
"HSDP+CP+torch.compile+Float8",
"hsdp+cp+compile+float8",
ngpu=8,
),
OverrideDefinitions(
[
[
"--module deepseek_v3 --config deepseek_v3_debugmodel_hybridep",
"--parallelism.data_parallel_shard_degree 4",
"--parallelism.expert_parallel_degree 2",
"--compile.enable",
"--compile.components model,loss",
],
],
"DeepSeek V3 FSDP+HybridEP+compile",
"deepseek_v3_fsdp+hybridep+compile",
ngpu=4,
# deep_ep/NVSHMEM is CUDA-only, so skip on ROCm.
skip_rocm_test=True,
),
]
return integration_tests_flavors