Skip to content

Commit bf9aebe

Browse files
luoyueyuguangchen2021673
authored andcommitted
fix: make distributed labels selectable
1 parent a8a3cc5 commit bf9aebe

17 files changed

+304
-312
lines changed

tests/autograd/CMakeLists.txt

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,54 @@
11
# ============================================================================
22
# Autograd tests
33
# ============================================================================
4-
# 重构版本:使用 infini_train_add_test 宏简化配置
5-
#
6-
# 新增测试只需 1 行:
7-
# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu)
4+
# 重构版本:使用单一 test binary 聚合测试源码,减少 target 膨胀
85
# ============================================================================
96

107
# -----------------------------------------------------------------------------
11-
# Elementwise tests
12-
# -----------------------------------------------------------------------------
13-
infini_train_add_test(test_autograd_elementwise_forward SOURCES test_autograd_elementwise_forward.cc LABELS cpu)
14-
infini_train_add_test(test_autograd_elementwise_backward SOURCES test_autograd_elementwise_backward.cc LABELS cpu)
15-
16-
# -----------------------------------------------------------------------------
17-
# Matmul tests
18-
# -----------------------------------------------------------------------------
19-
infini_train_add_test(test_autograd_matmul_forward SOURCES test_autograd_matmul_forward.cc LABELS cpu)
20-
infini_train_add_test(test_autograd_matmul_backward SOURCES test_autograd_matmul_backward.cc LABELS cpu)
21-
22-
# -----------------------------------------------------------------------------
23-
# Reduction tests
24-
# -----------------------------------------------------------------------------
25-
infini_train_add_test(test_autograd_reduction_forward SOURCES test_autograd_reduction_forward.cc LABELS cpu)
26-
infini_train_add_test(test_autograd_reduction_backward SOURCES test_autograd_reduction_backward.cc LABELS cpu)
27-
28-
# -----------------------------------------------------------------------------
29-
# Linear tests
30-
# -----------------------------------------------------------------------------
31-
infini_train_add_test(test_autograd_linear_forward SOURCES test_autograd_linear_forward.cc LABELS cpu)
32-
infini_train_add_test(test_autograd_linear_backward SOURCES test_autograd_linear_backward.cc LABELS cpu)
33-
34-
# -----------------------------------------------------------------------------
35-
# Softmax tests
36-
# -----------------------------------------------------------------------------
37-
infini_train_add_test(test_autograd_softmax_forward SOURCES test_autograd_softmax_forward.cc LABELS cpu)
38-
infini_train_add_test(test_autograd_softmax_backward SOURCES test_autograd_softmax_backward.cc LABELS cpu)
39-
40-
# -----------------------------------------------------------------------------
41-
# Transform tests
42-
# -----------------------------------------------------------------------------
43-
infini_train_add_test(test_autograd_transform_forward SOURCES test_autograd_transform_forward.cc LABELS cpu)
44-
infini_train_add_test(test_autograd_transform_backward SOURCES test_autograd_transform_backward.cc LABELS cpu)
45-
46-
# -----------------------------------------------------------------------------
47-
# Normalization tests
48-
# -----------------------------------------------------------------------------
49-
infini_train_add_test(test_autograd_normalization_forward SOURCES test_autograd_normalization_forward.cc LABELS cpu)
50-
infini_train_add_test(test_autograd_normalization_backward SOURCES test_autograd_normalization_backward.cc LABELS cpu)
51-
52-
# -----------------------------------------------------------------------------
53-
# Legacy combined tests
54-
# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现
55-
# -----------------------------------------------------------------------------
56-
infini_train_add_test(test_autograd_legacy SOURCES test_autograd.cc LABELS cpu cuda distributed)
8+
# Autograd tests (single binary)
9+
# -----------------------------------------------------------------------------
10+
set(AUTOGRAD_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
11+
set(AUTOGRAD_TEST_SOURCES
12+
${AUTOGRAD_TEST_DIR}/test_autograd_elementwise_forward.cc
13+
${AUTOGRAD_TEST_DIR}/test_autograd_elementwise_backward.cc
14+
${AUTOGRAD_TEST_DIR}/test_autograd_matmul_forward.cc
15+
${AUTOGRAD_TEST_DIR}/test_autograd_matmul_backward.cc
16+
${AUTOGRAD_TEST_DIR}/test_autograd_reduction_forward.cc
17+
${AUTOGRAD_TEST_DIR}/test_autograd_reduction_backward.cc
18+
${AUTOGRAD_TEST_DIR}/test_autograd_linear_forward.cc
19+
${AUTOGRAD_TEST_DIR}/test_autograd_linear_backward.cc
20+
${AUTOGRAD_TEST_DIR}/test_autograd_softmax_forward.cc
21+
${AUTOGRAD_TEST_DIR}/test_autograd_softmax_backward.cc
22+
${AUTOGRAD_TEST_DIR}/test_autograd_transform_forward.cc
23+
${AUTOGRAD_TEST_DIR}/test_autograd_transform_backward.cc
24+
${AUTOGRAD_TEST_DIR}/test_autograd_normalization_forward.cc
25+
${AUTOGRAD_TEST_DIR}/test_autograd_normalization_backward.cc
26+
${AUTOGRAD_TEST_DIR}/test_autograd.cc
27+
)
28+
29+
add_executable(test_autograd ${AUTOGRAD_TEST_SOURCES})
30+
target_compile_options(test_autograd PRIVATE -Wno-error)
31+
link_infini_train_exe(test_autograd)
32+
target_link_libraries(test_autograd PRIVATE GTest::gtest GTest::gtest_main)
33+
target_include_directories(test_autograd
34+
PRIVATE
35+
${CMAKE_CURRENT_SOURCE_DIR}/../common
36+
${glog_SOURCE_DIR}/src
37+
)
38+
39+
include(GoogleTest)
40+
gtest_discover_tests(test_autograd
41+
EXTRA_ARGS --gtest_output=xml:%T.xml
42+
TEST_FILTER "-AutogradCudaTest.*:AutogradDistributedTest.*"
43+
PROPERTIES LABELS "cpu"
44+
)
45+
gtest_discover_tests(test_autograd
46+
EXTRA_ARGS --gtest_output=xml:%T.xml
47+
TEST_FILTER "AutogradCudaTest.*"
48+
PROPERTIES LABELS "cuda"
49+
)
50+
gtest_discover_tests(test_autograd
51+
EXTRA_ARGS --gtest_output=xml:%T.xml
52+
TEST_FILTER "AutogradDistributedTest.*"
53+
PROPERTIES LABELS "distributed"
54+
)

tests/autograd/test_autograd.cc

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "infini_train/include/autograd/linear.h"
1616
#include "infini_train/include/autograd/outer.h"
1717
#include "infini_train/include/autograd/misc.h"
18+
#include "test_utils.h"
1819

1920
using namespace infini_train;
2021

@@ -380,17 +381,16 @@ TEST_F(AutogradForwardTest, NoOpForward) {
380381

381382
#ifdef USE_CUDA
382383
TEST_F(AutogradCudaTest, AddForwardCUDA) {
384+
REQUIRE_CUDA();
383385
auto a = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
384386
Device(Device::DeviceType::kCUDA, 0));
385387
a->set_requires_grad(true);
386-
auto a_data = static_cast<float*>(a->DataPtr());
387-
for (int i = 0; i < 6; ++i) a_data[i] = 1.0f;
388+
infini_train::test::FillConstantTensor(a, 1.0f);
388389

389390
auto b = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
390391
Device(Device::DeviceType::kCUDA, 0));
391392
b->set_requires_grad(true);
392-
auto b_data = static_cast<float*>(b->DataPtr());
393-
for (int i = 0; i < 6; ++i) b_data[i] = 2.0f;
393+
infini_train::test::FillConstantTensor(b, 2.0f);
394394

395395
auto add_fn = std::make_shared<autograd::Add>();
396396
auto result = add_fn->Apply({a, b});
@@ -399,17 +399,16 @@ TEST_F(AutogradCudaTest, AddForwardCUDA) {
399399
}
400400

401401
TEST_F(AutogradCudaTest, MatmulForwardCUDA) {
402+
REQUIRE_CUDA();
402403
auto a = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
403404
Device(Device::DeviceType::kCUDA, 0));
404405
a->set_requires_grad(true);
405-
auto a_data = static_cast<float*>(a->DataPtr());
406-
for (int i = 0; i < 6; ++i) a_data[i] = 1.0f;
406+
infini_train::test::FillConstantTensor(a, 1.0f);
407407

408408
auto b = std::make_shared<Tensor>(std::vector<int64_t>{3, 4}, DataType::kFLOAT32,
409409
Device(Device::DeviceType::kCUDA, 0));
410410
b->set_requires_grad(true);
411-
auto b_data = static_cast<float*>(b->DataPtr());
412-
for (int i = 0; i < 12; ++i) b_data[i] = 1.0f;
411+
infini_train::test::FillConstantTensor(b, 1.0f);
413412

414413
auto matmul_fn = std::make_shared<autograd::Matmul>();
415414
auto result = matmul_fn->Apply({a, b});
@@ -418,23 +417,23 @@ TEST_F(AutogradCudaTest, MatmulForwardCUDA) {
418417
}
419418

420419
TEST_F(AutogradCudaTest, SumForwardCUDA) {
420+
REQUIRE_CUDA();
421421
auto a = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
422422
Device(Device::DeviceType::kCUDA, 0));
423423
a->set_requires_grad(true);
424-
auto a_data = static_cast<float*>(a->DataPtr());
425-
for (int i = 0; i < 6; ++i) a_data[i] = 1.0f;
424+
infini_train::test::FillConstantTensor(a, 1.0f);
426425

427426
auto sum_fn = std::make_shared<autograd::Sum>(1, false);
428427
auto result = sum_fn->Apply({a});
429428
EXPECT_EQ(result.size(), 1);
430429
}
431430

432431
TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) {
432+
REQUIRE_CUDA();
433433
auto a = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
434434
Device(Device::DeviceType::kCUDA, 0));
435435
a->set_requires_grad(true);
436-
auto a_data = static_cast<float*>(a->DataPtr());
437-
for (int i = 0; i < 6; ++i) a_data[i] = 1.0f;
436+
infini_train::test::FillConstantTensor(a, 1.0f);
438437

439438
auto softmax_fn = std::make_shared<autograd::Softmax>(1);
440439
auto result = softmax_fn->Apply({a});
@@ -443,23 +442,21 @@ TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) {
443442
}
444443

445444
TEST_F(AutogradCudaTest, LinearForwardCUDA) {
445+
REQUIRE_CUDA();
446446
auto input = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
447447
Device(Device::DeviceType::kCUDA, 0));
448448
input->set_requires_grad(true);
449-
auto input_data = static_cast<float*>(input->DataPtr());
450-
for (int i = 0; i < 6; ++i) input_data[i] = 1.0f;
449+
infini_train::test::FillConstantTensor(input, 1.0f);
451450

452451
auto weight = std::make_shared<Tensor>(std::vector<int64_t>{4, 3}, DataType::kFLOAT32,
453452
Device(Device::DeviceType::kCUDA, 0));
454453
weight->set_requires_grad(true);
455-
auto weight_data = static_cast<float*>(weight->DataPtr());
456-
for (int i = 0; i < 12; ++i) weight_data[i] = 1.0f;
454+
infini_train::test::FillConstantTensor(weight, 1.0f);
457455

458456
auto bias = std::make_shared<Tensor>(std::vector<int64_t>{4}, DataType::kFLOAT32,
459457
Device(Device::DeviceType::kCUDA, 0));
460458
bias->set_requires_grad(true);
461-
auto bias_data = static_cast<float*>(bias->DataPtr());
462-
for (int i = 0; i < 4; ++i) bias_data[i] = 0.0f;
459+
infini_train::test::FillConstantTensor(bias, 0.0f);
463460

464461
auto linear_fn = std::make_shared<autograd::Linear>();
465462
auto result = linear_fn->Apply({input, weight, bias});
@@ -480,10 +477,9 @@ TEST_F(AutogradDistributedTest, AllReduceDistributed) {
480477
auto a = std::make_shared<Tensor>(std::vector<int64_t>{2, 3}, DataType::kFLOAT32,
481478
Device(Device::DeviceType::kCUDA, 0));
482479
a->set_requires_grad(true);
483-
auto a_data = static_cast<float*>(a->DataPtr());
484-
for (int i = 0; i < 6; ++i) a_data[i] = 1.0f;
480+
infini_train::test::FillConstantTensor(a, 1.0f);
485481

486-
EXPECT_TRUE(a->IsCUDA());
482+
EXPECT_TRUE(a->GetDevice().IsCUDA());
487483
EXPECT_TRUE(a->requires_grad());
488484
}
489485

@@ -494,10 +490,9 @@ TEST_F(AutogradDistributedTest, AllGatherDistributed) {
494490
auto a = std::make_shared<Tensor>(std::vector<int64_t>{4, 4}, DataType::kFLOAT32,
495491
Device(Device::DeviceType::kCUDA, 0));
496492
a->set_requires_grad(true);
497-
auto a_data = static_cast<float*>(a->DataPtr());
498-
for (int i = 0; i < 16; ++i) a_data[i] = 1.0f;
493+
infini_train::test::FillConstantTensor(a, 1.0f);
499494

500-
EXPECT_TRUE(a->IsCUDA());
495+
EXPECT_TRUE(a->GetDevice().IsCUDA());
501496
EXPECT_EQ(a->Dims(), (std::vector<int64_t>{4, 4}));
502497
}
503498

@@ -508,10 +503,9 @@ TEST_F(AutogradDistributedTest, ReduceScatterDistributed) {
508503
auto a = std::make_shared<Tensor>(std::vector<int64_t>{2, 8}, DataType::kFLOAT32,
509504
Device(Device::DeviceType::kCUDA, 0));
510505
a->set_requires_grad(true);
511-
auto a_data = static_cast<float*>(a->DataPtr());
512-
for (int i = 0; i < 16; ++i) a_data[i] = 1.0f;
506+
infini_train::test::FillConstantTensor(a, 1.0f);
513507

514-
EXPECT_TRUE(a->IsCUDA());
508+
EXPECT_TRUE(a->GetDevice().IsCUDA());
515509
EXPECT_EQ(a->Dims(), (std::vector<int64_t>{2, 8}));
516510
}
517511

@@ -530,7 +524,7 @@ TEST_F(AutogradDistributedTest, DistributedMatmul) {
530524
auto result = matmul_fn->Apply({a, b});
531525

532526
EXPECT_EQ(result.size(), 1);
533-
EXPECT_TRUE(result[0]->IsCUDA());
527+
EXPECT_TRUE(result[0]->GetDevice().IsCUDA());
534528
}
535529

536530
TEST_F(AutogradDistributedTest, DistributedLinear) {
@@ -552,6 +546,6 @@ TEST_F(AutogradDistributedTest, DistributedLinear) {
552546

553547
EXPECT_EQ(result.size(), 1);
554548
EXPECT_EQ(result[0]->Dims(), (std::vector<int64_t>{2, 4}));
555-
EXPECT_TRUE(result[0]->IsCUDA());
549+
EXPECT_TRUE(result[0]->GetDevice().IsCUDA());
556550
}
557551
#endif // USE_NCCL

tests/common/test_macros.cmake

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,8 @@ macro(infini_train_add_test)
7474
${glog_SOURCE_DIR}/src
7575
)
7676

77-
# 5. 链接项目库(whole-archive 方式解决静态库符号依赖)
78-
target_link_libraries(${ARG_TEST_NAME} PRIVATE
79-
"-Wl,--whole-archive"
80-
infini_train
81-
infini_train_cpu_kernels
82-
"-Wl,--no-whole-archive"
83-
)
77+
# 5. 链接项目库(复用框架链接策略,包含 CUDA/静态库依赖处理)
78+
link_infini_train_exe(${ARG_TEST_NAME})
8479

8580
# 6. 使用 gtest_discover_tests 自动发现测试用例
8681
# 这会自动为每个 TEST_F() 创建一个 ctest 测试

tests/common/test_utils.h

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,52 @@ inline bool HasDistributedSupport() {
5555
return HasCudaRuntime() && HasNCCL() && GetCudaDeviceCount() >= 2;
5656
}
5757

58+
inline void FillSequentialTensor(const std::shared_ptr<Tensor>& tensor, float start = 0.0f) {
59+
size_t size = 1;
60+
for (auto dim : tensor->Dims()) {
61+
size *= static_cast<size_t>(dim);
62+
}
63+
64+
if (tensor->GetDevice().IsCUDA()) {
65+
auto cpu_tensor = std::make_shared<Tensor>(tensor->Dims(), tensor->Dtype(),
66+
Device(Device::DeviceType::kCPU, 0));
67+
auto* cpu_data = static_cast<float*>(cpu_tensor->DataPtr());
68+
for (size_t i = 0; i < size; ++i) {
69+
cpu_data[i] = start + static_cast<float>(i);
70+
}
71+
tensor->CopyFrom(cpu_tensor);
72+
return;
73+
}
74+
75+
auto* data = static_cast<float*>(tensor->DataPtr());
76+
for (size_t i = 0; i < size; ++i) {
77+
data[i] = start + static_cast<float>(i);
78+
}
79+
}
80+
81+
inline void FillConstantTensor(const std::shared_ptr<Tensor>& tensor, float value) {
82+
size_t size = 1;
83+
for (auto dim : tensor->Dims()) {
84+
size *= static_cast<size_t>(dim);
85+
}
86+
87+
if (tensor->GetDevice().IsCUDA()) {
88+
auto cpu_tensor = std::make_shared<Tensor>(tensor->Dims(), tensor->Dtype(),
89+
Device(Device::DeviceType::kCPU, 0));
90+
auto* cpu_data = static_cast<float*>(cpu_tensor->DataPtr());
91+
for (size_t i = 0; i < size; ++i) {
92+
cpu_data[i] = value;
93+
}
94+
tensor->CopyFrom(cpu_tensor);
95+
return;
96+
}
97+
98+
auto* data = static_cast<float*>(tensor->DataPtr());
99+
for (size_t i = 0; i < size; ++i) {
100+
data[i] = value;
101+
}
102+
}
103+
58104
#define REQUIRE_CUDA() \
59105
do { \
60106
if (!infini_train::test::HasCudaRuntime()) { \
@@ -106,12 +152,7 @@ class TensorTestBase : public InfiniTrainTest {
106152
}
107153

108154
void fillTensor(std::shared_ptr<Tensor> tensor, float value) {
109-
auto data = static_cast<float*>(tensor->DataPtr());
110-
size_t size = 1;
111-
for (auto dim : tensor->Dims()) size *= dim;
112-
for (size_t i = 0; i < size; ++i) {
113-
data[i] = value + static_cast<float>(i);
114-
}
155+
FillSequentialTensor(tensor, value);
115156
}
116157
};
117158

@@ -140,12 +181,7 @@ class AutogradTestBase : public InfiniTrainTest {
140181
auto tensor = std::make_shared<Tensor>(shape, DataType::kFLOAT32,
141182
Device(device, device_id));
142183
tensor->set_requires_grad(true);
143-
auto data = static_cast<float*>(tensor->DataPtr());
144-
size_t size = 1;
145-
for (auto dim : shape) size *= dim;
146-
for (size_t i = 0; i < size; ++i) {
147-
data[i] = value + static_cast<float>(i);
148-
}
184+
FillSequentialTensor(tensor, value);
149185
return tensor;
150186
}
151187
};

0 commit comments

Comments
 (0)