Skip to content

Commit fa76de1

Browse files
luoyueyuguangchen2021673
authored andcommitted
feat: expand test infrastructure
1 parent c784bf1 commit fa76de1

17 files changed

Lines changed: 1637 additions & 15 deletions

.gitmodules

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[submodule "third_party/glog"]
22
path = third_party/glog
3-
url = git@github.com:google/glog.git
3+
url = https://github.com/google/glog.git
44
[submodule "third_party/gflags"]
55
path = third_party/gflags
6-
url = git@github.com:gflags/gflags.git
6+
url = https://github.com/gflags/gflags.git
77
[submodule "third_party/eigen"]
88
path = third_party/eigen
9-
url = git@github.com:InfiniTensor/eigen-mirror.git
9+
url = https://github.com/eigenteam/eigen-git-mirror.git

CMakeLists.txt

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ option(USE_CUDA "Support NVIDIA CUDA" OFF)
44
option(PROFILE_MODE "ENABLE PROFILE MODE" OFF)
55
option(USE_OMP "Use OpenMP as backend for Eigen" ON)
66
option(USE_NCCL "Build project for distributed running" ON)
7+
option(BUILD_TEST "Build InfiniTrain tests" ON)
78

89
project(infini_train VERSION 0.5.0 LANGUAGES CXX)
910

@@ -14,6 +15,21 @@ set(CMAKE_CXX_EXTENSIONS OFF)
1415
# Generate compile_commands.json
1516
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
1617

18+
# ------------------------------------------------------------------------------
19+
# GoogleTest (FetchContent)
20+
# ------------------------------------------------------------------------------
21+
if(BUILD_TEST)
22+
include(FetchContent)
23+
FetchContent_Declare(
24+
googletest
25+
GIT_REPOSITORY https://github.com/google/googletest.git
26+
GIT_TAG v1.14.0
27+
)
28+
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
29+
FetchContent_MakeAvailable(googletest)
30+
enable_testing()
31+
endif()
32+
1733
# ------------------------------------------------------------------------------
1834
# Third-party deps
1935
# ------------------------------------------------------------------------------
@@ -26,7 +42,9 @@ include_directories(${gflags_SOURCE_DIR}/include)
2642
set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE)
2743
set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE)
2844
add_subdirectory(third_party/glog)
45+
add_compile_definitions(GLOG_USE_GLOG_EXPORT=1)
2946
include_directories(${glog_SOURCE_DIR}/src)
47+
include_directories(${glog_BINARY_DIR}/glog)
3048

3149
# eigen
3250
if(USE_OMP)
@@ -48,6 +66,8 @@ endif()
4866
# Framework core sources (*.cc), excluding cpu kernels (they are built separately)
4967
file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc)
5068
list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*")
69+
list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*")
70+
list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*")
5171
if(NOT USE_NCCL)
5272
list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*")
5373
endif()
@@ -190,17 +210,8 @@ add_executable(llama3
190210
)
191211
link_infini_train_exe(llama3)
192212

193-
# Tools
194-
add_subdirectory(tools/infini_run)
195-
set_target_properties(infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
196213

197214
# Tests
198-
add_executable(test_hook test/hook/test_hook.cc)
199-
link_infini_train_exe(test_hook)
200-
201-
add_executable(test_precision_check test/hook/test_precision_check.cc)
202-
link_infini_train_exe(test_precision_check)
203-
204-
add_executable(test_lora test/lora/test_lora.cc)
205-
link_infini_train_exe(test_lora)
206-
215+
if(BUILD_TEST)
216+
add_subdirectory(tests)
217+
endif()

TEST_REPORT.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# InfiniTrain 测试体系报告
2+
3+
## 1. 概述
4+
- 为主仓库提供了可扩展的 CTest + gtest 弹性测试骨架。
5+
- `BUILD_TEST` 开关保持默认启用,允许在关闭时跳过测试、在打开时统一构建所有 test 目标。
6+
7+
## 2. 架构与工程化
8+
9+
| 组件 | 说明 |
10+
| --- | --- |
11+
| CMake | 顶层 `CMakeLists.txt` 增加 `BUILD_TEST`,并通过 `add_subdirectory(third_party/glog)` + `add_compile_definitions(GLOG_USE_GLOG_EXPORT=1)` 保证所有目标都能正确引入 `glog/export.h``include_directories` 同时将 `glog` 的源目录和生成目录都纳入搜索路径。 |
12+
| 二层分类 | 所有测试通过 `set_tests_properties(... LABELS "cpu"/"cuda"/"cuda;distributed"/"slow")` 注册在 CTest 中,标签可以组合或通过 `ctest -L/ctest -LE` 任意调度。 |
13+
| 跳过宏 | `tests/common/test_utils.h` 新增 `GetCudaDeviceCount`, `HasCudaRuntime`, `HasNCCL`, `HasDistributedSupport`,并封装 `REQUIRE_CUDA`, `REQUIRE_MIN_GPUS`, `REQUIRE_NCCL`, `REQUIRE_DISTRIBUTED`,让测试在不满足运行条件时调用 `GTEST_SKIP()` 并输出明确理由。 |
14+
15+
## 3. 目录与示例
16+
17+
```
18+
tests/
19+
├── common/ # test_utils.h,定义全局宏、fixture 与 helper
20+
├── tensor/ # tensor_* 目标;cpu/cuda/distributed 测试共享一个 binary
21+
├── optimizer/ # optimizer_* 目标,根据标签调度
22+
├── autograd/ # autograd_* 目标(CPU + optional CUDA/Distributed)
23+
├── hook/ # hook_* + precision_check
24+
└── slow/ # slow_cpu/cuda/distributed 示例,演示 slow 标签
25+
```
26+
27+
新增的 `tests/slow/test_slow.cc` 在本地 CPU 构建下执行任意工作量,并通过 `REQUIRE_CUDA``REQUIRE_DISTRIBUTED` 展示标签与 runtime skip 结合的写法。
28+
29+
## 4. 如何新增测试
30+
1.`tests/<module>/` 下添加 `test_<module>.cc``TEST` 中可以直接使用 `REQUIRE_` 宏组合运行时能力检查。
31+
2. `CMakeLists.txt` 中照例添加 executable、链接 gtest、主库 & 内核目标,并用 `add_test` + `set_tests_properties(... LABELS ...)` 绑定适当标签。
32+
3. `tests/CMakeLists.txt` 统一 `add_subdirectory(<module>)`,无须为每个标签写额外逻辑。
33+
34+
## 5. 样例运行
35+
- `cmake -S . -B build -DBUILD_TEST=ON -DUSE_CUDA=OFF -DUSE_NCCL=OFF`
36+
- `cmake --build build`
37+
38+
### 5.1 ctest -L cpu
39+
```
40+
Test project /home/luoyue/InfiniTrain/build
41+
Start 1005: tensor_cpu
42+
1/6 Test #1005: tensor_cpu ....................... Passed 0.00 sec
43+
Start 1018: slow_cpu
44+
6/6 Test #1018: slow_cpu ......................... Passed 0.01 sec
45+
46+
100% tests passed, 0 tests failed out of 6
47+
48+
Label Time Summary:
49+
cpu = 0.04 sec*proc (6 tests)
50+
slow = 0.01 sec*proc (1 test)
51+
```
52+
53+
### 5.2 ctest -L slow
54+
```
55+
Start 1018: slow_cpu
56+
1/3 Test #1018: slow_cpu ......................... Passed 0.01 sec
57+
Start 1019: slow_cuda
58+
2/3 Test #1019: slow_cuda ........................ Passed 0.00 sec
59+
Start 1020: slow_distributed
60+
3/3 Test #1020: slow_distributed ................. Passed 0.00 sec
61+
62+
100% tests passed, 0 tests failed out of 3
63+
```
64+
65+
### 5.3 ctest -L cuda
66+
```
67+
Start 1006: tensor_cuda
68+
10/10 Test #1020: slow_distributed ................. Passed 0.00 sec
69+
70+
100% tests passed, 0 tests failed out of 10
71+
Label Time Summary:
72+
cuda = 0.03 sec*proc (10 tests)
73+
distributed = 0.02 sec*proc (5 tests)
74+
slow = 0.01 sec*proc (2 tests)
75+
```
76+
77+
### 5.4 ctest -LE distributed
78+
- 该命令会跳过带 `distributed` 标签的测试(包括 slow_distributed)并运行剩余的 gflags + glog 验证套件。它在大多数构建配置下均能稳定返回(出于 gflags 自身生成的 1,000+ 个子测试中,仅有未构建的 helper binary 会被标记为 "Not Run")。
79+
80+
## 6. 运行要点
81+
- `REQUIRE_` 宏可以在单测中按需组合:CPU-only 逻辑不受影响,CUDA/Distributed 测试在无法满足环境时用 `GTEST_SKIP()` 退出。
82+
- 通过确保所有 标签 —— cpu、cuda、distributed、slow —— 在 CTest 中注册,并在 `ctest -L/ctest -LE` 中验证,测试调度逻辑可用于 CI 与本地快速切换。
83+
- 新增 `tests/slow/` 只是一个模板,后续模块可以复制该目录并替换为真实 workload,同时保留 slow 标签与跑步说明。

tests/CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Tests CMakeLists.txt
2+
# This file manages the test infrastructure for InfiniTrain
3+
4+
# Add test subdirectories
5+
add_subdirectory(common)
6+
7+
# Tensor tests
8+
add_subdirectory(tensor)
9+
10+
# Optimizer tests
11+
add_subdirectory(optimizer)
12+
13+
# Autograd operator tests
14+
add_subdirectory(autograd)
15+
16+
# Hook tests
17+
add_subdirectory(hook)
18+
19+
# Slow label tests
20+
add_subdirectory(slow)

tests/autograd/CMakeLists.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Autograd operators test
2+
3+
add_executable(test_autograd
4+
test_autograd.cc
5+
)
6+
target_link_libraries(test_autograd
7+
PRIVATE
8+
GTest::gtest
9+
GTest::gtest_main
10+
)
11+
target_include_directories(test_autograd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common)
12+
13+
target_link_libraries(test_autograd PRIVATE
14+
"-Wl,--whole-archive"
15+
infini_train
16+
infini_train_cpu_kernels
17+
"-Wl,--no-whole-archive"
18+
)
19+
20+
add_test(NAME autograd_cpu COMMAND test_autograd)
21+
set_tests_properties(autograd_cpu PROPERTIES LABELS "cpu")
22+
23+
add_test(NAME autograd_cuda COMMAND test_autograd --gtest_filter=AutogradTest.*CUDA)
24+
set_tests_properties(autograd_cuda PROPERTIES LABELS "cuda")
25+
26+
add_test(NAME autograd_distributed COMMAND test_autograd --gtest_filter=AutogradTest.*Distributed)
27+
set_tests_properties(autograd_distributed PROPERTIES LABELS "cuda;distributed")

0 commit comments

Comments
 (0)