Skip to content

Commit 6cd0638

Browse files
shumwayassistant-librarian[bot]
authored andcommitted
[rocm-libraries] ROCm/rocm-libraries#7090 (commit 316fded)
[CK] Add rocm_ck directory structure with feature flag (#7090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Adds initial rocm_ck directory structure, #7119. - Establishes production `rocm_ck/` directory at `composablekernel/rocm_ck/`, peer to `tile_engine/` and `dispatcher/` - Adds `CK_ENABLE_ROCM_CK` option (default OFF) as a CK-internal feature flag — no superbuild or TheRock changes needed - Creates `rocm_ck` INTERFACE library, `ck_tile_headers` target, GTest integration with builder-style convenience targets (`smoke-rocm-ck`, `check-rocm-ck`) - Adds Jenkins `RUN_ROCM_CK_TESTS` parameter for CI, following the `RUN_BUILDER_TESTS` pattern - README explains the constexpr schema model: host-device separation via constexpr data rather than template parameters, enabling multi-arch distribution through kpack archives ## Test plan - [x] `cmake -DCK_ENABLE_ROCM_CK=ON` configures without errors - [x] `ninja check-rocm-ck` passes (4 host-only index type tests) - [x] Default build (`CK_ENABLE_ROCM_CK=OFF`) is unaffected — no rocm_ck targets present - [x] Jenkins `RUN_ROCM_CK_TESTS=true` enables the flag and runs `check-rocm-ck` 🤖 Generated with [Claude Code](https://claude.com/claude-code)
1 parent d931e87 commit 6cd0638

7 files changed

Lines changed: 279 additions & 0 deletions

File tree

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ option(FORCE_DISABLE_WMMA "Skip compiling WMMA specific instances (even if suppo
5555
option(BUILD_CK_TILE_ENGINE "Build the tile_engine subdirectory" OFF)
5656
option(BUILD_CK_EXAMPLES "Build the example subdirectory" ON)
5757
option(BUILD_CK_TUTORIALS "Build the tutorial subdirectory" ON)
58+
option(CK_ENABLE_ROCM_CK "Build rocm_ck API" OFF)
5859

5960
if(CK_EXPERIMENTAL_BUILDER)
6061
add_definitions(-DCK_EXPERIMENTAL_BUILDER)
@@ -771,6 +772,12 @@ if(NOT GPU_ARCHS AND USER_GPU_TARGETS AND NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTE
771772
if(BUILD_CK_TILE_ENGINE)
772773
add_subdirectory(tile_engine)
773774
endif()
775+
if(CK_ENABLE_ROCM_CK)
776+
add_subdirectory(rocm_ck)
777+
if(TARGET check)
778+
add_dependencies(check build-smoke-rocm-ck)
779+
endif()
780+
endif()
774781
if(BUILD_TESTING)
775782
rocm_package_setup_component(tests
776783
LIBRARY_NAME composablekernel

Jenkinsfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,9 @@ def cmake_build(Map conf=[:]){
721721
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
722722
setup_args = " -D CK_EXPERIMENTAL_BUILDER=ON " + setup_args
723723
}
724+
if (params.RUN_ROCM_CK_TESTS) {
725+
setup_args = " -D CK_ENABLE_ROCM_CK=ON " + setup_args
726+
}
724727
setup_cmd = conf.get(
725728
"setup_cmd",
726729
"""${cmake_envs} cmake -G Ninja ${setup_args} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS=" -O3 " .. """
@@ -837,6 +840,9 @@ def cmake_build(Map conf=[:]){
837840
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
838841
sh 'ninja check-builder'
839842
}
843+
if (params.RUN_ROCM_CK_TESTS) {
844+
sh 'ninja check-rocm-ck'
845+
}
840846
if(params.BUILD_PACKAGES){
841847
echo "Build ckProfiler packages"
842848
sh 'ninja -j64 package'
@@ -876,6 +882,9 @@ def cmake_build(Map conf=[:]){
876882
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
877883
sh 'ninja check-builder'
878884
}
885+
if (params.RUN_ROCM_CK_TESTS) {
886+
sh 'ninja check-rocm-ck'
887+
}
879888
if(params.BUILD_PACKAGES){
880889
echo "Build ckProfiler packages"
881890
sh 'ninja -j64 package'
@@ -1425,6 +1434,10 @@ pipeline {
14251434
name: "RUN_BUILDER_TESTS",
14261435
defaultValue: false,
14271436
description: "Run CK_BUILDER tests (default: OFF)")
1437+
booleanParam(
1438+
name: "RUN_ROCM_CK_TESTS",
1439+
defaultValue: true,
1440+
description: "Run rocm_ck tests (default: ON)")
14281441
booleanParam(
14291442
name: "RUN_ALL_UNIT_TESTS",
14301443
defaultValue: false,

rocm_ck/CMakeLists.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
#
4+
# rocm_ck — constexpr schema API over CK Tile device kernels.
5+
#
6+
# Build from CK root:
7+
# cmake -B build -S . -G Ninja -DCK_ENABLE_ROCM_CK=ON
8+
# ninja -C build smoke-rocm-ck
9+
10+
# rocm_ck — header-only INTERFACE library
11+
add_library(rocm_ck INTERFACE)
12+
target_include_directories(rocm_ck INTERFACE
13+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
14+
)
15+
# rocm_ck requires C++20, but the CK library is still migrating from c++17
16+
# We should remove this once the library fully migrates to c++20.
17+
target_compile_features(rocm_ck INTERFACE cxx_std_20)
18+
target_compile_options(rocm_ck INTERFACE -Wno-c++20-compat)
19+
20+
# CK Tile headers — required for device code compilation
21+
if(NOT TARGET ck_tile_headers)
22+
add_library(ck_tile_headers INTERFACE)
23+
target_include_directories(ck_tile_headers INTERFACE
24+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
25+
)
26+
endif()
27+
28+
enable_testing()
29+
add_subdirectory(tests)

rocm_ck/README.md

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# rocm_ck
2+
3+
A C++20 constexpr API for configuring and distributing
4+
[CK Tile](../include/ck_tile/) GPU kernels across multiple architectures.
5+
6+
> **Status**: Early development. The current code establishes the directory
7+
> structure, build integration, and CI pipeline. A single unit test verifies
8+
> that the build and test infrastructure works end-to-end in Jenkins.
9+
> The schema types, device bridge, and kernel tests described below are
10+
> under active development.
11+
12+
## Why rocm_ck exists
13+
14+
CK Tile kernels are C++ templates. A GEMM kernel's tile size, pipeline
15+
strategy, data types, and epilogue are all template parameters — fixed at
16+
compile time. This is excellent for performance (zero-overhead abstraction,
17+
full inlining), but it creates a problem for multi-architecture distribution:
18+
the host program must be compiled separately from device code, and the host
19+
compiler must never see CK Tile headers.
20+
21+
rocm_ck solves this by introducing a **host-device boundary** built on
22+
constexpr data rather than template parameters:
23+
24+
1. **On the host side**, kernel configurations are plain C++20 structs
25+
(`Signature`, `Algorithm`, `GemmSpec`). These are constexpr data —
26+
they describe *what* to compute and *how*, without instantiating any
27+
templates. Host code reasons about kernels using values, not types.
28+
29+
2. **On the device side**, a thin bridge layer lowers these constexpr
30+
descriptions into CK Tile template instantiations. Each `GemmSpec`
31+
maps to exactly one `ck_tile::GemmPipeline<...>` specialization.
32+
33+
3. **At the boundary**, pre-compiled kernels are packaged into
34+
[kpack archives](https://github.com/ROCm/TheRock/blob/main/docs/rfcs/RFC0008-Multi-Arch-Packaging.md)
35+
self-describing, compressed, multi-architecture bundles. The host loads kernels at runtime
36+
by matching a `GemmSpec` against the kpack table of contents. No
37+
recompilation, no template instantiation on the host.
38+
39+
This separation is what makes CK Tile viable in
40+
[TheRock](https://github.com/ROCm/TheRock)'s multi-arch build system,
41+
where a single host binary must work with device code compiled for
42+
many GPU targets (e.g. gfx90a, gfx942, gfx1151).
43+
44+
## The constexpr schema model
45+
46+
Traditional GPU kernel libraries select kernels through template
47+
parameters or runtime enums. rocm_ck uses a third approach: **constexpr
48+
structs that are validated at compile time and lowered to templates on
49+
the device side.**
50+
51+
A kernel configuration has two axes:
52+
53+
- **Signature***what* the kernel computes: a directed graph of
54+
operators (`GemmOp`, `AddOp`, `ReluOp`, ...) connecting named tensor
55+
slots. Data types, layouts, and batch dimensions are part of the
56+
signature.
57+
58+
- **Algorithm***how* the kernel computes it: tile geometry, pipeline
59+
strategy, warp layout, padding, and scheduling. These are tuning
60+
parameters that don't change the mathematical result.
61+
62+
The `Signature` and `Algorithm` are plain aggregate structs with
63+
designated initializers — no constructors, no inheritance, no runtime
64+
polymorphism. Validation happens in `consteval` functions: invalid
65+
configurations (unsupported tile size, incompatible data types, missing
66+
tensor slots) fail at compile time with actionable error messages.
67+
68+
Here is a preview of the API direction (not yet implemented):
69+
70+
```cpp
71+
// Host side — pure constexpr, any C++20 compiler, no CK headers
72+
constexpr Signature sig = {
73+
.dtype = DataType::FP16,
74+
.ops = {
75+
GemmOp{.lhs = "A", .rhs = "B", .out = "C"},
76+
AddOp{.lhs = "C", .rhs = "bias", .out = "D"},
77+
ReluOp{.in = "D", .out = "E"},
78+
},
79+
};
80+
81+
// Device side — make_kernel lowers to a CK Tile template instantiation.
82+
// Compiled separately per architecture, packaged into .kpack archives.
83+
```
84+
85+
## Directory layout
86+
87+
```text
88+
rocm_ck/
89+
├── CMakeLists.txt # INTERFACE library, C++20, ck_tile_headers target
90+
├── include/rocm_ck/ # Public headers — host-safe, no CK/HIP deps
91+
├── src/ # (planned) Device bridge, kpack loading
92+
└── tests/
93+
├── CMakeLists.txt # Test tiers: ROCM_CK_SMOKE, ROCM_CK_KERNEL
94+
├── unit/ # Fast host-only tests (< 1s, no GPU)
95+
└── kernel/ # (planned) GPU kernel tests
96+
```
97+
98+
## Build
99+
100+
rocm_ck is a CK feature, gated by `CK_ENABLE_ROCM_CK`:
101+
102+
```bash
103+
cd composablekernel
104+
cmake -B build -S . -G Ninja \
105+
-DCK_ENABLE_ROCM_CK=ON \
106+
-DCMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++
107+
108+
ninja -C build smoke-rocm-ck # host-only smoke tests
109+
ninja -C build check-rocm-ck # all rocm_ck tests
110+
ctest --test-dir build -L ROCM_CK_SMOKE --output-on-failure
111+
```
112+
113+
Default CK builds (`CK_ENABLE_ROCM_CK=OFF`) are unaffected.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
// SPDX-License-Identifier: MIT
3+
// Role: types — index_t, long_index_t. No runtime, no CK deps.
4+
5+
#pragma once
6+
7+
#include <cstdint>
8+
9+
namespace rocm_ck {
10+
11+
// Matches ck_tile::index_t without pulling in CK Tile headers.
12+
using index_t = std::int32_t;
13+
14+
// batch_stride * nhead can exceed int32. Matches ck_tile::long_index_t.
15+
using long_index_t = std::int64_t;
16+
17+
} // namespace rocm_ck

rocm_ck/tests/CMakeLists.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
# SPDX-License-Identifier: MIT
3+
#
4+
# rocm_ck tests
5+
#
6+
# Test tiers:
7+
# ROCM_CK_SMOKE — Fast host-only tests (< 1s total). No GPU, no HIP.
8+
# ROCM_CK_KERNEL — GPU kernel tests. Require HIP and a GPU.
9+
#
10+
# Usage:
11+
# ninja smoke-rocm-ck # build + run smoke tests
12+
# ninja build-smoke-rocm-ck # build only (no run)
13+
# ninja check-rocm-ck # run all rocm_ck tests
14+
#
15+
# ctest -L ROCM_CK_SMOKE --output-on-failure
16+
17+
# Google Test (via CK's FetchContent wrapper)
18+
include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/gtest.cmake)
19+
20+
# ---------------------------------------------------------------------------
21+
# Helper function — reusable per-test setup
22+
# ---------------------------------------------------------------------------
23+
function(add_rocm_ck_test test_name)
24+
add_executable(${test_name} ${ARGN})
25+
target_link_libraries(${test_name} PRIVATE rocm_ck GTest::gtest_main)
26+
target_compile_options(${test_name} PRIVATE
27+
-Wno-global-constructors # GTest registration macros
28+
-Wno-undef # GTest internal headers
29+
)
30+
endfunction()
31+
32+
# ---------------------------------------------------------------------------
33+
# Smoke tests (fast, host-only, no GPU)
34+
# ---------------------------------------------------------------------------
35+
set(ROCM_CK_SMOKE_TESTS
36+
unit/unit_index_t.cpp
37+
)
38+
39+
set(ROCM_CK_SMOKE_TARGETS)
40+
foreach(test_source ${ROCM_CK_SMOKE_TESTS})
41+
get_filename_component(test_name ${test_source} NAME_WLE)
42+
set(target_name "rocm_ck_${test_name}")
43+
add_rocm_ck_test(${target_name} ${test_source})
44+
add_test(NAME ${target_name} COMMAND ${target_name})
45+
set_tests_properties(${target_name} PROPERTIES LABELS "ROCM_CK_SMOKE")
46+
list(APPEND ROCM_CK_SMOKE_TARGETS ${target_name})
47+
endforeach()
48+
49+
# rocm_ck_unit_index_t verifies rocm_ck index types match ck_tile
50+
target_include_directories(rocm_ck_unit_index_t PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
51+
52+
# ---------------------------------------------------------------------------
53+
# Convenience targets
54+
# ---------------------------------------------------------------------------
55+
add_custom_target(build-smoke-rocm-ck DEPENDS ${ROCM_CK_SMOKE_TARGETS})
56+
57+
add_custom_target(smoke-rocm-ck
58+
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -L "ROCM_CK_SMOKE"
59+
DEPENDS build-smoke-rocm-ck
60+
USES_TERMINAL
61+
COMMENT "Running rocm_ck smoke tests...")
62+
63+
add_custom_target(check-rocm-ck
64+
COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -L "ROCM_CK"
65+
DEPENDS build-smoke-rocm-ck
66+
USES_TERMINAL
67+
COMMENT "Running all rocm_ck tests...")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2+
// SPDX-License-Identifier: MIT
3+
4+
#include <rocm_ck/index_t.hpp>
5+
6+
#include <ck_tile/core/numeric/integer.hpp>
7+
8+
#include <gtest/gtest.h>
9+
10+
using ::rocm_ck::index_t;
11+
using ::rocm_ck::long_index_t;
12+
13+
namespace {
14+
15+
TEST(IndexTypes, IndexTypeIs32Bit) { EXPECT_EQ(sizeof(index_t), 4); }
16+
17+
TEST(IndexTypes, LongIndexTypeIs64Bit) { EXPECT_EQ(sizeof(long_index_t), 8); }
18+
19+
TEST(IndexTypes, IndexTypeIsSigned) { EXPECT_TRUE(index_t(-1) < 0); }
20+
21+
TEST(IndexTypes, LongIndexTypeIsSigned) { EXPECT_TRUE(long_index_t(-1) < 0); }
22+
23+
TEST(IndexTypes, MatchesCkTileIndexType)
24+
{
25+
EXPECT_TRUE((std::is_same_v<index_t, ck_tile::index_t>));
26+
}
27+
28+
TEST(IndexTypes, MatchesCkTileLongIndexType)
29+
{
30+
EXPECT_TRUE((std::is_same_v<long_index_t, ck_tile::long_index_t>));
31+
}
32+
33+
} // namespace

0 commit comments

Comments
 (0)