Skip to content

Commit 79aae7c

Browse files
tenpercentamd-khushbuDDEle
authored
[CK Tile] enable building examples by default (#3259)
* remove EXCLUDE_FROM_ALL from ck-tile examples -> +15 min build time w/ 64 threads for a single arch * fix cpp17 compile error in the ck-tile examples --------- Co-authored-by: khuagarw <khuagarw@amd.com> Co-authored-by: Ding, Yi <yi.ding@amd.com>
1 parent 40d7217 commit 79aae7c

39 files changed

Lines changed: 173 additions & 172 deletions

File tree

example/ck_tile/01_fmha/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ set(FMHA_FWD_INSTANCES "tile_fmha_fwd_instances")
109109
set(FMHA_BWD_INSTANCES "tile_fmha_bwd_instances")
110110

111111
message(DEBUG "adding instances ${FMHA_FWD_INSTANCES}")
112+
# to save build time, exclude the target from "all" target of "01_fmha" directory and its ancestors
112113
add_library(${FMHA_FWD_INSTANCES} OBJECT EXCLUDE_FROM_ALL)
113114
target_include_directories(${FMHA_FWD_INSTANCES} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
114115
target_sources(${FMHA_FWD_INSTANCES} PRIVATE ${FMHA_FWD_GEN_BLOBS})

example/ck_tile/02_layernorm2d/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ add_custom_command(
2626
set(EXAMPLE_LAYERNORM2D_FWD "tile_example_layernorm2d_fwd")
2727

2828
message(DEBUG "adding example ${EXAMPLE_LAYERNORM2D_FWD}")
29-
add_executable(${EXAMPLE_LAYERNORM2D_FWD} EXCLUDE_FROM_ALL layernorm2d_fwd.cpp)
29+
add_executable(${EXAMPLE_LAYERNORM2D_FWD} layernorm2d_fwd.cpp)
3030
target_include_directories(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
3131
target_sources(${EXAMPLE_LAYERNORM2D_FWD} PRIVATE ${LAYERNORM2D_FWD_GEN_BLOBS})
3232

example/ck_tile/03_gemm/CMakeLists.txt

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1-
add_executable(tile_example_gemm_basic EXCLUDE_FROM_ALL gemm_basic.cpp)
2-
add_executable(tile_example_gemm_universal EXCLUDE_FROM_ALL universal_gemm.cpp)
3-
add_executable(tile_example_gemm_weight_preshuffle EXCLUDE_FROM_ALL gemm_weight_preshuffle.cpp)
4-
add_executable(tile_example_gemm_reduce EXCLUDE_FROM_ALL gemm_splitk_two_stage_reduce.cpp)
5-
add_executable(tile_example_gemm_splitk_two_stage EXCLUDE_FROM_ALL gemm_splitk_two_stage.cpp)
6-
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
7-
set(EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS)
8-
if(CK_USE_OCP_FP8)
9-
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
1+
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a")
2+
add_executable(tile_example_gemm_basic gemm_basic.cpp)
3+
add_executable(tile_example_gemm_universal universal_gemm.cpp)
4+
add_executable(tile_example_gemm_weight_preshuffle gemm_weight_preshuffle.cpp)
5+
add_executable(tile_example_gemm_reduce gemm_splitk_two_stage_reduce.cpp)
6+
add_executable(tile_example_gemm_splitk_two_stage gemm_splitk_two_stage.cpp)
7+
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
8+
set(EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS)
9+
if(CK_USE_OCP_FP8)
10+
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
11+
endif()
12+
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion=0)
13+
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS -Wno-unused-local-typedef)
14+
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS -Wno-gnu-line-marker)
15+
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS --save-temps)
16+
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm -enable-noalias-to-md-conversion=0")
17+
target_compile_options(tile_example_gemm_basic PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
18+
target_compile_options(tile_example_gemm_universal PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
19+
target_compile_options(tile_example_gemm_weight_preshuffle PRIVATE ${EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS})
20+
target_compile_options(tile_example_gemm_reduce PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
21+
target_compile_options(tile_example_gemm_splitk_two_stage PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
1022
endif()
11-
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -mllvm -enable-noalias-to-md-conversion=0)
12-
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS -Wno-unused-local-typedef)
13-
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS -Wno-gnu-line-marker)
14-
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS --save-temps)
15-
list(APPEND EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllvm -enable-noalias-to-md-conversion=0")
16-
target_compile_options(tile_example_gemm_basic PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
17-
target_compile_options(tile_example_gemm_universal PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
18-
target_compile_options(tile_example_gemm_weight_preshuffle PRIVATE ${EXAMPLE_WEIGHT_PRESHUFFLE_COMPILE_OPTIONS})
19-
target_compile_options(tile_example_gemm_reduce PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
20-
target_compile_options(tile_example_gemm_splitk_two_stage PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})

example/ck_tile/03_gemm/gemm_splitk_two_stage_reduce.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,7 @@ int run_gemm_example_with_layouts_two_stage(ck_tile::ArgParser& arg_parser,
683683

684684
if constexpr(preshuffle)
685685
{
686-
ck_tile::HostTensor<BDataType> b_shuffle_host = shuffle_b<GemmConfig>(b_k_n);
686+
ck_tile::HostTensor<BDataType> b_shuffle_host = ck_tile::shuffle_b<GemmConfig>(b_k_n);
687687
// shuffled buffer B for device implementation
688688
b_k_n_dev_buf.ToDevice(b_shuffle_host.data());
689689
}

example/ck_tile/03_gemm/run_gemm_example.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,12 +284,12 @@ int run_gemm_example_with_layouts(ck_tile::ArgParser& arg_parser,
284284
if constexpr(GemmConfig::TiledMMAPermuteN)
285285
{
286286
std::cout << "Run with PermuteN" << std::endl;
287-
return shuffle_b_permuteN<GemmConfig>(b_k_n);
287+
return ck_tile::shuffle_b_permuteN<GemmConfig>(b_k_n);
288288
}
289289
else
290290
{
291291
std::cout << "Run without PermuteN" << std::endl;
292-
return shuffle_b<GemmConfig>(b_k_n);
292+
return ck_tile::shuffle_b<GemmConfig>(b_k_n);
293293
}
294294
}();
295295
// shuffled buffer B for device implementation
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# not using add_example_executable() to add this target, since we don't want this to have
22
# to be included in "make all/install/check"
3-
add_executable(tile_example_img2col EXCLUDE_FROM_ALL image_to_column.cpp)
3+
add_executable(tile_example_img2col image_to_column.cpp)

example/ck_tile/05_reduce/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ set(EXAMPLE_REDUCE "tile_example_reduce")
33
# to be included in "make all/install/check"
44
message(DEBUG "adding example ${EXAMPLE_REDUCE}")
55

6-
add_executable(${EXAMPLE_REDUCE} EXCLUDE_FROM_ALL reduce.cpp)
6+
add_executable(${EXAMPLE_REDUCE} reduce.cpp)
77
target_include_directories(${EXAMPLE_REDUCE} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
88
set(EXAMPLE_REDUCE_COMPILE_OPTIONS)
99

@@ -16,4 +16,4 @@ target_compile_options(${EXAMPLE_REDUCE} PRIVATE ${EXAMPLE_REDUCE_COMPILE_OPTION
1616
# by cmake will print too many files, execvp: /bin/sh: Argument list too long
1717
# however, this property may affect global
1818
# TODO: consider codegen a makefile by us
19-
set_property(GLOBAL PROPERTY RULE_MESSAGES OFF)
19+
set_property(GLOBAL PROPERTY RULE_MESSAGES OFF)

example/ck_tile/06_permute/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# not using add_example_executable() to add this target, since we don't want this to have
22
# to be included in "make all/install/check"
3-
add_executable(tile_example_permute EXCLUDE_FROM_ALL permute.cpp)
3+
add_executable(tile_example_permute permute.cpp)
44

55
if(NOT DEFINED PERMUTE_USE_ALTERNATIVE_IMPL)
66
# set(PERMUTE_USE_ALTERNATIVE_IMPL false)

example/ck_tile/09_topk_softmax/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
add_executable(tile_example_topk_softmax EXCLUDE_FROM_ALL topk_softmax.cpp topk_softmax_api.cpp)
1+
add_executable(tile_example_topk_softmax topk_softmax.cpp topk_softmax_api.cpp)
22
target_include_directories(tile_example_topk_softmax PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)
33

44
set(EXAMPLE_TOPK_SOFTMAX_COMPILE_OPTIONS)

example/ck_tile/10_rmsnorm2d/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ add_custom_command(
2626
set(TILE_RMSNORM2D_FWD "tile_rmsnorm2d_fwd")
2727

2828
message(DEBUG "adding ${TILE_RMSNORM2D_FWD}")
29-
add_executable(${TILE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL rmsnorm2d_fwd.cpp)
29+
add_executable(${TILE_RMSNORM2D_FWD} rmsnorm2d_fwd.cpp)
3030
target_include_directories(${TILE_RMSNORM2D_FWD} PRIVATE ${CMAKE_CURRENT_LIST_DIR})
3131
target_sources(${TILE_RMSNORM2D_FWD} PRIVATE ${RMSNORM2D_FWD_GEN_BLOBS})
3232

@@ -38,7 +38,7 @@ list(APPEND TILE_RMSNORM2D_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -Wno
3838
target_compile_options(${TILE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS})
3939

4040
set(EXAMPLE_RMSNORM2D_FWD "tile_example_rmsnorm2d_fwd")
41-
add_executable(${EXAMPLE_RMSNORM2D_FWD} EXCLUDE_FROM_ALL example_rmsnorm2d_fwd.cpp)
41+
add_executable(${EXAMPLE_RMSNORM2D_FWD} example_rmsnorm2d_fwd.cpp)
4242
target_compile_options(${EXAMPLE_RMSNORM2D_FWD} PRIVATE ${TILE_RMSNORM2D_FWD_COMPILE_OPTIONS})
4343

4444
# TODO: we have to turn off this global prop, otherwise the progress bar generated

0 commit comments

Comments
 (0)