@@ -52,6 +52,9 @@ option(CK_EXPERIMENTAL_BUILDER "Enable experimental builder" OFF)
5252option (BUILD_MHA_LIB "Build the static library for flash attention" OFF )
5353option (FORCE_DISABLE_XDL "Skip compiling XDL specific instances (even if supported GPUs are included in GPU_TARGETS)" OFF )
5454option (FORCE_DISABLE_WMMA "Skip compiling WMMA specific instances (even if supported GPUs are included in GPU_TARGETS)" OFF )
55+ option (BUILD_CK_TILE_ENGINE "Build the tile_engine subdirectory" ON )
56+ option (BUILD_CK_EXAMPLES "Build the example subdirectory" ON )
57+ option (BUILD_CK_TUTORIALS "Build the tutorial subdirectory" ON )
5558
5659if (CK_EXPERIMENTAL_BUILDER)
5760 add_definitions (-DCK_EXPERIMENTAL_BUILDER )
@@ -668,94 +671,106 @@ if(NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTENSOR_REQ_LIBS_ONLY)
668671endif ()
669672
670673
671-
672- # Optimization: Search only in library/src where all instance files actually live
673- # (was searching entire source tree, taking ~40s instead of <1s)
674- file (GLOB_RECURSE INSTANCE_FILES "${PROJECT_SOURCE_DIR} /library/src/*/device_*_instance.cpp" )
675- file (GLOB dir_list RELATIVE ${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu ${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu/* )
676- set (CK_DEVICE_INSTANCES)
677- FOREACH (subdir_path ${dir_list} )
678- set (target_dir)
679- IF (IS_DIRECTORY "${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu/${subdir_path} " )
680- set (cmake_instance)
681- file (READ "${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu/${subdir_path} /CMakeLists.txt" cmake_instance )
682- set (add_inst 0)
683- if (("${cmake_instance} " MATCHES "fp8" OR "${cmake_instance} " MATCHES "_f8" ) AND DTYPES MATCHES "fp8" )
684- set (add_inst 1)
685- endif ()
686- if (("${cmake_instance} " MATCHES "bf8" OR "${cmake_instance} " MATCHES "_b8" ) AND DTYPES MATCHES "bf8" )
687- set (add_inst 1)
688- endif ()
689- if (("${cmake_instance} " MATCHES "fp16" OR "${cmake_instance} " MATCHES "_f16" ) AND DTYPES MATCHES "fp16" )
690- set (add_inst 1)
691- endif ()
692- if (("${cmake_instance} " MATCHES "fp32" OR "${cmake_instance} " MATCHES "_f32" ) AND DTYPES MATCHES "fp32" )
693- set (add_inst 1)
694- endif ()
695- if (("${cmake_instance} " MATCHES "tf32" OR "${cmake_instance} " MATCHES "_tf32" ) AND DTYPES MATCHES "tf32" )
696- set (add_inst 1)
697- endif ()
698- if (("${cmake_instance} " MATCHES "fp64" OR "${cmake_instance} " MATCHES "_f64" ) AND DTYPES MATCHES "fp64" )
699- set (add_inst 1)
700- endif ()
701- if (("${cmake_instance} " MATCHES "bf16" OR "${cmake_instance} " MATCHES "_b16" ) AND DTYPES MATCHES "bf16" )
702- set (add_inst 1)
703- endif ()
704- if (("${cmake_instance} " MATCHES "int8" OR "${cmake_instance} " MATCHES "_i8" ) AND DTYPES MATCHES "int8" )
705- set (add_inst 1)
706- endif ()
707- if (NOT "${cmake_instance} " MATCHES "DTYPES" )
708- set (add_inst 1)
709- endif ()
710- if (add_inst EQUAL 1 OR NOT DEFINED DTYPES)
711- list (APPEND CK_DEVICE_INSTANCES device_${subdir_path} _instance)
712- endif ()
713- ENDIF ()
714- ENDFOREACH ()
715-
716- add_custom_target (instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES} )
717-
718674option (MIOPEN_REQ_LIBS_ONLY "Build only the MIOpen required libraries" OFF )
719675option (HIPTENSOR_REQ_LIBS_ONLY "Build only the HipTensor required libraries" OFF )
720676option (DISABLE_OFFLOAD_COMPRESS "Disable offload compress compiler flag when building instances" OFF )
721677option (BUILD_MHA_LIB "Build the static library for flash attention" OFF )
722-
723- add_subdirectory (library )
678+ option (BUILD_CK_DEVICE_INSTANCES "Build device operation instances in library/" ON )
679+ option (BUILD_CK_PROFILER "Build the CK profiler in profiler/" ON )
680+ option (BUILD_CK_TILE_ENGINE_TESTS "Build tile engine tests" ON )
681+ option (BUILD_CK_TILE_FMHA_TESTS "Build FMHA tests" ON )
682+ option (BUILD_CK_TILE_CSHUFFLE_LDS_BENCHMARKS "Build CShuffleLds microbenchmarks (requires BUILD_CK_EXAMPLES=ON)" OFF )
683+
684+ if (BUILD_CK_DEVICE_INSTANCES)
685+ # Optimization: Search only in library/src where all instance files actually live
686+ # (was searching entire source tree, taking ~40s instead of <1s)
687+ file (GLOB_RECURSE INSTANCE_FILES "${PROJECT_SOURCE_DIR} /library/src/*/device_*_instance.cpp" )
688+ file (GLOB dir_list RELATIVE ${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu ${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu/* )
689+ set (CK_DEVICE_INSTANCES)
690+ FOREACH (subdir_path ${dir_list} )
691+ set (target_dir)
692+ IF (IS_DIRECTORY "${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu/${subdir_path} " )
693+ set (cmake_instance)
694+ file (READ "${PROJECT_SOURCE_DIR} /library/src/tensor_operation_instance/gpu/${subdir_path} /CMakeLists.txt" cmake_instance )
695+ set (add_inst 0)
696+ if (("${cmake_instance} " MATCHES "fp8" OR "${cmake_instance} " MATCHES "_f8" ) AND DTYPES MATCHES "fp8" )
697+ set (add_inst 1)
698+ endif ()
699+ if (("${cmake_instance} " MATCHES "bf8" OR "${cmake_instance} " MATCHES "_b8" ) AND DTYPES MATCHES "bf8" )
700+ set (add_inst 1)
701+ endif ()
702+ if (("${cmake_instance} " MATCHES "fp16" OR "${cmake_instance} " MATCHES "_f16" ) AND DTYPES MATCHES "fp16" )
703+ set (add_inst 1)
704+ endif ()
705+ if (("${cmake_instance} " MATCHES "fp32" OR "${cmake_instance} " MATCHES "_f32" ) AND DTYPES MATCHES "fp32" )
706+ set (add_inst 1)
707+ endif ()
708+ if (("${cmake_instance} " MATCHES "tf32" OR "${cmake_instance} " MATCHES "_tf32" ) AND DTYPES MATCHES "tf32" )
709+ set (add_inst 1)
710+ endif ()
711+ if (("${cmake_instance} " MATCHES "fp64" OR "${cmake_instance} " MATCHES "_f64" ) AND DTYPES MATCHES "fp64" )
712+ set (add_inst 1)
713+ endif ()
714+ if (("${cmake_instance} " MATCHES "bf16" OR "${cmake_instance} " MATCHES "_b16" ) AND DTYPES MATCHES "bf16" )
715+ set (add_inst 1)
716+ endif ()
717+ if (("${cmake_instance} " MATCHES "int8" OR "${cmake_instance} " MATCHES "_i8" ) AND DTYPES MATCHES "int8" )
718+ set (add_inst 1)
719+ endif ()
720+ if (NOT "${cmake_instance} " MATCHES "DTYPES" )
721+ set (add_inst 1)
722+ endif ()
723+ if (add_inst EQUAL 1 OR NOT DEFINED DTYPES)
724+ list (APPEND CK_DEVICE_INSTANCES device_${subdir_path} _instance)
725+ endif ()
726+ ENDIF ()
727+ ENDFOREACH ()
728+
729+ add_custom_target (instances DEPENDS utility;${CK_DEVICE_INSTANCES} SOURCES ${INSTANCE_FILES} )
730+ add_subdirectory (library )
731+ endif ()
724732
725733if (CK_EXPERIMENTAL_BUILDER)
726734 add_subdirectory (experimental/builder )
727735 add_subdirectory (experimental/grouped_convolution_tile_instances )
728736endif ()
729737
730738if (NOT GPU_ARCHS AND USER_GPU_TARGETS AND NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTENSOR_REQ_LIBS_ONLY)
731- rocm_package_setup_component (tests
732- LIBRARY_NAME composablekernel
733- PACKAGE_NAME tests # Prevent -static suffix on package name
734- )
735-
736- rocm_package_setup_component (examples
737- LIBRARY_NAME composablekernel
738- PACKAGE_NAME examples
739- )
740- add_subdirectory (example )
741-
742- add_subdirectory (tutorial )
743- rocm_package_setup_component (tutorials
744- LIBRARY_NAME composablekernel
745- PACKAGE_NAME tutorials
746- )
747- add_subdirectory (tile_engine )
739+ if (BUILD_CK_EXAMPLES)
740+ rocm_package_setup_component (examples
741+ LIBRARY_NAME composablekernel
742+ PACKAGE_NAME examples
743+ )
744+ add_subdirectory (example )
745+ endif ()
746+
747+ if (BUILD_CK_TUTORIALS)
748+ add_subdirectory (tutorial )
749+ rocm_package_setup_component (tutorials
750+ LIBRARY_NAME composablekernel
751+ PACKAGE_NAME tutorials
752+ )
753+ endif ()
754+ if (BUILD_CK_TILE_ENGINE)
755+ add_subdirectory (tile_engine )
756+ endif ()
748757 if (BUILD_TESTING)
758+ rocm_package_setup_component (tests
759+ LIBRARY_NAME composablekernel
760+ PACKAGE_NAME tests # Prevent -static suffix on package name
761+ )
749762 add_subdirectory (test )
750763 endif ()
751764endif ()
752765
753- if (NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTENSOR_REQ_LIBS_ONLY)
754- rocm_package_setup_component (profiler
755- LIBRARY_NAME composablekernel
756- PACKAGE_NAME ckprofiler
757- )
758- add_subdirectory (profiler )
766+ if (BUILD_CK_PROFILER)
767+ if (NOT MIOPEN_REQ_LIBS_ONLY AND NOT HIPTENSOR_REQ_LIBS_ONLY)
768+ rocm_package_setup_component (profiler
769+ LIBRARY_NAME composablekernel
770+ PACKAGE_NAME ckprofiler
771+ )
772+ add_subdirectory (profiler )
773+ endif ()
759774endif ()
760775
761776if (CK_USE_CODEGEN AND (SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS))
0 commit comments