From 8a4c7e6d6443bdacc81232168f6d601ad63c9cc7 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sat, 9 May 2026 16:19:52 -0600 Subject: [PATCH 01/13] Suppress external header warnings and fix FP8 macro redefinition in MIOpen The MIOpen build with -Weverything produces ~31K warnings, almost all from external HIP runtime and rocRAND headers. Add targeted -Wno-* suppressions to EnableCompilerWarnings.cmake for warning categories that only fire on external headers, eliminating ~90% of the noise. Also fix a macro redefinition warning where hip_float8.hpp defaulted MIOPEN_FP8_IEEE_EXPONENT_BIAS to 1 before config.h could set it to the CMake-configured value (0). Use __has_include to pull config.h when available (host compilation), while preserving the fallback for kernel compilation where config.h is not in the include path. Co-Authored-By: Claude Opus 4 (1M context) --- projects/miopen/cmake/EnableCompilerWarnings.cmake | 12 +++++++++++- projects/miopen/src/kernels/hip_float8.hpp | 7 +++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 10a610f1c4a7..90c330159a50 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -67,7 +67,17 @@ set(__clang_cxx_compile_options -Wno-suggest-override -Wno-nonportable-system-include-path -Wno-documentation - -Wmissing-noreturn) + -Wmissing-noreturn + # Suppress warnings triggered by external headers (HIP, rocRAND, hipBLAS-common) + -Wno-zero-as-null-pointer-constant + -Wno-comma + -Wno-newline-eof + -Wno-unused-template + -Wno-float-equal + -Wno-shadow-field-in-constructor + -Wno-nvcc-compat + -Wno-gnu-anonymous-struct + -Wno-nested-anon-types) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "19") list(APPEND __clang_cxx_compile_options diff --git a/projects/miopen/src/kernels/hip_float8.hpp b/projects/miopen/src/kernels/hip_float8.hpp index 352cd97bb9b0..7d837be2a219 100644 --- a/projects/miopen/src/kernels/hip_float8.hpp +++ b/projects/miopen/src/kernels/hip_float8.hpp @@ -46,6 +46,13 @@ #define USE_SIMPLER_HIP_F8x8 0 +// Pull FP8 config from config.h when available (host compilation). +// During kernel compilation config.h is not in the include path; +// the macros are passed via -D flags from the solver instead. +#if __has_include() +#include +#endif + #ifndef MIOPEN_FP8_CLIPPING #define MIOPEN_FP8_CLIPPING 1 #endif From badacbb0ccdec3e7ba417ebaceca969c6b14e201 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sat, 9 May 2026 16:40:26 -0600 Subject: [PATCH 02/13] Replace -Weverything with -Wall -Wextra -Werror and mark external includes as SYSTEM Switch MIOpen from clang's -Weverything (which enables every possible warning and required 30+ suppressions) to the standard -Wall -Wextra plus three explicitly-enabled warnings not covered by those flags: -Wundef, -Wunreachable-code, and -Wmissing-noreturn. This eliminates all the -Wno-* suppressions that were only needed to counteract -Weverything, reducing the warning configuration from ~70 lines to ~30 lines. The remaining suppressions (-Wno-ignored-qualifiers, -Wno-sign-compare, -Wno-deprecated-declarations, -Wno-deprecated, -Wno-unused-command-line-argument) target specific -Wall/-Wextra warnings. Also mark hipBLAS-common and rocRAND include directories as SYSTEM in src/CMakeLists.txt so that warnings from external headers are suppressed by the compiler rather than requiring per-warning-type suppressions. With -Werror now enabled, any new warning in MIOpen's own code will cause a build failure, preventing warning regressions. Co-Authored-By: Claude Opus 4 (1M context) --- .../miopen/cmake/EnableCompilerWarnings.cmake | 71 +++---------------- projects/miopen/src/CMakeLists.txt | 4 +- 2 files changed, 13 insertions(+), 62 deletions(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 90c330159a50..ceb884dc8e0d 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -24,74 +24,24 @@ # ################################################################################ -set(__default_cxx_compile_options +set(__cxx_compile_options + -Werror -Wall -Wextra - -Wcomment - -Wendif-labels - -Wformat - -Winit-self - -Wreturn-type - -Wsequence-point - -Wswitch - -Wtrigraphs + # Additional warnings not included in -Wall/-Wextra -Wundef - -Wuninitialized -Wunreachable-code + -Wmissing-noreturn + # Suppress specific -Wall/-Wextra warnings -Wno-ignored-qualifiers -Wno-sign-compare + -Wno-deprecated-declarations + -Wno-deprecated ) set(__clang_cxx_compile_options - -Weverything - -Wno-c++98-compat - -Wno-c++98-compat-pedantic - -Wno-conversion - -Wno-double-promotion - -Wno-exit-time-destructors - -Wno-extra-semi - -Wno-extra-semi-stmt - -Wno-missing-prototypes - -Wno-padded -Wno-unused-command-line-argument - -Wno-weak-vtables - -Wno-covered-switch-default - -Wno-unsafe-buffer-usage - -Wno-deprecated-declarations - -Wno-global-constructors - -Wno-reserved-identifier - -Wno-deprecated - -Wno-old-style-cast - -Wno-c++11-narrowing - -Wno-switch-enum - -Wno-suggest-override - -Wno-nonportable-system-include-path - -Wno-documentation - -Wmissing-noreturn - # Suppress warnings triggered by external headers (HIP, rocRAND, hipBLAS-common) - -Wno-zero-as-null-pointer-constant - -Wno-comma - -Wno-newline-eof - -Wno-unused-template - -Wno-float-equal - -Wno-shadow-field-in-constructor - -Wno-nvcc-compat - -Wno-gnu-anonymous-struct - -Wno-nested-anon-types) - -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "19") - list(APPEND __clang_cxx_compile_options - -Wno-unique-object-duplication - -Wno-switch-default) -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "23") - list(APPEND __clang_cxx_compile_options - -Wno-lifetime-safety - -Wno-lifetime-safety-suggestions - -Wno-lifetime-safety-intra-tu-suggestions - -Wno-lifetime-safety-cross-tu-suggestions) -endif() +) if(WIN32) list(APPEND __clang_cxx_compile_options @@ -100,8 +50,9 @@ if(WIN32) endif() add_compile_options( - "$<$,$>:${__default_cxx_compile_options};${__clang_cxx_compile_options}>" + "$<$:${__cxx_compile_options}>" + "$<$,$>:${__clang_cxx_compile_options}>" ) -unset(__default_cxx_compile_options) +unset(__cxx_compile_options) unset(__clang_cxx_compile_options) diff --git a/projects/miopen/src/CMakeLists.txt b/projects/miopen/src/CMakeLists.txt index 9e6f401b7506..cd4ee6ab6dcb 100644 --- a/projects/miopen/src/CMakeLists.txt +++ b/projects/miopen/src/CMakeLists.txt @@ -1010,10 +1010,10 @@ endif() if(hipblaslt_FOUND) target_link_libraries( MIOpen PRIVATE roc::hipblaslt ) - target_include_directories( MIOpen INTERFACE $ ) + target_include_directories( MIOpen SYSTEM INTERFACE $ ) endif() -target_include_directories( MIOpen PRIVATE ${ROCRAND_INCLUDE_DIR} ) +target_include_directories( MIOpen SYSTEM PRIVATE ${ROCRAND_INCLUDE_DIR} ) # For backward compatibility with ROCm 5.3 # Build with library libMLIRMIOpen From 968584c5342f1a567880ecb78ba87a55da569854 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sat, 9 May 2026 17:38:28 -0600 Subject: [PATCH 03/13] Add -Wno-c++11-narrowing to suppress existing narrowing conversions MIOpen has several int-to-size_t and double-to-float narrowing conversions in batchnorm solvers, CK utilities, and generated kernel data files. Suppress these with -Wno-c++11-narrowing to allow -Werror to be enabled without breaking the build. The narrowing issues can be addressed individually in follow-up work. Co-Authored-By: Claude Opus 4 (1M context) --- projects/miopen/cmake/EnableCompilerWarnings.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index ceb884dc8e0d..940c23b70fc6 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -32,7 +32,8 @@ set(__cxx_compile_options -Wundef -Wunreachable-code -Wmissing-noreturn - # Suppress specific -Wall/-Wextra warnings + # Suppress specific warnings + -Wno-c++11-narrowing -Wno-ignored-qualifiers -Wno-sign-compare -Wno-deprecated-declarations From a488bac95b990ec82228c2f0bd8a2cd5cf4b43d9 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 08:24:06 -0600 Subject: [PATCH 04/13] Fix Windows CI build errors: macro redefinition and -Wundef Guard WIN32_LEAN_AND_MEAN defines with #ifndef in env.cpp and file_lock.hpp to prevent redefinition errors, since the macro is already defined via CMake compile flags (-DWIN32_LEAN_AND_MEAN). Disable -Wundef on Windows builds because external headers (rocRAND, HIP) are not consistently included as system headers on Windows, causing -Wundef errors from undefined macros like __HIP_DEVICE_COMPILE__ in rocrand_common.h. Co-Authored-By: Claude Opus 4 (1M context) --- projects/miopen/cmake/EnableCompilerWarnings.cmake | 3 +++ projects/miopen/src/env.cpp | 2 ++ projects/miopen/src/include/miopen/file_lock.hpp | 2 ++ 3 files changed, 7 insertions(+) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 940c23b70fc6..874e23a664eb 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -48,6 +48,9 @@ if(WIN32) list(APPEND __clang_cxx_compile_options -fms-extensions -fms-compatibility) + # External headers on Windows are not consistently marked as system + # includes, so -Wundef triggers on HIP/rocRAND headers + list(APPEND __cxx_compile_options -Wno-undef) endif() add_compile_options( diff --git a/projects/miopen/src/env.cpp b/projects/miopen/src/env.cpp index 24892ce7b3dd..5ad2a88f1eb5 100644 --- a/projects/miopen/src/env.cpp +++ b/projects/miopen/src/env.cpp @@ -35,7 +35,9 @@ #include #ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif #include #endif diff --git a/projects/miopen/src/include/miopen/file_lock.hpp b/projects/miopen/src/include/miopen/file_lock.hpp index ba27a179c5c2..6d83477ea230 100644 --- a/projects/miopen/src/include/miopen/file_lock.hpp +++ b/projects/miopen/src/include/miopen/file_lock.hpp @@ -13,7 +13,9 @@ #ifndef NOMINMAX #define NOMINMAX #endif +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif #include #include #else From a3f4c2133484804a99b806bbb8f5cd31918190ce Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 08:48:24 -0600 Subject: [PATCH 05/13] Add -Wshadow, remove dead suppressions, document warning roadmap Add -Wshadow (was active and clean under old -Weverything config). Remove -Wno-ignored-qualifiers and -Wno-deprecated (zero instances). Document in EnableCompilerWarnings.cmake: - Warnings we aspire to enable: -Wconversion, -Wold-style-cast, -Wsuggest-override, -Wdouble-promotion (with effort estimates) - Why each remaining suppression exists and how many instances need fixing to remove it Co-Authored-By: Claude Opus 4 (1M context) --- .../miopen/cmake/EnableCompilerWarnings.cmake | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 874e23a664eb..0d5b9f074a4c 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -32,12 +32,16 @@ set(__cxx_compile_options -Wundef -Wunreachable-code -Wmissing-noreturn - # Suppress specific warnings - -Wno-c++11-narrowing - -Wno-ignored-qualifiers - -Wno-sign-compare - -Wno-deprecated-declarations - -Wno-deprecated + -Wshadow + # TODO: Working to enable these warnings. Each requires code cleanup first. + # -Wconversion # ~1000+ implicit narrowing/sign conversions to fix + # -Wold-style-cast # C-style casts to replace with static_cast/reinterpret_cast + # -Wsuggest-override # missing override on virtual function overrides + # -Wdouble-promotion # implicit float-to-double promotions + # Suppress specific warnings -- working to remove these by fixing the code + -Wno-c++11-narrowing # ~40 instances: narrowing in brace init (batchnorm, ck_impl, addkernels) + -Wno-sign-compare # ~1000+ instances: signed/unsigned comparisons throughout codebase + -Wno-deprecated-declarations # 2 deprecated MIOpen APIs still have callers ) set(__clang_cxx_compile_options From cf7947087bd1f9175f0f9278ec89cba3eb541ccf Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 09:18:55 -0600 Subject: [PATCH 06/13] Enable -Wshadow and -Wsuggest-override, remove dead suppressions Enable -Wshadow (was active under old -Weverything, MIOpen is clean). Enable -Wsuggest-override and add missing override specifiers to SolverDbId, GetDefaultPerformanceConfig, IsValidPerformanceConfig, and Search methods in transposed conv/pooling solver wrappers. Remove -Wno-ignored-qualifiers and -Wno-deprecated (zero instances in codebase, dead weight). Document aspirational warnings (-Wconversion, -Wold-style-cast, -Wdouble-promotion) and existing suppressions with instance counts to guide future cleanup. Co-Authored-By: Claude Opus 4 (1M context) --- .../miopen/cmake/EnableCompilerWarnings.cmake | 2 +- .../src/include/miopen/conv/solvers.hpp | 16 +++++----- .../src/include/miopen/pooling/solvers.hpp | 32 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 0d5b9f074a4c..92c6f0ff6bd9 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -33,10 +33,10 @@ set(__cxx_compile_options -Wunreachable-code -Wmissing-noreturn -Wshadow + -Wsuggest-override # TODO: Working to enable these warnings. Each requires code cleanup first. # -Wconversion # ~1000+ implicit narrowing/sign conversions to fix # -Wold-style-cast # C-style casts to replace with static_cast/reinterpret_cast - # -Wsuggest-override # missing override on virtual function overrides # -Wdouble-promotion # implicit float-to-double promotions # Suppress specific warnings -- working to remove these by fixing the code -Wno-c++11-narrowing # ~40 instances: narrowing in brace init (batchnorm, ck_impl, addkernels) diff --git a/projects/miopen/src/include/miopen/conv/solvers.hpp b/projects/miopen/src/include/miopen/conv/solvers.hpp index e27ea412219f..bc7079e17e51 100644 --- a/projects/miopen/src/include/miopen/conv/solvers.hpp +++ b/projects/miopen/src/include/miopen/conv/solvers.hpp @@ -4829,18 +4829,18 @@ struct ConvWinogradNHWCTransposingTunableSolver struct TransposedConvBinWinograd3x3U final : ConvWinogradNHWCTransposingSolver { - const std::string& SolverDbId() const { return GetSolverDbId(); } + const std::string& SolverDbId() const override { return GetSolverDbId(); } }; struct TransposedConvBinWinogradRxS final : ConvWinogradNHWCTransposingSolver { - const std::string& SolverDbId() const { return GetSolverDbId(); } + const std::string& SolverDbId() const override { return GetSolverDbId(); } }; struct TransposedConvBinWinogradRxSf2x3g1 final : ConvWinogradNHWCTransposingSolver { - const std::string& SolverDbId() const + const std::string& SolverDbId() const override { return GetSolverDbId(); } @@ -4850,7 +4850,7 @@ struct TransposedConvMPBidirectWinograd final : ConvWinogradNHWCTransposingSolver< ConvMPBidirectWinograd> { - const std::string& SolverDbId() const + const std::string& SolverDbId() const override { return this->template GetSolverDbId< TransposedConvMPBidirectWinograd>(); @@ -4862,7 +4862,7 @@ struct TransposedConvWinograd3x3MultipassWrW final : ConvWinogradNHWCTransposingSolver< ConvWinograd3x3MultipassWrW> { - const std::string& SolverDbId() const + const std::string& SolverDbId() const override { return this->template GetSolverDbId struct TransposedConvWinoFuryRxS final : ConvWinogradNHWCTransposingSolver> { - const std::string& SolverDbId() const + const std::string& SolverDbId() const override { return this->template GetSolverDbId>(); } @@ -4885,7 +4885,7 @@ template struct TransposedConvWinoRageRxS final : ConvWinogradNHWCTransposingSolver> { - const std::string& SolverDbId() const + const std::string& SolverDbId() const override { return this->template GetSolverDbId>(); } @@ -4907,7 +4907,7 @@ struct TransposedConvMPBidirectWinograd_xdlops final : ConvWinogradNHWCTransposingTunableSolver< ConvMPBidirectWinograd_xdlops> { - const std::string& SolverDbId() const + const std::string& SolverDbId() const override { return this->template GetSolverDbId { - const std::string& SolverDbId() const { return GetSolverDbId(); } + const std::string& SolverDbId() const override { return GetSolverDbId(); } PoolingForward2d::PerformanceConfigType GetDefaultPerformanceConfig(const ExecutionContext& ctx, - const miopen::pooling::ProblemDescription& problem) const + const miopen::pooling::ProblemDescription& problem) const override { return PoolingForward2d{}.GetDefaultPerformanceConfig(ctx, problem); } bool IsValidPerformanceConfig(const ExecutionContext& ctx, const miopen::pooling::ProblemDescription& problem, - const PoolingForward2d::PerformanceConfigType& config) const + const PoolingForward2d::PerformanceConfigType& config) const override { return PoolingForward2d{}.IsValidPerformanceConfig(ctx, problem, config); } PoolingForward2d::PerformanceConfigType Search(const ExecutionContext& context, const miopen::pooling::ProblemDescription& problem, - const AnyInvokeParams& invoke_context) const + const AnyInvokeParams& invoke_context) const override { return PoolingForward2d{}.Search(context, problem, invoke_context); } @@ -354,23 +354,23 @@ struct TransposedPoolingFwd2d final : PoolingFwdNCHWTransposingSolver { - const std::string& SolverDbId() const { return GetSolverDbId(); } + const std::string& SolverDbId() const override { return GetSolverDbId(); } PoolingForwardNd::PerformanceConfigType GetDefaultPerformanceConfig(const ExecutionContext& ctx, - const miopen::pooling::ProblemDescription& problem) const + const miopen::pooling::ProblemDescription& problem) const override { return PoolingForwardNd{}.GetDefaultPerformanceConfig(ctx, problem); } bool IsValidPerformanceConfig(const ExecutionContext& ctx, const miopen::pooling::ProblemDescription& problem, - const PoolingForwardNd::PerformanceConfigType& config) const + const PoolingForwardNd::PerformanceConfigType& config) const override { return PoolingForwardNd{}.IsValidPerformanceConfig(ctx, problem, config); } PoolingForwardNd::PerformanceConfigType Search(const ExecutionContext& context, const miopen::pooling::ProblemDescription& problem, - const AnyInvokeParams& invoke_context) const + const AnyInvokeParams& invoke_context) const override { return PoolingForwardNd{}.Search(context, problem, invoke_context); } @@ -474,23 +474,23 @@ struct PoolingBwdNCHWTransposingSolver struct TransposedPoolingBwd2d final : PoolingBwdNCHWTransposingSolver { - const std::string& SolverDbId() const { return GetSolverDbId(); } + const std::string& SolverDbId() const override { return GetSolverDbId(); } PoolingBackward2d::PerformanceConfigType GetDefaultPerformanceConfig(const ExecutionContext& ctx, - const miopen::pooling::ProblemDescription& problem) const + const miopen::pooling::ProblemDescription& problem) const override { return PoolingBackward2d{}.GetDefaultPerformanceConfig(ctx, problem); } bool IsValidPerformanceConfig(const ExecutionContext& ctx, const miopen::pooling::ProblemDescription& problem, - const PoolingBackward2d::PerformanceConfigType& config) const + const PoolingBackward2d::PerformanceConfigType& config) const override { return PoolingBackward2d{}.IsValidPerformanceConfig(ctx, problem, config); } PoolingBackward2d::PerformanceConfigType Search(const ExecutionContext& context, const miopen::pooling::ProblemDescription& problem, - const AnyInvokeParams& invoke_context) const + const AnyInvokeParams& invoke_context) const override { return PoolingBackward2d{}.Search(context, problem, invoke_context); } @@ -498,23 +498,23 @@ struct TransposedPoolingBwd2d final : PoolingBwdNCHWTransposingSolver { - const std::string& SolverDbId() const { return GetSolverDbId(); } + const std::string& SolverDbId() const override { return GetSolverDbId(); } PoolingBackwardNd::PerformanceConfigType GetDefaultPerformanceConfig(const ExecutionContext& ctx, - const miopen::pooling::ProblemDescription& problem) const + const miopen::pooling::ProblemDescription& problem) const override { return PoolingBackwardNd{}.GetDefaultPerformanceConfig(ctx, problem); } bool IsValidPerformanceConfig(const ExecutionContext& ctx, const miopen::pooling::ProblemDescription& problem, - const PoolingBackwardNd::PerformanceConfigType& config) const + const PoolingBackwardNd::PerformanceConfigType& config) const override { return PoolingBackwardNd{}.IsValidPerformanceConfig(ctx, problem, config); } PoolingBackwardNd::PerformanceConfigType Search(const ExecutionContext& context, const miopen::pooling::ProblemDescription& problem, - const AnyInvokeParams& invoke_context) const + const AnyInvokeParams& invoke_context) const override { return PoolingBackwardNd{}.Search(context, problem, invoke_context); } From 642b521f6288ae07cc4f6841fd96ec44f1796be7 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 10:52:19 -0600 Subject: [PATCH 07/13] Enable -Wold-style-cast and replace C-style casts with static_cast Replace 7 C-style casts with static_cast in batchnorm, implicit gemm, and kthvalue driver code. Enable -Wold-style-cast to prevent new C-style casts from being introduced. Co-Authored-By: Claude Opus 4 (1M context) --- projects/miopen/cmake/EnableCompilerWarnings.cmake | 2 +- projects/miopen/driver/kthvalue_driver.hpp | 2 +- .../src/include/miopen/batchnorm/common_spatial.hpp | 8 ++++---- .../src/include/miopen/solver/implicitgemm_util.hpp | 2 +- .../solver/conv/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 92c6f0ff6bd9..b1b73ba88760 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -34,9 +34,9 @@ set(__cxx_compile_options -Wmissing-noreturn -Wshadow -Wsuggest-override + -Wold-style-cast # TODO: Working to enable these warnings. Each requires code cleanup first. # -Wconversion # ~1000+ implicit narrowing/sign conversions to fix - # -Wold-style-cast # C-style casts to replace with static_cast/reinterpret_cast # -Wdouble-promotion # implicit float-to-double promotions # Suppress specific warnings -- working to remove these by fixing the code -Wno-c++11-narrowing # ~40 instances: narrowing in brace init (batchnorm, ck_impl, addkernels) diff --git a/projects/miopen/driver/kthvalue_driver.hpp b/projects/miopen/driver/kthvalue_driver.hpp index 75f7e5b535b2..44010c173107 100644 --- a/projects/miopen/driver/kthvalue_driver.hpp +++ b/projects/miopen/driver/kthvalue_driver.hpp @@ -294,7 +294,7 @@ int KthvalueDriver::RunForwardGPU() outputDesc, output_dev->GetMem(), indicesDesc, - (size_t*)indices_dev->GetMem(), + static_cast(indices_dev->GetMem()), k, dim, keepDim); diff --git a/projects/miopen/src/include/miopen/batchnorm/common_spatial.hpp b/projects/miopen/src/include/miopen/batchnorm/common_spatial.hpp index af563db96a41..e7e59436a279 100644 --- a/projects/miopen/src/include/miopen/batchnorm/common_spatial.hpp +++ b/projects/miopen/src/include/miopen/batchnorm/common_spatial.hpp @@ -170,7 +170,7 @@ inline int GetStashMethod(bool IsLayoutNHWC, (in_cstride) % ylocalsize == 0 ? ylocalsize : (in_cstride) % ylocalsize; unsigned int last_zlocalsize = n % (zlocalsize * nelements) == 0 ? (zlocalsize * nelements) : n % (zlocalsize * nelements); - if(last_ylocalsize < stash_values && last_zlocalsize >= (size_t)stash_values) + if(last_ylocalsize < stash_values && last_zlocalsize >= static_cast(stash_values)) { stash_method = 1; } @@ -400,8 +400,8 @@ inline bool IsSpatialMultipleApplicable(const miopen::batchnorm::ProblemDescript // be large enough // - if C is not multiple of 2, intermediate results are stored in N dimension splitting // float values in group of 2 bytes. N must be large enough - if((!bfp32parm && (c % 2 != 0 && last_zlocalsize < (size_t)stash_values)) || - ((last_ylocalsize < stash_values) && (last_zlocalsize < (size_t)stash_values))) + if((!bfp32parm && (c % 2 != 0 && last_zlocalsize < static_cast(stash_values))) || + ((last_ylocalsize < stash_values) && (last_zlocalsize < static_cast(stash_values)))) { return false; } @@ -425,7 +425,7 @@ inline bool IsSpatialMultipleApplicable(const miopen::batchnorm::ProblemDescript // - if last block doesn't fit, intermediate results are stored in N dimension which must // be large enough stash_values *= (problem.GetXDesc().GetType() == miopenFloat ? 1 : 2); - if(last_ylocalsize < stash_values && last_zlocalsize < (size_t)stash_values) + if(last_ylocalsize < stash_values && last_zlocalsize < static_cast(stash_values)) { return false; } diff --git a/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp b/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp index 3f4e3ecde5d7..e6d5cb6cb5e8 100644 --- a/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp +++ b/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp @@ -177,7 +177,7 @@ inline T igemm_get_max_gks(T gemm_k, T gemm_k_per_block, T max_log2_splits) return 0; T rem = gemm_k / gemm_k_per_block; T rem_pow2 = rem & (~(rem - 1)); - T gks = (T)log2(rem_pow2); + T gks = static_cast(log2(rem_pow2)); if(gks > max_log2_splits) gks = max_log2_splits; diff --git a/projects/miopen/src/solver/conv/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp b/projects/miopen/src/solver/conv/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp index d44ab5ba59fa..4a30062d10c2 100644 --- a/projects/miopen/src/solver/conv/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp +++ b/projects/miopen/src/solver/conv/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp @@ -873,7 +873,7 @@ bool PerformanceConfigAsmImplicitGemmGTCBwdXdlopsNHWC::IsValid( n = n | (n >> 16); return n - (n >> 1); }(padded_k_num); - int k_pow2 = (int)log2(prev_pow2); + int k_pow2 = static_cast(log2(prev_pow2)); return std::min(k_pow2, BWD_MAX_GEMM_K_SPLITS); } From 0f98b125fa9ff4599f59bd2e8965186bbef4b0c0 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 13:25:14 -0600 Subject: [PATCH 08/13] Fix warnings in test code and CI -Wundef from rocrand stage headers Fix old-style casts and deprecated C++20 lambda captures found when building test targets (previously EXCLUDE_FROM_ALL, now built via 'ninja tests'): - test/gtest/kthvalue.hpp: (size_t*) -> static_cast - test/gtest/na_inference_find2.cpp: (void*) -> static_cast - test/gtest/rnn_seq_api.hpp: [=] -> [=, this] in member function lambda - test/driver.hpp: [=] -> [=, this] for member function lambdas - test/conv_common.hpp: [=] -> [=, this] for member function lambdas Fix CI -Wundef from rocrand headers: in TheRock builds, rocrand exposes both stage/include (non-system) and dist/include via its imported cmake target. Add a foreach loop in src/CMakeLists.txt to detect the rocrand imported target and mark its INTERFACE_INCLUDE_DIRECTORIES as SYSTEM, so rocrand_common.h's bare #if __HIP_DEVICE_COMPILE__ doesn't trigger -Werror,-Wundef during host compilation. Co-Authored-By: Claude Sonnet 4 --- projects/miopen/src/CMakeLists.txt | 13 ++++++ projects/miopen/test/conv_common.hpp | 4 +- projects/miopen/test/driver.hpp | 42 +++++++++---------- projects/miopen/test/gtest/kthvalue.hpp | 2 +- .../miopen/test/gtest/na_inference_find2.cpp | 2 +- projects/miopen/test/gtest/rnn_seq_api.hpp | 2 +- 6 files changed, 39 insertions(+), 26 deletions(-) diff --git a/projects/miopen/src/CMakeLists.txt b/projects/miopen/src/CMakeLists.txt index cd4ee6ab6dcb..6475ff172a6f 100644 --- a/projects/miopen/src/CMakeLists.txt +++ b/projects/miopen/src/CMakeLists.txt @@ -1014,6 +1014,19 @@ if(hipblaslt_FOUND) endif() target_include_directories( MIOpen SYSTEM PRIVATE ${ROCRAND_INCLUDE_DIR} ) +# Also mark rocrand's imported target includes as SYSTEM so rocrand headers don't +# trigger -Wundef (rocrand_common.h uses #if __HIP_DEVICE_COMPILE__ without #ifdef). +# In TheRock builds rocrand exposes both stage/include and dist/include; only dist/ +# is covered by ROCRAND_INCLUDE_DIR above. +foreach(_rocrand_target rocrand rocrand::rocrand roc::rocrand) + if(TARGET ${_rocrand_target}) + get_target_property(_rocrand_iface_incs ${_rocrand_target} INTERFACE_INCLUDE_DIRECTORIES) + if(_rocrand_iface_incs) + target_include_directories(MIOpen SYSTEM PRIVATE ${_rocrand_iface_incs}) + endif() + break() + endif() +endforeach() # For backward compatibility with ROCm 5.3 # Build with library libMLIRMIOpen diff --git a/projects/miopen/test/conv_common.hpp b/projects/miopen/test/conv_common.hpp index 5e141908bab7..87f116690296 100644 --- a/projects/miopen/test/conv_common.hpp +++ b/projects/miopen/test/conv_common.hpp @@ -2272,14 +2272,14 @@ struct conv_driver : test_driver { auto output = get_output_tensor(filter, input, weights, out_layout); - auto gen_positive_value = [=](auto...) { + auto gen_positive_value = [=, this](auto...) { auto data_type = input.desc.GetType(); int v_max = is_int8 ? 16 : (data_type == miopenHalf) ? 4 : 17; return gen_float ? prng::gen_canonical() : static_cast(prng::gen_A_to_B(1, v_max)); }; - auto gen_sign_value = [=](auto... is) { + auto gen_sign_value = [=, this](auto... is) { auto data_type = input.desc.GetType(); int v_max = is_int8 ? 16 : (data_type == miopenHalf) ? 4 : 17; return gen_float ? prng::gen_A_to_B(-1.0, 1.0) diff --git a/projects/miopen/test/driver.hpp b/projects/miopen/test/driver.hpp index 13097aaca388..53b08f24492e 100644 --- a/projects/miopen/test/driver.hpp +++ b/projects/miopen/test/driver.hpp @@ -112,7 +112,7 @@ struct test_driver template void add_source(Source src, T& x) { - data_sources.push_back([=, &x](std::function callback) { + data_sources.push_back([=, this, &x](std::function callback) { for(auto y : src()) // NOLINT { x = T(y); @@ -332,7 +332,7 @@ struct test_driver template generate_tensor_t generate_tensor(std::set dims, X single, G g) { - return {[=]() -> std::set { + return {[=, this]() -> std::set { if(full_set) return dims; else @@ -351,7 +351,7 @@ struct test_driver template auto lazy_generate_tensor(F f, G g) -> generate_tensor_t, G> { - return {[=]() -> decltype(f()) { + return {[=, this]() -> decltype(f()) { if(full_set) return f(); else @@ -363,7 +363,7 @@ struct test_driver template generate_tensor_t lazy_generate_tensor(F f, X single, G g) { - return {[=]() -> std::set { + return {[=, this]() -> std::set { if(full_set) return f(); else @@ -382,7 +382,7 @@ struct test_driver template generate_tensor_t, G> get_tensor(F gen_shapes, G gen_value) { - return lazy_generate_tensor([=] { return gen_shapes(batch_factor); }, gen_value); + return lazy_generate_tensor([=, this] { return gen_shapes(batch_factor); }, gen_value); } template @@ -390,7 +390,7 @@ struct test_driver get_bn_spatial_input_tensor(G tensor_elem_gen = tensor_elem_gen_integer{}) { return lazy_generate_tensor( - [=] { return get_bn_spatial_inputs(batch_factor); }, {4, 64, 28, 28}, tensor_elem_gen); + [=, this] { return get_bn_spatial_inputs(batch_factor); }, {4, 64, 28, 28}, tensor_elem_gen); } template @@ -398,7 +398,7 @@ struct test_driver get_bn_peract_input_tensor(G tensor_elem_gen = tensor_elem_gen_integer{}) { return lazy_generate_tensor( - [=] { return get_bn_peract_inputs(batch_factor); }, {16, 32, 8, 8}, tensor_elem_gen); + [=, this] { return get_bn_peract_inputs(batch_factor); }, {16, 32, 8, 8}, tensor_elem_gen); } template @@ -406,14 +406,14 @@ struct test_driver get_input_tensor(G tensor_elem_gen = tensor_elem_gen_integer{}) { return lazy_generate_tensor( - [=] { return get_inputs(batch_factor); }, {16, 32, 8, 8}, tensor_elem_gen); + [=, this] { return get_inputs(batch_factor); }, {16, 32, 8, 8}, tensor_elem_gen); } template generate_tensor_t, G> get_3d_bn_spatial_input_tensor(G tensor_elem_gen = tensor_elem_gen_integer{}) { - return lazy_generate_tensor([=] { return get_3d_bn_spatial_inputs(batch_factor); }, + return lazy_generate_tensor([=, this] { return get_3d_bn_spatial_inputs(batch_factor); }, {16, 32, 8, 8, 8}, tensor_elem_gen); } @@ -422,7 +422,7 @@ struct test_driver generate_tensor_t, G> get_3d_bn_peract_input_tensor(G tensor_elem_gen = tensor_elem_gen_integer{}) { - return lazy_generate_tensor([=] { return get_3d_bn_peract_inputs(batch_factor); }, + return lazy_generate_tensor([=, this] { return get_3d_bn_peract_inputs(batch_factor); }, {16, 32, 8, 8, 8}, tensor_elem_gen); } @@ -432,7 +432,7 @@ struct test_driver get_weights_tensor(G tensor_elem_gen = tensor_elem_gen_integer{}) { return lazy_generate_tensor( - [=] { return get_weights(batch_factor); }, {64, 32, 5, 5}, tensor_elem_gen); + [=, this] { return get_weights(batch_factor); }, {64, 32, 5, 5}, tensor_elem_gen); } template @@ -456,7 +456,7 @@ struct test_driver template generate_data_t> generate_data(std::vector dims, T single) { - return {[=]() -> std::vector { + return {[=, this]() -> std::vector { if(full_set) return dims; else @@ -468,7 +468,7 @@ struct test_driver generate_data_t> generate_data_limited(std::vector dims, int limit_multiplier, T single) { - return {[=]() -> std::vector { + return {[=, this]() -> std::vector { if(full_set) { if(limit_set > 0) @@ -504,7 +504,7 @@ struct test_driver template generate_data_t> generate_data(std::vector dims) { - return {[=]() -> std::vector { + return {[=, this]() -> std::vector { if(full_set) return dims; else @@ -515,13 +515,13 @@ struct test_driver template generate_data_t> generate_multi_data(std::vector> multi_dims) { - return {[=]() -> std::vector { return generate_data(multi_dims.at(dataset_id))(T{}); }}; + return {[=, this]() -> std::vector { return generate_data(multi_dims.at(dataset_id))(T{}); }}; } template generate_data_t> generate_data_limited(std::vector dims, int limit_multiplier) { - return {[=]() -> std::vector { + return {[=, this]() -> std::vector { if(full_set) { if(limit_set > 0) @@ -547,7 +547,7 @@ struct test_driver generate_data_t> generate_multi_data_limited(std::vector> multi_dims, int limit_multiplier) { - return {[=]() -> std::vector { + return {[=, this]() -> std::vector { return generate_data_limited(multi_dims.at(dataset_id), limit_multiplier)(T{}); }}; } @@ -555,7 +555,7 @@ struct test_driver template auto lazy_generate_data(F f, T single) -> generate_data_t { - return {[=]() -> decltype(f()) { + return {[=, this]() -> decltype(f()) { if(full_set) return f(); else @@ -566,7 +566,7 @@ struct test_driver template auto lazy_generate_data(F f) -> generate_data_t { - return {[=]() -> decltype(f()) { + return {[=, this]() -> decltype(f()) { if(full_set) return f(); else @@ -577,7 +577,7 @@ struct test_driver template generate_data_t> generate_single(T single) { - return {[=]() -> std::vector { return {single}; }}; + return {[=, this]() -> std::vector { return {single}; }}; } template @@ -613,7 +613,7 @@ struct test_driver auto verify_reporter() { - return [=](bool pass, + return [=, this](bool pass, std::vector error, const auto& out_cpu, const auto& out_gpu, diff --git a/projects/miopen/test/gtest/kthvalue.hpp b/projects/miopen/test/gtest/kthvalue.hpp index 2aa7e6fd41d1..62d5ebb5905a 100644 --- a/projects/miopen/test/gtest/kthvalue.hpp +++ b/projects/miopen/test/gtest/kthvalue.hpp @@ -156,7 +156,7 @@ struct KthvalueFwdTest : public ::testing::TestWithParam output.desc, output_dev.get(), indicesDesc, - (size_t*)indices_dev.get(), + static_cast(indices_dev.get()), config.k, config.dim, config.keepDim); diff --git a/projects/miopen/test/gtest/na_inference_find2.cpp b/projects/miopen/test/gtest/na_inference_find2.cpp index 0313097c2242..fa45d8eddcc8 100644 --- a/projects/miopen/test/gtest/na_inference_find2.cpp +++ b/projects/miopen/test/gtest/na_inference_find2.cpp @@ -152,7 +152,7 @@ struct verify_inference_batchnorm_activ miopenTensorArgument_t{miopenTensorBatchnormBias, nullptr, bnbias_dev.get()}, miopenTensorArgument_t{miopenTensorBatchnormEstimatedMean, nullptr, estMean_dev.get()}, miopenTensorArgument_t{miopenTensorBatchnormEstimatedVariance, nullptr, estVariance_dev.get()}, - miopenTensorArgument_t{miopenScalarBatchnormEpsilon, nullptr, (void*)&mut_eps}, + miopenTensorArgument_t{miopenScalarBatchnormEpsilon, nullptr, static_cast(&mut_eps)}, // clang-format on }; diff --git a/projects/miopen/test/gtest/rnn_seq_api.hpp b/projects/miopen/test/gtest/rnn_seq_api.hpp index b96a7537c3fd..0a95cd446eb8 100644 --- a/projects/miopen/test/gtest/rnn_seq_api.hpp +++ b/projects/miopen/test/gtest/rnn_seq_api.hpp @@ -1818,7 +1818,7 @@ struct RNNSeqApiCommon : public ::testing::TestWithParam seqTensor dy(output); const auto num_hidden_layers = numLayers * ((dirMode != 0) ? 2 : 1); - tensor hx = [=]() { + tensor hx = [=, this]() { if(pytorchTensorDescriptorFormat) return tensor(std::vector{num_hidden_layers, batchSize, hiddenSize, 1, 1}); else From 9db616cce5dfb17687857b2d445cf808c0a07cd3 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 13:57:24 -0600 Subject: [PATCH 09/13] Fix -Wundef from rocrand headers in MIOpenDriver rocrand_wrapper.cpp includes directly, so MIOpenDriver is exposed to the same rocrand stage/include -Wundef issue as the MIOpen library target. Apply the same imported-target SYSTEM include workaround that was added to src/CMakeLists.txt in the previous commit. Co-Authored-By: Claude Sonnet 4 --- projects/miopen/driver/CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/projects/miopen/driver/CMakeLists.txt b/projects/miopen/driver/CMakeLists.txt index 4aac2358c432..263d51521505 100644 --- a/projects/miopen/driver/CMakeLists.txt +++ b/projects/miopen/driver/CMakeLists.txt @@ -75,6 +75,18 @@ add_dependencies(MIOpenDriver generate_kernels) target_include_directories(MIOpenDriver PRIVATE ../src/kernels) # MIOpen_with_plugins ensures CK plugin .so's are built alongside MIOpenDriver target_link_libraries(MIOpenDriver PRIVATE MIOpen_with_plugins Threads::Threads roc::rocrand nlohmann_json::nlohmann_json ) +# rocrand_wrapper.cpp includes directly. Mark rocrand's imported +# target includes as SYSTEM so rocrand headers don't trigger -Wundef (rocrand_common.h +# uses bare #if __HIP_DEVICE_COMPILE__ which is undefined during host compilation). +foreach(_rocrand_target rocrand rocrand::rocrand roc::rocrand) + if(TARGET ${_rocrand_target}) + get_target_property(_rocrand_iface_incs ${_rocrand_target} INTERFACE_INCLUDE_DIRECTORIES) + if(_rocrand_iface_incs) + target_include_directories(MIOpenDriver SYSTEM PRIVATE ${_rocrand_iface_incs}) + endif() + break() + endif() +endforeach() if(NOT MIOPEN_EMBED_DB STREQUAL "") target_link_libraries(MIOpenDriver PRIVATE $ ) endif() From 56f3fcf797e42631e740e2c97057eabffd698111 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 15:52:26 -0600 Subject: [PATCH 10/13] Drop -Wundef: TheRock injects rocRAND/stage/include globally as -I MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rocrand_common.h uses bare #if __HIP_DEVICE_COMPILE__ (not #ifdef), which triggers -Wundef when included via a non-system path. In the TheRock build system, rocRAND/stage/include is injected as a global -I flag at the build-system level — not via rocrand's CMake imported target — so it cannot be overridden to -isystem from within MIOpen's CMakeLists. Remove -Wundef from the warning set and document the root cause. Also remove the now-unnecessary roc::rocrand INTERFACE_INCLUDE_DIRECTORIES SYSTEM workarounds from src/ and driver/ CMakeLists, and remove the WIN32-only -Wno-undef suppression (no longer needed since -Wundef is gone). Co-Authored-By: Claude Sonnet 4 --- projects/miopen/cmake/EnableCompilerWarnings.cmake | 7 +++---- projects/miopen/driver/CMakeLists.txt | 12 ------------ projects/miopen/src/CMakeLists.txt | 13 ------------- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index b1b73ba88760..ed0ef462bcab 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -29,7 +29,6 @@ set(__cxx_compile_options -Wall -Wextra # Additional warnings not included in -Wall/-Wextra - -Wundef -Wunreachable-code -Wmissing-noreturn -Wshadow @@ -38,6 +37,9 @@ set(__cxx_compile_options # TODO: Working to enable these warnings. Each requires code cleanup first. # -Wconversion # ~1000+ implicit narrowing/sign conversions to fix # -Wdouble-promotion # implicit float-to-double promotions + # -Wundef # blocked by rocrand: rocrand_common.h uses bare #if __HIP_DEVICE_COMPILE__ + # # (not #ifdef). TheRock injects rocRAND/stage/include as a global -I path + # # at the build-system level, so we cannot mark it as -isystem from MIOpen. # Suppress specific warnings -- working to remove these by fixing the code -Wno-c++11-narrowing # ~40 instances: narrowing in brace init (batchnorm, ck_impl, addkernels) -Wno-sign-compare # ~1000+ instances: signed/unsigned comparisons throughout codebase @@ -52,9 +54,6 @@ if(WIN32) list(APPEND __clang_cxx_compile_options -fms-extensions -fms-compatibility) - # External headers on Windows are not consistently marked as system - # includes, so -Wundef triggers on HIP/rocRAND headers - list(APPEND __cxx_compile_options -Wno-undef) endif() add_compile_options( diff --git a/projects/miopen/driver/CMakeLists.txt b/projects/miopen/driver/CMakeLists.txt index 263d51521505..4aac2358c432 100644 --- a/projects/miopen/driver/CMakeLists.txt +++ b/projects/miopen/driver/CMakeLists.txt @@ -75,18 +75,6 @@ add_dependencies(MIOpenDriver generate_kernels) target_include_directories(MIOpenDriver PRIVATE ../src/kernels) # MIOpen_with_plugins ensures CK plugin .so's are built alongside MIOpenDriver target_link_libraries(MIOpenDriver PRIVATE MIOpen_with_plugins Threads::Threads roc::rocrand nlohmann_json::nlohmann_json ) -# rocrand_wrapper.cpp includes directly. Mark rocrand's imported -# target includes as SYSTEM so rocrand headers don't trigger -Wundef (rocrand_common.h -# uses bare #if __HIP_DEVICE_COMPILE__ which is undefined during host compilation). -foreach(_rocrand_target rocrand rocrand::rocrand roc::rocrand) - if(TARGET ${_rocrand_target}) - get_target_property(_rocrand_iface_incs ${_rocrand_target} INTERFACE_INCLUDE_DIRECTORIES) - if(_rocrand_iface_incs) - target_include_directories(MIOpenDriver SYSTEM PRIVATE ${_rocrand_iface_incs}) - endif() - break() - endif() -endforeach() if(NOT MIOPEN_EMBED_DB STREQUAL "") target_link_libraries(MIOpenDriver PRIVATE $ ) endif() diff --git a/projects/miopen/src/CMakeLists.txt b/projects/miopen/src/CMakeLists.txt index 6475ff172a6f..cd4ee6ab6dcb 100644 --- a/projects/miopen/src/CMakeLists.txt +++ b/projects/miopen/src/CMakeLists.txt @@ -1014,19 +1014,6 @@ if(hipblaslt_FOUND) endif() target_include_directories( MIOpen SYSTEM PRIVATE ${ROCRAND_INCLUDE_DIR} ) -# Also mark rocrand's imported target includes as SYSTEM so rocrand headers don't -# trigger -Wundef (rocrand_common.h uses #if __HIP_DEVICE_COMPILE__ without #ifdef). -# In TheRock builds rocrand exposes both stage/include and dist/include; only dist/ -# is covered by ROCRAND_INCLUDE_DIR above. -foreach(_rocrand_target rocrand rocrand::rocrand roc::rocrand) - if(TARGET ${_rocrand_target}) - get_target_property(_rocrand_iface_incs ${_rocrand_target} INTERFACE_INCLUDE_DIRECTORIES) - if(_rocrand_iface_incs) - target_include_directories(MIOpen SYSTEM PRIVATE ${_rocrand_iface_incs}) - endif() - break() - endif() -endforeach() # For backward compatibility with ROCm 5.3 # Build with library libMLIRMIOpen From c3307c2b49d06dff8fd569b05d7281e4a820587f Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Sun, 10 May 2026 18:35:08 -0600 Subject: [PATCH 11/13] Re-enable -Wundef and -Wold-style-cast, suppress at rocrand include sites TheRock injects rocRAND/stage/include globally as -I (not -isystem), causing rocrand's own headers to trigger -Wold-style-cast and -Wundef under our strict warning flags. Rather than disabling these warnings globally, suppress them only at the two points where rocrand headers are included: - src/kernels/miopen_rocrand.hpp: wraps - driver/rocrand_wrapper.cpp: wraps Both use #pragma clang diagnostic push/pop so the suppression is scoped tightly to the rocrand include and all MIOpen code continues to be checked. Re-add -Wundef and -Wold-style-cast to EnableCompilerWarnings.cmake. Co-Authored-By: Claude Sonnet 4 --- projects/miopen/cmake/EnableCompilerWarnings.cmake | 4 +--- projects/miopen/driver/rocrand_wrapper.cpp | 7 +++++++ projects/miopen/src/kernels/miopen_rocrand.hpp | 9 ++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index ed0ef462bcab..c08a3611e329 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -29,6 +29,7 @@ set(__cxx_compile_options -Wall -Wextra # Additional warnings not included in -Wall/-Wextra + -Wundef -Wunreachable-code -Wmissing-noreturn -Wshadow @@ -37,9 +38,6 @@ set(__cxx_compile_options # TODO: Working to enable these warnings. Each requires code cleanup first. # -Wconversion # ~1000+ implicit narrowing/sign conversions to fix # -Wdouble-promotion # implicit float-to-double promotions - # -Wundef # blocked by rocrand: rocrand_common.h uses bare #if __HIP_DEVICE_COMPILE__ - # # (not #ifdef). TheRock injects rocRAND/stage/include as a global -I path - # # at the build-system level, so we cannot mark it as -isystem from MIOpen. # Suppress specific warnings -- working to remove these by fixing the code -Wno-c++11-narrowing # ~40 instances: narrowing in brace init (batchnorm, ck_impl, addkernels) -Wno-sign-compare # ~1000+ instances: signed/unsigned comparisons throughout codebase diff --git a/projects/miopen/driver/rocrand_wrapper.cpp b/projects/miopen/driver/rocrand_wrapper.cpp index 2a7d910c4be7..fe8ba5fe9687 100644 --- a/projects/miopen/driver/rocrand_wrapper.cpp +++ b/projects/miopen/driver/rocrand_wrapper.cpp @@ -26,7 +26,14 @@ #include "rocrand_wrapper.hpp" +// Suppress warnings from rocrand's own headers: in TheRock builds rocrand/stage/include +// is injected globally as -I (not -isystem), so rocrand headers are not treated as system +// includes and their internal issues trigger our warnings. +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" #include +#pragma clang diagnostic pop namespace gpumemrand { diff --git a/projects/miopen/src/kernels/miopen_rocrand.hpp b/projects/miopen/src/kernels/miopen_rocrand.hpp index 3effb18802e2..019e14193be7 100644 --- a/projects/miopen/src/kernels/miopen_rocrand.hpp +++ b/projects/miopen/src/kernels/miopen_rocrand.hpp @@ -44,12 +44,19 @@ #define ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE // Use inlined rocrand header for runtime compilation to avoid external dependency -// For host code compilation, use the regular rocrand header +// For host code compilation, use the regular rocrand header. +// Suppress warnings from rocrand's own headers: in TheRock builds rocrand/stage/include +// is injected globally as -I (not -isystem), so rocrand headers are not treated as system +// includes and their internal issues trigger our warnings. +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wundef" #ifdef MIOPEN_HIP_RUNTIME_COMPILE #include "rocrand_xorwow_inlined.h" #else #include #endif +#pragma clang diagnostic pop namespace prng { From d04107b6f8ab72d4de5431ef194695ec19508ab9 Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Mon, 11 May 2026 06:54:38 -0600 Subject: [PATCH 12/13] Fix -Wunused-parameter in kernel_tuning_net.cpp solver_nm is only used inside the MIOPEN_ENABLE_AI_KERNEL_TUNING && MIOPEN_USE_COMPOSABLEKERNEL guard. When those macros are not defined the parameter is unused and -Werror,-Wunused-parameter fires in CI. Co-Authored-By: Claude Sonnet 4 --- projects/miopen/test/gtest/kernel_tuning_net.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/miopen/test/gtest/kernel_tuning_net.cpp b/projects/miopen/test/gtest/kernel_tuning_net.cpp index 304adb9800d4..ab103db57853 100644 --- a/projects/miopen/test/gtest/kernel_tuning_net.cpp +++ b/projects/miopen/test/gtest/kernel_tuning_net.cpp @@ -197,7 +197,7 @@ template class KernelTuningNetTest : public ::testing::TestWithParam { protected: - void TestParameterPredictionModel(std::string solver_nm) + void TestParameterPredictionModel([[maybe_unused]] std::string solver_nm) { #if MIOPEN_ENABLE_AI_KERNEL_TUNING && MIOPEN_USE_COMPOSABLEKERNEL auto test_case = GetParam(); From 8c4a1cc496419503b4cd615d3f39b3ab113def4c Mon Sep 17 00:00:00 2001 From: Brad Pepers Date: Mon, 11 May 2026 08:12:06 -0600 Subject: [PATCH 13/13] Fix -Wunused-parameter in implicitgemm_ck_util.hpp for non-CK builds Parameters in IsCKArgsSupported, GetOutElementOp, InitInvokerFactoryNCHW, InitInvokerFactoryNHWC, and MakeSolutionGroupConvImplicitGemmXdlops are only used inside #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL blocks, causing -Werror failures on gfx900/gfx906 where CK is disabled. Co-Authored-By: Claude Sonnet 4 --- .../src/ck_impl/implicitgemm_ck_util.hpp | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/projects/miopen/src/ck_impl/implicitgemm_ck_util.hpp b/projects/miopen/src/ck_impl/implicitgemm_ck_util.hpp index bc3a840e35e6..2c2da8e56644 100644 --- a/projects/miopen/src/ck_impl/implicitgemm_ck_util.hpp +++ b/projects/miopen/src/ck_impl/implicitgemm_ck_util.hpp @@ -433,7 +433,8 @@ template -bool IsCKArgsSupported(const ProblemDescriptionType& problem, const std::string& kernel_id) +bool IsCKArgsSupported([[maybe_unused]] const ProblemDescriptionType& problem, + [[maybe_unused]] const std::string& kernel_id) { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(!kernel_id.empty()) @@ -687,7 +688,7 @@ ConvSolution InitAnyInvokerFactory(const ProblemDescriptionType& problem, } template -OutElemOp GetOutElementOp(const miopen::fusion::ActivationOpInvokeParam& activationOp) +OutElemOp GetOutElementOp([[maybe_unused]] const miopen::fusion::ActivationOpInvokeParam& activationOp) { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL auto activationMode = activationOp.activMode; @@ -893,12 +894,12 @@ template -ConvSolution InitInvokerFactoryNCHW(const ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, - const std::string& kernel_id, - const Input1TposeOp& input1_op, - const Input2TposeOp& input2_op, - const OutputTposeOp& output_op) +ConvSolution InitInvokerFactoryNCHW([[maybe_unused]] const ExecutionContext& ctx, + [[maybe_unused]] const miopen::conv::ProblemDescription& problem, + [[maybe_unused]] const std::string& kernel_id, + [[maybe_unused]] const Input1TposeOp& input1_op, + [[maybe_unused]] const Input2TposeOp& input2_op, + [[maybe_unused]] const OutputTposeOp& output_op) { assert(problem.IsLayoutDefault()); @@ -1063,7 +1064,7 @@ template ConvSolution InitInvokerFactoryNHWC(const ExecutionContext&, - const ProblemDescriptionType& problem, + [[maybe_unused]] const ProblemDescriptionType& problem, const std::string& kernel_id) { ConvSolution result; @@ -1270,10 +1271,10 @@ ConvSolution InitInvokerFactoryWrwNCHW(const ExecutionContext& ctx, template ConvSolution -MakeSolutionGroupConvImplicitGemmXdlops(const miopen::conv::ProblemDescription& problem, - InvokerFactoryMakerNCHW&& invoker_factory_maker_ncdhw, - InvokerFactoryMakerNHWC&& invoker_factory_maker_ndhwc, - const bool use_tf32 = false) +MakeSolutionGroupConvImplicitGemmXdlops([[maybe_unused]] const miopen::conv::ProblemDescription& problem, + [[maybe_unused]] InvokerFactoryMakerNCHW&& invoker_factory_maker_ncdhw, + [[maybe_unused]] InvokerFactoryMakerNHWC&& invoker_factory_maker_ndhwc, + [[maybe_unused]] const bool use_tf32 = false) { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL