ROCm · BradPepersAMD · May 9, 2026 · May 9, 2026 · May 9, 2026 · May 9, 2026
@@ -110,6 +110,13 @@ if(MIOPEN_INCBIN)
     enable_language(ASM)
 endif()
 
+# Truncation rounding or (default) rounding to nearest even (RNE) is enabled.
+# This switch controls two related but different aspects of MIOpen behavior:
+#  1. How host code performs conversions of float to bfloat16 (important for testing).
+#  2. How BF16 kernels perform the final conversion (and rounding) of FP32 to BF16 results
+#     (affects the main functionality of the library).
+option(MIOPEN_USE_RNE_BFLOAT16 "Sets rounding scheme for bfloat16 type" ON)
+
 # Strip symbols for release
 if(MIOPEN_STRIP_SYMBOLS AND NOT WIN32 AND NOT APPLE)
     set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s")
@@ -894,8 +901,10 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 if(NOT MIOPEN_USE_SQLITE_PERFDB)
     add_subdirectory(tools/sqlite2txt)
 endif()
+add_subdirectory(common_utils)
 add_subdirectory(addkernels)
 add_subdirectory(src)
+add_subdirectory(miopen_utils)
 if(MIOPEN_BUILD_DRIVER)
     add_subdirectory(driver)
 endif()

@@ -0,0 +1,46 @@
+################################################################################
+#
+# MIT License
+#
+# Copyright (c) 2025 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+################################################################################
+
+# INTERNAL BUILD-ONLY library — not installed, not exported, not part of the public MIOpen API.
+# Header-only pure C++ utilities shared by MIOpen, MIOpenDriver, and tests.
+# Contains NO MIOpen or GPU dependencies.
+# Do NOT add install(TARGETS miopen_common_utils ...) — headers live in the build tree only.
+
+add_library(miopen_common_utils INTERFACE)
+set_target_properties(miopen_common_utils PROPERTIES EXCLUDE_FROM_ALL TRUE)
+
+target_include_directories(miopen_common_utils INTERFACE
+    # BUILD_INTERFACE only — no install interface; these headers are not installed.
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+)
+
+# bfloat16.hpp needs MIOPEN_USE_RNE_BFLOAT16 at compile time.
+# The option is declared in the top-level CMakeLists.txt.
+if(MIOPEN_USE_RNE_BFLOAT16)
+    target_compile_definitions(miopen_common_utils INTERFACE MIOPEN_USE_RNE_BFLOAT16=1)
+else()
+    target_compile_definitions(miopen_common_utils INTERFACE MIOPEN_USE_RNE_BFLOAT16=0)
+endif()
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef GUARD_MLOPEN_ALGORITHM_HPP
+#define GUARD_MLOPEN_ALGORITHM_HPP
+
+#include <algorithm>
+
+namespace miopen {
+
+template <typename Range, typename Predicate>
+bool any_of(const Range& r, Predicate p)
+{
+    return std::any_of(r.begin(), r.end(), p);
+}
+
+template <typename Range, typename Predicate>
+bool all_of(const Range& r, Predicate p)
+{
+    return std::all_of(r.begin(), r.end(), p);
+}
+
+} // namespace miopen
+
+#endif
@@ -0,0 +1,179 @@
+// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
+// SPDX-License-Identifier: MIT
+
+#ifndef BFLOAT16_H_
+#define BFLOAT16_H_
+
+#include <iostream>
+// MIOPEN_USE_RNE_BFLOAT16 is provided via CMake compile definitions.
+
+class bfloat16
+{
+public:
+    bfloat16() : data_{0} {}
+    explicit bfloat16(float rhs)
+    {
+        union
+        {
+            float float_st;
+            std::uint32_t bf16_st;
+        } bits_st = {rhs};
+
+        // BF16 round and NaN preservation code matches
+        // https://github.com/ROCm/rocBLAS/blob/develop/library/include/rocblas_bfloat16.h
+        if((~bits_st.bf16_st & 0x7f800000) == 0) // Inf or NaN
+        {
+            // When all of the exponent bits are 1, the value is Inf or NaN.
+            // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
+            // mantissa bit. Quiet NaN is indicated by the most significant mantissa
+            // bit being 1. Signaling NaN is indicated by the most significant
+            // mantissa bit being 0 but some other bit(s) being 1. If any of the
+            // lower 16 bits of the mantissa are 1, we set the least significant bit
+            // of the bfloat16 mantissa, in order to preserve signaling NaN in case
+            // the bloat16's mantissa bits are all 0.
+            if((bits_st.bf16_st & 0xffff) != 0)
+            {
+                bits_st.bf16_st |= 0x10000; // Preserve signaling NaN
+            }
+        }
+        else
+        {
+#if MIOPEN_USE_RNE_BFLOAT16 == 1
+            // When the exponent bits are not all 1s, then the value is zero, normal,
+            // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
+            // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
+            // This causes the bfloat16's mantissa to be incremented by 1 if the 16
+            // least significant bits of the float mantissa are greater than 0x8000,
+            // or if they are equal to 0x8000 and the least significant bit of the
+            // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
+            // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
+            // has the value 0x7f, then incrementing it causes it to become 0x00 and
+            // the exponent is incremented by one, which is the next higher FP value
+            // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
+            // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
+            // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
+            // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
+            // incrementing it causes it to become an exponent of 0xFF and a mantissa
+            // of 0x00, which is Inf, the next higher value to the unrounded value.
+            bits_st.bf16_st +=
+                (0x7fff + ((bits_st.bf16_st >> 16) & 1)); // Round to nearest, round to even
+#else                                                     // truncation
+// do nothing
+#endif
+        }
+        data_ = bits_st.bf16_st >> 16;
+    }
+    operator float() const
+    {
+        union
+        {
+            std::uint32_t bf16_st;
+            float float_st;
+        } bits_st = {data_};
+
+        bits_st.bf16_st = bits_st.bf16_st << 16;
+        return bits_st.float_st;
+    }
+
+    bfloat16 operator-() const { return bfloat16(-static_cast<float>(*this)); }
+    bfloat16 operator+() const { return *this; }
+
+    bfloat16& operator=(const float rhs)
+    {
+        *this = bfloat16(rhs);
+        return *this;
+    }
+    bfloat16& operator+=(bfloat16 rhs)
+    {
+        *this = bfloat16(static_cast<float>(*this) + static_cast<float>(rhs));
+        return *this;
+    }
+
+    bfloat16& operator+=(float rhs)
+    {
+        *this = bfloat16(static_cast<float>(*this) + rhs);
+        return *this;
+    }
+
+    bfloat16& operator-=(bfloat16 rhs)
+    {
+        *this += -rhs;
+        return *this;
+    }
+    bfloat16& operator*=(bfloat16 rhs)
+    {
+        *this = bfloat16(static_cast<float>(*this) * static_cast<float>(rhs));
+        return *this;
+    }
+    bfloat16& operator*=(float rhs)
+    {
+        *this = bfloat16(static_cast<float>(*this) * rhs);
+        return *this;
+    }
+
+    bfloat16& operator/=(bfloat16 rhs)
+    {
+        *this = bfloat16(static_cast<float>(*this) / static_cast<float>(rhs));
+        return *this;
+    }
+    bool operator<(bfloat16 rhs) const
+    {
+        return static_cast<float>(*this) < static_cast<float>(rhs);
+    }
+    bool operator==(bfloat16 rhs) const { return std::equal_to<float>()(*this, rhs); }
+
+    static constexpr bfloat16 generate(uint16_t val) { return bfloat16{val, true}; }
+
+private:
+    constexpr bfloat16(std::uint16_t val, bool) : data_{val} {}
+
+    std::uint16_t data_;
+};
+
+inline bfloat16 operator+(bfloat16 a, const bfloat16& b)
+{
+    a += b;
+    return a;
+}
+
+inline bfloat16 operator-(bfloat16 a, const bfloat16& b)
+{
+    a -= b;
+    return a;
+}
+
+inline bfloat16 operator*(bfloat16 a, const bfloat16& b)
+{
+    a *= b;
+    return a;
+}
+
+inline bfloat16 operator/(bfloat16 a, const bfloat16& b)
+{
+    a /= b;
+    return a;
+}
+
+namespace std {
+template <>
+class numeric_limits<bfloat16>
+{
+public:
+    static constexpr bool is_specialized = true;
+    static constexpr bfloat16 min() noexcept { return bfloat16::generate(0x0080); } // 0x1.00p-126
+    static constexpr bfloat16 max() noexcept { return bfloat16::generate(0x7F7F); }
+    static constexpr bfloat16 lowest() noexcept { return bfloat16::generate(0xFF7F); }
+    static constexpr bfloat16 epsilon() noexcept { return bfloat16::generate(0x3C00); }
+    static constexpr bfloat16 infinity() noexcept { return bfloat16::generate(0x7F80); }
+    static constexpr bfloat16 quiet_NaN() noexcept { return bfloat16::generate(0x7FC0); } // qnan(0)
+    static constexpr bfloat16 signaling_NaN() noexcept
+    {
+        return bfloat16::generate(0x7F81); // snan(1)
+    }
+    static constexpr bfloat16 denorm_min() noexcept
+    {
+        return bfloat16::generate(0x0001); // 0x0.02p-126
+    }
+};
+} // namespace std
+#endif
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef GUARD_MIOPEN_EACH_ARGS_HPP
+#define GUARD_MIOPEN_EACH_ARGS_HPP
+
+#include <initializer_list>
+#include <type_traits>
+#include <utility>
+
+namespace miopen {
+namespace detail {
+
+template <class F, std::size_t... Ns, class... Ts>
+void each_args_i_impl(F f, std::index_sequence<Ns...>, Ts&&... xs)
+{
+    (void)std::initializer_list<int>{
+        (f(std::integral_constant<std::size_t, Ns>{}, std::forward<Ts>(xs)), 0)...};
+}
+
+template <class F, std::size_t... Ns, class T>
+auto unpack_impl(F f, std::index_sequence<Ns...>, T&& x)
+{
+    return f(std::get<Ns>(x)...);
+}
+
+} // namespace detail
+
+template <class F, class... Ts>
+void each_args_i(F f, Ts&&... xs)
+{
+    detail::each_args_i_impl(f, std::make_index_sequence<sizeof...(Ts)>(), std::forward<Ts>(xs)...);
+}
+
+template <class F, class... Ts>
+void each_args(F f, Ts&&... xs)
+{
+    (void)std::initializer_list<int>{(f(std::forward<Ts>(xs)), 0)...};
+}
+
+// Workaround for gcc warnings
+template <class F>
+void each_args(F)
+{
+}
+
+template <class F, std::size_t... Ns, class T>
+auto unpack(F f, T&& x)
+{
+    using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+    return detail::unpack_impl(
+        f, std::make_index_sequence<std::tuple_size<type>::value>(), std::forward<T>(x));
+}
+
+} // namespace miopen
+
+#endif