From 94db5ffabcaab5a7d31217398f50845c99a47a8b Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 7 Oct 2025 09:51:08 -0700
Subject: [PATCH 01/25] Dot fp32 precision

---
 tools/clang/unittests/HLSLExec/LongVectorTestData.h |  4 ----
 tools/clang/unittests/HLSLExec/LongVectors.cpp      | 12 +++++++++---
 2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
index 24a4301299..c6170acdef 100644
--- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h
+++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -184,10 +184,6 @@ struct HLSLHalf_t {
     return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B)));
   }
 
-  HLSLHalf_t &operator+=(const HLSLHalf_t &Other) {
-    return *this = *this + Other;
-  }
-
   HLSLHalf_t operator-(const HLSLHalf_t &Other) const {
     const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val);
     const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val);
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 93a46d02fb..ecb73bd65e 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1100,11 +1100,17 @@ REDUCTION_OP(OpType::All_Zero, (std::all_of));
 template <typename T> struct Op<OpType::Dot, T, 2> : DefaultValidation<T> {};
 template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
   static std::vector<T> buildExpected(Op<OpType::Dot, T, 2>,
-                                      const InputSets<T> &Inputs) {
-    T DotProduct = T();
+                                      const InputSets<T> &Inputs,
+                                      uint16_t ScalarInputFlags) {
+    UNREFERENCED_PARAMETER(ScalarInputFlags);
+
+    // Accumulate in fp32 to improve precision.
+    float DotProduct = 0.0f;
 
     for (size_t I = 0; I < Inputs[0].size(); ++I) {
-      DotProduct += Inputs[0][I] * Inputs[1][I];
+      const float A = Inputs[0][I];
+      const float B = Inputs[1][I];
+      DotProduct += A * B;
     }
 
     std::vector<T> Expected;

From 97691fbeb72b25c0b882bbba5df6637fc7a0aa3c Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 20 Oct 2025 19:24:58 -0700
Subject: [PATCH 02/25] Change error tolerance for dot

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index ecb73bd65e..5949975506 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -593,7 +593,8 @@ template <typename T> struct DefaultValidation {
   }
 };
 
-// Strict Validation - require exact matches for all types
+// Strict Validation - Defaults to exact matches.
+// Tolerance cant be set to a non-zero value to allow for a wider range.
 struct StrictValidation {
   ValidationConfig ValidationConfig;
 };
@@ -1097,22 +1098,30 @@ REDUCTION_OP(OpType::All_Zero, (std::all_of));
 
 #undef REDUCTION_OP
 
-template <typename T> struct Op<OpType::Dot, T, 2> : DefaultValidation<T> {};
+template <typename T> struct Op<OpType::Dot, T, 2> : StrictValidation {};
 template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
-  static std::vector<T> buildExpected(Op<OpType::Dot, T, 2>,
+  static std::vector<T> buildExpected(Op<OpType::Dot, T, 2> Op,
                                       const InputSets<T> &Inputs,
                                       uint16_t ScalarInputFlags) {
     UNREFERENCED_PARAMETER(ScalarInputFlags);
 
     // Accumulate in fp32 to improve precision.
     float DotProduct = 0.0f;
+    float DotProductAbs = 0.0f;
 
-    for (size_t I = 0; I < Inputs[0].size(); ++I) {
+    const size_t VectorSize = Inputs[0].size();
+
+    for (size_t I = 0; I < VectorSize; ++I) {
       const float A = Inputs[0][I];
       const float B = Inputs[1][I];
       DotProduct += A * B;
+
+      DotProductAbs += std::fabs(A) * std::fabs(B);
     }
 
+    Op.ValidationConfig.Tolerance =
+        (VectorSize + 1) * std::numeric_limits<T>::epsilon() * DotProductAbs;
+
     std::vector<T> Expected;
     Expected.push_back(DotProduct);
     return Expected;

From 856e1ff46011b3ac700156d1360c22fc0b7ef0a0 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 21 Oct 2025 22:31:09 -0700
Subject: [PATCH 03/25] Added tolerance for dot based on discussion with Tex.
 Needs some cleanup

---
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 83 +++++++++++++++----
 1 file changed, 68 insertions(+), 15 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 5949975506..2f576b7f3d 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -20,6 +20,8 @@
 #include <string>
 #include <type_traits>
 #include <vector>
+#include <algorithm> // For sort
+#include <numeric>   // For accumulate
 
 namespace LongVector {
 
@@ -247,6 +249,11 @@ bool doVectorsMatch(const std::vector<T> &ActualValues,
   if (VerboseLogging) {
     logLongVector(ActualValues, L"ActualValues");
     logLongVector(ExpectedValues, L"ExpectedValues");
+
+    hlsl_test::LogCommentFmt(
+        L"ValidationType: %s, Tolerance: %f",
+        ValidationType == ValidationType::Epsilon ? L"Epsilon" : L"ULP",
+        Tolerance);
   }
 
   // Stash mismatched indexes for easy failure logging later
@@ -936,8 +943,9 @@ struct Op<OpType::AsUint_SplitDouble, double, 1> : StrictValidation {};
 // values.
 template <> struct ExpectedBuilder<OpType::AsUint_SplitDouble, double> {
   static std::vector<uint32_t>
-  buildExpected(Op<OpType::AsUint_SplitDouble, double, 1>,
-                const InputSets<double> &Inputs) {
+  buildExpected(Op<OpType::AsUint_SplitDouble, double, 1>&,
+                const InputSets<double> &Inputs, uint16_t ScalarInputFlags) {
+    DXASSERT_NOMSG(ScalarInputFlags == 0);
     DXASSERT_NOMSG(Inputs.size() == 1);
 
     size_t VectorSize = Inputs[0].size();
@@ -1010,8 +1018,9 @@ DEFAULT_OP_1(OpType::Log2, (std::log2(A)));
 template <> struct Op<OpType::Frexp, float, 1> : DefaultValidation<float> {};
 
 template <> struct ExpectedBuilder<OpType::Frexp, float> {
-  static std::vector<float> buildExpected(Op<OpType::Frexp, float, 1>,
-                                          const InputSets<float> &Inputs) {
+  static std::vector<float> buildExpected(Op<OpType::Frexp, float, 1>&,
+                                          const InputSets<float> &Inputs,
+                                          uint32_t) {
     DXASSERT_NOMSG(Inputs.size() == 1);
 
     // Expected values size is doubled. In the first half we store the
@@ -1080,8 +1089,8 @@ OP_3(OpType::Select, StrictValidation, (static_cast<bool>(A) ? B : C));
 #define REDUCTION_OP(OP, STDFUNC)                                              \
   template <typename T> struct Op<OP, T, 1> : StrictValidation {};             \
   template <typename T> struct ExpectedBuilder<OP, T> {                        \
-    static std::vector<HLSLBool_t> buildExpected(Op<OP, T, 1>,                 \
-                                                 const InputSets<T> &Inputs) { \
+    static std::vector<HLSLBool_t>                                             \
+    buildExpected(Op<OP, T, 1>&, const InputSets<T> &Inputs, uint16_t) {       \
       const bool Res = STDFUNC(Inputs[0].begin(), Inputs[0].end(),             \
                                [](T A) { return A != static_cast<T>(0); });    \
       return std::vector<HLSLBool_t>{Res};                                     \
@@ -1100,27 +1109,33 @@ REDUCTION_OP(OpType::All_Zero, (std::all_of));
 
 template <typename T> struct Op<OpType::Dot, T, 2> : StrictValidation {};
 template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
-  static std::vector<T> buildExpected(Op<OpType::Dot, T, 2> Op,
+  static std::vector<T> buildExpected(Op<OpType::Dot, T, 2> &Op,
                                       const InputSets<T> &Inputs,
                                       uint16_t ScalarInputFlags) {
     UNREFERENCED_PARAMETER(ScalarInputFlags);
 
     // Accumulate in fp32 to improve precision.
     float DotProduct = 0.0f;
-    float DotProductAbs = 0.0f;
+
+    std::vector<float> PositiveProducts;
+    std::vector<float> NegativeProducts;
 
     const size_t VectorSize = Inputs[0].size();
 
     for (size_t I = 0; I < VectorSize; ++I) {
       const float A = Inputs[0][I];
       const float B = Inputs[1][I];
-      DotProduct += A * B;
+      const float Product = A * B;
+      DotProduct += Product;
 
-      DotProductAbs += std::fabs(A) * std::fabs(B);
+      if(Product >= 0.0f)
+        PositiveProducts.push_back(Product);
+      else
+        NegativeProducts.push_back(Product);
     }
 
-    Op.ValidationConfig.Tolerance =
-        (VectorSize + 1) * std::numeric_limits<T>::epsilon() * DotProductAbs;
+    const DataType &OpDataType = getDataType<T>();
+    computeDotTolerance(PositiveProducts, NegativeProducts, Op.ValidationConfig, OpDataType.Is16Bit);
 
     std::vector<T> Expected;
     Expected.push_back(DotProduct);
@@ -1154,6 +1169,40 @@ STRICT_OP_1(OpType::LoadAndStore_DT_SB_UAV, (A));
 STRICT_OP_1(OpType::LoadAndStore_DT_SB_SRV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_UAV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
+static void computeDotTolerance(std::vector<float> &PositiveProducts, std::vector<float> &NegativeProducts, ValidationConfig &ValidationConfig, bool Is16Bit) {
+
+    std::sort(PositiveProducts.begin(), PositiveProducts.end(), std::greater_equal<float>());
+    std::sort(NegativeProducts.begin(), NegativeProducts.end(), std::less_equal<float>());
+
+    // Stash the ULPs for the result of each subsequent addition.
+    float A = PositiveProducts.empty() ? 0.0f : PositiveProducts.front();
+    std::vector<float> ULP;
+    for(size_t I = 1; I < PositiveProducts.size(); ++I) {
+      A += PositiveProducts[I];
+      ULP.push_back(std::nexttowardf(A, std::numeric_limits<float>::infinity()) - A);
+    }
+
+    // Stash the ULPs of each subsequent addition.
+    A = NegativeProducts.empty() ? 0.0f : NegativeProducts.front();
+    for(size_t I = 1; I < NegativeProducts.size(); ++I) {
+      A += NegativeProducts[I];
+      ULP.push_back(A - std::nexttowardf(A, -std::numeric_limits<float>::infinity()));
+    }
+
+    std::sort(ULP.begin(), ULP.end(), std::greater_equal<float>());
+
+    // Sum up all of the ULPs.
+    float EpsilonA = std::accumulate(ULP.begin(), ULP.end(), 0.0f);
+
+    // And add half an ULP of the final result to get our tolerance.
+    const float ULPTolerance = Is16Bit ? 0.5f : 1.0f;
+
+    const float EpsULP = ((std::nexttowardf(EpsilonA, std::numeric_limits<float>::infinity() - EpsilonA)));
+
+    EpsilonA += (EpsULP * ULPTolerance);
+
+    ValidationConfig.Tolerance = EpsilonA;
+}
 
 //
 // dispatchTest
@@ -1161,7 +1210,9 @@ STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
 
 template <OpType OP, typename T> struct ExpectedBuilder {
 
-  static auto buildExpected(Op<OP, T, 1> Op, const InputSets<T> &Inputs) {
+  static auto buildExpected(Op<OP, T, 1> &Op, const InputSets<T> &Inputs,
+                            uint16_t ScalarInputFlags) {
+    UNREFERENCED_PARAMETER(ScalarInputFlags);
     DXASSERT_NOMSG(Inputs.size() == 1);
 
     std::vector<decltype(Op(T()))> Expected;
@@ -1173,7 +1224,8 @@ template <OpType OP, typename T> struct ExpectedBuilder {
     return Expected;
   }
 
-  static auto buildExpected(Op<OP, T, 2> Op, const InputSets<T> &Inputs) {
+  static auto buildExpected(Op<OP, T, 2> &Op, const InputSets<T> &Inputs,
+                            uint16_t ScalarInputFlags) {
     DXASSERT_NOMSG(Inputs.size() == 2);
 
     std::vector<decltype(Op(T(), T()))> Expected;
@@ -1185,7 +1237,8 @@ template <OpType OP, typename T> struct ExpectedBuilder {
     return Expected;
   }
 
-  static auto buildExpected(Op<OP, T, 3> Op, const InputSets<T> &Inputs) {
+  static auto buildExpected(Op<OP, T, 3> &Op, const InputSets<T> &Inputs,
+                            uint16_t ScalarInputFlags) {
     DXASSERT_NOMSG(Inputs.size() == 3);
 
     std::vector<decltype(Op(T(), T(), T()))> Expected;

From 06f022370627385da8bc3ebbf4897bb2d34d1155 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 21 Oct 2025 22:31:23 -0700
Subject: [PATCH 04/25] Clang format

---
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 74 +++++++++++--------
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 2f576b7f3d..a534e96d7d 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -12,16 +12,16 @@
 
 #include "HlslExecTestUtils.h"
 
+#include <algorithm> // For sort
 #include <array>
 #include <bitset>
 #include <iomanip>
+#include <numeric> // For accumulate
 #include <optional>
 #include <sstream>
 #include <string>
 #include <type_traits>
 #include <vector>
-#include <algorithm> // For sort
-#include <numeric>   // For accumulate
 
 namespace LongVector {
 
@@ -943,7 +943,7 @@ struct Op<OpType::AsUint_SplitDouble, double, 1> : StrictValidation {};
 // values.
 template <> struct ExpectedBuilder<OpType::AsUint_SplitDouble, double> {
   static std::vector<uint32_t>
-  buildExpected(Op<OpType::AsUint_SplitDouble, double, 1>&,
+  buildExpected(Op<OpType::AsUint_SplitDouble, double, 1> &,
                 const InputSets<double> &Inputs, uint16_t ScalarInputFlags) {
     DXASSERT_NOMSG(ScalarInputFlags == 0);
     DXASSERT_NOMSG(Inputs.size() == 1);
@@ -1018,7 +1018,7 @@ DEFAULT_OP_1(OpType::Log2, (std::log2(A)));
 template <> struct Op<OpType::Frexp, float, 1> : DefaultValidation<float> {};
 
 template <> struct ExpectedBuilder<OpType::Frexp, float> {
-  static std::vector<float> buildExpected(Op<OpType::Frexp, float, 1>&,
+  static std::vector<float> buildExpected(Op<OpType::Frexp, float, 1> &,
                                           const InputSets<float> &Inputs,
                                           uint32_t) {
     DXASSERT_NOMSG(Inputs.size() == 1);
@@ -1090,7 +1090,7 @@ OP_3(OpType::Select, StrictValidation, (static_cast<bool>(A) ? B : C));
   template <typename T> struct Op<OP, T, 1> : StrictValidation {};             \
   template <typename T> struct ExpectedBuilder<OP, T> {                        \
     static std::vector<HLSLBool_t>                                             \
-    buildExpected(Op<OP, T, 1>&, const InputSets<T> &Inputs, uint16_t) {       \
+    buildExpected(Op<OP, T, 1> &, const InputSets<T> &Inputs, uint16_t) {      \
       const bool Res = STDFUNC(Inputs[0].begin(), Inputs[0].end(),             \
                                [](T A) { return A != static_cast<T>(0); });    \
       return std::vector<HLSLBool_t>{Res};                                     \
@@ -1128,14 +1128,15 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
       const float Product = A * B;
       DotProduct += Product;
 
-      if(Product >= 0.0f)
+      if (Product >= 0.0f)
         PositiveProducts.push_back(Product);
       else
         NegativeProducts.push_back(Product);
     }
 
     const DataType &OpDataType = getDataType<T>();
-    computeDotTolerance(PositiveProducts, NegativeProducts, Op.ValidationConfig, OpDataType.Is16Bit);
+    computeDotTolerance(PositiveProducts, NegativeProducts, Op.ValidationConfig,
+                        OpDataType.Is16Bit);
 
     std::vector<T> Expected;
     Expected.push_back(DotProduct);
@@ -1169,39 +1170,48 @@ STRICT_OP_1(OpType::LoadAndStore_DT_SB_UAV, (A));
 STRICT_OP_1(OpType::LoadAndStore_DT_SB_SRV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_UAV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
-static void computeDotTolerance(std::vector<float> &PositiveProducts, std::vector<float> &NegativeProducts, ValidationConfig &ValidationConfig, bool Is16Bit) {
 
-    std::sort(PositiveProducts.begin(), PositiveProducts.end(), std::greater_equal<float>());
-    std::sort(NegativeProducts.begin(), NegativeProducts.end(), std::less_equal<float>());
-
-    // Stash the ULPs for the result of each subsequent addition.
-    float A = PositiveProducts.empty() ? 0.0f : PositiveProducts.front();
-    std::vector<float> ULP;
-    for(size_t I = 1; I < PositiveProducts.size(); ++I) {
-      A += PositiveProducts[I];
-      ULP.push_back(std::nexttowardf(A, std::numeric_limits<float>::infinity()) - A);
-    }
+static void computeDotTolerance(std::vector<float> &PositiveProducts,
+                                std::vector<float> &NegativeProducts,
+                                ValidationConfig &ValidationConfig,
+                                bool Is16Bit) {
+
+  std::sort(PositiveProducts.begin(), PositiveProducts.end(),
+            std::greater_equal<float>());
+  std::sort(NegativeProducts.begin(), NegativeProducts.end(),
+            std::less_equal<float>());
+
+  // Stash the ULPs for the result of each subsequent addition.
+  float A = PositiveProducts.empty() ? 0.0f : PositiveProducts.front();
+  std::vector<float> ULP;
+  for (size_t I = 1; I < PositiveProducts.size(); ++I) {
+    A += PositiveProducts[I];
+    ULP.push_back(std::nexttowardf(A, std::numeric_limits<float>::infinity()) -
+                  A);
+  }
 
-    // Stash the ULPs of each subsequent addition.
-    A = NegativeProducts.empty() ? 0.0f : NegativeProducts.front();
-    for(size_t I = 1; I < NegativeProducts.size(); ++I) {
-      A += NegativeProducts[I];
-      ULP.push_back(A - std::nexttowardf(A, -std::numeric_limits<float>::infinity()));
-    }
+  // Stash the ULPs of each subsequent addition.
+  A = NegativeProducts.empty() ? 0.0f : NegativeProducts.front();
+  for (size_t I = 1; I < NegativeProducts.size(); ++I) {
+    A += NegativeProducts[I];
+    ULP.push_back(A -
+                  std::nexttowardf(A, -std::numeric_limits<float>::infinity()));
+  }
 
-    std::sort(ULP.begin(), ULP.end(), std::greater_equal<float>());
+  std::sort(ULP.begin(), ULP.end(), std::greater_equal<float>());
 
-    // Sum up all of the ULPs.
-    float EpsilonA = std::accumulate(ULP.begin(), ULP.end(), 0.0f);
+  // Sum up all of the ULPs.
+  float EpsilonA = std::accumulate(ULP.begin(), ULP.end(), 0.0f);
 
-    // And add half an ULP of the final result to get our tolerance.
-    const float ULPTolerance = Is16Bit ? 0.5f : 1.0f;
+  // And add half an ULP of the final result to get our tolerance.
+  const float ULPTolerance = Is16Bit ? 0.5f : 1.0f;
 
-    const float EpsULP = ((std::nexttowardf(EpsilonA, std::numeric_limits<float>::infinity() - EpsilonA)));
+  const float EpsULP = ((std::nexttowardf(
+      EpsilonA, std::numeric_limits<float>::infinity() - EpsilonA)));
 
-    EpsilonA += (EpsULP * ULPTolerance);
+  EpsilonA += (EpsULP * ULPTolerance);
 
-    ValidationConfig.Tolerance = EpsilonA;
+  ValidationConfig.Tolerance = EpsilonA;
 }
 
 //

From 547db3cd53395c1d5829afe0eda7863bd4b41295 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Wed, 22 Oct 2025 16:59:46 -0700
Subject: [PATCH 05/25] Working with updates from talking with Tex. Need to
 clean-up and do something for corner cases like subnormals

---
 include/dxc/Test/HlslTestUtils.h              |  23 ++++
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 105 ++++++++++--------
 2 files changed, 79 insertions(+), 49 deletions(-)

diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
index 5a84f5cfbc..b6dad40cd8 100644
--- a/include/dxc/Test/HlslTestUtils.h
+++ b/include/dxc/Test/HlslTestUtils.h
@@ -641,6 +641,29 @@ inline bool CompareFloatRelativeEpsilon(
   return CompareFloatULP(fsrc, fref, 23 - nRelativeExp, mode);
 }
 
+//inline HALF GetULPForHalf(HALF &Value) {
+//  HALF NextValue = Value;
+//
+//  //const uint16_t Exp = (NextValue >> 10) & 0x1F;
+//  //if(Exp == 0x1F)
+//  //  return std::numeric_limits<HALF>::infinity();
+//
+//  if(Value & FLOAT16_BIT_SIGN)
+//  {
+//    if(Value == FLOAT16_NEG_ZERO)
+//      NextValue = 0x0000;
+//    else
+//      --NextValue;
+//  }
+//  else {
+//    ++NextValue;
+//  }
+//
+//  float A = DirectX::PackedVector::XMConvertHalfToFloat(Value);
+//  float B = DirectX::PackedVector::XMConvertHalfToFloat(NextValue);
+//  return B - A;
+//}
+
 inline bool CompareHalfULP(const uint16_t &fsrc, const uint16_t &fref,
                            float ULPTolerance) {
   // Treat +0 and -0 as equal
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index a534e96d7d..c688dacb18 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -251,7 +251,7 @@ bool doVectorsMatch(const std::vector<T> &ActualValues,
     logLongVector(ExpectedValues, L"ExpectedValues");
 
     hlsl_test::LogCommentFmt(
-        L"ValidationType: %s, Tolerance: %f",
+        L"ValidationType: %s, Tolerance: %17g",
         ValidationType == ValidationType::Epsilon ? L"Epsilon" : L"ULP",
         Tolerance);
   }
@@ -1114,32 +1114,43 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
                                       uint16_t ScalarInputFlags) {
     UNREFERENCED_PARAMETER(ScalarInputFlags);
 
-    // Accumulate in fp32 to improve precision.
-    float DotProduct = 0.0f;
+    // Accumulate in fp64 to improve precision.
+    double DotProduct = 0.0f;
 
-    std::vector<float> PositiveProducts;
-    std::vector<float> NegativeProducts;
+    std::vector<double> PositiveProducts;
+    std::vector<double> NegativeProducts;
 
     const size_t VectorSize = Inputs[0].size();
 
+    double AbsoluteEpsilon = 0.0f;
+    const float ULPTolerance = 0.5f;
+
     for (size_t I = 0; I < VectorSize; ++I) {
-      const float A = Inputs[0][I];
-      const float B = Inputs[1][I];
-      const float Product = A * B;
+      const double Product = Inputs[0][I] * Inputs[1][I];
+      AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Product, ULPTolerance);
+
       DotProduct += Product;
 
-      if (Product >= 0.0f)
+      if (Product >= 0.0)
         PositiveProducts.push_back(Product);
       else
         NegativeProducts.push_back(Product);
     }
 
-    const DataType &OpDataType = getDataType<T>();
-    computeDotTolerance(PositiveProducts, NegativeProducts, Op.ValidationConfig,
-                        OpDataType.Is16Bit);
+    PositiveProducts.insert(PositiveProducts.end(), NegativeProducts.begin(), NegativeProducts.end());
+
+    double A = PositiveProducts.empty() ? 0.0 : PositiveProducts.front();
+    for(size_t I = 1; I < PositiveProducts.size(); ++I) {
+      A += PositiveProducts[I];
+      AbsoluteEpsilon += computeAbsoluteEpsilon<T>(AbsoluteEpsilon, ULPTolerance);
+    }
+
+    AbsoluteEpsilon += computeAbsoluteEpsilon<T>(AbsoluteEpsilon, ULPTolerance);
+    AbsoluteEpsilon = AbsoluteEpsilon * VectorSize;
+    Op.ValidationConfig.Tolerance = static_cast<float>(AbsoluteEpsilon);
 
     std::vector<T> Expected;
-    Expected.push_back(DotProduct);
+    Expected.push_back(static_cast<T>(DotProduct));
     return Expected;
   }
 };
@@ -1171,49 +1182,45 @@ STRICT_OP_1(OpType::LoadAndStore_DT_SB_SRV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_UAV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
 
-static void computeDotTolerance(std::vector<float> &PositiveProducts,
-                                std::vector<float> &NegativeProducts,
-                                ValidationConfig &ValidationConfig,
-                                bool Is16Bit) {
-
-  std::sort(PositiveProducts.begin(), PositiveProducts.end(),
-            std::greater_equal<float>());
-  std::sort(NegativeProducts.begin(), NegativeProducts.end(),
-            std::less_equal<float>());
-
-  // Stash the ULPs for the result of each subsequent addition.
-  float A = PositiveProducts.empty() ? 0.0f : PositiveProducts.front();
-  std::vector<float> ULP;
-  for (size_t I = 1; I < PositiveProducts.size(); ++I) {
-    A += PositiveProducts[I];
-    ULP.push_back(std::nexttowardf(A, std::numeric_limits<float>::infinity()) -
-                  A);
-  }
-
-  // Stash the ULPs of each subsequent addition.
-  A = NegativeProducts.empty() ? 0.0f : NegativeProducts.front();
-  for (size_t I = 1; I < NegativeProducts.size(); ++I) {
-    A += NegativeProducts[I];
-    ULP.push_back(A -
-                  std::nexttowardf(A, -std::numeric_limits<float>::infinity()));
-  }
-
-  std::sort(ULP.begin(), ULP.end(), std::greater_equal<float>());
+static double computeAbsoluteEpsilon(double A, float ULPTolerance)
+{
+  // TODO: We will need to handle denormals, infinities, and NaNs here.
+  // But none of the test cases should be generating those right now.
 
-  // Sum up all of the ULPs.
-  float EpsilonA = std::accumulate(ULP.begin(), ULP.end(), 0.0f);
+  if (A < 0.0)
+    A = -A;
 
-  // And add half an ULP of the final result to get our tolerance.
-  const float ULPTolerance = Is16Bit ? 0.5f : 1.0f;
+  double ULP = 0.0;
 
-  const float EpsULP = ((std::nexttowardf(
-      EpsilonA, std::numeric_limits<float>::infinity() - EpsilonA)));
+  if constexpr (std::is_same_v<T, HLSLHalf_t>) {
+    // TODO: I dont think the 0 case is handled properly here.
+    // But none of the inputs should be 0 right now.
+    HLSLHalf_t Next = A;
+    HLSLHalf_t Current = A;
+    if(Next.Val && 0x8000) {
+      // Negative
+      if(Next.Val == 0x8000)
+        Next.Val = 0x0000;
+      else
+        --Next.Val;
+    }
+    else {
+      ++Next.Val;
+    }
 
-  EpsilonA += (EpsULP * ULPTolerance);
+    float NextF = Next;
+    float CurrentF = Current;
+    ULP = std::abs(NextF - CurrentF);
+  }
+  else {
+    ULP =
+     (std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) - static_cast<T>(A));
+  }
 
-  ValidationConfig.Tolerance = EpsilonA;
+  return ULP * ULPTolerance;
 }
 
+
 //
 // dispatchTest
 //

From 2443751c55fee8dbf5b69c953abad775cb3eb1b4 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Wed, 22 Oct 2025 18:30:31 -0700
Subject: [PATCH 06/25] Some minor cleanup

---
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 71 +++++++++----------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index c688dacb18..2170642d05 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1109,44 +1109,57 @@ REDUCTION_OP(OpType::All_Zero, (std::all_of));
 
 template <typename T> struct Op<OpType::Dot, T, 2> : StrictValidation {};
 template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
+  // For Dot, buildExpected is a special case: it also computes an absolute
+  // epsilon for validation because Dot is a compound operation. Expected value
+  // is computed by multiplying and accumulating in fp64 for higher precision.
+  // Absolute epsilon is computed by reordering the accumulation into a
+  // worst-case sequence, then summing the per-step epsilons to produce a
+  // conservative error tolerance for the entire Dot operation.
   static std::vector<T> buildExpected(Op<OpType::Dot, T, 2> &Op,
                                       const InputSets<T> &Inputs,
                                       uint16_t ScalarInputFlags) {
     UNREFERENCED_PARAMETER(ScalarInputFlags);
 
-    // Accumulate in fp64 to improve precision.
-    double DotProduct = 0.0f;
-
-    std::vector<double> PositiveProducts;
+    std::vector<double> Products;
     std::vector<double> NegativeProducts;
 
     const size_t VectorSize = Inputs[0].size();
 
-    double AbsoluteEpsilon = 0.0f;
     const float ULPTolerance = 0.5f;
 
-    for (size_t I = 0; I < VectorSize; ++I) {
-      const double Product = Inputs[0][I] * Inputs[1][I];
+    // Accumulate in fp64 to improve precision.
+    double Product = Inputs[0][0] * Inputs[1][0];
+    double DotProduct = Product;
+    double AbsoluteEpsilon = computeAbsoluteEpsilon<T>(Product, ULPTolerance);
+    for (size_t I = 1; I < VectorSize; ++I) {
+      Product = Inputs[0][I] * Inputs[1][I];
       AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Product, ULPTolerance);
 
       DotProduct += Product;
 
       if (Product >= 0.0)
-        PositiveProducts.push_back(Product);
+        Products.push_back(Product);
       else
         NegativeProducts.push_back(Product);
     }
 
-    PositiveProducts.insert(PositiveProducts.end(), NegativeProducts.begin(), NegativeProducts.end());
+    // Sort each by magnitude so that we can accumulate in worst case order.
+    std::sort(Products.begin(), Products.end(), std::greater<double>());
+    std::sort(NegativeProducts.begin(), NegativeProducts.end());
+
+    // Put them together for final accumulation.
+    Products.reserve(Products.size() + NegativeProducts.size());
+    Products.insert(Products.end(), NegativeProducts.begin(), NegativeProducts.end());
 
-    double A = PositiveProducts.empty() ? 0.0 : PositiveProducts.front();
+    // Accumulate products in the worst case order while computing the absolute
+    // epsilon error for each intermediate step. And accumulate that error.
+    double Sum = PositiveProducts.empty() ? 0.0 : PositiveProducts.front();
     for(size_t I = 1; I < PositiveProducts.size(); ++I) {
-      A += PositiveProducts[I];
-      AbsoluteEpsilon += computeAbsoluteEpsilon<T>(AbsoluteEpsilon, ULPTolerance);
+      Sum += PositiveProducts[I];
+      AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
     }
 
     AbsoluteEpsilon += computeAbsoluteEpsilon<T>(AbsoluteEpsilon, ULPTolerance);
-    AbsoluteEpsilon = AbsoluteEpsilon * VectorSize;
     Op.ValidationConfig.Tolerance = static_cast<float>(AbsoluteEpsilon);
 
     std::vector<T> Expected;
@@ -1184,38 +1197,22 @@ STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
 
 static double computeAbsoluteEpsilon(double A, float ULPTolerance)
 {
-  // TODO: We will need to handle denormals, infinities, and NaNs here.
-  // But none of the test cases should be generating those right now.
-
-  if (A < 0.0)
-    A = -A;
+  A = std::abs(A);
 
   double ULP = 0.0;
 
   if constexpr (std::is_same_v<T, HLSLHalf_t>) {
-    // TODO: I dont think the 0 case is handled properly here.
-    // But none of the inputs should be 0 right now.
     HLSLHalf_t Next = A;
-    HLSLHalf_t Current = A;
-    if(Next.Val && 0x8000) {
-      // Negative
-      if(Next.Val == 0x8000)
-        Next.Val = 0x0000;
-      else
-        --Next.Val;
-    }
-    else {
-      ++Next.Val;
-    }
+    ++Next.Val;
 
+    // float is good enough to represent the ULP of a half.
+    // And we don't have an overridden cast for half to double because
+    // it creates ambiguity in many places and isn't necessary.
     float NextF = Next;
-    float CurrentF = Current;
-    ULP = std::abs(NextF - CurrentF);
-  }
-  else {
-    ULP =
-     (std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) - static_cast<T>(A));
+    ULP = NextF - static_cast<float>(A);
   }
+  else
+    ULP = (std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) - static_cast<T>(A));
 
   return ULP * ULPTolerance;
 }

From 4e5c969c0292ef15e5c30e2f8ffe9eea5cd42878 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Thu, 23 Oct 2025 11:16:35 -0700
Subject: [PATCH 07/25] Fix error tolerance for dot

---
 include/dxc/Test/HlslTestUtils.h              | 23 -------------
 .../unittests/HLSLExec/LongVectorTestData.h   |  9 +++++
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 33 ++++++++++---------
 3 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
index b6dad40cd8..5a84f5cfbc 100644
--- a/include/dxc/Test/HlslTestUtils.h
+++ b/include/dxc/Test/HlslTestUtils.h
@@ -641,29 +641,6 @@ inline bool CompareFloatRelativeEpsilon(
   return CompareFloatULP(fsrc, fref, 23 - nRelativeExp, mode);
 }
 
-//inline HALF GetULPForHalf(HALF &Value) {
-//  HALF NextValue = Value;
-//
-//  //const uint16_t Exp = (NextValue >> 10) & 0x1F;
-//  //if(Exp == 0x1F)
-//  //  return std::numeric_limits<HALF>::infinity();
-//
-//  if(Value & FLOAT16_BIT_SIGN)
-//  {
-//    if(Value == FLOAT16_NEG_ZERO)
-//      NextValue = 0x0000;
-//    else
-//      --NextValue;
-//  }
-//  else {
-//    ++NextValue;
-//  }
-//
-//  float A = DirectX::PackedVector::XMConvertHalfToFloat(Value);
-//  float B = DirectX::PackedVector::XMConvertHalfToFloat(NextValue);
-//  return B - A;
-//}
-
 inline bool CompareHalfULP(const uint16_t &fsrc, const uint16_t &fref,
                            float ULPTolerance) {
   // Treat +0 and -0 as equal
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
index c6170acdef..82c92160a4 100644
--- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h
+++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -118,6 +118,15 @@ struct HLSLHalf_t {
   // float.
   HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
 
+  static double GetULP(HLSLHalf_t A) {
+    HLSLHalf_t Next = A;
+    ++Next.Val;
+
+    double NextD = Next;
+    double AD = A;
+    return NextD - AD;
+  }
+
   static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) {
     HLSLHalf_t H;
     H.Val = Half;
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 2170642d05..0b5d081148 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1125,6 +1125,8 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     const size_t VectorSize = Inputs[0].size();
 
+    // Floating point ops have a tolerance of 0.5 ULPs per operation as per the
+    // DX spec.
     const float ULPTolerance = 0.5f;
 
     // Accumulate in fp64 to improve precision.
@@ -1143,7 +1145,8 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
         NegativeProducts.push_back(Product);
     }
 
-    // Sort each by magnitude so that we can accumulate in worst case order.
+    // Sort each by magnitude so that we can accumulate them in worst case
+    // order.
     std::sort(Products.begin(), Products.end(), std::greater<double>());
     std::sort(NegativeProducts.begin(), NegativeProducts.end());
 
@@ -1153,13 +1156,15 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     // Accumulate products in the worst case order while computing the absolute
     // epsilon error for each intermediate step. And accumulate that error.
-    double Sum = PositiveProducts.empty() ? 0.0 : PositiveProducts.front();
-    for(size_t I = 1; I < PositiveProducts.size(); ++I) {
-      Sum += PositiveProducts[I];
+    double Sum = Products.empty() ? 0.0 : Products.front();
+    for(size_t I = 1; I < Products.size(); ++I) {
+      Sum += Products[I];
       AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
     }
 
+    // Finally, compute and add in the ULP on our final sum of epsilons.
     AbsoluteEpsilon += computeAbsoluteEpsilon<T>(AbsoluteEpsilon, ULPTolerance);
+
     Op.ValidationConfig.Tolerance = static_cast<float>(AbsoluteEpsilon);
 
     std::vector<T> Expected;
@@ -1197,22 +1202,20 @@ STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
 
 static double computeAbsoluteEpsilon(double A, float ULPTolerance)
 {
+  if(isinf(A) || isnan(A))
+    // None of the existing input values should produce inf or nan results.
+    DXASSERT_NOMSG(false);
+
+  // ULP is a positive value by definition. So, working with abs(A) simplifies
+  // our logic for computing ULP in the first place.
   A = std::abs(A);
 
   double ULP = 0.0;
 
-  if constexpr (std::is_same_v<T, HLSLHalf_t>) {
-    HLSLHalf_t Next = A;
-    ++Next.Val;
-
-    // float is good enough to represent the ULP of a half.
-    // And we don't have an overridden cast for half to double because
-    // it creates ambiguity in many places and isn't necessary.
-    float NextF = Next;
-    ULP = NextF - static_cast<float>(A);
-  }
+  if constexpr (std::is_same_v<T, HLSLHalf_t>)
+    ULP = HLSLHalf_t::GetULP(A);
   else
-    ULP = (std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) - static_cast<T>(A));
+    ULP = std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) - static_cast<T>(A);
 
   return ULP * ULPTolerance;
 }

From 85936736f544956fcf60334462471c7e307b3cef Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Thu, 23 Oct 2025 13:02:24 -0700
Subject: [PATCH 08/25] Revert CIndexDiagnostic.h

---
 tools/clang/tools/libclang/CIndexDiagnostic.h | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/tools/clang/tools/libclang/CIndexDiagnostic.h b/tools/clang/tools/libclang/CIndexDiagnostic.h
index 491db31f55..05c63ff57c 100644
--- a/tools/clang/tools/libclang/CIndexDiagnostic.h
+++ b/tools/clang/tools/libclang/CIndexDiagnostic.h
@@ -29,11 +29,8 @@ class CXDiagnosticSetImpl {
   std::vector<std::unique_ptr<CXDiagnosticImpl>> Diagnostics;
   const bool IsExternallyManaged;
 public:
-  // HLSL Change start: definition moved out-of-line because CXDiagnosticImpl
-  // must be a complete type when ~unique_ptr<CXDiagnosticImpl> is instantiated
-  // in C++23 (Diagnostics member).
-  inline CXDiagnosticSetImpl(bool isManaged = false);
-  // HLSL Change end
+  CXDiagnosticSetImpl(bool isManaged = false)
+    : IsExternallyManaged(isManaged) {}
 
   virtual ~CXDiagnosticSetImpl();
 
@@ -110,14 +107,7 @@ class CXDiagnosticImpl {
 private:
   Kind K;
 };
-
-// HLSL Change start: definition moved out-of-line because CXDiagnosticImpl must
-// be a complete type when ~unique_ptr<CXDiagnosticImpl> is instantiated in
-// C++23 (Diagnostics member).
-CXDiagnosticSetImpl::CXDiagnosticSetImpl(bool isManaged)
-    : IsExternallyManaged(isManaged) {}
-// HLSL Change end
-
+  
 /// \brief The storage behind a CXDiagnostic
 struct CXStoredDiagnostic : public CXDiagnosticImpl {
   const StoredDiagnostic &Diag;

From a7ef0a41f1be87d363cc7b994120c052ab008403 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 23 Oct 2025 18:21:18 +0000
Subject: [PATCH 09/25] chore: autopublish 2025-10-23T18:21:18Z

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 0b5d081148..79186587af 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1152,12 +1152,13 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     // Put them together for final accumulation.
     Products.reserve(Products.size() + NegativeProducts.size());
-    Products.insert(Products.end(), NegativeProducts.begin(), NegativeProducts.end());
+    Products.insert(Products.end(), NegativeProducts.begin(),
+                    NegativeProducts.end());
 
     // Accumulate products in the worst case order while computing the absolute
     // epsilon error for each intermediate step. And accumulate that error.
     double Sum = Products.empty() ? 0.0 : Products.front();
-    for(size_t I = 1; I < Products.size(); ++I) {
+    for (size_t I = 1; I < Products.size(); ++I) {
       Sum += Products[I];
       AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
     }
@@ -1203,6 +1204,8 @@ STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
 static double computeAbsoluteEpsilon(double A, float ULPTolerance)
 {
   if(isinf(A) || isnan(A))
+static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
+  if (isinf(A) || isnan(A))
     // None of the existing input values should produce inf or nan results.
     DXASSERT_NOMSG(false);
 
@@ -1215,12 +1218,13 @@ static double computeAbsoluteEpsilon(double A, float ULPTolerance)
   if constexpr (std::is_same_v<T, HLSLHalf_t>)
     ULP = HLSLHalf_t::GetULP(A);
   else
-    ULP = std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) - static_cast<T>(A);
+    ULP =
+        std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) -
+        static_cast<T>(A);
 
   return ULP * ULPTolerance;
 }
 
-
 //
 // dispatchTest
 //

From aa8805f9c2dd92d56720229ffd5a04ce2e53f814 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Thu, 23 Oct 2025 17:22:42 -0700
Subject: [PATCH 10/25] revert argument type on op when reference not needed

---
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 38 ++++++++++++++-----
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 79186587af..201ee115af 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -12,11 +12,11 @@
 
 #include "HlslExecTestUtils.h"
 
-#include <algorithm> // For sort
+#include <algorithm>
 #include <array>
 #include <bitset>
 #include <iomanip>
-#include <numeric> // For accumulate
+#include <numeric>
 #include <optional>
 #include <sstream>
 #include <string>
@@ -601,7 +601,7 @@ template <typename T> struct DefaultValidation {
 };
 
 // Strict Validation - Defaults to exact matches.
-// Tolerance cant be set to a non-zero value to allow for a wider range.
+// Tolerance can be set to a non-zero value to allow for a wider range.
 struct StrictValidation {
   ValidationConfig ValidationConfig;
 };
@@ -1174,6 +1174,28 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
   }
 };
 
+template <typename T>
+static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
+  if (isinf(A) || isnan(A))
+    // None of the existing input values should produce inf or nan results.
+    DXASSERT_NOMSG(false);
+
+  // ULP is a positive value by definition. So, working with abs(A) simplifies
+  // our logic for computing ULP in the first place.
+  A = std::abs(A);
+
+  double ULP = 0.0;
+
+  if constexpr (std::is_same_v<T, HLSLHalf_t>)
+    ULP = HLSLHalf_t::GetULP(A);
+  else
+    ULP =
+        std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) -
+        static_cast<T>(A);
+
+  return ULP * ULPTolerance;
+}
+
 template <typename T>
 struct Op<OpType::ShuffleVector, T, 1> : DefaultValidation<T> {};
 template <typename T> struct ExpectedBuilder<OpType::ShuffleVector, T> {
@@ -1231,9 +1253,7 @@ static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
 
 template <OpType OP, typename T> struct ExpectedBuilder {
 
-  static auto buildExpected(Op<OP, T, 1> &Op, const InputSets<T> &Inputs,
-                            uint16_t ScalarInputFlags) {
-    UNREFERENCED_PARAMETER(ScalarInputFlags);
+  static auto buildExpected(Op<OP, T, 1> Op, const InputSets<T> &Inputs) {
     DXASSERT_NOMSG(Inputs.size() == 1);
 
     std::vector<decltype(Op(T()))> Expected;
@@ -1245,8 +1265,7 @@ template <OpType OP, typename T> struct ExpectedBuilder {
     return Expected;
   }
 
-  static auto buildExpected(Op<OP, T, 2> &Op, const InputSets<T> &Inputs,
-                            uint16_t ScalarInputFlags) {
+  static auto buildExpected(Op<OP, T, 2> Op, const InputSets<T> &Inputs) {
     DXASSERT_NOMSG(Inputs.size() == 2);
 
     std::vector<decltype(Op(T(), T()))> Expected;
@@ -1258,8 +1277,7 @@ template <OpType OP, typename T> struct ExpectedBuilder {
     return Expected;
   }
 
-  static auto buildExpected(Op<OP, T, 3> &Op, const InputSets<T> &Inputs,
-                            uint16_t ScalarInputFlags) {
+  static auto buildExpected(Op<OP, T, 3> Op, const InputSets<T> &Inputs) {
     DXASSERT_NOMSG(Inputs.size() == 3);
 
     std::vector<decltype(Op(T(), T(), T()))> Expected;

From 45914bf019b49c4525ef891506e90bb4730046b8 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 27 Oct 2025 13:26:40 -0700
Subject: [PATCH 11/25] Some code review updates for Tex

---
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 35 ++++++++++---------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 201ee115af..1c1cce076c 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1120,7 +1120,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
                                       uint16_t ScalarInputFlags) {
     UNREFERENCED_PARAMETER(ScalarInputFlags);
 
-    std::vector<double> Products;
+    std::vector<double> PositiveProducts;
     std::vector<double> NegativeProducts;
 
     const size_t VectorSize = Inputs[0].size();
@@ -1140,31 +1140,34 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
       DotProduct += Product;
 
       if (Product >= 0.0)
-        Products.push_back(Product);
+        PositiveProducts.push_back(Product);
       else
         NegativeProducts.push_back(Product);
     }
 
     // Sort each by magnitude so that we can accumulate them in worst case
     // order.
-    std::sort(Products.begin(), Products.end(), std::greater<double>());
+    std::sort(PositiveProducts.begin(), PositiveProducts.end(), std::greater<double>());
     std::sort(NegativeProducts.begin(), NegativeProducts.end());
 
-    // Put them together for final accumulation.
-    Products.reserve(Products.size() + NegativeProducts.size());
-    Products.insert(Products.end(), NegativeProducts.begin(),
-                    NegativeProducts.end());
+    // Helper to sum the products and compute/add to the running absolute
+    // epsilon total.
+    auto SumProducts = [&](const std::vector<double> &Values){
+      double Sum = 0;
+      for (size_t I = 1; I < Values.size(); ++I) {
+        Sum += Values[I];
+        AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
+      }
+      return Sum;
+    };
 
     // Accumulate products in the worst case order while computing the absolute
     // epsilon error for each intermediate step. And accumulate that error.
-    double Sum = Products.empty() ? 0.0 : Products.front();
-    for (size_t I = 1; I < Products.size(); ++I) {
-      Sum += Products[I];
-      AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
-    }
+    const double SumPos = SumProducts(PositiveProducts);
+    const double SumNeg = SumProducts(NegativeProducts);
 
-    // Finally, compute and add in the ULP on our final sum of epsilons.
-    AbsoluteEpsilon += computeAbsoluteEpsilon<T>(AbsoluteEpsilon, ULPTolerance);
+    if(!PositiveProducts.empty() && !NegativeProducts.empty())
+      AbsoluteEpsilon += computeAbsoluteEpsilon<T>((SumPos + SumNeg), ULPTolerance);
 
     Op.ValidationConfig.Tolerance = static_cast<float>(AbsoluteEpsilon);
 
@@ -1176,9 +1179,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
 template <typename T>
 static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
-  if (isinf(A) || isnan(A))
-    // None of the existing input values should produce inf or nan results.
-    DXASSERT_NOMSG(false);
+  DXASSERT((!isinf(A) && !isnan(A)), "Input values should not produce inf or nan results");
 
   // ULP is a positive value by definition. So, working with abs(A) simplifies
   // our logic for computing ULP in the first place.

From 542fd0c26b931aebb3c63a41e59f29919ff76a56 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Mon, 27 Oct 2025 20:30:01 +0000
Subject: [PATCH 12/25] chore: autopublish 2025-10-27T20:30:01Z

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 1c1cce076c..063bc15bcd 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1147,12 +1147,13 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     // Sort each by magnitude so that we can accumulate them in worst case
     // order.
-    std::sort(PositiveProducts.begin(), PositiveProducts.end(), std::greater<double>());
+    std::sort(PositiveProducts.begin(), PositiveProducts.end(),
+              std::greater<double>());
     std::sort(NegativeProducts.begin(), NegativeProducts.end());
 
     // Helper to sum the products and compute/add to the running absolute
     // epsilon total.
-    auto SumProducts = [&](const std::vector<double> &Values){
+    auto SumProducts = [&](const std::vector<double> &Values) {
       double Sum = 0;
       for (size_t I = 1; I < Values.size(); ++I) {
         Sum += Values[I];
@@ -1166,8 +1167,9 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
     const double SumPos = SumProducts(PositiveProducts);
     const double SumNeg = SumProducts(NegativeProducts);
 
-    if(!PositiveProducts.empty() && !NegativeProducts.empty())
-      AbsoluteEpsilon += computeAbsoluteEpsilon<T>((SumPos + SumNeg), ULPTolerance);
+    if (!PositiveProducts.empty() && !NegativeProducts.empty())
+      AbsoluteEpsilon +=
+          computeAbsoluteEpsilon<T>((SumPos + SumNeg), ULPTolerance);
 
     Op.ValidationConfig.Tolerance = static_cast<float>(AbsoluteEpsilon);
 
@@ -1179,7 +1181,8 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
 template <typename T>
 static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
-  DXASSERT((!isinf(A) && !isnan(A)), "Input values should not produce inf or nan results");
+  DXASSERT((!isinf(A) && !isnan(A)),
+           "Input values should not produce inf or nan results");
 
   // ULP is a positive value by definition. So, working with abs(A) simplifies
   // our logic for computing ULP in the first place.

From f90fc5694e59e026b2dfdba44ec57f31080635a8 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 27 Oct 2025 16:56:53 -0700
Subject: [PATCH 13/25] Some more feedback from Tex

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 063bc15bcd..734dee2cfc 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1155,7 +1155,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
     // epsilon total.
     auto SumProducts = [&](const std::vector<double> &Values) {
       double Sum = 0;
-      for (size_t I = 1; I < Values.size(); ++I) {
+      for (size_t I = 0; I < Values.size(); ++I) {
         Sum += Values[I];
         AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
       }

From 7e1f9ea2cba3d2bfa1cae1bae8095fec375f6958 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alexsepkowski@gmail.com>
Date: Mon, 27 Oct 2025 16:46:17 -0700
Subject: [PATCH 14/25] Update tools/clang/unittests/HLSLExec/LongVectors.cpp

Co-authored-by: Tex Riddell <texr@microsoft.com>
---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 734dee2cfc..ebae66c7c9 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1130,11 +1130,10 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
     const float ULPTolerance = 0.5f;
 
     // Accumulate in fp64 to improve precision.
-    double Product = Inputs[0][0] * Inputs[1][0];
-    double DotProduct = Product;
-    double AbsoluteEpsilon = computeAbsoluteEpsilon<T>(Product, ULPTolerance);
-    for (size_t I = 1; I < VectorSize; ++I) {
-      Product = Inputs[0][I] * Inputs[1][I];
+    double DotProduct = 0.0;      // computed reference result
+    double AbsoluteEpsilon = 0.0; // computed tolerance
+    for (size_t I = 0; I < VectorSize; ++I) {
+      double Product = Inputs[0][I] * Inputs[1][I];
       AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Product, ULPTolerance);
 
       DotProduct += Product;

From ad6cbde9193ff0d7493b04cc0a5c88d1ad356c41 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alexsepkowski@gmail.com>
Date: Mon, 27 Oct 2025 16:46:32 -0700
Subject: [PATCH 15/25] Update tools/clang/unittests/HLSLExec/LongVectors.cpp

Co-authored-by: Tex Riddell <texr@microsoft.com>
---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index ebae66c7c9..d8d6865f5e 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1152,7 +1152,8 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     // Helper to sum the products and compute/add to the running absolute
     // epsilon total.
-    auto SumProducts = [&](const std::vector<double> &Values) {
+    auto SumProducts =
+        [&AbsoluteEpsilon, ULPTolerance](const std::vector<double> &Values) {
       double Sum = 0;
       for (size_t I = 0; I < Values.size(); ++I) {
         Sum += Values[I];

From 93c08eeb298950f654db0eff1bbdb61677284216 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 27 Oct 2025 17:03:09 -0700
Subject: [PATCH 16/25] Fix minor issue with sum loop

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index d8d6865f5e..c56dfe8cef 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1154,8 +1154,8 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
     // epsilon total.
     auto SumProducts =
         [&AbsoluteEpsilon, ULPTolerance](const std::vector<double> &Values) {
-      double Sum = 0;
-      for (size_t I = 0; I < Values.size(); ++I) {
+      double Sum = Values.empty() ? 0.0 : Values[0];
+      for (size_t I = 1; I < Values.size(); ++I) {
         Sum += Values[I];
         AbsoluteEpsilon += computeAbsoluteEpsilon<T>(Sum, ULPTolerance);
       }

From 435b670a2185da039cf546bdcba1e40349b92f58 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 27 Oct 2025 17:28:57 -0700
Subject: [PATCH 17/25] Add assert to prevent inf and nan in HLSLHalf_t::GetULP

---
 include/dxc/Test/HlslTestUtils.h                    | 4 ++++
 tools/clang/unittests/HLSLExec/LongVectorTestData.h | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
index 5a84f5cfbc..7617874202 100644
--- a/include/dxc/Test/HlslTestUtils.h
+++ b/include/dxc/Test/HlslTestUtils.h
@@ -527,6 +527,10 @@ static const uint16_t Float16NegDenorm = 0x8008;
 static const uint16_t Float16PosZero = 0x0000;
 static const uint16_t Float16NegZero = 0x8000;
 
+inline bool IsInfFloat16(uint16_t val) {
+  return (val == Float16PosInf) || (val == Float16NegInf);
+}
+
 inline bool GetSign(float x) { return std::signbit(x); }
 
 inline int GetMantissa(float x) {
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
index 82c92160a4..a4113cca2a 100644
--- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h
+++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -119,6 +119,9 @@ struct HLSLHalf_t {
   HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
 
   static double GetULP(HLSLHalf_t A) {
+    DXASSERT(!IsInfFloat16(A.Val), "ULP of infinity is undefined");
+    DXASSERT(!IsNanFloat16(A.Val), "ULP of NaN is undefined");
+
     HLSLHalf_t Next = A;
     ++Next.Val;
 

From a4f4472cf61ed61aaba7e7ce4b5233073e9f516b Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 27 Oct 2025 17:48:35 -0700
Subject: [PATCH 18/25] Can use std functions

---
 include/dxc/Test/HlslTestUtils.h                    | 4 ----
 tools/clang/unittests/HLSLExec/LongVectorTestData.h | 6 ++++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
index 7617874202..5a84f5cfbc 100644
--- a/include/dxc/Test/HlslTestUtils.h
+++ b/include/dxc/Test/HlslTestUtils.h
@@ -527,10 +527,6 @@ static const uint16_t Float16NegDenorm = 0x8008;
 static const uint16_t Float16PosZero = 0x0000;
 static const uint16_t Float16NegZero = 0x8000;
 
-inline bool IsInfFloat16(uint16_t val) {
-  return (val == Float16PosInf) || (val == Float16NegInf);
-}
-
 inline bool GetSign(float x) { return std::signbit(x); }
 
 inline int GetMantissa(float x) {
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
index a4113cca2a..1aaca6a467 100644
--- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h
+++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h
@@ -12,6 +12,8 @@
 #include <DirectXMath.h>
 #include <DirectXPackedVector.h>
 
+#include "dxc/Support/Global.h"
+
 namespace LongVector {
 
 // A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes.
@@ -119,8 +121,8 @@ struct HLSLHalf_t {
   HLSLHalf_t(DirectX::PackedVector::HALF) = delete;
 
   static double GetULP(HLSLHalf_t A) {
-    DXASSERT(!IsInfFloat16(A.Val), "ULP of infinity is undefined");
-    DXASSERT(!IsNanFloat16(A.Val), "ULP of NaN is undefined");
+    DXASSERT(!std::isnan(A) && !std::isinf(A),
+             "ULP of NaN or infinity is undefined");
 
     HLSLHalf_t Next = A;
     ++Next.Val;

From a9ddd02a52a3cbee78fbe0323ef9d3886dd7f4e6 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 28 Oct 2025 00:31:17 +0000
Subject: [PATCH 19/25] chore: autopublish 2025-10-28T00:31:17Z

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index c56dfe8cef..2a014bfe28 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1152,8 +1152,8 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     // Helper to sum the products and compute/add to the running absolute
     // epsilon total.
-    auto SumProducts =
-        [&AbsoluteEpsilon, ULPTolerance](const std::vector<double> &Values) {
+    auto SumProducts = [&AbsoluteEpsilon,
+                        ULPTolerance](const std::vector<double> &Values) {
       double Sum = Values.empty() ? 0.0 : Values[0];
       for (size_t I = 1; I < Values.size(); ++I) {
         Sum += Values[I];

From a5fa085d6c790a0139798e2ff4e7a3e34d579e8b Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Mon, 27 Oct 2025 17:55:27 -0700
Subject: [PATCH 20/25] Dont need to include numeric anymore

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 2a014bfe28..dbcce531fd 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -16,7 +16,6 @@
 #include <array>
 #include <bitset>
 #include <iomanip>
-#include <numeric>
 #include <optional>
 #include <sstream>
 #include <string>

From 09bc6cbb66efb3b559ef73c4612ed5ccd0f1e62f Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 28 Oct 2025 20:38:11 -0700
Subject: [PATCH 21/25] Change validation config to use double instead of
 float. Higher precision.

---
 include/dxc/Test/HlslTestUtils.h              |  2 +-
 .../clang/unittests/HLSLExec/LongVectors.cpp  | 66 ++++++-------------
 2 files changed, 20 insertions(+), 48 deletions(-)

diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
index 5a84f5cfbc..f7b941a1bb 100644
--- a/include/dxc/Test/HlslTestUtils.h
+++ b/include/dxc/Test/HlslTestUtils.h
@@ -580,7 +580,7 @@ inline bool CompareDoubleULP(
 }
 
 inline bool CompareDoubleEpsilon(const double &Src, const double &Ref,
-                                 float Epsilon) {
+                                 double Epsilon) {
   if (Src == Ref) {
     return true;
   }
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index dbcce531fd..eea3c39a88 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -176,25 +176,25 @@ enum class ValidationType {
 };
 
 template <typename T>
-bool doValuesMatch(T A, T B, float Tolerance, ValidationType) {
-  if (Tolerance == 0.0f)
+bool doValuesMatch(T A, T B, double Tolerance, ValidationType) {
+  if (Tolerance == 0.0)
     return A == B;
 
   T Diff = A > B ? A - B : B - A;
   return Diff <= Tolerance;
 }
 
-bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, float, ValidationType) {
+bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) {
   return A == B;
 }
 
-bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, float Tolerance,
+bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance,
                    ValidationType ValidationType) {
   switch (ValidationType) {
   case ValidationType::Epsilon:
-    return CompareHalfEpsilon(A.Val, B.Val, Tolerance);
+    return CompareHalfEpsilon(A.Val, B.Val, static_cast<float>(Tolerance));
   case ValidationType::Ulp:
-    return CompareHalfULP(A.Val, B.Val, Tolerance);
+    return CompareHalfULP(A.Val, B.Val, static_cast<float>(Tolerance));
   default:
     hlsl_test::LogErrorFmt(
         L"Invalid ValidationType. Expecting Epsilon or ULP.");
@@ -202,11 +202,11 @@ bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, float Tolerance,
   }
 }
 
-bool doValuesMatch(float A, float B, float Tolerance,
+bool doValuesMatch(float A, float B, double Tolerance,
                    ValidationType ValidationType) {
   switch (ValidationType) {
   case ValidationType::Epsilon:
-    return CompareFloatEpsilon(A, B, Tolerance);
+    return CompareFloatEpsilon(A, B, static_cast<float>(Tolerance));
   case ValidationType::Ulp: {
     // Tolerance is in ULPs. Convert to int for the comparison.
     const int IntTolerance = static_cast<int>(Tolerance);
@@ -219,7 +219,7 @@ bool doValuesMatch(float A, float B, float Tolerance,
   }
 }
 
-bool doValuesMatch(double A, double B, float Tolerance,
+bool doValuesMatch(double A, double B, double Tolerance,
                    ValidationType ValidationType) {
   switch (ValidationType) {
   case ValidationType::Epsilon:
@@ -238,7 +238,7 @@ bool doValuesMatch(double A, double B, float Tolerance,
 
 template <typename T>
 bool doVectorsMatch(const std::vector<T> &ActualValues,
-                    const std::vector<T> &ExpectedValues, float Tolerance,
+                    const std::vector<T> &ExpectedValues, double Tolerance,
                     ValidationType ValidationType, bool VerboseLogging) {
 
   DXASSERT(
@@ -540,14 +540,14 @@ InputSets<T> buildTestInputs(size_t VectorSize, const InputSet OpInputSets[3],
 }
 
 struct ValidationConfig {
-  float Tolerance = 0.0f;
+  double Tolerance = 0.0;
   ValidationType Type = ValidationType::Epsilon;
 
-  static ValidationConfig Epsilon(float Tolerance) {
+  static ValidationConfig Epsilon(double Tolerance) {
     return ValidationConfig{Tolerance, ValidationType::Epsilon};
   }
 
-  static ValidationConfig Ulp(float Tolerance) {
+  static ValidationConfig Ulp(double Tolerance) {
     return ValidationConfig{Tolerance, ValidationType::Ulp};
   }
 };
@@ -943,8 +943,7 @@ struct Op<OpType::AsUint_SplitDouble, double, 1> : StrictValidation {};
 template <> struct ExpectedBuilder<OpType::AsUint_SplitDouble, double> {
   static std::vector<uint32_t>
   buildExpected(Op<OpType::AsUint_SplitDouble, double, 1> &,
-                const InputSets<double> &Inputs, uint16_t ScalarInputFlags) {
-    DXASSERT_NOMSG(ScalarInputFlags == 0);
+                const InputSets<double> &Inputs) {
     DXASSERT_NOMSG(Inputs.size() == 1);
 
     size_t VectorSize = Inputs[0].size();
@@ -1018,8 +1017,7 @@ template <> struct Op<OpType::Frexp, float, 1> : DefaultValidation<float> {};
 
 template <> struct ExpectedBuilder<OpType::Frexp, float> {
   static std::vector<float> buildExpected(Op<OpType::Frexp, float, 1> &,
-                                          const InputSets<float> &Inputs,
-                                          uint32_t) {
+                                          const InputSets<float> &Inputs) {
     DXASSERT_NOMSG(Inputs.size() == 1);
 
     // Expected values size is doubled. In the first half we store the
@@ -1089,7 +1087,7 @@ OP_3(OpType::Select, StrictValidation, (static_cast<bool>(A) ? B : C));
   template <typename T> struct Op<OP, T, 1> : StrictValidation {};             \
   template <typename T> struct ExpectedBuilder<OP, T> {                        \
     static std::vector<HLSLBool_t>                                             \
-    buildExpected(Op<OP, T, 1> &, const InputSets<T> &Inputs, uint16_t) {      \
+    buildExpected(Op<OP, T, 1> &, const InputSets<T> &Inputs) {                \
       const bool Res = STDFUNC(Inputs[0].begin(), Inputs[0].end(),             \
                                [](T A) { return A != static_cast<T>(0); });    \
       return std::vector<HLSLBool_t>{Res};                                     \
@@ -1115,9 +1113,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
   // worst-case sequence, then summing the per-step epsilons to produce a
   // conservative error tolerance for the entire Dot operation.
   static std::vector<T> buildExpected(Op<OpType::Dot, T, 2> &Op,
-                                      const InputSets<T> &Inputs,
-                                      uint16_t ScalarInputFlags) {
-    UNREFERENCED_PARAMETER(ScalarInputFlags);
+                                      const InputSets<T> &Inputs) {
 
     std::vector<double> PositiveProducts;
     std::vector<double> NegativeProducts;
@@ -1126,7 +1122,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 
     // Floating point ops have a tolerance of 0.5 ULPs per operation as per the
     // DX spec.
-    const float ULPTolerance = 0.5f;
+    const double ULPTolerance = 0.5;
 
     // Accumulate in fp64 to improve precision.
     double DotProduct = 0.0;      // computed reference result
@@ -1179,7 +1175,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
 };
 
 template <typename T>
-static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
+static double computeAbsoluteEpsilon(double A, double ULPTolerance) {
   DXASSERT((!isinf(A) && !isnan(A)),
            "Input values should not produce inf or nan results");
 
@@ -1226,30 +1222,6 @@ STRICT_OP_1(OpType::LoadAndStore_DT_SB_SRV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_UAV, (A));
 STRICT_OP_1(OpType::LoadAndStore_RD_SB_SRV, (A));
 
-static double computeAbsoluteEpsilon(double A, float ULPTolerance)
-{
-  if(isinf(A) || isnan(A))
-static double computeAbsoluteEpsilon(double A, float ULPTolerance) {
-  if (isinf(A) || isnan(A))
-    // None of the existing input values should produce inf or nan results.
-    DXASSERT_NOMSG(false);
-
-  // ULP is a positive value by definition. So, working with abs(A) simplifies
-  // our logic for computing ULP in the first place.
-  A = std::abs(A);
-
-  double ULP = 0.0;
-
-  if constexpr (std::is_same_v<T, HLSLHalf_t>)
-    ULP = HLSLHalf_t::GetULP(A);
-  else
-    ULP =
-        std::nextafter(static_cast<T>(A), std::numeric_limits<T>::infinity()) -
-        static_cast<T>(A);
-
-  return ULP * ULPTolerance;
-}
-
 //
 // dispatchTest
 //

From 3391ef617cc89bd2f885b2aded44d1744f436a37 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 28 Oct 2025 20:39:25 -0700
Subject: [PATCH 22/25] Clang format

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index eea3c39a88..0f52b58171 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1086,8 +1086,8 @@ OP_3(OpType::Select, StrictValidation, (static_cast<bool>(A) ? B : C));
 #define REDUCTION_OP(OP, STDFUNC)                                              \
   template <typename T> struct Op<OP, T, 1> : StrictValidation {};             \
   template <typename T> struct ExpectedBuilder<OP, T> {                        \
-    static std::vector<HLSLBool_t>                                             \
-    buildExpected(Op<OP, T, 1> &, const InputSets<T> &Inputs) {                \
+    static std::vector<HLSLBool_t> buildExpected(Op<OP, T, 1> &,               \
+                                                 const InputSets<T> &Inputs) { \
       const bool Res = STDFUNC(Inputs[0].begin(), Inputs[0].end(),             \
                                [](T A) { return A != static_cast<T>(0); });    \
       return std::vector<HLSLBool_t>{Res};                                     \

From 44d7bc1beda31fe530931beed0ddd38dee419666 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 29 Oct 2025 03:42:42 +0000
Subject: [PATCH 23/25] chore: autopublish 2025-10-29T03:42:42Z

---
 tools/clang/tools/libclang/CIndexDiagnostic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/clang/tools/libclang/CIndexDiagnostic.h b/tools/clang/tools/libclang/CIndexDiagnostic.h
index 05c63ff57c..4d8badd893 100644
--- a/tools/clang/tools/libclang/CIndexDiagnostic.h
+++ b/tools/clang/tools/libclang/CIndexDiagnostic.h
@@ -30,7 +30,7 @@ class CXDiagnosticSetImpl {
   const bool IsExternallyManaged;
 public:
   CXDiagnosticSetImpl(bool isManaged = false)
-    : IsExternallyManaged(isManaged) {}
+      : IsExternallyManaged(isManaged) {}
 
   virtual ~CXDiagnosticSetImpl();
 
@@ -107,7 +107,7 @@ class CXDiagnosticImpl {
 private:
   Kind K;
 };
-  
+
 /// \brief The storage behind a CXDiagnostic
 struct CXStoredDiagnostic : public CXDiagnosticImpl {
   const StoredDiagnostic &Diag;

From e0d0a2b97b691a1b103d324439cc6d2e94e0b373 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Tue, 28 Oct 2025 20:43:05 -0700
Subject: [PATCH 24/25] Restore CIndexDiagnostic.h, again, again

---
 tools/clang/tools/libclang/CIndexDiagnostic.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tools/clang/tools/libclang/CIndexDiagnostic.h b/tools/clang/tools/libclang/CIndexDiagnostic.h
index 05c63ff57c..491db31f55 100644
--- a/tools/clang/tools/libclang/CIndexDiagnostic.h
+++ b/tools/clang/tools/libclang/CIndexDiagnostic.h
@@ -29,8 +29,11 @@ class CXDiagnosticSetImpl {
   std::vector<std::unique_ptr<CXDiagnosticImpl>> Diagnostics;
   const bool IsExternallyManaged;
 public:
-  CXDiagnosticSetImpl(bool isManaged = false)
-    : IsExternallyManaged(isManaged) {}
+  // HLSL Change start: definition moved out-of-line because CXDiagnosticImpl
+  // must be a complete type when ~unique_ptr<CXDiagnosticImpl> is instantiated
+  // in C++23 (Diagnostics member).
+  inline CXDiagnosticSetImpl(bool isManaged = false);
+  // HLSL Change end
 
   virtual ~CXDiagnosticSetImpl();
 
@@ -107,7 +110,14 @@ class CXDiagnosticImpl {
 private:
   Kind K;
 };
-  
+
+// HLSL Change start: definition moved out-of-line because CXDiagnosticImpl must
+// be a complete type when ~unique_ptr<CXDiagnosticImpl> is instantiated in
+// C++23 (Diagnostics member).
+CXDiagnosticSetImpl::CXDiagnosticSetImpl(bool isManaged)
+    : IsExternallyManaged(isManaged) {}
+// HLSL Change end
+
 /// \brief The storage behind a CXDiagnostic
 struct CXStoredDiagnostic : public CXDiagnosticImpl {
   const StoredDiagnostic &Diag;

From 00b6b2f3770d52d4798180152cc3286027365822 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com>
Date: Wed, 29 Oct 2025 13:54:01 -0700
Subject: [PATCH 25/25] Last minor feedback from Tex

---
 tools/clang/unittests/HLSLExec/LongVectors.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp
index 0f52b58171..f80925740f 100644
--- a/tools/clang/unittests/HLSLExec/LongVectors.cpp
+++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp
@@ -1166,7 +1166,7 @@ template <typename T> struct ExpectedBuilder<OpType::Dot, T> {
       AbsoluteEpsilon +=
           computeAbsoluteEpsilon<T>((SumPos + SumNeg), ULPTolerance);
 
-    Op.ValidationConfig.Tolerance = static_cast<float>(AbsoluteEpsilon);
+    Op.ValidationConfig = ValidationConfig::Epsilon(AbsoluteEpsilon);
 
     std::vector<T> Expected;
     Expected.push_back(static_cast<T>(DotProduct));