ssh4net
diff --git a/‎src/libOpenImageIO/imagebufalgo_addsub.cpp‎
Lines changed: 46 additions & 35 deletions b/‎src/libOpenImageIO/imagebufalgo_addsub.cpp‎
Lines changed: 46 additions & 35 deletions
@@ -64,8 +64,8 @@ add_impl_scalar(ImageBuf& R, const ImageBuf& A, cspan<float> b, ROI roi,
 // Native integer add using SaturatedAdd (scale-invariant, no float conversion)
 template<class T>
 static bool
-add_impl_hwy_native_int(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
-                        int nthreads)
+add_impl_hwy_native_int(ImageBuf& R, const ImageBuf& A, const ImageBuf& B,
+                        ROI roi, int nthreads)
 {
     ImageBufAlgo::parallel_image(roi, nthreads, [&](ROI roi) {
         const ImageSpec& Rspec = R.spec();
@@ -103,31 +103,34 @@ add_impl_hwy_native_int(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI r
             if (contig) {
                 // Native integer saturated add - much faster than float conversion!
                 size_t n = static_cast<size_t>(roi.width()) * nchannels;
-                RunHwyBinaryNativeInt<T>(
-                    reinterpret_cast<T*>(r_row),
-                    reinterpret_cast<const T*>(a_row),
-                    reinterpret_cast<const T*>(b_row), n,
-                    [](auto d, auto a, auto b) { return hn::SaturatedAdd(a, b); });
+                RunHwyBinaryNativeInt<T>(reinterpret_cast<T*>(r_row),
+                                         reinterpret_cast<const T*>(a_row),
+                                         reinterpret_cast<const T*>(b_row), n,
+                                         [](auto d, auto a, auto b) {
+                                             return hn::SaturatedAdd(a, b);
+                                         });
             } else {
                 // Scalar fallback
                 for (int x = 0; x < roi.width(); ++x) {
                     T* r_ptr = reinterpret_cast<T*>(r_row)
-                                   + x * r_pixel_bytes / sizeof(T);
+                               + x * r_pixel_bytes / sizeof(T);
                     const T* a_ptr = reinterpret_cast<const T*>(a_row)
-                                         + x * a_pixel_bytes / sizeof(T);
+                                     + x * a_pixel_bytes / sizeof(T);
                     const T* b_ptr = reinterpret_cast<const T*>(b_row)
-                                         + x * b_pixel_bytes / sizeof(T);
+                                     + x * b_pixel_bytes / sizeof(T);
                     for (int c = 0; c < nchannels; ++c) {
                         // Saturating add in scalar
                         int64_t sum = (int64_t)a_ptr[c] + (int64_t)b_ptr[c];
                         if constexpr (std::is_unsigned_v<T>) {
                             r_ptr[c] = (sum > std::numeric_limits<T>::max())
-                                ? std::numeric_limits<T>::max() : (T)sum;
+                                           ? std::numeric_limits<T>::max()
+                                           : (T)sum;
                         } else {
                             r_ptr[c] = (sum > std::numeric_limits<T>::max())
-                                ? std::numeric_limits<T>::max()
-                                : (sum < std::numeric_limits<T>::min())
-                                    ? std::numeric_limits<T>::min() : (T)sum;
+                                           ? std::numeric_limits<T>::max()
+                                       : (sum < std::numeric_limits<T>::min())
+                                           ? std::numeric_limits<T>::min()
+                                           : (T)sum;
                         }
                     }
                 }
@@ -193,8 +196,9 @@ add_impl_hwy(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
                     const Btype* b_ptr = reinterpret_cast<const Btype*>(b_row)
                                          + x * b_pixel_bytes / sizeof(Btype);
                     for (int c = 0; c < nchannels; ++c) {
-                        r_ptr[c] = static_cast<Rtype>(static_cast<float>(a_ptr[c]) +
-                                                       static_cast<float>(b_ptr[c]));
+                        r_ptr[c] = static_cast<Rtype>(
+                            static_cast<float>(a_ptr[c])
+                            + static_cast<float>(b_ptr[c]));
                     }
                 }
             }
@@ -248,7 +252,8 @@ add_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
         && B.localpixels()) {
         // Use native integer path for scale-invariant add when all types match
         // and are integer types (much faster: 6-12x vs 3-5x with float conversion)
-        constexpr bool all_same = std::is_same_v<Rtype, Atype> && std::is_same_v<Atype, Btype>;
+        constexpr bool all_same = std::is_same_v<Rtype, Atype>
+                                  && std::is_same_v<Atype, Btype>;
         constexpr bool is_integer = std::is_integral_v<Rtype>;
         if constexpr (all_same && is_integer) {
             return add_impl_hwy_native_int<Rtype>(R, A, B, roi, nthreads);
@@ -270,8 +275,8 @@ add_impl(ImageBuf& R, const ImageBuf& A, cspan<float> b, ROI roi, int nthreads)
 // Native integer sub using SaturatedSub (scale-invariant, no float conversion)
 template<class T>
 static bool
-sub_impl_hwy_native_int(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
-                        int nthreads)
+sub_impl_hwy_native_int(ImageBuf& R, const ImageBuf& A, const ImageBuf& B,
+                        ROI roi, int nthreads)
 {
     ImageBufAlgo::parallel_image(roi, nthreads, [&](ROI roi) {
         const ImageSpec& Rspec = R.spec();
@@ -309,31 +314,35 @@ sub_impl_hwy_native_int(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI r
             if (contig) {
                 // Native integer saturated sub - much faster than float conversion!
                 size_t n = static_cast<size_t>(roi.width()) * nchannels;
-                RunHwyBinaryNativeInt<T>(
-                    reinterpret_cast<T*>(r_row),
-                    reinterpret_cast<const T*>(a_row),
-                    reinterpret_cast<const T*>(b_row), n,
-                    [](auto d, auto a, auto b) { return hn::SaturatedSub(a, b); });
+                RunHwyBinaryNativeInt<T>(reinterpret_cast<T*>(r_row),
+                                         reinterpret_cast<const T*>(a_row),
+                                         reinterpret_cast<const T*>(b_row), n,
+                                         [](auto d, auto a, auto b) {
+                                             return hn::SaturatedSub(a, b);
+                                         });
             } else {
                 // Scalar fallback
                 for (int x = 0; x < roi.width(); ++x) {
                     T* r_ptr = reinterpret_cast<T*>(r_row)
-                                   + x * r_pixel_bytes / sizeof(T);
+                               + x * r_pixel_bytes / sizeof(T);
                     const T* a_ptr = reinterpret_cast<const T*>(a_row)
-                                         + x * a_pixel_bytes / sizeof(T);
+                                     + x * a_pixel_bytes / sizeof(T);
                     const T* b_ptr = reinterpret_cast<const T*>(b_row)
-                                         + x * b_pixel_bytes / sizeof(T);
+                                     + x * b_pixel_bytes / sizeof(T);
                     for (int c = 0; c < nchannels; ++c) {
                         // Saturating sub in scalar
                         if constexpr (std::is_unsigned_v<T>) {
                             r_ptr[c] = (a_ptr[c] > b_ptr[c])
-                                ? (a_ptr[c] - b_ptr[c]) : T(0);
+                                           ? (a_ptr[c] - b_ptr[c])
+                                           : T(0);
                         } else {
-                            int64_t diff = (int64_t)a_ptr[c] - (int64_t)b_ptr[c];
+                            int64_t diff = (int64_t)a_ptr[c]
+                                           - (int64_t)b_ptr[c];
                             r_ptr[c] = (diff > std::numeric_limits<T>::max())
-                                ? std::numeric_limits<T>::max()
-                                : (diff < std::numeric_limits<T>::min())
-                                    ? std::numeric_limits<T>::min() : (T)diff;
+                                           ? std::numeric_limits<T>::max()
+                                       : (diff < std::numeric_limits<T>::min())
+                                           ? std::numeric_limits<T>::min()
+                                           : (T)diff;
                         }
                     }
                 }
@@ -396,8 +405,9 @@ sub_impl_hwy(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
                     const Btype* b_ptr = reinterpret_cast<const Btype*>(b_row)
                                          + x * b_pixel_bytes / sizeof(Btype);
                     for (int c = 0; c < nchannels; ++c) {
-                        r_ptr[c] = static_cast<Rtype>(static_cast<float>(a_ptr[c]) -
-                                                       static_cast<float>(b_ptr[c]));
+                        r_ptr[c] = static_cast<Rtype>(
+                            static_cast<float>(a_ptr[c])
+                            - static_cast<float>(b_ptr[c]));
                     }
                 }
             }
@@ -415,7 +425,8 @@ sub_impl(ImageBuf& R, const ImageBuf& A, const ImageBuf& B, ROI roi,
         && B.localpixels()) {
         // Use native integer path for scale-invariant sub when all types match
         // and are integer types (much faster: 6-12x vs 3-5x with float conversion)
-        constexpr bool all_same = std::is_same_v<Rtype, Atype> && std::is_same_v<Atype, Btype>;
+        constexpr bool all_same = std::is_same_v<Rtype, Atype>
+                                  && std::is_same_v<Atype, Btype>;
         constexpr bool is_integer = std::is_integral_v<Rtype>;
         if constexpr (all_same && is_integer) {
             return sub_impl_hwy_native_int<Rtype>(R, A, B, roi, nthreads);