build: Remove support for deprecated Intel icc compiler (#2075)

lgritz · web-flow · commit 11739968de16 · 2026-03-02T09:26:21.000-08:00
Intel icc is deprecated and hasn't had a release for a few years. It's
holding us back, both by making us work around an ever growing number
of icc bugs and limitation that will never be fixed, as well as not
allowing us to upgrade minimum versions of certain dependencies,
because icc can't correctly compile newer versions (as an example, it
cannot use a 'fmt' library newer than the oldest we support, 7.0).

So it's time to thank icc for its service and put it on the ice floe
for the polar bears to eat. This is of course in main (future OSL
1.16), and will not be backported to release branches, since we never
stop support of a dependency or toolchain of existing releases. People
requiring icc for whatever reason may keep using OSL 1.15 or older.

We will continue to support and test icx, the fully supported Intel
LLVM-based compiler.

This PR also removes some test reference output that was only needed
for icc.

Signed-off-by: Larry Gritz &lt;lg@larrygritz.com&gt;
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -342,31 +342,6 @@ jobs:
                             OPENIMAGEIO_CMAKE_FLAGS="-DUSE_PYTHON=0"
                             CMAKE_BUILD_TYPE=RelWithDebInfo
 
-          - desc: icc/C++17 llvm14 py3.9 oiio-2.5 avx2
-            nametag: linux-icc
-            runner: ubuntu-latest
-            container: aswf/ci-osl:2023-clang15
-            cc_compiler: icc
-            cxx_compiler: icpc
-            cxx_std: 17
-            fmt_ver: 7.1.3
-            opencolorio_ver: v2.3.2
-            openimageio_ver: v2.5.17.0
-            # Changes to OIIO's simd.h starting in commit 68666db9 (from PR
-            # #4187) seem to trigger compiler bugs in icc and generate wrong
-            # SIMD code. It's probably not worth tracking down for just this
-            # obsolete compiler. Just lock down to OIIO 2.5 for icc builds to
-            # avoid the problem.
-            # openimageio_ver: e41ac03c0b21  # works
-            # openimageio_ver: 68666db994d5 # broken
-            python_ver: "3.10"
-            pybind11_ver: v2.10.0
-            # simd: avx2,f16c
-            batched: b8_AVX2_noFMA
-            setenvs: export OSL_CMAKE_FLAGS="-DSTOP_ON_WARNING=OFF -DEXTRA_CPP_ARGS=-fp-model=consistent"
-                            OPENIMAGEIO_CMAKE_FLAGS=-DBUILD_FMT_VERSION=7.1.3
-                            USE_OPENVDB=0
-                            OPENCOLORIO_CMAKE_FLAGS="-DCMAKE_CXX_COMPILER=g++"
           - desc: icx/C++17 llvm14 py3.10 oiio-3.0 avx2
             nametag: linux-icx
             runner: ubuntu-latest
@@ -379,7 +354,7 @@ jobs:
             cxx_std: 17
             fmt_ver: 7.1.3
             opencolorio_ver: v2.3.2
-            openimageio_ver: v3.0.11.0
+            openimageio_ver: v3.0.15.0
             python_ver: "3.10"
             pybind11_ver: v2.10.0
             simd: avx2,f16c
diff --git a/src/include/OSL/Imathx/Imathx.h b/src/include/OSL/Imathx/Imathx.h
@@ -248,9 +248,7 @@ affineInverse(const Matrix44 &m)
 // differently than the LLVM IR version.
 // NOTE:  only using "inline" to get ODR (One Definition Rule) behavior
 static inline OSL_HOSTDEVICE Matrix44
-#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
     OSL_GNUC_ATTRIBUTE(optimize("fp-contract=off"))
-#endif
 nonAffineInverse(const Matrix44 &source);
 
 Matrix44 OSL_HOSTDEVICE nonAffineInverse(const Matrix44 &source)
diff --git a/src/include/OSL/mask.h b/src/include/OSL/mask.h
@@ -20,19 +20,6 @@ OSL_NAMESPACE_BEGIN
 using std::popcount;
 using std::countr_zero;
 
-#elif OSL_INTEL_CLASSIC_COMPILER_VERSION
-
-#include <immintrin.h>
-
-OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return _mm_popcnt_u32(x);}
-OSL_FORCEINLINE int popcount(uint64_t x) noexcept { return _mm_popcnt_u64(x); }
-OSL_FORCEINLINE int countr_zero(uint32_t x) noexcept { return _bit_scan_forward(x); }
-OSL_FORCEINLINE int countr_zero(uint64_t x) noexcept {
-    unsigned __int32 index;
-    _BitScanForward64(&index, x);
-    return static_cast<int>(index);
-}
-
 #elif defined(__GNUC__) || defined(__clang__)
 
 OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return __builtin_popcount(x); }
diff --git a/src/include/OSL/oslnoise.h b/src/include/OSL/oslnoise.h
@@ -647,7 +647,7 @@ OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> select(const bool b, const Dual2<flo
     // versus requiring a stack location.
     // Without this work per component, gathers & scatters were being emitted
     // when used inside SIMD loops.
-#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION && !OSL_INTEL_LLVM_COMPILER_VERSION
+#if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
     // Clang's vectorizor was really insistent that a select operation could not be replaced
     // with control flow, so had to re-introduce the ? operator to make it happy
     return Dual2<float> (
@@ -2254,7 +2254,6 @@ OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
 
     // With Dual2<Vec3> data types, a lot of code is generated below
     // which caused some runaway compiler memory consumption when vectorizing
-#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
     auto l_result = OIIO::lerp (
                OIIO::trilerp (grad (hash (X  , Y  , Z  , W  ), fx     , fy     , fz     , fw     ),
                               grad (hash (X+1, Y  , Z  , W  ), fx-1.0f, fy     , fz     , fw     ),
@@ -2275,40 +2274,6 @@ OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
                               grad (hash (X+1, Y+1, Z+1, W+1), fx-1.0f, fy-1.0f, fz-1.0f, fw-1.0f),
                               u, v, t),
                s);
-#else
-    // Use a loop to avoid repeating code gen twice
-    Dual2<Vec3> v0, v1;
-    // GCC emits -Wmaybe-uninitialized errors for v0,v1.
-    // To avoid, GCC uses reference version above
-
-    // Clang doesn't want to vectorize with the vIndex loop
-    // To enable vectorization, Clang uses reference version above
-    OSL_INTEL_PRAGMA(nounroll_and_jam)
-    for(int vIndex=0; vIndex < 2;++vIndex) {
-        int vW = W + vIndex;
-        Dual2<float> vfw = fw - float(vIndex);
-
-        Dual2<Vec3> vResult = OIIO::trilerp (
-            grad (hash (X  , Y  , Z  , vW  ), fx     , fy     , fz     , vfw     ),
-            grad (hash (X+1, Y  , Z  , vW  ), fx-1.0f, fy     , fz     , vfw     ),
-            grad (hash (X  , Y+1, Z  , vW  ), fx     , fy-1.0f, fz     , vfw     ),
-            grad (hash (X+1, Y+1, Z  , vW  ), fx-1.0f, fy-1.0f, fz     , vfw     ),
-            grad (hash (X  , Y  , Z+1, vW  ), fx     , fy     , fz-1.0f, vfw     ),
-            grad (hash (X+1, Y  , Z+1, vW  ), fx-1.0f, fy     , fz-1.0f, vfw     ),
-            grad (hash (X  , Y+1, Z+1, vW  ), fx     , fy-1.0f, fz-1.0f, vfw     ),
-            grad (hash (X+1, Y+1, Z+1, vW  ), fx-1.0f, fy-1.0f, fz-1.0f, vfw     ),
-            u, v, t);
-        // Rather than dynamic indexing array,
-        // use masking to store outputs,
-        // to better enable SROA (Scalar Replacement of Aggregates) optimizations
-        if (vIndex == 0) {
-            v0 = vResult;
-        } else {
-            v1 = vResult;
-        }
-    }
-    auto l_result = OIIO::lerp (v0, v1, s);
-#endif
 
     result = scale4 (l_result);
     }
diff --git a/src/include/OSL/sfmath.h b/src/include/OSL/sfmath.h
@@ -68,23 +68,7 @@ namespace sfm
        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     */
 
-#if OSL_INTEL_CLASSIC_COMPILER_VERSION
-    // std::isinf wasn't vectorizing and was branchy. This slightly
-    // perturbed version fairs better and is branch free when vectorized
-    // with the Intel compiler.
-    OSL_FORCEINLINE OSL_HOSTDEVICE int isinf (float x) {
-        int r = 0;
-        // NOTE: using bitwise | to avoid branches
-        if (!(std::isfinite(x)|std::isnan(x))) {
-            r = static_cast<int>(copysignf(1.0f,x));
-        }
-        return r;
-    }
-#else
-    // Other compilers don't seem to vectorize well no matter what, so just
-    // use the standard version.
     using std::isinf;
-#endif
 
     template<typename T>
     OSL_FORCEINLINE OSL_HOSTDEVICE T
@@ -191,7 +175,7 @@ namespace sfm
         }
     }
 
-#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION && !OSL_INTEL_LLVM_COMPILER_VERSION
+#if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
 
     // To make clang's loop vectorizor happy
     // we need to make sure result of min and max
diff --git a/src/include/OSL/wide.h b/src/include/OSL/wide.h
@@ -1904,7 +1904,7 @@ struct WideImpl<const Dual2<ElementT>[], WidthT, true /*IsConstT */> {
 }  // namespace pvt
 
 
-#if OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_GNUC_VERSION
+#if OSL_GNUC_VERSION
 // Workaround for error #3466: inheriting constructors must be inherited from a direct base class
 #    define __OSL_INHERIT_BASE_CTORS(DERIVED, BASE) \
         using Base = typename DERIVED::BASE;        \
@@ -3210,8 +3210,7 @@ template<typename DataT, int WidthT>
 OSL_FORCEINLINE bool
 testIfAnyLaneIsNonZero(const Wide<DataT, WidthT>& wvalues)
 {
-#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION \
-    && !OSL_INTEL_LLVM_COMPILER_VERSION
+#if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
     int anyLaneIsOn = 0;
     OSL_OMP_PRAGMA(omp simd simdlen(WidthT) reduction(max : anyLaneIsOn))
     for (int i = 0; i < WidthT; ++i) {
diff --git a/src/liboslexec/opcolor_impl.h b/src/liboslexec/opcolor_impl.h
@@ -266,8 +266,7 @@ hsv_to_rgb(const COLOR3& hsv)
         // Avoid switch statement vectorizor doesn't like
         // Also avoid if/else nest which some optimizers might
         // convert back into a switch statement
-#    if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION \
-        && !OSL_INTEL_LLVM_COMPILER_VERSION
+#    if OSL_ANY_CLANG && !OSL_INTEL_LLVM_COMPILER_VERSION
         // Clang was still transforming series of if's back into a switch.
         // Alternate between == and <= comparisons to avoid
 #        define __OSL_ASC_EQ <=
diff --git a/src/liboslexec/wide/wide_opcolor.cpp b/src/liboslexec/wide/wide_opcolor.cpp
@@ -302,7 +302,7 @@ namespace {
 
 // Note: Clang 14 seems to no longer allow vectorizing these loops
 #if ((OSL_CLANG_VERSION && OSL_CLANG_VERSION < 140000) \
-     || OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_INTEL_LLVM_COMPILER_VERSION)
+     || OSL_INTEL_LLVM_COMPILER_VERSION)
 #    define WIDE_TRANSFORMC_OMP_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
 #else
 #    define WIDE_TRANSFORMC_OMP_SIMD_LOOP(...)
diff --git a/src/liboslexec/wide/wide_opnoise_periodic_perlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_periodic_perlin_deriv_Vec3.cpp
@@ -24,17 +24,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WV> {
 };
 template<>
 struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF, Param::WV, Param::WF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 
diff --git a/src/liboslexec/wide/wide_opnoise_periodic_uperlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_periodic_uperlin_deriv_Vec3.cpp
@@ -24,17 +24,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WV> {
 };
 template<>
 struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF, Param::WV, Param::WF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 
diff --git a/src/liboslexec/wide/wide_opnoise_perlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_perlin_deriv_Vec3.cpp
@@ -22,17 +22,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV> {
     static constexpr int simd_threshold = 5;
 };
 template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 
diff --git a/src/liboslexec/wide/wide_opnoise_uperlin_deriv_Vec3.cpp b/src/liboslexec/wide/wide_opnoise_uperlin_deriv_Vec3.cpp
@@ -22,17 +22,7 @@ template<> struct BatchedCGPolicy<Param::WDV, Param::WDV> {
     static constexpr int simd_threshold = 5;
 };
 template<> struct BatchedCGPolicy<Param::WDV, Param::WDV, Param::WDF> {
-#if ((defined(_WIN32) || defined(_WIN64)) && defined(__INTEL_COMPILER))
-    // Avoid stack overflow on windows build because compiler has hard coded stack limit.
-    // By not forcing everything to recursively inline and not explicitly vectorizing,
-    // the compiler uses less stack space at the cost of NOT creating a properly SIMD optimized function.
-    // Linux & OSX can increase stacksize before building
-
-    static constexpr int simd_threshold
-        = __OSL_WIDTH + 1;  // Make SIMD code path unreachable
-#else
     static constexpr int simd_threshold = 6;
-#endif
 };
 }  // namespace
 
diff --git a/src/liboslexec/wide/wide_opstring.cpp b/src/liboslexec/wide/wide_opstring.cpp
@@ -75,7 +75,7 @@ __OSL_MASKED_OP2(strlen, Wi, Ws)(void* wr_, void* ws_, unsigned int mask_value)
 
     OSL_FORCEINLINE_BLOCK
     {
-#if (!OSL_CLANG_VERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION)
+#if !OSL_CLANG_VERSION
         // Clang 11 generated SIMD crashes at runtime
         // TODO: investigate clang crash when vectorizing
         OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH))
diff --git a/src/liboslnoise/sfm_gabornoise.h b/src/liboslnoise/sfm_gabornoise.h
@@ -356,19 +356,7 @@ gabor_cell(const sfm::GaborUniformParams& gup, const sfm::GaborParams& gp,
                 Dual2<float> gk     = gabor_kernel(w_i_t_s_f, omega_i_t_s_f,
                                                    phi_i_t_s_f, a_i_t_s_f,
                                                    x_k_i_t);  // 2D
-#if defined(__AVX512F__) && defined(__INTEL_COMPILER) \
-    && (__INTEL_COMPILER < 1800)
-                // icc17 with AVX512 had some incorrect results
-                // due to the not_finite code path executing even
-                // when the value was finite.  Workaround: using isnan | isinf
-                // instead of isfinite avoided the issue.
-                // icc18u3 doesn't exhibit the problem
-                // NOTE: tried using bitwise | to avoid branches and got internal compiler error
-                //bool not_finite = std::isnan(gk.val()) | std::isinf(gk.val());
-                bool not_finite = std::isnan(gk.val()) || std::isinf(gk.val());
-#else
-                bool not_finite = !std::isfinite(gk.val());
-#endif
+                bool not_finite     = !std::isfinite(gk.val());
                 if (OSL_UNLIKELY(not_finite)) {
                     // Numeric failure of the filtered version.  Fall
                     // back on the unfiltered.
diff --git a/src/testshade/batched_simplerend.cpp b/src/testshade/batched_simplerend.cpp
@@ -180,17 +180,7 @@ BatchedSimpleRenderer<WidthT>::get_matrix(BatchedShaderGlobals* bsg,
 
         OSL_OMP_PRAGMA(omp simd simdlen(WidthT))
         for (int lane = 0; lane < WidthT; ++lane) {
-#    if __INTEL_COMPILER >= 1900
-            // Used load + blend + store instead of masked store to temporarily work around
-            // an icc19u5 issue when automatic ISA dispatch is used causing scatters to be generated
-            Matrix44 m = result[lane];
-            if (result.mask()[lane]) {
-                m = uniformTransform;
-            }
-            result[ActiveLane(lane)] = m;
-#    else
             result[lane] = uniformTransform;
-#    endif
         }
 #endif
     }
diff --git a/testsuite/blackbody/ref/out.alt.exr b/testsuite/blackbody/ref/out.alt.exr
diff --git a/testsuite/derivs/ref/out.icc.txt b/testsuite/derivs/ref/out.icc.txt
diff --git a/testsuite/exponential/ref/out.alt-icc.txt b/testsuite/exponential/ref/out.alt-icc.txt
diff --git a/testsuite/geomath/ref/out.icc.txt b/testsuite/geomath/ref/out.icc.txt
diff --git a/testsuite/trig/ref/out.alt-icc.txt b/testsuite/trig/ref/out.alt-icc.txt
diff --git a/testsuite/vector/ref/out-icc.txt b/testsuite/vector/ref/out-icc.txt