Implement uint8 casting functions

guyuqi · serge-sans-paille · commit 54128491e069 · 2021-06-09T19:28:22.000Z
Casting simd data source:
(uint8 * N_u8) to (uint16 * N_u16), (uint32 * N_u32) and (uint64 * N_u64).

Change-Id: Iaea18face98fc9490fa24c205c4c265fd784e4d4
Signed-off-by: Yuqi Gu &lt;guyuqi@apache.org&gt;
diff --git a/include/xsimd/types/xsimd_avx512_conversion.hpp b/include/xsimd/types/xsimd_avx512_conversion.hpp
@@ -31,6 +31,13 @@ namespace xsimd
     batch<float, 16> to_float(const batch<int32_t, 16>& x);
     batch<double, 8> to_float(const batch<int64_t, 8>& x);
 
+    batch<uint16_t, 32> u8_to_u16(const batch<uint8_t, 64>& x);
+    batch<uint8_t, 64> u16_to_u8(const batch<uint16_t, 32>& x);
+    batch<uint32_t, 16> u8_to_u32(const batch<uint8_t, 64>& x);
+    batch<uint8_t, 64> u32_to_u8(const batch<uint32_t, 16>& x);
+    batch<uint64_t, 8> u8_to_u64(const batch<uint8_t, 64>& x);
+    batch<uint8_t, 64> u64_to_u8(const batch<uint64_t, 8>& x);
+
     /**************************
      * boolean cast functions *
      **************************/
@@ -179,6 +186,34 @@ namespace xsimd
     XSIMD_BATCH_CAST_INTRINSIC(double, uint64_t, 8, _mm512_cvttpd_epu64)
 #endif
 
+    inline batch<uint16_t, 32> u8_to_u16(const batch<uint8_t, 64>& x)
+    {
+        return static_cast<batch<uint16_t, 32>>(x);
+    }
+    inline batch<uint8_t, 64> u16_to_u8(const batch<uint16_t, 32>& x)
+    {
+        return static_cast<batch<uint8_t, 64>>(x);
+    }
+
+    inline batch<uint32_t, 16> u8_to_u32(const batch<uint8_t, 64>& x)
+    {
+        return static_cast<batch<uint32_t, 16>>(x);
+    }
+    inline batch<uint8_t, 64> u32_to_u8(const batch<uint32_t, 16>& x)
+    {
+        return static_cast<batch<uint8_t, 64>>(x);
+    }
+
+    inline batch<uint64_t, 8> u8_to_u64(const batch<uint8_t, 64>& x)
+    {
+        return static_cast<batch<uint64_t, 8>>(x);
+    }
+
+    inline batch<uint8_t, 64> u64_to_u8(const batch<uint64_t, 8>& x)
+    {
+        return static_cast<batch<uint8_t, 64>>(x);
+    }
+
     /**************************
      * boolean cast functions *
      **************************/
diff --git a/include/xsimd/types/xsimd_avx_conversion.hpp b/include/xsimd/types/xsimd_avx_conversion.hpp
@@ -31,6 +31,13 @@ namespace xsimd
     batch<float, 8> to_float(const batch<int32_t, 8>& x);
     batch<double, 4> to_float(const batch<int64_t, 4>& x);
 
+    batch<uint16_t, 16> u8_to_u16(const batch<uint8_t, 32>& x);
+    batch<uint8_t, 32> u16_to_u8(const batch<uint16_t, 16>& x);
+    batch<uint32_t, 8> u8_to_u32(const batch<uint8_t, 32>& x);
+    batch<uint8_t, 32> u32_to_u8(const batch<uint32_t, 8>& x);
+    batch<uint64_t, 4> u8_to_u64(const batch<uint8_t, 32>& x);
+    batch<uint8_t, 32> u64_to_u8(const batch<uint64_t, 4>& x);
+
     /**************************
      * boolean cast functions *
      **************************/
@@ -78,6 +85,34 @@ namespace xsimd
 #endif
     }
 
+    inline batch<uint16_t, 16> u8_to_u16(const batch<uint8_t, 32>& x)
+    {
+        return static_cast<batch<uint16_t, 16>>(x);
+    }
+    inline batch<uint8_t, 32> u16_to_u8(const batch<uint16_t, 16>& x)
+    {
+        return static_cast<batch<uint8_t, 32>>(x);
+    }
+
+    inline batch<uint32_t, 8> u8_to_u32(const batch<uint8_t, 32>& x)
+    {
+        return static_cast<batch<uint32_t, 8>>(x);
+    }
+    inline batch<uint8_t, 32> u32_to_u8(const batch<uint32_t, 8>& x)
+    {
+        return static_cast<batch<uint8_t, 32>>(x);
+    }
+
+    inline batch<uint64_t, 4> u8_to_u64(const batch<uint8_t, 32>& x)
+    {
+        return static_cast<batch<uint64_t, 4>>(x);
+    }
+
+    inline batch<uint8_t, 32> u64_to_u8(const batch<uint64_t, 4>& x)
+    {
+        return static_cast<batch<uint8_t, 32>>(x);
+    }
+
     /*****************************************
      * batch cast functions implementation *
      *****************************************/
diff --git a/include/xsimd/types/xsimd_fallback.hpp b/include/xsimd/types/xsimd_fallback.hpp
@@ -1199,6 +1199,24 @@ namespace xsimd
     /*****************************************
      * bitwise cast functions implementation *
      *****************************************/
+    template <std::size_t in_N, std::size_t out_N>
+    batch<uint16_t, out_N> u8_to_u16(const batch<uint8_t, in_N>& x);
+
+    template <std::size_t in_N, std::size_t out_N>
+    batch<uint8_t, out_N> u16_to_u8(const batch<uint16_t, in_N>& x);
+
+    template <std::size_t in_N, std::size_t out_N>
+    batch<uint32_t, out_N> u8_to_u32(const batch<uint8_t, in_N>& x);
+
+    template <std::size_t in_N, std::size_t out_N>
+    batch<uint8_t, out_N> u32_to_u8(const batch<uint32_t, in_N>& x);
+
+    template <std::size_t in_N, std::size_t out_N>
+    batch<uint64_t, out_N> u8_to_u64(const batch<uint8_t, in_N>& x);
+
+    template <std::size_t in_N, std::size_t out_N>
+    batch<uint8_t, out_N> u64_to_u8(const batch<uint64_t, in_N>& x);
+
 
     template <class T_in, class T_out, std::size_t N_in>
     struct bitwise_cast_impl<batch<T_in, N_in>,
@@ -1221,6 +1239,46 @@ namespace xsimd
             return batch<T_out, N_out>(caster.out);
         }
     };
+
+    /***********************************************
+     * static_cast conversion by bitwise_cast_impl *
+     ***********************************************/
+    template <std::size_t in_N, std::size_t out_N>
+    inline batch<uint16_t, out_N> u8_to_u16(const batch<uint8_t, in_N>& x)
+    {
+        return bitwise_cast_impl<batch<uint8_t, in_N>, batch<uint16_t, out_N>>::run(x);
+    }
+
+    template <std::size_t in_N, std::size_t out_N>
+    inline batch<uint8_t, out_N> u16_to_u8(const batch<uint16_t, in_N>& x)
+    {
+        return bitwise_cast_impl<batch<uint16_t, in_N>, batch<uint8_t, out_N>>::run(x);
+    }
+
+    template <std::size_t in_N, std::size_t out_N>
+    inline batch<uint32_t, out_N> u8_to_u32(const batch<uint8_t, in_N>& x)
+    {
+        return bitwise_cast_impl<batch<uint8_t, in_N>, batch<uint32_t, out_N>>::run(x);
+    }
+
+    template <std::size_t in_N, std::size_t out_N>
+    inline batch<uint8_t, out_N> u32_to_u8(const batch<uint32_t, in_N>& x)
+    {
+        return bitwise_cast_impl<batch<uint32_t, in_N>, batch<uint8_t, out_N>>::run(x);
+    }
+
+    template <std::size_t in_N, std::size_t out_N>
+    inline batch<uint64_t, out_N> u8_to_u64(const batch<uint8_t, in_N>& x)
+    {
+        return bitwise_cast_impl<batch<uint8_t, in_N>, batch<uint64_t, out_N>>::run(x);
+    }
+
+    template <std::size_t in_N, std::size_t out_N>
+    inline batch<uint8_t, out_N> u64_to_u8(const batch<uint64_t, in_N>& x)
+    {
+       return bitwise_cast_impl<batch<uint64_t, in_N>, batch<uint8_t, out_N>>::run(x);
+    }
+
 }
 
 #endif
diff --git a/include/xsimd/types/xsimd_neon_conversion.hpp b/include/xsimd/types/xsimd_neon_conversion.hpp
@@ -35,6 +35,13 @@ namespace xsimd
     batch<int32_t, 4> to_int(const batch<float, 4>& x);
     batch<float, 4> to_float(const batch<int32_t, 4>& x);
 
+    batch<uint16_t, 8> u8_to_u16(const batch<uint8_t, 16>& x);
+    batch<uint8_t, 16> u16_to_u8(const batch<uint16_t, 8>& x);
+    batch<uint32_t, 4> u8_to_u32(const batch<uint8_t, 16>& x);
+    batch<uint8_t, 16> u32_to_u8(const batch<uint32_t, 4>& x);
+    batch<uint64_t, 2> u8_to_u64(const batch<uint8_t, 16>& x);
+    batch<uint8_t, 16> u64_to_u8(const batch<uint64_t, 2>& x);
+
 #if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
     batch<int64_t, 2> to_int(const batch<double, 2>& x);
     batch<double, 2> to_float(const batch<int64_t, 2>& x);
@@ -83,6 +90,36 @@ namespace xsimd
         return vcvtq_f32_s32(x);
     }
 
+    inline batch<uint16_t, 8> u8_to_u16(const batch<uint8_t, 16>& x)
+    {
+        return vreinterpretq_u16_u8(x);
+    }
+
+    inline batch<uint8_t, 16> u16_to_u8(const batch<uint16_t, 8>& x)
+    {
+        return vreinterpretq_u8_u16(x);
+    }
+
+    inline batch<uint32_t, 4> u8_to_u32(const batch<uint8_t, 16>& x)
+    {
+        return vreinterpretq_u32_u8(x);
+    }
+
+    inline batch<uint8_t, 16> u32_to_u8(const batch<uint32_t, 4>& x)
+    {
+        return vreinterpretq_u8_u32(x);
+    }
+
+    inline batch<uint64_t, 2> u8_to_u64(const batch<uint8_t, 16>& x)
+    {
+        return vreinterpretq_u64_u8(x);
+    }
+
+    inline batch<uint8_t, 16> u64_to_u8(const batch<uint64_t, 2>& x)
+    {
+        return vreinterpretq_u8_u64(x);
+    }
+
 #if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
     inline batch<int64_t, 2> to_int(const batch<double, 2>& x)
     {
diff --git a/include/xsimd/types/xsimd_sse_conversion.hpp b/include/xsimd/types/xsimd_sse_conversion.hpp
@@ -31,6 +31,13 @@ namespace xsimd
     batch<float, 4> to_float(const batch<int32_t, 4>& x);
     batch<double, 2> to_float(const batch<int64_t, 2>& x);
 
+    batch<uint16_t, 8> u8_to_u16(const batch<uint8_t, 16>& x);
+    batch<uint8_t, 16> u16_to_u8(const batch<uint16_t, 8>& x);
+    batch<uint32_t, 4> u8_to_u32(const batch<uint8_t, 16>& x);
+    batch<uint8_t, 16> u32_to_u8(const batch<uint32_t, 4>& x);
+    batch<uint64_t, 2> u8_to_u64(const batch<uint8_t, 16>& x);
+    batch<uint8_t, 16> u64_to_u8(const batch<uint64_t, 2>& x);
+
     /**************************
      * boolean cast functions *
      **************************/
@@ -72,6 +79,36 @@ namespace xsimd
 #endif
     }
 
+    inline batch<uint16_t, 8> u8_to_u16(const batch<uint8_t, 16>& x)
+    {
+        return static_cast<batch<uint16_t, 8>>(x);
+    }
+
+    inline batch<uint8_t, 16> u16_to_u8(const batch<uint16_t, 8>& x)
+    {
+        return static_cast<batch<uint8_t, 16>>(x);
+    }
+
+    inline batch<uint32_t, 4> u8_to_u32(const batch<uint8_t, 16>& x)
+    {
+        return static_cast<batch<uint32_t, 4>>(x);
+    }
+
+    inline batch<uint8_t, 16> u32_to_u8(const batch<uint32_t, 4>& x)
+    {
+        return static_cast<batch<uint8_t, 16>>(x);
+    }
+
+    inline batch<uint64_t, 2> u8_to_u64(const batch<uint8_t, 16>& x)
+    {
+        return static_cast<batch<uint64_t, 2>>(x);
+    }
+
+    inline batch<uint8_t, 16> u64_to_u8(const batch<uint64_t, 2>& x)
+    {
+        return static_cast<batch<uint8_t, 16>>(x);
+    }
+
     /*****************************************
      * batch cast functions implementation *
      *****************************************/