@@ -75,7 +75,7 @@ constexpr T max_value(const std::array<T, N>& arr) {
7575template <std::array kArr , typename Arch, std::size_t ... Is>
7676constexpr auto array_to_batch_constant_impl (std::index_sequence<Is...>) {
7777 using Array = std::decay_t <decltype (kArr )>;
78- using value_type = typename Array::value_type;
78+ using value_type = Array::value_type;
7979
8080 return xsimd::batch_constant<value_type, Arch, kArr [Is]...>{};
8181}
@@ -364,7 +364,7 @@ struct KernelTraits {
364364 SizedUint<sizeof (bool )>, unpacked_type>;
365365 using simd_batch = xsimd::make_sized_batch_t <uint_type, kShape .unpacked_per_simd()>;
366366 using simd_bytes = xsimd::make_sized_batch_t <uint8_t , kShape .simd_byte_size()>;
367- using arch_type = typename simd_batch::arch_type;
367+ using arch_type = simd_batch::arch_type;
368368};
369369
370370// / Return similar kernel traits but with a different integer unpacking type.
@@ -487,7 +487,7 @@ constexpr int adjust_bytes_per_read(int bits_per_read, int simd_byte_size) {
487487template <typename KerTraits, MediumKernelOptions kOptions >
488488struct MediumKernelPlan {
489489 using Traits = KerTraits;
490- using uint_type = typename Traits::uint_type;
490+ using uint_type = Traits::uint_type;
491491 static constexpr auto kShape = Traits::kShape ;
492492 static constexpr auto kPlanSize = MediumKernelPlanSize::Build(kShape , kOptions );
493493
@@ -497,8 +497,8 @@ struct MediumKernelPlan {
497497 using SwizzlesPerRead = std::array<Swizzle, kPlanSize .swizzles_per_read()>;
498498 using SwizzlesPerKernel = std::array<SwizzlesPerRead, kPlanSize .reads_per_kernel()>;
499499
500- using Shift = std::array<uint_type, kShape .unpacked_per_simd()>;
501- using ShiftsPerSwizzle = std::array<Shift , kPlanSize .shifts_per_swizzle()>;
500+ using Shifts = std::array<uint_type, kShape .unpacked_per_simd()>;
501+ using ShiftsPerSwizzle = std::array<Shifts , kPlanSize .shifts_per_swizzle()>;
502502 using ShiftsPerRead = std::array<ShiftsPerSwizzle, kPlanSize .swizzles_per_read()>;
503503 using ShiftsPerKernel = std::array<ShiftsPerRead, kPlanSize .reads_per_kernel()>;
504504
@@ -545,8 +545,7 @@ constexpr auto MediumKernelPlan<KerTraits, kOptions>::Build()
545545
546546 for (int sw = 0 ; sw < kPlanSize .swizzles_per_read (); ++sw) {
547547 constexpr int kUndefined = -1 ;
548- plan.swizzles .at (r).at (sw) =
549- BuildConstantArrayLike<typename Plan::Swizzle>(kUndefined );
548+ plan.swizzles .at (r).at (sw) = BuildConstantArrayLike<Plan::Swizzle>(kUndefined );
550549 for (int sh = 0 ; sh < kPlanSize .shifts_per_swizzle (); ++sh) {
551550 const int sh_offset_bytes = sh * kShape .packed_max_spread_bytes ();
552551 const int sh_offset_bits = 8 * sh_offset_bytes;
@@ -644,12 +643,12 @@ struct MediumKernel {
644643 static constexpr auto kPlan = MediumKernelPlan<KerTraits, kOptions >::Build();
645644 static constexpr auto kPlanSize = kPlan .kPlanSize ;
646645 static constexpr auto kShape = kPlan .kShape ;
647- using Traits = typename decltype (kPlan )::Traits;
648- using unpacked_type = typename Traits::unpacked_type;
649- using uint_type = typename Traits::uint_type;
650- using simd_batch = typename Traits::simd_batch;
651- using simd_bytes = typename Traits::simd_bytes;
652- using arch_type = typename Traits::arch_type;
646+ using Traits = decltype (kPlan )::Traits;
647+ using unpacked_type = Traits::unpacked_type;
648+ using uint_type = Traits::uint_type;
649+ using simd_batch = Traits::simd_batch;
650+ using simd_bytes = Traits::simd_bytes;
651+ using arch_type = Traits::arch_type;
653652
654653 static constexpr int kValuesUnpacked = kPlan .unpacked_per_kernel();
655654 static constexpr int kBytesRead = kPlan .total_bytes_read();
@@ -751,7 +750,7 @@ struct LargeKernelPlanSize {
751750template <typename KerTraits>
752751struct LargeKernelPlan {
753752 using Traits = KerTraits;
754- using uint_type = typename Traits::uint_type;
753+ using uint_type = Traits::uint_type;
755754 static constexpr auto kShape = Traits::kShape ;
756755 static constexpr auto kPlanSize = LargeKernelPlanSize::Build(kShape );
757756
@@ -760,8 +759,8 @@ struct LargeKernelPlan {
760759 using Swizzle = std::array<uint8_t , kShape .simd_byte_size()>;
761760 using SwizzlesPerKernel = std::array<Swizzle, kPlanSize .reads_per_kernel()>;
762761
763- using Shift = std::array<uint_type, kShape .unpacked_per_simd()>;
764- using ShitsPerKernel = std::array<Shift , kPlanSize .reads_per_kernel()>;
762+ using Shifts = std::array<uint_type, kShape .unpacked_per_simd()>;
763+ using ShitsPerKernel = std::array<Shifts , kPlanSize .reads_per_kernel()>;
765764
766765 static constexpr LargeKernelPlan Build ();
767766
@@ -783,7 +782,7 @@ struct LargeKernelPlan {
783782template <typename KerTraits>
784783constexpr auto LargeKernelPlan<KerTraits>::Build() -> LargeKernelPlan<KerTraits> {
785784 using Plan = LargeKernelPlan<KerTraits>;
786- using uint_type = typename Plan::Traits::uint_type;
785+ using uint_type = Plan::Traits::uint_type;
787786 constexpr auto kShape = Plan::kShape ;
788787 constexpr auto kPlanSize = Plan::kPlanSize ;
789788 static_assert (kShape .is_large ());
@@ -798,8 +797,8 @@ constexpr auto LargeKernelPlan<KerTraits>::Build() -> LargeKernelPlan<KerTraits>
798797 plan.reads .at (r) = read_start_byte;
799798
800799 constexpr int kUndefined = -1 ;
801- plan.low_swizzles .at (r) = BuildConstantArrayLike<typename Plan::Swizzle>(kUndefined );
802- plan.high_swizzles .at (r) = BuildConstantArrayLike<typename Plan::Swizzle>(kUndefined );
800+ plan.low_swizzles .at (r) = BuildConstantArrayLike<Plan::Swizzle>(kUndefined );
801+ plan.high_swizzles .at (r) = BuildConstantArrayLike<Plan::Swizzle>(kUndefined );
803802
804803 for (int u = 0 ; u < kShape .unpacked_per_simd (); ++u) {
805804 const int packed_start_byte = packed_start_bit / 8 ;
@@ -903,10 +902,10 @@ struct LargeKernel {
903902 static constexpr auto kPlanSize = kPlan .kPlanSize ;
904903 static constexpr auto kShape = kPlan .kShape ;
905904 using Traits = typename decltype (kPlan )::Traits;
906- using unpacked_type = typename Traits::unpacked_type;
907- using simd_batch = typename Traits::simd_batch;
908- using simd_bytes = typename Traits::simd_bytes;
909- using arch_type = typename Traits::arch_type;
905+ using unpacked_type = Traits::unpacked_type;
906+ using simd_batch = Traits::simd_batch;
907+ using simd_bytes = Traits::simd_bytes;
908+ using arch_type = Traits::arch_type;
910909
911910 static constexpr int kValuesUnpacked = kPlanSize .unpacked_per_kernel();
912911 static constexpr int kBytesRead = kPlan .total_bytes_read();
@@ -966,7 +965,7 @@ struct LargeKernel {
966965// / A Kernel that does not extract anything, leaving all work to the naive implementation.
967966template <typename KernelTraits>
968967struct NoOpKernel {
969- using unpacked_type = typename KernelTraits::unpacked_type;
968+ using unpacked_type = KernelTraits::unpacked_type;
970969
971970 static constexpr int kValuesUnpacked = 0 ;
972971 static constexpr int kBytesRead = 0 ;
@@ -976,12 +975,12 @@ struct NoOpKernel {
976975
977976template <typename KernelTraits, typename WorkingKernel>
978977struct ForwardToKernel : WorkingKernel {
979- using unpacked_type = typename KernelTraits::unpacked_type;
978+ using unpacked_type = KernelTraits::unpacked_type;
980979
981980 static constexpr int kValuesUnpacked = WorkingKernel::kValuesUnpacked ;
982981
983982 static const uint8_t * unpack (const uint8_t * in, unpacked_type* out) {
984- using working_type = typename WorkingKernel::unpacked_type;
983+ using working_type = WorkingKernel::unpacked_type;
985984
986985 working_type buffer[kValuesUnpacked ] = {};
987986 in = WorkingKernel::unpack (in, buffer);
@@ -997,15 +996,15 @@ struct ForwardToKernel : WorkingKernel {
997996 *******************************/
998997
999998// Benchmarking show unpack to uint64_t is underperforming on SSE4.2 and Avx2
1000- template <typename KerTraits, typename Arch = typename KerTraits::arch_type>
999+ template <typename KerTraits, typename Arch = KerTraits::arch_type>
10011000constexpr bool kMediumShouldUseUint32 =
10021001 (HasSse2<Arch> || HasSse2<Arch>) && //
10031002 (KerTraits::kShape .unpacked_byte_size() == sizeof (uint64_t )) &&
10041003 (KerTraits::kShape .packed_bit_size() < 32 ) &&
10051004 KernelTraitsWithUnpackUint<KerTraits, uint32_t >::kShape .is_medium();
10061005
10071006// Benchmarking show large unpack to uint8_t is underperforming on SSE4.2
1008- template <typename KerTraits, typename Arch = typename KerTraits::arch_type>
1007+ template <typename KerTraits, typename Arch = KerTraits::arch_type>
10091008constexpr bool kLargeShouldUseUint16 =
10101009 HasSse2<Arch> && (KerTraits::kShape .unpacked_byte_size() == sizeof (uint8_t ));
10111010
0 commit comments