google
diff --git a/‎ynnpack/base/simd/arm_neon_base.h‎
Lines changed: 18 additions & 20 deletions b/‎ynnpack/base/simd/arm_neon_base.h‎
Lines changed: 18 additions & 20 deletions
diff --git a/‎ynnpack/base/simd/arm_neonfp16.h‎
Lines changed: 1 addition & 1 deletion b/‎ynnpack/base/simd/arm_neonfp16.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ynnpack/base/simd/byte_vec.h‎
Lines changed: 5 additions & 3 deletions b/‎ynnpack/base/simd/byte_vec.h‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎ynnpack/base/simd/generic.inc‎
Lines changed: 43 additions & 43 deletions b/‎ynnpack/base/simd/generic.inc‎
Lines changed: 43 additions & 43 deletions
diff --git a/‎ynnpack/base/simd/vec.h‎
Lines changed: 20 additions & 5 deletions b/‎ynnpack/base/simd/vec.h‎
Lines changed: 20 additions & 5 deletions
@@ -61,9 +61,6 @@ struct vec<float, 4> {
   vec(vec<float, 2> lo, vec<float, 2> hi) : v(vcombine_f32(lo.v, hi.v)) {}
 
   float32x4_t v;
-
-  vec<float, 2> lo() const { return vec<float, 2>{vget_low_f32(v)}; }
-  vec<float, 2> hi() const { return vec<float, 2>{vget_high_f32(v)}; }
 };
 
 #ifdef YNN_ARCH_ARM64
@@ -127,9 +124,6 @@ struct vec<bfloat16, 8> {
   vec(vec<bfloat16, 4> lo, vec<bfloat16, 4> hi) : v(vcombine_u16(lo.v, hi.v)) {}
 
   uint16x8_t v;
-
-  vec<bfloat16, 4> lo() const { return vec<bfloat16, 4>{vget_low_u16(v)}; }
-  vec<bfloat16, 4> hi() const { return vec<bfloat16, 4>{vget_high_u16(v)}; }
 };
 
 template <>
@@ -190,9 +184,6 @@ struct vec<uint8_t, 16> {
   vec(u8x8 lo, u8x8 hi) : v(vcombine_u8(lo.v, hi.v)) {}
   vec(uint8_t x) : v(vdupq_n_u8(x)) {}  // NOLINT
 
-  u8x8 lo() const { return u8x8{vget_low_u8(v)}; }
-  u8x8 hi() const { return u8x8{vget_high_u8(v)}; }
-
   uint8x16_t v;
 };
 
@@ -224,6 +215,13 @@ using s16x8 = vec<int16_t, 8>;
 using u8x16 = vec<uint8_t, 16>;
 using s8x16 = vec<int8_t, 16>;
 
+YNN_ALWAYS_INLINE f32x2 lo(f32x4 x) { return f32x2{vget_low_f32(x.v)}; }
+YNN_ALWAYS_INLINE f32x2 hi(f32x4 x) { return f32x2{vget_high_f32(x.v)}; }
+YNN_ALWAYS_INLINE bf16x4 lo(bf16x8 x) { return bf16x4{vget_low_u16(x.v)}; }
+YNN_ALWAYS_INLINE bf16x4 hi(bf16x8 x) { return bf16x4{vget_high_u16(x.v)}; }
+YNN_ALWAYS_INLINE u8x8 lo(u8x16 x) { return u8x8{vget_low_u8(x.v)}; }
+YNN_ALWAYS_INLINE u8x8 hi(u8x16 x) { return u8x8{vget_high_u8(x.v)}; }
+
 namespace internal {
 
 YNN_ALWAYS_INLINE int32x4x2_t vtrn(int32x4_t a, int32x4_t b) {
@@ -1205,15 +1203,15 @@ YNN_ALWAYS_INLINE f32x2 cast(f64x2 a, float) {
 #endif  // YNN_ARCH_ARM64
 
 YNN_ALWAYS_INLINE s16x8 cast(s32x8 a, int16_t) {
-  return s16x8{vcombine_s16(vqmovn_s32(a.lo().v), vqmovn_s32(a.hi().v))};
+  return s16x8{vcombine_s16(vqmovn_s32(lo(a).v), vqmovn_s32(hi(a).v))};
 }
 
 YNN_ALWAYS_INLINE s8x16 cast(s16x16 a, int8_t) {
-  return s8x16{vcombine_s8(vqmovn_s16(a.lo().v), vqmovn_s16(a.hi().v))};
+  return s8x16{vcombine_s8(vqmovn_s16(lo(a).v), vqmovn_s16(hi(a).v))};
 }
 
 YNN_ALWAYS_INLINE u8x16 cast(s16x16 a, uint8_t) {
-  return u8x16{vcombine_u8(vqmovun_s16(a.lo().v), vqmovun_s16(a.hi().v))};
+  return u8x16{vcombine_u8(vqmovun_s16(lo(a).v), vqmovun_s16(hi(a).v))};
 }
 
 YNN_ALWAYS_INLINE s32x4 cast(f32x4 f, int32_t) {
@@ -1226,27 +1224,27 @@ YNN_ALWAYS_INLINE s32x4 cast(f32x4 f, int32_t) {
 
 YNN_ALWAYS_INLINE s16x8 cast(f32x8 f, int16_t) {
 #if defined(__ARM_ARCH) && __ARM_ARCH < 8
-  s32x4 a1 = cast(round(f.lo()), int32_t{});
-  s32x4 a2 = cast(round(f.hi()), int32_t{});
+  s32x4 a1 = cast(round(lo(f)), int32_t{});
+  s32x4 a2 = cast(round(hi(f)), int32_t{});
   return cast(s32x8{a1, a2}, int16_t{});
 #else
-  return s16x8{vcombine_s16(vqmovn_s32(vcvtnq_s32_f32(f.lo().v)),
-                            vqmovn_s32(vcvtnq_s32_f32(f.hi().v)))};
+  return s16x8{vcombine_s16(vqmovn_s32(vcvtnq_s32_f32(lo(f).v)),
+                            vqmovn_s32(vcvtnq_s32_f32(hi(f).v)))};
 #endif
 }
 
 YNN_ALWAYS_INLINE s8x16 cast(f32x16 f, int8_t) {
   s16x16 f_s16 = {
-      cast(f.lo(), int16_t{}),
-      cast(f.hi(), int16_t{}),
+      cast(lo(f), int16_t{}),
+      cast(hi(f), int16_t{}),
   };
   return cast(f_s16, int8_t{});
 }
 
 YNN_ALWAYS_INLINE u8x16 cast(f32x16 f, uint8_t) {
   s16x16 f_s16 = {
-      cast(f.lo(), int16_t{}),
-      cast(f.hi(), int16_t{}),
+      cast(lo(f), int16_t{}),
+      cast(hi(f), int16_t{}),
   };
   return cast(f_s16, uint8_t{});
 }
 
@@ -34,7 +34,7 @@ YNN_ALWAYS_INLINE f16x4 cast(f32x4 a, half) {
 
 YNN_ALWAYS_INLINE f16x8 cast(f32x8 a, half) {
   return f16x8{vreinterpretq_u16_f16(
-      vcombine_f16(vcvt_f16_f32(a.lo().v), vcvt_f16_f32(a.hi().v)))};
+      vcombine_f16(vcvt_f16_f32(lo(a).v), vcvt_f16_f32(hi(a).v)))};
 }
 
 }  // namespace simd
 
@@ -46,14 +46,16 @@ struct vec<uint8_t, 8> {
   explicit vec(uint64_t v) : v(v) {}
   vec(u8x4 x0, u8x4 x1) : v((static_cast<uint64_t>(x1.v) << 32) | x0.v) {}
 
-  u8x4 lo() const { return u8x4{static_cast<uint32_t>(v)}; }
-  u8x4 hi() const { return u8x4{static_cast<uint32_t>(v >> 32)}; }
-
   uint64_t v;
 };
 
 using u8x8 = vec<uint8_t, 8>;
 
+YNN_ALWAYS_INLINE u8x4 lo(u8x8 x) { return u8x4{static_cast<uint32_t>(x.v)}; }
+YNN_ALWAYS_INLINE u8x4 hi(u8x8 x) {
+  return u8x4{static_cast<uint32_t>(x.v >> 32)};
+}
+
 YNN_ALWAYS_INLINE u8x4 load_aligned(const uint8_t* ptr, decltype(u8x4::N),
                                     u8x4 = {}) {
   return u8x4{*reinterpret_cast<const uint32_t*>(ptr)};
 
@@ -52,9 +52,9 @@ template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> load(const T* ptr, size_t n, vec<T, N> src) {
   std::integral_constant<size_t, N / 2> n2 = {};
   if (n < n2) {
-    return {load(ptr, n, src.lo()), src.hi()};
+    return {load(ptr, n, lo(src)), hi(src)};
   } else {
-    return {load(ptr, n2), load(ptr + n2, n - n2, src.hi())};
+    return {load(ptr, n2), load(ptr + n2, n - n2, hi(src))};
   }
 }
 template <typename T, size_t N>
@@ -81,43 +81,43 @@ template <typename T, size_t N>
 YNN_ALWAYS_INLINE void store(T* ptr, vec<T, N> value,
                              std::integral_constant<size_t, N> n) {
   std::integral_constant<size_t, N / 2> n2 = {};
-  store(ptr, value.lo(), n2);
-  store(ptr + n2, value.hi(), n2);
+  store(ptr, lo(value), n2);
+  store(ptr + n2, hi(value), n2);
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE void store_aligned(T* ptr, vec<T, N> value,
                                      std::integral_constant<size_t, N> n) {
   std::integral_constant<size_t, N / 2> n2 = {};
-  store_aligned(ptr, value.lo(), n2);
-  store_aligned(ptr + n2, value.hi(), n2);
+  store_aligned(ptr, lo(value), n2);
+  store_aligned(ptr + n2, hi(value), n2);
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE void store(T* ptr, vec<T, N> value, size_t n) {
   std::integral_constant<size_t, N / 2> n2 = {};
   if (n < n2) {
-    store(ptr, value.lo(), n);
+    store(ptr, lo(value), n);
   } else {
-    store(ptr, value.lo(), n2);
-    store(ptr + n2, value.hi(), n - n2);
+    store(ptr, lo(value), n2);
+    store(ptr + n2, hi(value), n - n2);
   }
 }
 
 // Arithmetic operators.
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator+(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() + b.lo(), a.hi() + b.hi()};
+  return {lo(a) + lo(b), hi(a) + hi(b)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator-(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() - b.lo(), a.hi() - b.hi()};
+  return {lo(a) - lo(b), hi(a) - hi(b)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator*(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() * b.lo(), a.hi() * b.hi()};
+  return {lo(a) * lo(b), hi(a) * hi(b)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator/(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() / b.lo(), a.hi() / b.hi()};
+  return {lo(a) / lo(b), hi(a) / hi(b)};
 }
 
 template <typename T, size_t N>
@@ -144,23 +144,23 @@ YNN_ALWAYS_INLINE vec<T, N>& operator/=(vec<T, N>& a, vec<T, N> b) {
 // Boolean operators.
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator&(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() & b.lo(), a.hi() & b.hi()};
+  return {lo(a) & lo(b), hi(a) & hi(b)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator|(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() | b.lo(), a.hi() | b.hi()};
+  return {lo(a) | lo(b), hi(a) | hi(b)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator^(vec<T, N> a, vec<T, N> b) {
-  return {a.lo() ^ b.lo(), a.hi() ^ b.hi()};
+  return {lo(a) ^ lo(b), hi(a) ^ hi(b)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator~(vec<T, N> a) {
-  return {~a.lo(), ~a.hi()};
+  return {~lo(a), ~hi(a)};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> operator<<(vec<T, N> a, int b) {
-  return {a.lo() << b, a.hi() << b};
+  return {lo(a) << b, hi(a) << b};
 }
 
 template <typename T, size_t N>
@@ -180,60 +180,60 @@ YNN_ALWAYS_INLINE vec<T, N>& operator^=(vec<T, N>& a, vec<T, N> b) {
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> min(vec<T, N> a, vec<T, N> b) {
-  return {min(a.lo(), b.lo()), min(a.hi(), b.hi())};
+  return {min(lo(a), lo(b)), min(hi(a), hi(b))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> max(vec<T, N> a, vec<T, N> b) {
-  return {max(a.lo(), b.lo()), max(a.hi(), b.hi())};
+  return {max(lo(a), lo(b)), max(hi(a), hi(b))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> copysign(vec<T, N> mag, vec<T, N> sgn) {
-  return {copysign(mag.lo(), sgn.lo()), copysign(mag.hi(), sgn.hi())};
+  return {copysign(lo(mag), lo(sgn)), copysign(hi(mag), hi(sgn))};
 };
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> abs(vec<T, N> a) {
-  return {abs(a.lo()), abs(a.hi())};
+  return {abs(lo(a)), abs(hi(a))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> add_sat(vec<T, N> a, vec<T, N> b) {
-  return {add_sat(a.lo(), b.lo()), add_sat(a.hi(), b.hi())};
+  return {add_sat(lo(a), lo(b)), add_sat(hi(a), hi(b))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> sub_sat(vec<T, N> a, vec<T, N> b) {
-  return {sub_sat(a.lo(), b.lo()), sub_sat(a.hi(), b.hi())};
+  return {sub_sat(lo(a), lo(b)), sub_sat(hi(a), hi(b))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> floor(vec<T, N> a) {
-  return {floor(a.lo()), floor(a.hi())};
+  return {floor(lo(a)), floor(hi(a))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> floor_log2(vec<T, N> a) {
-  return {floor_log2(a.lo()), floor_log2(a.hi())};
+  return {floor_log2(lo(a)), floor_log2(hi(a))};
 }
 
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> exp2_round(vec<T, N> a) {
-  return {exp2_round(a.lo()), exp2_round(a.hi())};
+  return {exp2_round(lo(a)), exp2_round(hi(a))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> copynan(vec<T, N> x, vec<T, N> nan) {
-  return {copynan(x.lo(), nan.lo()), copynan(x.hi(), nan.hi())};
+  return {copynan(lo(x), lo(nan)), copynan(hi(x), hi(nan))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> ceil(vec<T, N> a) {
-  return {ceil(a.lo()), ceil(a.hi())};
+  return {ceil(lo(a)), ceil(hi(a))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> round(vec<T, N> a) {
-  return {round(a.lo()), round(a.hi())};
+  return {round(lo(a)), round(hi(a))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> sqrt(vec<T, N> a) {
-  return {sqrt(a.lo()), sqrt(a.hi())};
+  return {sqrt(lo(a)), sqrt(hi(a))};
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N> fma(vec<T, N> a, vec<T, N> b, vec<T, N> acc) {
-  return {fma(a.lo(), b.lo(), acc.lo()), fma(a.hi(), b.hi(), acc.hi())};
+  return {fma(lo(a), lo(b), lo(acc)), fma(hi(a), hi(b), hi(acc))};
 }
 
 template <int Index, typename T, size_t N>
@@ -246,7 +246,7 @@ template <int Index, typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N / 2> extract(vec<T, N> x,
                                         std::integral_constant<size_t, N / 2>) {
   static_assert(Index == 0 || Index == 1, "");
-  return Index == 0 ? x.lo() : x.hi();
+  return Index == 0 ? lo(x) : hi(x);
 }
 template <int Index, typename T, size_t N>
 YNN_ALWAYS_INLINE vec<T, N / 4> extract(vec<T, N> x,
@@ -263,31 +263,31 @@ YNN_ALWAYS_INLINE vec<T, N*2> concat(vec<T, N> a, vec<T, N> b) {
 
 template <typename To, typename From, size_t N>
 YNN_ALWAYS_INLINE vec<To, N> cast(vec<From, N> from, To) {
-  return {cast(from.lo(), To()), cast(from.hi(), To())};
+  return {cast(lo(from), To()), cast(hi(from), To())};
 }
 
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE T horizontal_sum(vec<T, N> x) {
-  return horizontal_sum(x.lo() + x.hi());
+  return horizontal_sum(lo(x) + hi(x));
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE T horizontal_min(vec<T, N> x) {
-  return horizontal_min(min(x.lo(), x.hi()));
+  return horizontal_min(min(lo(x), hi(x)));
 }
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE T horizontal_max(vec<T, N> x) {
-  return horizontal_max(max(x.lo(), x.hi()));
+  return horizontal_max(max(lo(x), hi(x)));
 }
 
 template <typename T, size_t N>
 YNN_ALWAYS_INLINE void kahan_sum(vec<T, N> a, vec<T, N>& acc,
                                  vec<T, N>& error) {
-  vec<T, N / 2> acc_lo = acc.lo();
-  vec<T, N / 2> acc_hi = acc.hi();
-  vec<T, N / 2> error_lo = error.lo();
-  vec<T, N / 2> error_hi = error.hi();
-  kahan_sum(a.lo(), acc_lo, error_lo);
-  kahan_sum(a.hi(), acc_hi, error_hi);
+  vec<T, N / 2> acc_lo = lo(acc);
+  vec<T, N / 2> acc_hi = hi(acc);
+  vec<T, N / 2> error_lo = lo(error);
+  vec<T, N / 2> error_hi = hi(error);
+  kahan_sum(lo(a), acc_lo, error_lo);
+  kahan_sum(hi(a), acc_hi, error_hi);
   acc = concat(acc_lo, acc_hi);
   error = concat(error_lo, error_hi);
 }
 
@@ -68,11 +68,6 @@ struct vec {
 
   subvec v[2];
 
-  subvec& lo() { return v[0]; }
-  const subvec& lo() const { return v[0]; }
-  subvec& hi() { return v[1]; }
-  const subvec& hi() const { return v[1]; }
-
   vec() = default;
   YNN_ALWAYS_INLINE explicit vec(value_type x) : v{subvec{x}, subvec{x}} {}
   YNN_ALWAYS_INLINE vec(subvec v0, subvec v1) : v{v0, v1} {}
@@ -81,6 +76,26 @@ struct vec {
   YNN_ALWAYS_INLINE const subvec& operator[](size_t i) const { return v[i]; }
 };
 
+template <typename T, size_t N>
+YNN_ALWAYS_INLINE vec<T, N / 2>& lo(vec<T, N>& x) {
+  return x.v[0];
+}
+
+template <typename T, size_t N>
+YNN_ALWAYS_INLINE const vec<T, N / 2>& lo(const vec<T, N>& x) {
+  return x.v[0];
+}
+
+template <typename T, size_t N>
+YNN_ALWAYS_INLINE vec<T, N / 2>& hi(vec<T, N>& x) {
+  return x.v[1];
+}
+
+template <typename T, size_t N>
+YNN_ALWAYS_INLINE const vec<T, N / 2>& hi(const vec<T, N>& x) {
+  return x.v[1];
+}
+
 template <size_t N, typename T>
 YNN_ALWAYS_INLINE vec<T, N> broadcast(T x) {
   return vec<T, N>{x};
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ YNN_ALWAYS_INLINE f16x4 cast(f32x4 a, half) {`
`34`	`34`
`35`	`35`	`YNN_ALWAYS_INLINE f16x8 cast(f32x8 a, half) {`
`36`	`36`	`return f16x8{vreinterpretq_u16_f16(`
`37`		`- vcombine_f16(vcvt_f16_f32(a.lo().v), vcvt_f16_f32(a.hi().v)))};`
	`37`	`+ vcombine_f16(vcvt_f16_f32(lo(a).v), vcvt_f16_f32(hi(a).v)))};`
`38`	`38`	`}`
`39`	`39`
`40`	`40`	`} // namespace simd`