Skip to content

Commit 8d45d98

Browse files
jan-wassenbergcopybara-github
authored andcommitted
Add+use CanLookup8 helper function
PiperOrigin-RevId: 910004461
1 parent dd4199b commit 8d45d98

3 files changed

Lines changed: 21 additions & 16 deletions

File tree

g3doc/quick_reference.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2568,7 +2568,8 @@ The following `ReverseN` must not be called if `Lanes(D()) < N`:
25682568
`T` denote `TFromD<D>`. Only available if `HWY_TARGET != HWY_SCALAR` and
25692569
`HWY_MIN_BYTES / sizeof(T) >= 4`. The latter is guaranteed if `T` is four
25702570
bytes and `D` is one of `FixedTag<T, 16/sizeof(T)>` or `ScalableTag<T>` or
2571-
`CappedTag<T, N/sizeof(T)>` (where `N >= 16`).
2571+
`CappedTag<T, N/sizeof(T)>` (where `N >= 16`). The constexpr function
2572+
`CanLookup8(d)` is the preferred way to check these conditions.
25722573
25732574
* <code>unspecified **IndicesFromVec**(D d, V idx)</code> prepares for
25742575
`TableLookupLanes` or `TwoTablesLookupLanes` with integer indices in `idx`,

hwy/contrib/math/fast_math-inl.h

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,11 +229,9 @@ HWY_INLINE V FastTan(D d, V x) {
229229
V x_red, sign;
230230
impl::ReduceAngleTan(d, x, x_red, sign);
231231

232-
constexpr size_t kLanes = HWY_MAX_LANES_D(D);
233232
V b, c, d_val;
234233

235-
if constexpr ((kLanes >= 4 && !HWY_HAVE_SCALABLE) ||
236-
(HWY_HAVE_SCALABLE && sizeof(T) == 4 && detail::IsFull(d))) {
234+
if constexpr (CanLookup8(d)) {
237235
// --- Table Lookup ---
238236
const auto scale = Set(d, static_cast<T>(3.8197186342));
239237
auto idx_float = Floor(Mul(x_red, scale));
@@ -525,7 +523,6 @@ HWY_INLINE V FastTanh(D d, V val) {
525523
// Abs(val) and preserve sign for later
526524
auto y = Abs(val);
527525

528-
constexpr size_t kLanes = HWY_MAX_LANES_D(D);
529526
V a, b, c, d_val, e, f;
530527

531528
const auto t0 = Set(d, static_cast<T>(0.168236118310606));
@@ -536,8 +533,7 @@ HWY_INLINE V FastTanh(D d, V val) {
536533
const auto t5 = Set(d, static_cast<T>(2.969315202883957));
537534
const auto t6 = Set(d, static_cast<T>(4.734657601441978));
538535

539-
if constexpr ((kLanes >= 4 && !HWY_HAVE_SCALABLE) ||
540-
(HWY_HAVE_SCALABLE && sizeof(T) == 4 && detail::IsFull(d))) {
536+
if constexpr (CanLookup8(d)) {
541537
using DI = RebindToSigned<D>;
542538
auto idx_i = Zero(DI());
543539
const auto one_i = Set(DI(), 1);
@@ -919,7 +915,6 @@ HWY_INLINE V FastLog(D d, V x) {
919915
V y, exp;
920916
impl::FastLogRangeReduction<kHandleSubnormals>(d, x, y, exp);
921917

922-
constexpr size_t kLanes = HWY_MAX_LANES_D(D);
923918
V approx;
924919

925920
V a, b, c, d_val;
@@ -928,8 +923,7 @@ HWY_INLINE V FastLog(D d, V x) {
928923
// log(y) directly.
929924
const V z = Sub(y, Set(d, static_cast<T>(1.0)));
930925

931-
if constexpr ((kLanes >= 4 && !HWY_HAVE_SCALABLE) ||
932-
(HWY_HAVE_SCALABLE && sizeof(T) == 4 && detail::IsFull(d))) {
926+
if constexpr (CanLookup8(d)) {
933927
// --- Table Lookup ---
934928
const auto scale = Set(d, static_cast<T>(11.3137085));
935929
// Input is always non-negative, so Floor() + ConvertTo()
@@ -1390,7 +1384,6 @@ HWY_INLINE V FastLog2(D d, V x) {
13901384
V y, exp;
13911385
impl::FastLogRangeReduction<kHandleSubnormals>(d, x, y, exp);
13921386

1393-
constexpr size_t kLanes = HWY_MAX_LANES_D(D);
13941387
V approx;
13951388

13961389
V a, b, c, d_val;
@@ -1399,8 +1392,7 @@ HWY_INLINE V FastLog2(D d, V x) {
13991392
// log(y) directly.
14001393
const V z = Sub(y, Set(d, static_cast<T>(1.0)));
14011394

1402-
if constexpr ((kLanes >= 4 && !HWY_HAVE_SCALABLE) ||
1403-
(HWY_HAVE_SCALABLE && sizeof(T) == 4 && detail::IsFull(d))) {
1395+
if constexpr (CanLookup8(d)) {
14041396
// --- Table Lookup ---
14051397
const auto scale = Set(d, static_cast<T>(11.3137085));
14061398
auto idx_i = ConvertInRangeTo(
@@ -1628,7 +1620,6 @@ HWY_INLINE V FastLog10(D d, V x) {
16281620
V y, exp;
16291621
impl::FastLogRangeReduction<kHandleSubnormals>(d, x, y, exp);
16301622

1631-
constexpr size_t kLanes = HWY_MAX_LANES_D(D);
16321623
V approx;
16331624

16341625
V a, b, c, d_val;
@@ -1637,8 +1628,7 @@ HWY_INLINE V FastLog10(D d, V x) {
16371628
// log(y) directly.
16381629
const V z = Sub(y, Set(d, static_cast<T>(1.0)));
16391630

1640-
if constexpr ((kLanes >= 4 && !HWY_HAVE_SCALABLE) ||
1641-
(HWY_HAVE_SCALABLE && sizeof(T) == 4 && detail::IsFull(d))) {
1631+
if constexpr (CanLookup8(d)) {
16421632
// --- Table Lookup ---
16431633
const auto scale = Set(d, static_cast<T>(11.3137085));
16441634
auto idx_i = ConvertInRangeTo(

hwy/ops/shared-inl.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,20 @@ HWY_API bool IsAligned(D d, T* ptr) {
560560
return reinterpret_cast<uintptr_t>(ptr) % (N * sizeof(T)) == 0;
561561
}
562562

563+
// Returns whether `Lookup8` can be used for vectors created from tag `d`.
564+
template <class D, typename T = TFromD<D>>
565+
HWY_API constexpr bool CanLookup8(D d) {
566+
// `Lookup8` can use two-register tables, so it is sufficient to ensure
567+
// vectors have at least four lanes (8/2). For fixed-length vectors: check
568+
// `MaxLanes` directly. For scalable vectors, first require full
569+
// (non-partial) vectors, which implies they are at least 128 bits. Then also
570+
// require 16 or 32-bit elements, which implies at least 128/{16,32} =
571+
// {8,4} lanes per vector. For 8-bit T, `TableLookupBytes` is more efficient.
572+
return (!HWY_HAVE_SCALABLE && MaxLanes(d) >= 4) ||
573+
(HWY_HAVE_SCALABLE && detail::IsFull(d) &&
574+
(sizeof(T) == 2 || sizeof(T) == 4));
575+
}
576+
563577
// ------------------------------ Choosing overloads (SFINAE)
564578

565579
// Same as base.h macros but with a Simd<T, N, kPow2> argument instead of T.

0 commit comments

Comments
 (0)