@@ -362,7 +362,7 @@ static inline Vec8h convert4f_8h (Vec4f f) {
362362#else
363363
364364// extend precision: Vec8h -> Vec4f. upper half ignored
365- static Vec4f convert8h_4f (Vec8h x) {
365+ static inline Vec4f convert8h_4f (Vec8h x) {
366366 // __m128i a = _mm_cvtepu16_epi32(x); // SSE4.1
367367 __m128i a = _mm_unpacklo_epi16 (x, _mm_setzero_si128 ()); // zero extend
368368 __m128i b = _mm_slli_epi32 (a, 16 ); // left-justify
@@ -387,7 +387,7 @@ static Vec4f convert8h_4f (Vec8h x) {
387387}
388388
389389// reduce precision: Vec4f -> Vec8h. upper half zero
390- static Vec8h convert4f_8h (Vec4f x) {
390+ static inline Vec8h convert4f_8h (Vec4f x) {
391391 __m128i a = _mm_castps_si128 (x); // bit-cast to integer
392392 // 23 bit mantissa rounded to 10 bits - nearest or even
393393 __m128i r = _mm_srli_epi32 (a, 12 ); // get first discarded mantissa bit
@@ -449,7 +449,7 @@ static inline Vec8h to_float16 (Vec8f f) {
449449#elif INSTRSET >= 8 // __F16C__ not defined, AVX2 supported
450450
451451// extend precision: Vec8h -> Vec8f
452- static Vec8f to_float (Vec8h x) {
452+ static inline Vec8f to_float (Vec8h x) {
453453 __m256i a = _mm256_cvtepu16_epi32 (x); // zero-extend each element to 32 bits
454454 __m256i b = _mm256_slli_epi32 (a, 16 ); // left-justify
455455 __m256i c = _mm256_and_si256 (b, _mm256_set1_epi32 (0x80000000 )); // isolate sign bit
@@ -473,7 +473,7 @@ static Vec8f to_float (Vec8h x) {
473473}
474474
475475// reduce precision: Vec8f -> Vec8h
476- static Vec8h to_float16 (Vec8f x) {
476+ static inline Vec8h to_float16 (Vec8f x) {
477477 __m256i a = _mm256_castps_si256 (x); // bit-cast to integer
478478 // 23 bit mantissa rounded to 10 bits - nearest or even
479479 __m256i r = _mm256_srli_epi32 (a, 12 ); // get first discarded mantissa bit
@@ -516,7 +516,7 @@ static Vec8h to_float16 (Vec8f x) {
516516#else // __F16C__ not defined, AVX2 not supported
517517
518518// extend precision: Vec8h -> Vec8f
519- static Vec8f to_float (Vec8h x) {
519+ static inline Vec8f to_float (Vec8h x) {
520520 Vec8s xx = __m128i (x);
521521 Vec4ui a1 = _mm_unpacklo_epi16 (xx, _mm_setzero_si128 ());
522522 Vec4ui a2 = _mm_unpackhi_epi16 (xx, _mm_setzero_si128 ());
@@ -558,7 +558,7 @@ static Vec8f to_float (Vec8h x) {
558558}
559559
560560// reduce precision: Vec8f -> Vec8h
561- static Vec8h to_float16 (Vec8f x) {
561+ static inline Vec8h to_float16 (Vec8f x) {
562562 Vec4ui a1 = _mm_castps_si128 (x.get_low ()); // low half
563563 Vec4ui a2 = _mm_castps_si128 (x.get_high ()); // high half
564564 Vec4ui r1 = a1 >> 12 ; // get first discarded mantissa bit
0 commit comments