Skip to content

Commit 008637d

Browse files
authored
Update erf.cpp
1 parent f6b7058 commit 008637d

1 file changed

Lines changed: 19 additions & 19 deletions

File tree

onnxruntime/core/mlas/lib/erf.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -313,13 +313,13 @@ exp_neg_rational_approx_f16(MLAS_FLOAT16X8 x)
313313
MLAS_FLOAT16X8 d1v = MlasBroadcastF16Float16x8(d1);
314314
MLAS_FLOAT16X8 d2v = MlasBroadcastF16Float16x8(d2);
315315
MLAS_FLOAT16X8 x2 = MlasMultiplyFloat16(x, x);
316-
MLAS_FLOAT16X8 num = MlasMultiplyAddFloat16(c1v, x,c0v);
317-
num = MlasMultiplyAddFloat16(c2v, x2,num);
318-
MLAS_FLOAT16X8 den = MlasMultiplyAddFloat16(d1v, x,d0v);
319-
den = MlasMultiplyAddFloat16(d2v, x2,den);
320-
MLAS_FLOAT16X8 recip = MlasapproximatereciprocalFloat16(den);
321-
recip = MlasMultiplyFloat16(recip, MlasreciprocalsqrtFloat16(den, recip));
322-
recip = MlasMultiplyFloat16(recip, MlasreciprocalsqrtFloat16(den, recip));
316+
MLAS_FLOAT16X8 num = MlasMultiplyAddFloat16(c1v, x, c0v);
317+
num = MlasMultiplyAddFloat16(c2v, x2, num);
318+
MLAS_FLOAT16X8 den = MlasMultiplyAddFloat16(d1v, x, d0v);
319+
den = MlasMultiplyAddFloat16(d2v, x2, den);
320+
MLAS_FLOAT16X8 recip = MlasApproximateReciprocalFloat16(den);
321+
recip = MlasMultiplyFloat16(recip, MlasReciprocalSqrtFloat16(den, recip));
322+
recip = MlasMultiplyFloat16(recip, MlasReciprocalSqrtFloat16(den, recip));
323323
MLAS_FLOAT16X8 result = MlasMultiplyFloat16(num, recip);
324324
return result;
325325
}
@@ -354,32 +354,32 @@ MlasNeonErfKernelFp16(const _mlas_fp16_* Input, _mlas_fp16_* Output, size_t N)
354354
size_t i = 0;
355355
for (; i + 8 <= N; i += 8) {
356356
MLAS_FLOAT16X8 x = MlasLoadFloat16x8(&Input[i]);
357-
MLAS_UINT16X8 neg_mask = MlasComparelessthanFloat16(x, vzero);
358-
MLAS_FLOAT16X8 sign = MlasselectFloat16(neg_mask, vneg_one, vone);
357+
MLAS_UINT16X8 neg_mask = MlasCompareLessThanFloat16(x, vzero);
358+
MLAS_FLOAT16X8 sign = MlasSelectFloat16(neg_mask, vneg_one, vone);
359359
MLAS_FLOAT16X8 absx = MlasAbsFloat16(x);
360-
MLAS_UINT16X8 use_mask = MlasComparelessthanFloat16(absx, vth);
360+
MLAS_UINT16X8 use_mask = MlasCompareLessThanFloat16(absx, vth);
361361
MLAS_FLOAT16X8 absx_clamped = MlasMinimumFloat16(absx, vth);
362-
MLAS_FLOAT16X8 denom = MlasMultiplyAddFloat16(vp, absx_clamped,vone);
363-
MLAS_FLOAT16X8 t = MlasapproximatereciprocalFloat16(denom);
364-
t = MlasMultiplyFloat16(t, MlasreciprocalsqrtFloat16(denom, t));
365-
t = MlasMultiplyFloat16(t, MlasreciprocalsqrtFloat16(denom, t));
362+
MLAS_FLOAT16X8 denom = MlasMultiplyAddFloat16(vp, absx_clamped, vone);
363+
MLAS_FLOAT16X8 t = MlasApproximateReciprocalFloat16(denom);
364+
t = MlasMultiplyFloat16(t, MlasReciprocalSqrtFloat16(denom, t));
365+
t = MlasMultiplyFloat16(t, MlasReciprocalSqrtFloat16(denom, t));
366366
MLAS_FLOAT16X8 t2 = MlasMultiplyFloat16(t, t);
367367
MLAS_FLOAT16X8 t3 = MlasMultiplyFloat16(t2, t);
368368
MLAS_FLOAT16X8 t4 = MlasMultiplyFloat16(t3, t);
369369
MLAS_FLOAT16X8 t5 = MlasMultiplyFloat16(t4, t);
370370
MLAS_FLOAT16X8 poly = MlasMultiplyFloat16(va1, t);
371-
poly = MlasMultiplyAddFloat16(va2, t2,poly);
372-
poly = MlasMultiplyAddFloat16(va3, t3,poly);
373-
poly = MlasMultiplyAddFloat16(va4, t4,poly);
374-
poly = MlasMultiplyAddFloat16(va5, t5,poly);
371+
poly = MlasMultiplyAddFloat16(va2, t2, poly);
372+
poly = MlasMultiplyAddFloat16(va3, t3, poly);
373+
poly = MlasMultiplyAddFloat16(va4, t4, poly);
374+
poly = MlasMultiplyAddFloat16(va5, t5, poly);
375375
MLAS_FLOAT16X8 x2 = MlasMultiplyFloat16(absx_clamped, absx_clamped);
376376
MLAS_FLOAT16X8 exp_neg_x2 = exp_neg_rational_approx_f16(x2);
377377
MLAS_FLOAT16X8 poly_mul_exp = MlasMultiplyFloat16(poly, exp_neg_x2);
378378
MLAS_FLOAT16X8 one_minus_term = MlasSubtractFloat16(vone, poly_mul_exp);
379379
MLAS_FLOAT16X8 erf_approx = MlasMultiplyFloat16(sign, one_minus_term);
380380
erf_approx = MlasMinimumFloat16(erf_approx, vone);
381381
erf_approx = MlasMaximumFloat16(erf_approx, vneg_one);
382-
MLAS_FLOAT16X8 result = MlasselectFloat16(use_mask, erf_approx, sign);
382+
MLAS_FLOAT16X8 result = MlasSelectFloat16(use_mask, erf_approx, sign);
383383
MlasStoreFloat16x8(&Output[i], result);
384384
}
385385

0 commit comments

Comments
 (0)