@@ -704,7 +704,8 @@ fvec4 map_to_cone(float r1, float r2, fvec4 N, float radius) {
704704 theta = 0 .5f * PI * (1 .0f - 0 .5f * (offset.get <0 >() / offset.get <1 >()));
705705 }
706706
707- const fvec2 uv = fvec2 (radius * r * cosf (theta), radius * r * sinf (theta));
707+ const fvec2 sincos_theta = portable_sincos (theta);
708+ const fvec2 uv = fvec2 (radius * r * sincos_theta.get <1 >(), radius * r * sincos_theta.get <0 >());
708709
709710 fvec4 LT , LB ;
710711 create_tbn (normalize (N), LT , LB );
@@ -764,7 +765,7 @@ force_inline bool quadratic(float a, float b, float c, float &t0, float &t1) {
764765}
765766
766767force_inline float ngon_rad (const float theta, const float n) {
767- return cosf (PI / n) / cosf (theta - (2 .0f * PI / n) * floorf ((n * theta + PI ) / (2 .0f * PI )));
768+ return portable_cos (PI / n) / portable_cos (theta - (2 .0f * PI / n) * floorf ((n * theta + PI ) / (2 .0f * PI )));
768769}
769770
770771float approx_atan2 (const float y, const float x) { // max error is 0.000004f
@@ -1116,10 +1117,112 @@ force_inline fvec4 slerp(const fvec4 &start, const fvec4 &end, const float perce
11161117 // And multiplying that by percent returns the angle between
11171118 // start and the final result.
11181119 const float theta = acosf (cos_theta) * percent;
1119- fvec4 relative_vec = safe_normalize (end - start * cos_theta);
1120+ const fvec4 relative_vec = safe_normalize (end - start * cos_theta);
11201121 // Orthonormal basis
11211122 // The final result.
1122- return start * cosf (theta) + relative_vec * sinf (theta);
1123+ const fvec2 sincos_theta = portable_sincos (theta);
1124+ return start * sincos_theta.get <1 >() + relative_vec * sincos_theta.get <0 >();
1125+ }
1126+
1127+ //
1128+ // Polynomial approximation of cosine
1129+ // Max error : 6.987e-07
1130+ //
1131+ float portable_cos (float a) {
1132+ // Normalize angle to [0, 1] range, where 1 corresponds to 2*PI
1133+ a = fract (fabs (a) * 0 .15915494309189535f );
1134+
1135+ // Select between ranges [0; 0.25), [0.25; 0.75), [0.75; 1.0]
1136+ fvec4 selector = 0 .0f ;
1137+ selector.set <0 >(float (a < 0 .25f ));
1138+ selector.set <2 >(float (a >= 0 .75f ));
1139+ selector.set <1 >(-1 .0f + dot (selector, fvec4{1 , 0 , 1 , 0 }));
1140+
1141+ // Center around ranges
1142+ fvec4 arg = fvec4{a} - fvec4{0 .0f , 0 .5f , 1 .0f , 0 .0f };
1143+ // Squared value gives better precision
1144+ arg *= arg;
1145+
1146+ // Evaluate 5th-degree polynome
1147+ fvec4 res = fmadd (-25 .0407296503853054f , arg, 60 .1524123580209817f );
1148+ res = fmsub (res, arg, 85 .4539888046442542f );
1149+ res = fmadd (res, arg, 64 .9393549651994562f );
1150+ res = fmsub (res, arg, 19 .7392086060579359f );
1151+ res = fmadd (res, arg, 0 .9999999998415476f );
1152+
1153+ // Combine contributions based on selector to get final value
1154+ return dot (res, selector);
1155+ }
1156+
1157+ //
1158+ // Polynomial approximation of sine
1159+ // Max error : 8.482e-07
1160+ //
1161+ float portable_sin (float a) {
1162+ // Normalize angle to [0, 1] range, where 1 corresponds to 2*PI
1163+ a = fract (fabs (a - 1 .5707963267948966f ) * 0 .15915494309189535f );
1164+
1165+ // Select between ranges [0; 0.25), [0.25; 0.75), [0.75; 1.0]
1166+ fvec4 selector = 0 .0f ;
1167+ selector.set <0 >(float (a < 0 .25f ));
1168+ selector.set <2 >(float (a >= 0 .75f ));
1169+ selector.set <1 >(-1 .0f + dot (selector, fvec4{1 , 0 , 1 , 0 }));
1170+
1171+ // Center around ranges
1172+ fvec4 arg = fvec4{a} - fvec4{0 .0f , 0 .5f , 1 .0f , 0 .0f };
1173+ // Squared value gives better precision
1174+ arg *= arg;
1175+
1176+ // Evaluate 5th-degree polynome
1177+ fvec4 res = fmadd (-25 .0407296503853054f , arg, 60 .1524123580209817f );
1178+ res = fmsub (res, arg, 85 .4539888046442542f );
1179+ res = fmadd (res, arg, 64 .9393549651994562f );
1180+ res = fmsub (res, arg, 19 .7392086060579359f );
1181+ res = fmadd (res, arg, 0 .9999999998415476f );
1182+
1183+ // Combine contributions based on selector to get final value
1184+ return dot (res, selector);
1185+ }
1186+
1187+ //
1188+ // Combined approximation of sine/cosine
1189+ // Max error : 8.482e-07
1190+ //
1191+ fvec2 portable_sincos (const float a) {
1192+ // Normalize angle to [0, 1] range, where 1 corresponds to 2*PI
1193+ const float a_cos = fract (fabsf (a) * 0 .15915494309189535f );
1194+ const float a_sin = fract (fabsf (a - 1 .5707963267948966f ) * 0 .15915494309189535f );
1195+
1196+ // Select between ranges [0; 0.25), [0.25; 0.75), [0.75; 1.0]
1197+ fvec4 selector_cos = 0 .0f , selector_sin = 0 .0f ;
1198+ selector_cos.set <0 >(float (a_cos < 0 .25f ));
1199+ selector_cos.set <2 >(float (a_cos >= 0 .75f ));
1200+ selector_cos.set <1 >(-1 .0f + dot (selector_cos, fvec4{1 , 0 , 1 , 0 }));
1201+ selector_sin.set <0 >(float (a_sin < 0 .25f ));
1202+ selector_sin.set <2 >(float (a_sin >= 0 .75f ));
1203+ selector_sin.set <1 >(-1 .0f + dot (selector_sin, fvec4{1 , 0 , 1 , 0 }));
1204+
1205+ // Center around ranges
1206+ fvec4 arg_cos = fvec4{a_cos} - fvec4{0 .0f , 0 .5f , 1 .0f , 0 .0f };
1207+ fvec4 arg_sin = fvec4{a_sin} - fvec4{0 .0f , 0 .5f , 1 .0f , 0 .0f };
1208+ // Squared value gives better precision
1209+ arg_cos *= arg_cos;
1210+ arg_sin *= arg_sin;
1211+
1212+ // Evaluate 5th-degree polynome
1213+ fvec4 res_cos = fmadd (-25 .0407296503853054f , arg_cos, 60 .1524123580209817f );
1214+ fvec4 res_sin = fmadd (-25 .0407296503853054f , arg_sin, 60 .1524123580209817f );
1215+ res_cos = fmsub (res_cos, arg_cos, 85 .4539888046442542f );
1216+ res_sin = fmsub (res_sin, arg_sin, 85 .4539888046442542f );
1217+ res_cos = fmadd (res_cos, arg_cos, 64 .9393549651994562f );
1218+ res_sin = fmadd (res_sin, arg_sin, 64 .9393549651994562f );
1219+ res_cos = fmsub (res_cos, arg_cos, 19 .7392086060579359f );
1220+ res_sin = fmsub (res_sin, arg_sin, 19 .7392086060579359f );
1221+ res_cos = fmadd (res_cos, arg_cos, 0 .9999999998415476f );
1222+ res_sin = fmadd (res_sin, arg_sin, 0 .9999999998415476f );
1223+
1224+ // Combine contributions based on selector to get final value
1225+ return fvec2{dot (res_sin, selector_sin), dot (res_cos, selector_cos)};
11231226}
11241227
11251228//
@@ -1224,7 +1327,8 @@ float Ray::Ref::SampleSphericalRectangle(const fvec4 &P, const fvec4 &light_pos,
12241327 if (out_p) {
12251328 // compute cu
12261329 const float au = Xi.get <0 >() * area + k;
1227- const float fu = safe_div ((cosf (au) * b0 - b1), sinf (au));
1330+ const fvec2 sincos_au = portable_sincos (au);
1331+ const float fu = safe_div ((sincos_au.get <1 >() * b0 - b1), sincos_au.get <0 >());
12281332 float cu = 1 .0f / sqrtf (fu * fu + b0sq) * (fu > 0 .0f ? 1 .0f : -1 .0f );
12291333 cu = clamp (cu, -1 .0f , 1 .0f );
12301334 // compute xu
@@ -1281,17 +1385,20 @@ float Ray::Ref::SampleSphericalTriangle(const fvec4 &P, const fvec4 &p1, const f
12811385 const float area_S = Xi.get <0 >() * area;
12821386
12831387 // Save the sine and cosine of the angle delta
1284- const float p = sinf (area_S - alpha);
1285- const float q = cosf (area_S - alpha);
1388+ const fvec2 sincos_area = portable_sincos (area_S - alpha);
1389+ const float p = sincos_area.get <0 >();
1390+ const float q = sincos_area.get <1 >();
12861391
12871392 // Compute the pair(u; v) that determines sin(beta_s) and cos(beta_s)
1288- const float u = q - cosf (alpha);
1289- const float v = p + sinf (alpha) * cosf (c);
1393+ const fvec2 sincos_alpha = portable_sincos (alpha);
1394+ const float u = q - sincos_alpha.get <1 >();
1395+ const float v = p + sincos_alpha.get <0 >() * portable_cos (c);
12901396
12911397 // Compute the s coordinate as normalized arc length from A to C_s
1292- const float denom = ((v * p + u * q) * sinf (alpha));
1293- const float s = safe_div (1 .0f , b) *
1294- portable_acosf (clamp (safe_div (((v * q - u * p) * cosf (alpha) - v), denom), -1 .0f , 1 .0f ));
1398+ const float denom = ((v * p + u * q) * sincos_alpha.get <0 >());
1399+ const float s =
1400+ safe_div (1 .0f , b) *
1401+ portable_acosf (clamp (safe_div (((v * q - u * p) * sincos_alpha.get <1 >() - v), denom), -1 .0f , 1 .0f ));
12951402
12961403 // Compute the third vertex of the sub - triangle
12971404 const fvec4 C_s = slerp (A, C, s);
@@ -1403,8 +1510,9 @@ void Ray::Ref::GeneratePrimaryRays(const camera_t &cam, const rect_t &rect, cons
14031510
14041511 theta += cam.lens_rotation ;
14051512
1406- offset.set <0 >(0 .5f * r * cosf (theta) / cam.lens_ratio );
1407- offset.set <1 >(0 .5f * r * sinf (theta));
1513+ const fvec2 sincos_theta = portable_sincos (theta);
1514+ offset.set <0 >(0 .5f * r * sincos_theta.get <1 >() / cam.lens_ratio );
1515+ offset.set <1 >(0 .5f * r * sincos_theta.get <0 >());
14081516 }
14091517
14101518 const float coc = 0 .5f * (cam.focal_length / cam.fstop );
@@ -3263,10 +3371,10 @@ void Ray::Ref::SampleLightSource(const fvec4 &P, const fvec4 &T, const fvec4 &B,
32633371 ls.L = make_fvec3 (l.dir .dir );
32643372 ls.area = 0 .0f ;
32653373 ls.pdf = 1 .0f ;
3266- if (l.dir .angle != 0 .0f ) {
3374+ if (l.dir .tan_angle != 0 .0f ) {
32673375 const float r1 = rand_light_uv.get <0 >(), r2 = rand_light_uv.get <1 >();
32683376
3269- const float radius = tanf ( l.dir .angle ) ;
3377+ const float radius = l.dir .tan_angle ;
32703378 ls.L = normalize (map_to_cone (r1, r2, ls.L , radius));
32713379 ls.area = PI * radius * radius;
32723380
@@ -3332,8 +3440,10 @@ void Ray::Ref::SampleLightSource(const fvec4 &P, const fvec4 &T, const fvec4 &B,
33323440 theta = 0 .5f * PI - 0 .25f * PI * (offset.get <0 >() / offset.get <1 >());
33333441 }
33343442
3335- offset.set <0 >(0 .5f * r * cosf (theta));
3336- offset.set <1 >(0 .5f * r * sinf (theta));
3443+ const fvec2 sincos_theta = portable_sincos (theta);
3444+
3445+ offset.set <0 >(0 .5f * r * sincos_theta.get <1 >());
3446+ offset.set <1 >(0 .5f * r * sincos_theta.get <0 >());
33373447 }
33383448
33393449 const fvec4 lp = light_pos + light_u * offset.get <0 >() + light_v * offset.get <1 >();
@@ -3375,7 +3485,8 @@ void Ray::Ref::SampleLightSource(const fvec4 &P, const fvec4 &T, const fvec4 &B,
33753485 fvec4 light_v = cross (light_u, light_dir);
33763486
33773487 const float phi = PI * r1;
3378- const fvec4 normal = cosf (phi) * light_u + sinf (phi) * light_v;
3488+ const fvec2 sincos_phi = portable_sincos (phi);
3489+ const fvec4 normal = sincos_phi.get <1 >() * light_u + sincos_phi.get <0 >() * light_v;
33793490
33803491 const fvec4 lp = light_pos + normal * l.line .radius + (r2 - 0 .5f ) * light_dir * l.line .height ;
33813492
@@ -3473,7 +3584,8 @@ void Ray::Ref::SampleLightSource(const fvec4 &P, const fvec4 &T, const fvec4 &B,
34733584 } else {
34743585 // Sample environment as hemishpere
34753586 const float phi = 2 * PI * ry;
3476- const float cos_phi = cosf (phi), sin_phi = sinf (phi);
3587+ const fvec2 sincos_phi = portable_sincos (phi);
3588+ const float cos_phi = sincos_phi.get <1 >(), sin_phi = sincos_phi.get <0 >();
34773589
34783590 const float dir = sqrtf (1 .0f - rx * rx);
34793591 auto V = fvec4{dir * cos_phi, dir * sin_phi, rx, 0 .0f }; // in tangent-space
@@ -3652,7 +3764,7 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
36523764 } else if (l.type == LIGHT_TYPE_DIR ) {
36533765 const fvec4 light_dir = make_fvec3 (l.dir .dir );
36543766 const float cos_theta = dot (rd, light_dir);
3655- if ((inout_inter.v < 0 .0f || no_shadow) && cos_theta > cosf ( l.dir .angle ) ) {
3767+ if ((inout_inter.v < 0 .0f || no_shadow) && cos_theta > l.dir .cos_angle ) {
36563768 inout_inter.v = 0 .0f ;
36573769 inout_inter.obj_index = -int (light_index) - 1 ;
36583770 inout_inter.t = 1 .0f / cos_theta;
@@ -3898,7 +4010,7 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
38984010 } else if (l.type == LIGHT_TYPE_DIR ) {
38994011 const fvec4 light_dir = make_fvec3 (l.dir .dir );
39004012 const float cos_theta = dot (rd, light_dir);
3901- if ((inout_inter.v < 0 .0f || no_shadow) && cos_theta > cosf ( l.dir .angle ) ) {
4013+ if ((inout_inter.v < 0 .0f || no_shadow) && cos_theta > l.dir .cos_angle ) {
39024014 inout_inter.v = 0 .0f ;
39034015 inout_inter.obj_index = -int (light_index) - 1 ;
39044016 inout_inter.t = 1 .0f / cos_theta;
@@ -4098,7 +4210,7 @@ void Ray::Ref::IntersectAreaLights(Span<const ray_data_t> rays, Span<const light
40984210 } else if (l.type == LIGHT_TYPE_DIR ) {
40994211 const fvec4 light_dir = make_fvec3 (l.dir .dir );
41004212 const float cos_theta = dot (rd, light_dir);
4101- if ((inout_inter.v < 0 .0f || no_shadow) && cos_theta > cosf ( l.dir .angle ) ) {
4213+ if ((inout_inter.v < 0 .0f || no_shadow) && cos_theta > l.dir .cos_angle ) {
41024214 inout_inter.v = 0 .0f ;
41034215 inout_inter.obj_index = -int (light_index) - 1 ;
41044216 inout_inter.t = 1 .0f / cos_theta;
0 commit comments