1010#include "py/runtime.h"
1111#include <math.h>
1212
13- // Hardcoded constants for 98MHz register-speed access
1413static const float PI_F = 3.1415926535f ;
1514static const float TWO_PI_F = 6.2831853071f ;
1615static const float HALF_PI_F = 1.5707963267f ;
1716static const float INV_TWO_PI_F = 0.1591549431f ;
1817
1918// -----------------------------------------------------------------------------
20- // Core Math Engines (Optimized for ARM VSEL/VCMPE instructions )
19+ // Core Math Engines (Precision Tuned )
2120// -----------------------------------------------------------------------------
2221
23- static inline float fast_sin_poly (float x ) {
24- float x2 = x * x ;
25- // Horner's Method: 3 muls, 2 adds
26- return x * (1.0f + x2 * (-0.1666665f + x2 * 0.0083322f ));
27- }
28-
2922static inline float fast_sin_internal (float theta ) {
3023 // 1. Range Reduction to [-PI, PI]
31- // Manual round implementation to avoid undefined 'roundf' in some libm builds
3224 float quot = theta * INV_TWO_PI_F ;
3325 float x = theta - (float )((int )(quot + (quot > 0 ? 0.5f : -0.5f ))) * TWO_PI_F ;
3426
35- // 2. Branchless Symmetry Reduction to [-PI/2, PI/2]
36- float x_abs = fabsf (x );
37- float x_folded = (x_abs > HALF_PI_F ) ? PI_F - x_abs : x_abs ;
38-
39- // Restore sign branchlessly
40- float result_x = (x < 0.0f ) ? - x_folded : x_folded ;
27+ // 2. Symmetry Reduction to [-PI/2, PI/2]
28+ // Using simple branches here ensures the sign is handled perfectly at the poles
29+ if (x > HALF_PI_F ) { x = PI_F - x ; }
30+ else if (x < - HALF_PI_F ) { x = - PI_F - x ; }
4131
42- return fast_sin_poly (result_x );
32+ // 3. 5th-Degree Remez Minimax Polynomial
33+ // These coefficients minimize maximum absolute error to ~0.00005
34+ float x2 = x * x ;
35+ return x * (0.9999966f + x2 * (-0.1666482f + x2 * 0.0083062f ));
4336}
4437
4538static inline float fast_atan2_internal (float y , float x ) {
46- float ay = fabsf (y ) + 1e-10f ;
47- float ax = fabsf (x );
48-
49- // Determine which axis is dominant
50- float z = (ax >= ay ) ? y / ax : x / ay ;
51- float abs_z = fabsf (z );
52-
53- // Parabolic approximation for atan(z)
54- float angle = (0.7853982f + 0.273f * (1.0f - abs_z )) * z ;
55-
56- // Quadrant adjustment
57- angle = (ax < ay ) ? 1.5707963f - angle : angle ;
58-
59- if (x < 0.0f ) {
60- angle += (y >= 0.0f ) ? PI_F : - PI_F ;
39+ // Edge case: Origin
40+ if (x == 0.0f && y == 0.0f ) return 0.0f ;
41+
42+ float abs_y = fabsf (y ) + 1e-10f ; // Prevent div by zero
43+ float abs_x = fabsf (x );
44+ float angle ;
45+
46+ // Rational approximation: atan(z) approx z / (1 + 0.28086 * z^2)
47+ if (abs_x >= abs_y ) {
48+ float r = y / x ;
49+ angle = r / (1.0f + 0.28086f * r * r );
50+ // Correct for negative X
51+ if (x < 0.0f ) {
52+ angle += (y >= 0.0f ) ? PI_F : - PI_F ;
53+ }
54+ } else {
55+ float r = x / y ;
56+ angle = (y > 0.0f ? HALF_PI_F : - HALF_PI_F ) - r / (1.0f + 0.28086f * r * r );
6157 }
58+
6259 return angle ;
6360}
6461
@@ -82,7 +79,7 @@ static mp_obj_t experimental_atan2(mp_obj_t y_in, mp_obj_t x_in) {
8279static MP_DEFINE_CONST_FUN_OBJ_2 (experimental_atan2_obj , experimental_atan2 ) ;
8380
8481// -----------------------------------------------------------------------------
85- // Anti-Optimization Benchmark
82+ // Detailed Benchmark
8683// -----------------------------------------------------------------------------
8784
8885static mp_obj_t experimental_benchmark_detailed (mp_obj_t n_in ) {
@@ -118,7 +115,7 @@ static mp_obj_t experimental_benchmark_detailed(mp_obj_t n_in) {
118115static MP_DEFINE_CONST_FUN_OBJ_1 (experimental_benchmark_detailed_obj , experimental_benchmark_detailed ) ;
119116
120117// -----------------------------------------------------------------------------
121- // Registry
118+ // Module Registry
122119// -----------------------------------------------------------------------------
123120
124121static const mp_rom_map_elem_t experimental_globals_table [] = {
0 commit comments