1010#include "py/runtime.h"
1111#include <math.h>
1212
13+ // Hardcoded constants for 98MHz register-speed access
1314static const float PI_F = 3.1415926535f ;
1415static const float TWO_PI_F = 6.2831853071f ;
1516static const float HALF_PI_F = 1.5707963267f ;
1617static const float INV_TWO_PI_F = 0.1591549431f ;
1718
1819// -----------------------------------------------------------------------------
19- // Internal Engines (Inlined )
20+ // Core Math Engines (Optimized for ARM VSEL/VCMPE instructions )
2021// -----------------------------------------------------------------------------
2122
2223static inline float fast_sin_poly (float x ) {
2324 float x2 = x * x ;
25+ // Horner's Method: 3 muls, 2 adds
2426 return x * (1.0f + x2 * (-0.1666665f + x2 * 0.0083322f ));
2527}
2628
2729static inline float fast_sin_internal (float theta ) {
28- float quot = theta * INV_TWO_PI_F ;
29- float x = theta - (float )((int )(quot + (quot > 0 ? 0.5f : -0.5f ))) * TWO_PI_F ;
30- if (x > HALF_PI_F ) { x = PI_F - x ; }
31- else if (x < - HALF_PI_F ) { x = - PI_F - x ; }
32- return fast_sin_poly (x );
30+ // 1. Range Reduction to [-PI, PI]
31+ // Use roundf for branchless centering
32+ float x = theta - roundf (theta * INV_TWO_PI_F ) * TWO_PI_F ;
33+
34+ // 2. Branchless Symmetry Reduction to [-PI/2, PI/2]
35+ // Ternary operators here are mapped to conditional move instructions (VSEL)
36+ // which prevents CPU pipeline stalls.
37+ float x_abs = fabsf (x );
38+ float x_folded = (x_abs > HALF_PI_F ) ? PI_F - x_abs : x_abs ;
39+
40+ // Restore sign
41+ float result_x = (x < 0.0f ) ? - x_folded : x_folded ;
42+
43+ return fast_sin_poly (result_x );
3344}
3445
3546static inline float fast_atan2_internal (float y , float x ) {
3647 float ay = fabsf (y ) + 1e-10f ;
3748 float ax = fabsf (x );
38- float z , angle ;
39- if (ax >= ay ) {
40- z = y / ax ;
41- angle = (0.7853982f + 0.273f * (1.0f - fabsf (z ))) * z ;
42- } else {
43- z = x / ay ;
44- angle = 1.5707963f - (0.7853982f + 0.273f * (1.0f - fabsf (z ))) * z ;
45- }
49+
50+ // Determine which axis is dominant without standard branching
51+ float z = (ax >= ay ) ? y / ax : x / ay ;
52+ float abs_z = fabsf (z );
53+
54+ // Parabolic approximation for atan(z)
55+ float angle = (0.7853982f + 0.273f * (1.0f - abs_z )) * z ;
56+
57+ // Quadrant adjustment (mapped to VSEL)
58+ angle = (ax < ay ) ? 1.5707963f - angle : angle ;
59+
4660 if (x < 0.0f ) {
4761 angle += (y >= 0.0f ) ? PI_F : - PI_F ;
4862 }
@@ -69,7 +83,7 @@ static mp_obj_t experimental_atan2(mp_obj_t y_in, mp_obj_t x_in) {
6983static MP_DEFINE_CONST_FUN_OBJ_2 (experimental_atan2_obj , experimental_atan2 ) ;
7084
7185// -----------------------------------------------------------------------------
72- // Detailed Internal Benchmark (Defeats Compiler Shortcuts)
86+ // Anti-Optimization Benchmark
7387// -----------------------------------------------------------------------------
7488
7589static mp_obj_t experimental_benchmark_detailed (mp_obj_t n_in ) {
@@ -80,7 +94,6 @@ static mp_obj_t experimental_benchmark_detailed(mp_obj_t n_in) {
8094
8195 t0 = mp_hal_ticks_ms ();
8296 for (int32_t i = 0 ; i < n ; i ++ ) {
83- // Varying input prevents the compiler from pre-calculating
8497 result += fast_sin_internal ((float )i * inv_n );
8598 }
8699
@@ -91,7 +104,6 @@ static mp_obj_t experimental_benchmark_detailed(mp_obj_t n_in) {
91104
92105 t2 = mp_hal_ticks_ms ();
93106 for (int32_t i = 0 ; i < n ; i ++ ) {
94- // Changing y and x forces different branches in atan2
95107 result += fast_atan2_internal ((float )i , (float )(n - i ));
96108 }
97109 t3 = mp_hal_ticks_ms ();
@@ -106,6 +118,10 @@ static mp_obj_t experimental_benchmark_detailed(mp_obj_t n_in) {
106118}
107119static MP_DEFINE_CONST_FUN_OBJ_1 (experimental_benchmark_detailed_obj , experimental_benchmark_detailed ) ;
108120
121+ // -----------------------------------------------------------------------------
122+ // Registry
123+ // -----------------------------------------------------------------------------
124+
109125static const mp_rom_map_elem_t experimental_globals_table [] = {
110126 { MP_ROM_QSTR (MP_QSTR___name__ ), MP_ROM_QSTR (MP_QSTR_experimental ) },
111127 { MP_ROM_QSTR (MP_QSTR_sin ), MP_ROM_PTR (& experimental_sin_obj ) },
@@ -123,4 +139,5 @@ const mp_obj_module_t pb_module_experimental = {
123139#if !MICROPY_MODULE_BUILTIN_SUBPACKAGES
124140MP_REGISTER_MODULE (MP_QSTR_pybricks_dot_experimental , pb_module_experimental );
125141#endif
126- #endif
142+
143+ #endif // PYBRICKS_PY_EXPERIMENTAL
0 commit comments