|
11 | 11 |
|
12 | 12 | #include <math.h> |
13 | 13 |
|
14 | | -// Math Constants - Hardcoded for speed |
| 14 | +// Math Constants - Hardcoded for register-speed access |
15 | 15 | static const float PI_F = 3.1415926535f; |
16 | 16 | static const float TWO_PI_F = 6.2831853071f; |
17 | 17 | static const float HALF_PI_F = 1.5707963267f; |
18 | 18 | static const float INV_TWO_PI_F = 0.1591549431f; |
19 | 19 |
|
20 | 20 | // ----------------------------------------------------------------------------- |
21 | | -// Min-Maxed Internal Engines |
| 21 | +// Internal Math Engines (Inlined for zero call overhead) |
22 | 22 | // ----------------------------------------------------------------------------- |
23 | 23 |
|
24 | | -// Core Polynomial - Inlined to remove call overhead |
25 | 24 | static inline float fast_sin_poly(float x) { |
26 | 25 | float x2 = x * x; |
27 | 26 | // Horner's Method: reduces the number of multiplications |
28 | 27 | return x * (1.0f + x2 * (-0.1666665f + x2 * 0.0083322f)); |
29 | 28 | } |
30 | 29 |
|
31 | 30 | static inline float fast_sin_internal(float theta) { |
32 | | - // Fast range reduction |
33 | 31 | float quot = theta * INV_TWO_PI_F; |
34 | 32 | float x = theta - (float)((int)(quot + (quot > 0 ? 0.5f : -0.5f))) * TWO_PI_F; |
35 | 33 |
|
36 | | - // Symmetry reduction |
37 | 34 | if (x > HALF_PI_F) { x = PI_F - x; } |
38 | 35 | else if (x < -HALF_PI_F) { x = -PI_F - x; } |
39 | 36 |
|
@@ -73,46 +70,59 @@ static mp_obj_t experimental_cos(mp_obj_t theta_in) { |
73 | 70 | } |
74 | 71 | static MP_DEFINE_CONST_FUN_OBJ_1(experimental_cos_obj, experimental_cos); |
75 | 72 |
|
76 | | -// MIN-MAX: Using FUN_OBJ_2 instead of FUN_OBJ_KW to eliminate keyword parsing overhead |
77 | 73 | static mp_obj_t experimental_atan2(mp_obj_t y_in, mp_obj_t x_in) { |
78 | 74 | return mp_obj_new_float_from_f(fast_atan2_internal(mp_obj_get_float(y_in), mp_obj_get_float(x_in))); |
79 | 75 | } |
80 | 76 | static MP_DEFINE_CONST_FUN_OBJ_2(experimental_atan2_obj, experimental_atan2); |
81 | 77 |
|
82 | 78 | // ----------------------------------------------------------------------------- |
83 | | -// Benchmark |
| 79 | +// Detailed Granular Benchmark |
84 | 80 | // ----------------------------------------------------------------------------- |
85 | 81 |
|
86 | | -static mp_obj_t experimental_benchmark_internal(mp_obj_t n_in) { |
| 82 | +static mp_obj_t experimental_benchmark_detailed(mp_obj_t n_in) { |
87 | 83 | int32_t n = mp_obj_get_int(n_in); |
88 | 84 | volatile float result = 0.0f; |
89 | | - uint32_t start = mp_hal_ticks_ms(); |
| 85 | + uint32_t t0, t1, t2, t3; |
90 | 86 |
|
| 87 | + // Loop 1: Sine Only |
| 88 | + t0 = mp_hal_ticks_ms(); |
91 | 89 | for (int32_t i = 0; i < n; i++) { |
92 | | - result += fast_sin_internal(1.23f); |
93 | | - result += fast_sin_internal(1.23f + HALF_PI_F); |
94 | | - result += fast_atan2_internal(1.23f, 1.23f); |
| 90 | + result += fast_sin_internal(1.1f); |
95 | 91 | } |
96 | 92 |
|
97 | | - uint32_t end = mp_hal_ticks_ms(); |
98 | | - uint32_t total_ms = end - start; |
99 | | - float ns_per_op = (n > 0) ? ((float)total_ms * 1000000.0f) / n : 0; |
| 93 | + // Loop 2: Cosine Only (includes the HALF_PI addition) |
| 94 | + t1 = mp_hal_ticks_ms(); |
| 95 | + for (int32_t i = 0; i < n; i++) { |
| 96 | + result += fast_sin_internal(1.1f + HALF_PI_F); |
| 97 | + } |
100 | 98 |
|
101 | | - mp_obj_t tuple[2] = { mp_obj_new_int(total_ms), mp_obj_new_float(ns_per_op) }; |
102 | | - return mp_obj_new_tuple(2, tuple); |
| 99 | + // Loop 3: Atan2 Only (includes the division and branching) |
| 100 | + t2 = mp_hal_ticks_ms(); |
| 101 | + for (int32_t i = 0; i < n; i++) { |
| 102 | + result += fast_atan2_internal(1.1f, 1.1f); |
| 103 | + } |
| 104 | + t3 = mp_hal_ticks_ms(); |
| 105 | + |
| 106 | + mp_obj_t tuple[4] = { |
| 107 | + mp_obj_new_int(t1 - t0), // Sin ms |
| 108 | + mp_obj_new_int(t2 - t1), // Cos ms |
| 109 | + mp_obj_new_int(t3 - t2), // Atan2 ms |
| 110 | + mp_obj_new_int(t3 - t0) // Total ms |
| 111 | + }; |
| 112 | + return mp_obj_new_tuple(4, tuple); |
103 | 113 | } |
104 | | -static MP_DEFINE_CONST_FUN_OBJ_1(experimental_benchmark_internal_obj, experimental_benchmark_internal); |
| 114 | +static MP_DEFINE_CONST_FUN_OBJ_1(experimental_benchmark_detailed_obj, experimental_benchmark_detailed); |
105 | 115 |
|
106 | 116 | // ----------------------------------------------------------------------------- |
107 | 117 | // Module Registry |
108 | 118 | // ----------------------------------------------------------------------------- |
109 | 119 |
|
110 | 120 | static const mp_rom_map_elem_t experimental_globals_table[] = { |
111 | | - { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_experimental) }, |
112 | | - { MP_ROM_QSTR(MP_QSTR_sin), MP_ROM_PTR(&experimental_sin_obj) }, |
113 | | - { MP_ROM_QSTR(MP_QSTR_cos), MP_ROM_PTR(&experimental_cos_obj) }, |
114 | | - { MP_ROM_QSTR(MP_QSTR_atan2), MP_ROM_PTR(&experimental_atan2_obj) }, |
115 | | - { MP_ROM_QSTR(MP_QSTR_benchmark_internal), MP_ROM_PTR(&experimental_benchmark_internal_obj) }, |
| 121 | + { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_experimental) }, |
| 122 | + { MP_ROM_QSTR(MP_QSTR_sin), MP_ROM_PTR(&experimental_sin_obj) }, |
| 123 | + { MP_ROM_QSTR(MP_QSTR_cos), MP_ROM_PTR(&experimental_cos_obj) }, |
| 124 | + { MP_ROM_QSTR(MP_QSTR_atan2), MP_ROM_PTR(&experimental_atan2_obj) }, |
| 125 | + { MP_ROM_QSTR(MP_QSTR_benchmark_detailed), MP_ROM_PTR(&experimental_benchmark_detailed_obj) }, |
116 | 126 | }; |
117 | 127 | static MP_DEFINE_CONST_DICT(pb_module_experimental_globals, experimental_globals_table); |
118 | 128 |
|
|
0 commit comments