Skip to content

Commit afcc763

Browse files
committed
final speed increases...?
1 parent 08eede2 commit afcc763

1 file changed

Lines changed: 33 additions & 23 deletions

File tree

pybricks/experimental/pb_module_experimental.c

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,26 @@
1111

1212
#include <math.h>
1313

14-
// Math Constants - Hardcoded for speed
14+
// Math Constants - Hardcoded for register-speed access
1515
static const float PI_F = 3.1415926535f;
1616
static const float TWO_PI_F = 6.2831853071f;
1717
static const float HALF_PI_F = 1.5707963267f;
1818
static const float INV_TWO_PI_F = 0.1591549431f;
1919

2020
// -----------------------------------------------------------------------------
21-
// Min-Maxed Internal Engines
21+
// Internal Math Engines (Inlined for zero call overhead)
2222
// -----------------------------------------------------------------------------
2323

24-
// Core Polynomial - Inlined to remove call overhead
2524
static inline float fast_sin_poly(float x) {
2625
float x2 = x * x;
2726
// Horner's Method: reduces the number of multiplications
2827
return x * (1.0f + x2 * (-0.1666665f + x2 * 0.0083322f));
2928
}
3029

3130
static inline float fast_sin_internal(float theta) {
32-
// Fast range reduction
3331
float quot = theta * INV_TWO_PI_F;
3432
float x = theta - (float)((int)(quot + (quot > 0 ? 0.5f : -0.5f))) * TWO_PI_F;
3533

36-
// Symmetry reduction
3734
if (x > HALF_PI_F) { x = PI_F - x; }
3835
else if (x < -HALF_PI_F) { x = -PI_F - x; }
3936

@@ -73,46 +70,59 @@ static mp_obj_t experimental_cos(mp_obj_t theta_in) {
7370
}
7471
static MP_DEFINE_CONST_FUN_OBJ_1(experimental_cos_obj, experimental_cos);
7572

76-
// MIN-MAX: Using FUN_OBJ_2 instead of FUN_OBJ_KW to eliminate keyword parsing overhead
7773
static mp_obj_t experimental_atan2(mp_obj_t y_in, mp_obj_t x_in) {
7874
return mp_obj_new_float_from_f(fast_atan2_internal(mp_obj_get_float(y_in), mp_obj_get_float(x_in)));
7975
}
8076
static MP_DEFINE_CONST_FUN_OBJ_2(experimental_atan2_obj, experimental_atan2);
8177

8278
// -----------------------------------------------------------------------------
83-
// Benchmark
79+
// Detailed Granular Benchmark
8480
// -----------------------------------------------------------------------------
8581

86-
static mp_obj_t experimental_benchmark_internal(mp_obj_t n_in) {
82+
static mp_obj_t experimental_benchmark_detailed(mp_obj_t n_in) {
8783
int32_t n = mp_obj_get_int(n_in);
8884
volatile float result = 0.0f;
89-
uint32_t start = mp_hal_ticks_ms();
85+
uint32_t t0, t1, t2, t3;
9086

87+
// Loop 1: Sine Only
88+
t0 = mp_hal_ticks_ms();
9189
for (int32_t i = 0; i < n; i++) {
92-
result += fast_sin_internal(1.23f);
93-
result += fast_sin_internal(1.23f + HALF_PI_F);
94-
result += fast_atan2_internal(1.23f, 1.23f);
90+
result += fast_sin_internal(1.1f);
9591
}
9692

97-
uint32_t end = mp_hal_ticks_ms();
98-
uint32_t total_ms = end - start;
99-
float ns_per_op = (n > 0) ? ((float)total_ms * 1000000.0f) / n : 0;
93+
// Loop 2: Cosine Only (includes the HALF_PI addition)
94+
t1 = mp_hal_ticks_ms();
95+
for (int32_t i = 0; i < n; i++) {
96+
result += fast_sin_internal(1.1f + HALF_PI_F);
97+
}
10098

101-
mp_obj_t tuple[2] = { mp_obj_new_int(total_ms), mp_obj_new_float(ns_per_op) };
102-
return mp_obj_new_tuple(2, tuple);
99+
// Loop 3: Atan2 Only (includes the division and branching)
100+
t2 = mp_hal_ticks_ms();
101+
for (int32_t i = 0; i < n; i++) {
102+
result += fast_atan2_internal(1.1f, 1.1f);
103+
}
104+
t3 = mp_hal_ticks_ms();
105+
106+
mp_obj_t tuple[4] = {
107+
mp_obj_new_int(t1 - t0), // Sin ms
108+
mp_obj_new_int(t2 - t1), // Cos ms
109+
mp_obj_new_int(t3 - t2), // Atan2 ms
110+
mp_obj_new_int(t3 - t0) // Total ms
111+
};
112+
return mp_obj_new_tuple(4, tuple);
103113
}
104-
static MP_DEFINE_CONST_FUN_OBJ_1(experimental_benchmark_internal_obj, experimental_benchmark_internal);
114+
static MP_DEFINE_CONST_FUN_OBJ_1(experimental_benchmark_detailed_obj, experimental_benchmark_detailed);
105115

106116
// -----------------------------------------------------------------------------
107117
// Module Registry
108118
// -----------------------------------------------------------------------------
109119

110120
static const mp_rom_map_elem_t experimental_globals_table[] = {
111-
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_experimental) },
112-
{ MP_ROM_QSTR(MP_QSTR_sin), MP_ROM_PTR(&experimental_sin_obj) },
113-
{ MP_ROM_QSTR(MP_QSTR_cos), MP_ROM_PTR(&experimental_cos_obj) },
114-
{ MP_ROM_QSTR(MP_QSTR_atan2), MP_ROM_PTR(&experimental_atan2_obj) },
115-
{ MP_ROM_QSTR(MP_QSTR_benchmark_internal), MP_ROM_PTR(&experimental_benchmark_internal_obj) },
121+
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_experimental) },
122+
{ MP_ROM_QSTR(MP_QSTR_sin), MP_ROM_PTR(&experimental_sin_obj) },
123+
{ MP_ROM_QSTR(MP_QSTR_cos), MP_ROM_PTR(&experimental_cos_obj) },
124+
{ MP_ROM_QSTR(MP_QSTR_atan2), MP_ROM_PTR(&experimental_atan2_obj) },
125+
{ MP_ROM_QSTR(MP_QSTR_benchmark_detailed), MP_ROM_PTR(&experimental_benchmark_detailed_obj) },
116126
};
117127
static MP_DEFINE_CONST_DICT(pb_module_experimental_globals, experimental_globals_table);
118128

0 commit comments

Comments
 (0)