Skip to content

Commit 5f69eaa

Browse files
committed
ev3 support
1 parent 4789cad commit 5f69eaa

1 file changed

Lines changed: 70 additions & 38 deletions

File tree

pybricks/experimental/pb_module_experimental.c

Lines changed: 70 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,51 @@
1010
#include "py/runtime.h"
1111
#include <math.h>
1212

13-
// ARM Cortex-M4 Hardware Cycle Counter registers
14-
#define DWT_CONTROL (*((volatile uint32_t*)0xE0001000))
15-
#define DWT_CYCCNT (*((volatile uint32_t*)0xE0001004))
16-
#define DEMCR (*((volatile uint32_t*)0xE000EDFC))
13+
// Architecture Detection
14+
#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
15+
#define IS_CORTEX_M 1
16+
#define DWT_CONTROL (*((volatile uint32_t*)0xE0001000))
17+
#define DWT_CYCCNT (*((volatile uint32_t*)0xE0001004))
18+
#define DEMCR (*((volatile uint32_t*)0xE000EDFC))
19+
#else
20+
#define IS_CORTEX_M 0
21+
#endif
1722

1823
static const float PI_F = 3.141592653589793f;
1924
static const float TWO_PI_F = 6.283185307179586f;
2025
static const float HALF_PI_F = 1.570796326794896f;
2126
static const float INV_TWO_PI_F = 0.159154943091895f;
2227

23-
// -----------------------------------------------------------------------------
24-
// Core Math Engines (High-Precision VMLA)
25-
// -----------------------------------------------------------------------------
28+
// ------------------------------------------------------------
29+
// Core Math Engines (Optimized per Architecture)
30+
// ------------------------------------------------------------
2631

2732
static inline float fast_sin_internal(float theta) {
33+
// Range Reduction (Common)
2834
float x = theta * INV_TWO_PI_F;
2935
x = theta - (float)((int)(x + (x > 0 ? 0.5f : -0.5f))) * TWO_PI_F;
3036

3137
if (x > HALF_PI_F) { x = PI_F - x; }
3238
else if (x < -HALF_PI_F) { x = -PI_F - x; }
3339

3440
float x2 = x * x;
35-
float res = -0.000195152f;
36-
res = 0.008332152f + (x2 * res);
37-
res = -0.166666567f + (x2 * res);
38-
res = 1.0f + (x2 * res);
41+
float res;
42+
43+
#if IS_CORTEX_M
44+
// Spike Optimization: Pattern for Hardware VMLA (Multiply-Accumulate)
45+
res = -0.000195152f;
46+
res = 0.008332152f + (x2 * res);
47+
res = -0.166666567f + (x2 * res);
48+
res = 1.0f + (x2 * res);
49+
#else
50+
// EV3 Optimization: Software-Float Register-Friendly Polynomial
51+
// Horner's method helps the ARM9 emulator keep the 'x2' value
52+
// in a CPU register (r4-r7) to avoid re-calculating or memory loads.
53+
float c3 = -0.000195152f;
54+
float c2 = 0.008332152f;
55+
float c1 = -0.166666567f;
56+
res = 1.0f + x2 * (c1 + x2 * (c2 + x2 * c3));
57+
#endif
3958

4059
return x * res;
4160
}
@@ -45,22 +64,32 @@ static inline float fast_atan2_internal(float y, float x) {
4564
float abs_y = fabsf(y) + 1e-10f;
4665
float abs_x = fabsf(x);
4766
float angle;
67+
68+
float r = (abs_x >= abs_y) ? (y / x) : (x / y);
69+
float r2 = r * r;
70+
71+
#if IS_CORTEX_M
72+
float den = 1.0f + (r2 * 0.28086f);
73+
#else
74+
// EV3: Avoid unnecessary float constants to save on pool loads
75+
float coeff = 0.28086f;
76+
float den = 1.0f + (r2 * coeff);
77+
#endif
78+
79+
float res_atan = r * (1.0f / den);
80+
4881
if (abs_x >= abs_y) {
49-
float r = y / x;
50-
float den = 1.0f + (r * r * 0.28086f);
51-
angle = r * (1.0f / den);
52-
if (x < 0.0f) { angle += (y >= 0.0f) ? PI_F : -PI_F; }
82+
angle = res_atan;
83+
if (x < 0.0f) angle += (y >= 0.0f) ? PI_F : -PI_F;
5384
} else {
54-
float r = x / y;
55-
float den = 1.0f + (r * r * 0.28086f);
56-
angle = (y > 0.0f ? HALF_PI_F : -HALF_PI_F) - r * (1.0f / den);
85+
angle = (y > 0.0f ? HALF_PI_F : -HALF_PI_F) - res_atan;
5786
}
5887
return angle;
5988
}
6089

61-
// -----------------------------------------------------------------------------
90+
// ------------------------------------------------------------
6291
// Wrappers & Hardware Benchmarks
63-
// -----------------------------------------------------------------------------
92+
// ----------------------------
6493

6594
static mp_obj_t experimental_sin(mp_obj_t theta_in) {
6695
return mp_obj_new_float_from_f(fast_sin_internal(mp_obj_get_float(theta_in)));
@@ -77,26 +106,29 @@ static mp_obj_t experimental_atan2(mp_obj_t y_in, mp_obj_t x_in) {
77106
}
78107
static MP_DEFINE_CONST_FUN_OBJ_2(experimental_atan2_obj, experimental_atan2);
79108

80-
// Benchmark with seed to prevent constant folding
81109
static mp_obj_t experimental_benchmark_hardware(mp_obj_t seed_in) {
82110
float seed = mp_obj_get_float(seed_in);
83-
DEMCR |= 0x01000000; DWT_CONTROL |= 1;
84-
85-
uint32_t start, cyc_sin;
86-
volatile float res;
87-
88-
DWT_CYCCNT = 0;
89-
start = DWT_CYCCNT;
90-
// Chain operations to force the FPU to wait for the previous result
91-
res = fast_sin_internal(seed);
92-
res = fast_sin_internal(res + 0.01f);
93-
__asm volatile ("dsb");
94-
cyc_sin = DWT_CYCCNT - start;
95-
96-
(void)res;
97-
98-
// Return avg cycles for one operation
99-
return mp_obj_new_int(cyc_sin / 2);
111+
112+
#if IS_CORTEX_M
113+
DEMCR |= 0x01000000; DWT_CONTROL |= 1;
114+
DWT_CYCCNT = 0;
115+
uint32_t start = DWT_CYCCNT;
116+
volatile float res = fast_sin_internal(seed);
117+
res = fast_sin_internal(res + 0.01f);
118+
__asm volatile ("dsb");
119+
uint32_t cyc = DWT_CYCCNT - start;
120+
return mp_obj_new_int(cyc / 2);
121+
#else
122+
// EV3: No Cycle Counter. Use microsecond-level timing via ticks_ms.
123+
uint32_t t0 = mp_hal_ticks_ms();
124+
volatile float res = seed;
125+
for(int i=0; i<20000; i++) {
126+
res = fast_sin_internal(res);
127+
}
128+
uint32_t dt = mp_hal_ticks_ms() - t0;
129+
// Return time in microseconds for 100 ops to keep scale
130+
return mp_obj_new_int((dt * 1000) / 20000);
131+
#endif
100132
}
101133
static MP_DEFINE_CONST_FUN_OBJ_1(experimental_benchmark_hardware_obj, experimental_benchmark_hardware);
102134

0 commit comments

Comments
 (0)