diff --git a/readme.md b/readme.md index e0acb7ef0..91bb5b26e 100644 --- a/readme.md +++ b/readme.md @@ -402,6 +402,12 @@ The Espressif (`target xtensa_esp32_s3`) port for NodeMCU ESP32-S3 features a bare-metal startup _without_ using any of the SDK. The bare-metal startup was taken from the work of [Chalandi/Baremetal_esp32s3_nosdk](https://github.com/Chalandi/Baremetal_esp32s3_nosdk). +The dual-core system first boots core0 which subsequently +starts up core1. Blinky runs in the standard `ref_app` +on core0 toggling `port7` while an endless timer loop on core1 +toggles `port6`. The LED ports togle in near unison at $\frac{1}{2}~\text{Hz}$. +Self-procured LEDs and resistors need to be fitted in order to observe +blinky on this particular board. The NXP(R) OM13093 LPC11C24 board ARM(R) Cortex(R)-M0+ configuration called `target lpc11c24` toggles the LED on `port0.8`. @@ -460,9 +466,9 @@ The program toggles the GPIO status LED at GPIO index `0x47`. The `rpi_pico_rp2040` target configuration employs the RaspberryPi(R) Pico RP2040 with dual-core ARM(R) Cortex(R)-M0+ clocked at $133~\text{MHz}$. The low-level startup boots through -core 0. Core 0 then starts up core 1 (via a specific protocol). -Core 1 subsequently carries out the blinky application, -while core 0 enters an endless, idle loop. +core0. Core0 then starts up core1 (via a specific protocol). +Core1 subsequently carries out the blinky application, +while core0 enters an endless, idle loop. Ozone debug files are supplied for this system for those interested. Reverse engineering of the complicated (and scantly documented) dual-core startup originated in and have been taken from (with many thanks) diff --git a/ref_app/src/app/benchmark/readme.md b/ref_app/src/app/benchmark/readme.md index 475e9275e..90993db68 100644 --- a/ref_app/src/app/benchmark/readme.md +++ b/ref_app/src/app/benchmark/readme.md @@ -108,14 +108,14 @@ The $32$-bit RISC-V controller (having a novel _open-source_ core) on the `wch_ch32v307` board boasts a quite respectable time of $8.0~\text{ms}$. -Running on only one core (core 0) of the $32$-bit +Running on only one core (core0) of the $32$-bit controller of the `xtensa_esp32_s3` board results in a runtime of $9.1~\text{ms}$ for the calculation. -Using only one core (core 1) on the $32$-bit ARM(R) Cortex(R) M0+ +Using only one core (core1) on the $32$-bit ARM(R) Cortex(R) M0+ controller of the `rpi_pico_rp2040` board results in a calculation time of $19~\text{ms}$. The next generation `rpi_pico2_rp2350` with dual ARM(R) Cortex(R) M33 cores definitively improves on this -(still using only core 1) with a time of $6.3~\text{ms}$. +(still using only core1) with a time of $6.3~\text{ms}$. This is slightly more than $3~\text{ms}$ times faster than its predecessor. diff --git a/ref_app/src/mcal/rpi_pico2_rp2350/mcal_cpu_rp2350.cpp b/ref_app/src/mcal/rpi_pico2_rp2350/mcal_cpu_rp2350.cpp index 7df8dfd5d..0bc1cb5b0 100644 --- a/ref_app/src/mcal/rpi_pico2_rp2350/mcal_cpu_rp2350.cpp +++ b/ref_app/src/mcal/rpi_pico2_rp2350/mcal_cpu_rp2350.cpp @@ -106,26 +106,26 @@ auto mcal::cpu::rp2350::start_core1() -> bool mcal::reg::sio_fifo_st, UINT32_C(0)>::bit_get()); - // Send 0 to wake up core 1. + // Send 0 to wake up core1. local::sio_fifo_write_verify(std::uint32_t { UINT32_C(0) }); - // Send 1 to synchronize with core 1. + // Send 1 to synchronize with core1. local::sio_fifo_write_verify(std::uint32_t { UINT32_C(1) }); static_assert(sizeof(std::uint32_t) == sizeof(std::uintptr_t), "Error: Pointer/address size mismatch"); - // Send the VTOR address for core 1. + // Send the VTOR address for core1. local::sio_fifo_write_verify(reinterpret_cast(&__INTVECT_Core1[0U])); - // Send the stack pointer value for core 1. + // Send the stack pointer value for core1. local::sio_fifo_write_verify(__INTVECT_Core1[0U]); - // Send the reset handler address for core 1. + // Send the reset handler address for core1. local::sio_fifo_write_verify(__INTVECT_Core1[1U]); - // Clear the sticky bits of the FIFO_ST on core 0. - // Note: Core 0 has called us to get here so these are, - // in fact, the FIFO_ST sticky bits on core 0. + // Clear the sticky bits of the FIFO_ST on core0. + // Note: core0 has called us to get here so these are, + // in fact, the FIFO_ST sticky bits on core0. // HW_PER_SIO->FIFO_ST.reg = 0xFFu; mcal::reg::reg_access_static bool mcal::reg::sio_fifo_st, UINT32_C(0)>::bit_get()); - // Send 0 to wake up core 1. + // Send 0 to wake up core1. local::sio_fifo_write_verify(std::uint32_t { UINT32_C(0) }); - // Send 1 to synchronize with core 1. + // Send 1 to synchronize with core1. local::sio_fifo_write_verify(std::uint32_t { UINT32_C(1) }); static_assert(sizeof(std::uint32_t) == sizeof(std::uintptr_t), "Error: Pointer/address size mismatch"); - // Send the VTOR address for core 1. + // Send the VTOR address for core1. local::sio_fifo_write_verify(reinterpret_cast(&__INTVECT_Core1[0U])); - // Send the stack pointer value for core 1. + // Send the stack pointer value for core1. local::sio_fifo_write_verify(__INTVECT_Core1[0U]); - // Send the reset handler address for core 1. + // Send the reset handler address for core1. local::sio_fifo_write_verify(__INTVECT_Core1[1U]); - // Clear the sticky bits of the FIFO_ST on core 0. - // Note: Core 0 has called us to get here so these are, - // in fact, the FIFO_ST sticky bits on core 0. + // Clear the sticky bits of the FIFO_ST on core0. + // Note: core0 has called us to get here so these are, + // in fact, the FIFO_ST sticky bits on core0. // SIO->FIFO_ST.reg = 0xFFU; mcal::reg::reg_access_staticOPTIONS0.bit.SW_STALL_APPCPU_C0 = 0; // RTC_CNTL->SW_CPU_STALL.bit.SW_STALL_APPCPU_C1 = 0; @@ -43,41 +45,52 @@ void Mcu_StartCore1() mcal::reg::reg_access_static(UINT8_C(0))>::bit_clr(); - // Enable the clock for core 1. + // Enable the clock for core1. // SYSTEM->CORE_1_CONTROL_0.bit.CONTROL_CORE_1_CLKGATE_EN = 1; mcal::reg::reg_access_static(UINT8_C(1))>::bit_set(); - // Reset core 1. + // Reset core1. // SYSTEM->CORE_1_CONTROL_0.bit.CONTROL_CORE_1_RESETING = 1; // SYSTEM->CORE_1_CONTROL_0.bit.CONTROL_CORE_1_RESETING = 0; mcal::reg::reg_access_static(UINT8_C(2))>::bit_set(); mcal::reg::reg_access_static(UINT8_C(2))>::bit_clr(); - // Note: In ESP32-S3, when the reset of the core1 is released, - // the core1 starts executing the bootROM code and it gets stuck - // in a trap waiting for the entry address to be received - // from core0. This is can be achieved by writing the core1 entry - // address to the register SYSTEM_CORE_1_CONTROL_1_REG from core0. + // Note: In ESP32-S3 when the reset of core1 is released, + // then core1 starts executing the bootROM code. Core1 + // subsequently gets stuck in a trap. It is waiting for + // the entry address to be received from core0. - // Set the core1 entry address. + // The send/receive transaction of the entry address is + // carried out via core0 deliberately writing the core1 + // entry address in the SYSTEM_CORE_1_CONTROL_1_REG register. - // SYSTEM->CORE_1_CONTROL_1.reg = (uint32_t) &_start; { - const std::uint32_t start_addr { reinterpret_cast(&_start) }; + // Set the core1 entry address. + + using mcal_reg_access_dynamic_type = mcal::reg::reg_access_dynamic; + + // SYSTEM->CORE_1_CONTROL_1.reg = (uint32_t) &_start; - mcal::reg::reg_access_dynamic::reg_set(mcal::reg::system::core_1_control_1, start_addr); + mcal_reg_access_dynamic_type::reg_set + ( + mcal::reg::system::core_1_control_1, + static_cast(reinterpret_cast(&_start)) + ); } } extern "C" void main_c1() { - // Set the private cpu timer1 for core 1. + // Note: This subroutine executes in core1. It has been called + // by the core1 branch of the subroutine _start(). + + // Set the private cpu timer1 for core1. set_cpu_private_timer1(mcal::gpt::timer1_reload()); - // Enable all interrupts on core 1. + // Enable all interrupts on core1. mcal::irq::init(nullptr); // GPIO->OUT.reg |= CORE1_LED; @@ -88,10 +101,12 @@ void main_c1() auto mcal::cpu::post_init() noexcept -> void { - // Set the private cpu timer1 for core 0. + // Note: This subroutine is called from core0. + + // Set the private cpu timer1 for core0. set_cpu_private_timer1(mcal::gpt::timer1_reload()); - // Use core 0 to start core 1. + // Use core0 to start core1. Mcu_StartCore1(); } diff --git a/ref_app/target/micros/rpi_pico2_rp2350/startup/crt0.cpp b/ref_app/target/micros/rpi_pico2_rp2350/startup/crt0.cpp index 1052a8338..909505f0e 100644 --- a/ref_app/target/micros/rpi_pico2_rp2350/startup/crt0.cpp +++ b/ref_app/target/micros/rpi_pico2_rp2350/startup/crt0.cpp @@ -68,9 +68,9 @@ auto __my_startup() -> void mcal::wdg::secure::trigger(); // Jump to __main, which calls __main_core0, the main - // function of core 0. The main function of core 0 - // itself then subsequently starts up core 1 which - // is launched in __main_core1. Both of these core 0/1 + // function of core0. The main function of core0 + // itself then subsequently starts up core1 which + // is launched in __main_core1. Both of these core0/1 // subroutines will never return. ::__main(); @@ -86,17 +86,17 @@ auto __my_startup() -> void extern "C" auto __main() -> void { - // Run the main function of core 0. - // This will subsequently start core 1. + // Run the main function of core0. + // This will subsequently start core1. ::__main_core0(); - // Synchronize with core 1. + // Synchronize with core1. mcal::cpu::rp2350::multicore_sync(local::get_cpuid()); // It is here that an actual application could - // be started and then executed on core 0. + // be started and then executed on core0. - // Execute an endless loop on core 0 (while the application runs on core 1). + // Execute an endless loop on core0 (while the application runs on core1). for(;;) { mcal::cpu::nop(); } // This point is never reached. @@ -105,13 +105,13 @@ auto __main() -> void extern "C" auto __main_core0() -> void { - // Disable interrupts on core 0. + // Disable interrupts on core0. mcal::irq::disable_all(); - // Start core 1 and verify successful initiaization of core 1. + // Start core1 and verify successful initiaization of core1. if(!mcal::cpu::rp2350::start_core1()) { - // In case of error, loop forever (on core 0). + // In case of error, loop forever (on core0). for(;;) { // Replace with a loud error if desired. @@ -119,31 +119,31 @@ auto __main_core0() -> void } } - // This flag will be set by core 1 (which is now running). + // This flag will be set by core1 (which is now running). while(!core_1_run_flag_get()) { mcal::cpu::nop(); } - // This subroutine (running on core 0) *does* return + // This subroutine (running on core0) *does* return // at this point here. } extern "C" auto __main_core1() -> void { - // Disable interrupts on core 1. + // Disable interrupts on core1. mcal::irq::disable_all(); core_1_run_flag_set(true); - // Core 1 is started via interrupt enabled by the BootRom. - // But core 1 remains in an interrupt handler until core 0 - // actually manually starts core 1 in the subroutine - // mcal::cpu::rp2040::start_core1(). Execution on core 1 + // Core1 is started via interrupt enabled by the BootRom. + // But core1 remains in an interrupt handler until core0 + // actually manually starts core1 in the subroutine + // mcal::cpu::rp2040::start_core1(). Execution on core1 // begins here. - // Clear the sticky bits of the FIFO_ST on core 1. + // Clear the sticky bits of the FIFO_ST on core1. // HW_PER_SIO->FIFO_ST.reg = 0xFFu; mcal::reg::reg_access_static void asm volatile("dsb"); - // Clear all pending interrupts on core 1. + // Clear all pending interrupts on core1. // NVIC->ICPR[0U] = static_cast(UINT32_C(0xFFFFFFFF)); mcal::reg::reg_access_static void mcal::reg::nvic_icpr, std::uint32_t { UINT32_C(0xFFFFFFFF) }>::reg_set(); - // Synchronize with core 0. + // Synchronize with core0. mcal::cpu::rp2350::multicore_sync(local::get_cpuid()); - // Enable the hardware FPU on Core 1. + // Enable the hardware FPU on core1. mcal::reg::reg_access_static void mcal::reg::ppb_cpacr, std::uint32_t { (3UL << 20U) | (3UL << 22U) }>::reg_or(); - // Jump to main on core 1 (and never return). + // Jump to main on core1 (and never return). asm volatile("ldr r3, =main"); asm volatile("blx r3"); } diff --git a/ref_app/target/micros/rpi_pico_rp2040/startup/crt0.cpp b/ref_app/target/micros/rpi_pico_rp2040/startup/crt0.cpp index ea58f1e1d..a57497f33 100644 --- a/ref_app/target/micros/rpi_pico_rp2040/startup/crt0.cpp +++ b/ref_app/target/micros/rpi_pico_rp2040/startup/crt0.cpp @@ -62,9 +62,9 @@ auto __my_startup() -> void mcal::wdg::secure::trigger(); // Jump to __main, which calls __main_core0, the main - // function of core 0. The main function of core 0 - // itself then subsequently starts up core 1 which - // is launched in __main_core1. Both of these core 0/1 + // function of core0. The main function of core0 + // itself then subsequently starts up core1 which + // is launched in __main_core1. Both of these core0/1 // subroutines will never return. ::__main(); @@ -80,17 +80,17 @@ auto __my_startup() -> void extern "C" auto __main() -> void { - // Run the main function of core 0. - // This will subsequently start core 1. + // Run the main function of core0. + // This will subsequently start core1. ::__main_core0(); - // Synchronize with core 1. + // Synchronize with core1. mcal::cpu::rp2040::multicore_sync(local::get_cpuid()); // It is here that an actual application could - // be started and then executed on core 0. + // be started and then executed on core0. - // Execute an endless loop on core 0 (while the application runs on core 1). + // Execute an endless loop on core0 (while the application runs on core1). for(;;) { mcal::cpu::nop(); } // This point is never reached. @@ -99,13 +99,13 @@ auto __main() -> void extern "C" auto __main_core0() -> void { - // Disable interrupts on core 0. + // Disable interrupts on core0. mcal::irq::disable_all(); - // Start core 1 and verify successful initiaization of core 1. + // Start core1 and verify successful initiaization of core1. if(!mcal::cpu::rp2040::start_core1()) { - // In case of error, loop forever (on core 0). + // In case of error, loop forever (on core0). for(;;) { // Replace with a loud error if desired. @@ -113,20 +113,20 @@ auto __main_core0() -> void } } - // This subroutine (running on core 0) *does* return + // This subroutine (running on core0) *does* return // at this point here. } extern "C" auto __main_core1() -> void { - // Core 1 is started via interrupt enabled by the BootRom. - // But core 1 remains in an interrupt handler until core 0 - // actually manually starts core 1 in the subroutine - // mcal::cpu::rp2040::start_core1(). Execution on core 1 + // Core1 is started via interrupt enabled by the BootRom. + // But core1 remains in an interrupt handler until core0 + // actually manually starts core1 in the subroutine + // mcal::cpu::rp2040::start_core1(). Execution on core1 // begins here. - // Clear the sticky bits of the FIFO_ST on core 1. + // Clear the sticky bits of the FIFO_ST on core1. // SIO->FIFO_ST.reg = 0xFFu; mcal::reg::reg_access_static void asm volatile("dsb"); - // Clear all pending interrupts on core 1. + // Clear all pending interrupts on core1. // NVIC->ICPR[0U] = static_cast(UINT32_C(0xFFFFFFFF)); mcal::reg::reg_access_static void mcal::reg::nvic_icpr, std::uint32_t { UINT32_C(0xFFFFFFFF) }>::reg_set(); - // Synchronize with core 0. + // Synchronize with core0. mcal::cpu::rp2040::multicore_sync(local::get_cpuid()); - // Jump to main on core 1 (and never return). + // Jump to main on core1 (and never return). asm volatile("ldr r3, =main"); asm volatile("blx r3"); } diff --git a/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags.gmk b/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags.gmk index 756e4230e..8c5d730bf 100644 --- a/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags.gmk +++ b/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags.gmk @@ -34,6 +34,7 @@ include $(PATH_TGT_MAKE)/$(TGT)_flags_extra.gmk TGT_ALLFLAGS = -O1 \ $(MOST_O2_FLAGS) \ + $(SOME_O3_FLAGS) \ -mabi=call0 \ -mno-text-section-literals \ -mstrict-align \ diff --git a/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags_extra.gmk b/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags_extra.gmk index 94887c2d2..fb6eb49b9 100644 --- a/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags_extra.gmk +++ b/ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags_extra.gmk @@ -62,6 +62,9 @@ MOST_O2_FLAGS += -ftree-tail-merge MOST_O2_FLAGS += -ftree-vrp MOST_O2_FLAGS += -fvect-cost-model=very-cheap +SOME_O3_FLAGS := +SOME_O3_FLAGS += -fgcse-after-reload +SOME_O3_FLAGS += -fipa-cp-clone DEFS_IEEE754_SF := DEFS_IEEE754_SF += -DL_divsf3