Skip to content

Commit 50e15fe

Browse files
authored
Merge pull request #619 from ckormanyos/esp32s3_most_02_flags
Esp32s3 most 02 flags
2 parents cd2b243 + ece5cb7 commit 50e15fe

5 files changed

Lines changed: 95 additions & 19 deletions

File tree

ref_app/src/app/benchmark/readme.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ The benchmark used is a ${\sim}100$ decimal digit AGM $\pi$ calculation.
8787
| `stm32f446` | 5.1 | 3.4 |
8888
| `rpi_pico2_rp2350` | 6.3 | 4.2 |
8989
| `wch_ch32v307` | 8.0 | 5.3 |
90+
| `xtensa_esp32_s3` | 9.1 | 6.1 |
9091
| `rpi_pico_rp2040` | 19 | 13 |
9192
| `avr` | 420 | 280 |
9293

@@ -107,6 +108,10 @@ The $32$-bit RISC-V controller (having a novel _open-source_ core)
107108
on the `wch_ch32v307` board boasts a quite respectable
108109
time of $8.0~\text{ms}$.
109110

111+
Running on only one core (core 0) of the $32$-bit
112+
controller of the `xtensa_esp32_s3` board results in
113+
a runtime of $9.1~\text{ms}$ for the calculation.
114+
110115
Using only one core (core 1) on the $32$-bit ARM(R) Cortex(R) M0+
111116
controller of the `rpi_pico_rp2040` board results in a calculation
112117
time of $19~\text{ms}$. The next generation `rpi_pico2_rp2350`

ref_app/target.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,7 @@
10051005
<None Include="target\micros\xtensa_esp32_s3\make\xtensa_esp32_s3.ld" />
10061006
<None Include="target\micros\xtensa_esp32_s3\make\xtensa_esp32_s3_files.gmk" />
10071007
<None Include="target\micros\xtensa_esp32_s3\make\xtensa_esp32_s3_flags.gmk" />
1008+
<None Include="target\micros\xtensa_esp32_s3\make\xtensa_esp32_s3_flags_extra.gmk" />
10081009
<None Include="target\micros\xtensa_esp32_s3\startup\boot.s" />
10091010
<None Include="target\micros\xtensa_esp32_s3\startup\IntVectTable.s" />
10101011
<None Include="target\micros\xtensa_esp32_s3\startup\Std\ieee754-sf.S" />

ref_app/target.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,9 @@
648648
<None Include="target\micros\xtensa_esp32_s3\startup\Std\lib1funcs.S">
649649
<Filter>micros\xtensa_esp32_s3\startup\Std</Filter>
650650
</None>
651+
<None Include="target\micros\xtensa_esp32_s3\make\xtensa_esp32_s3_flags_extra.gmk">
652+
<Filter>micros\xtensa_esp32_s3\make</Filter>
653+
</None>
651654
</ItemGroup>
652655
<ItemGroup>
653656
<ClCompile Include="Target\Micros\AVR\Startup\int_vect.cpp">

ref_app/target/micros/xtensa_esp32_s3/make/xtensa_esp32_s3_flags.gmk

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,15 @@ WARN_FLAGS := -Wall
2525
-Wno-unused-parameter \
2626
-Wno-unused-but-set-parameter \
2727
-Wno-missing-field-initializers \
28-
-Wno-sign-compare
28+
-Wno-sign-compare \
29+
-Wno-main
2930

3031

31-
WARN_FLAGS += -Wno-main
32+
include $(PATH_TGT_MAKE)/$(TGT)_flags_extra.gmk
3233

33-
DEFS_IEEE754_SF := -DL_divsf3 \
34-
-DL_negsf2 \
35-
-DL_addsubsf3 \
36-
-DL_mulsf3 \
37-
-DL_cmpsf2 \
38-
-DL_fixsfsi \
39-
-DL_fixsfdi \
40-
-DL_fixunssfsi \
41-
-DL_fixunssfdi \
42-
-DL_floatsisf \
43-
-DL_floatdisf \
44-
-DL_sqrtf \
45-
-DL_recipsf2 \
46-
-DL_rsqrtsf2
4734

48-
49-
TGT_ALLFLAGS = -O2 \
35+
TGT_ALLFLAGS = -O1 \
36+
$(MOST_O2_FLAGS) \
5037
-mabi=call0 \
5138
-mno-text-section-literals \
5239
-mstrict-align \
@@ -101,7 +88,7 @@ $(PATH_OBJ)/%.o : %.S
10188
# ...and reformat (using sed) any possible error/warning messages
10289
# for the VisualStudio(R) output window,
10390
# ...and create an assembly listing using objdump
104-
@-$(TGT_GCC) -O2 -fno-reorder-blocks-and-partition -fno-reorder-functions -mabi=call0 -mno-text-section-literals -mstrict-align -mlongcalls -fomit-frame-pointer -fstrict-volatile-bitfields -fno-jump-tables -fno-tree-switch-conversion -fno-stack-protector -DI_KNOW_WHAT_I_AM_DOING $(DEFS_IEEE754_SF) $(C_INCLUDES) $< -c -o $(PATH_OBJ)/$(basename $(@F)).o 2> $(PATH_ERR)/$(basename $(@F)).err
91+
@-$(TGT_GCC) -O1 -fno-reorder-blocks-and-partition -fno-reorder-functions -mabi=call0 -mno-text-section-literals -mstrict-align -mlongcalls -fomit-frame-pointer -fstrict-volatile-bitfields -fno-jump-tables -fno-tree-switch-conversion -fno-stack-protector -DI_KNOW_WHAT_I_AM_DOING $(DEFS_IEEE754_SF) $(C_INCLUDES) $< -c -o $(PATH_OBJ)/$(basename $(@F)).o 2> $(PATH_ERR)/$(basename $(@F)).err
10592
@-$(SED) -e 's|:\([0-9]*\):|(\1) :|' $(PATH_ERR)/$(basename $(@F)).err
10693
@-$(OBJDUMP) -S $(PATH_OBJ)/$(basename $(@F)).o > $(PATH_LST)/$(basename $(@F)).lst
10794

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#
2+
# Copyright Christopher Kormanyos 2025.
3+
# Distributed under the Boost Software License,
4+
# Version 1.0. (See accompanying file LICENSE_1_0.txt
5+
# or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
#
7+
8+
# ------------------------------------------------------------------------------
9+
# extra compiler flags for the target architecture
10+
# ------------------------------------------------------------------------------
11+
12+
MOST_O2_FLAGS :=
13+
MOST_O2_FLAGS += -falign-functions
14+
MOST_O2_FLAGS += -falign-jumps
15+
MOST_O2_FLAGS += -falign-labels
16+
MOST_O2_FLAGS += -falign-loops
17+
MOST_O2_FLAGS += -fcaller-saves
18+
MOST_O2_FLAGS += -fcode-hoisting
19+
MOST_O2_FLAGS += -fcrossjumping
20+
MOST_O2_FLAGS += -fcse-follow-jumps
21+
MOST_O2_FLAGS += -fcse-skip-blocks
22+
MOST_O2_FLAGS += -fdelete-null-pointer-checks
23+
MOST_O2_FLAGS += -fdevirtualize
24+
MOST_O2_FLAGS += -fdevirtualize-speculatively
25+
MOST_O2_FLAGS += -fexpensive-optimizations
26+
MOST_O2_FLAGS += -ffinite-loops
27+
MOST_O2_FLAGS += -fgcse
28+
MOST_O2_FLAGS += -fgcse-lm
29+
MOST_O2_FLAGS += -fhoist-adjacent-loads
30+
MOST_O2_FLAGS += -finline-functions
31+
MOST_O2_FLAGS += -finline-small-functions
32+
MOST_O2_FLAGS += -findirect-inlining
33+
MOST_O2_FLAGS += -fipa-bit-cp
34+
MOST_O2_FLAGS += -fipa-cp
35+
MOST_O2_FLAGS += -fipa-icf
36+
MOST_O2_FLAGS += -fipa-ra
37+
MOST_O2_FLAGS += -fipa-sra
38+
MOST_O2_FLAGS += -fipa-vrp
39+
MOST_O2_FLAGS += -fisolate-erroneous-paths-dereference
40+
MOST_O2_FLAGS += -flra-remat
41+
MOST_O2_FLAGS += -foptimize-sibling-calls
42+
MOST_O2_FLAGS += -foptimize-strlen
43+
MOST_O2_FLAGS += -fpartial-inlining
44+
MOST_O2_FLAGS += -fpeephole2
45+
MOST_O2_FLAGS += -freorder-blocks-algorithm=stc
46+
MOST_O2_FLAGS += -freorder-blocks-and-partition
47+
MOST_O2_FLAGS += -freorder-functions
48+
MOST_O2_FLAGS += -frerun-cse-after-loop
49+
MOST_O2_FLAGS += -fschedule-insns
50+
MOST_O2_FLAGS += -fschedule-insns2
51+
MOST_O2_FLAGS += -fsched-interblock
52+
MOST_O2_FLAGS += -fsched-spec
53+
MOST_O2_FLAGS += -fstore-merging
54+
MOST_O2_FLAGS += -fstrict-aliasing
55+
MOST_O2_FLAGS += -fthread-jumps
56+
MOST_O2_FLAGS += -ftree-builtin-call-dce
57+
MOST_O2_FLAGS += -ftree-loop-vectorize
58+
MOST_O2_FLAGS += -ftree-pre
59+
MOST_O2_FLAGS += -ftree-slp-vectorize
60+
MOST_O2_FLAGS += -ftree-switch-conversion
61+
MOST_O2_FLAGS += -ftree-tail-merge
62+
MOST_O2_FLAGS += -ftree-vrp
63+
MOST_O2_FLAGS += -fvect-cost-model=very-cheap
64+
65+
66+
DEFS_IEEE754_SF :=
67+
DEFS_IEEE754_SF += -DL_divsf3
68+
DEFS_IEEE754_SF += -DL_negsf2
69+
DEFS_IEEE754_SF += -DL_addsubsf3
70+
DEFS_IEEE754_SF += -DL_mulsf3
71+
DEFS_IEEE754_SF += -DL_cmpsf2
72+
DEFS_IEEE754_SF += -DL_fixsfsi
73+
DEFS_IEEE754_SF += -DL_fixsfdi
74+
DEFS_IEEE754_SF += -DL_fixunssfsi
75+
DEFS_IEEE754_SF += -DL_fixunssfdi
76+
DEFS_IEEE754_SF += -DL_floatsisf
77+
DEFS_IEEE754_SF += -DL_floatdisf
78+
DEFS_IEEE754_SF += -DL_sqrtf
79+
DEFS_IEEE754_SF += -DL_recipsf2
80+
DEFS_IEEE754_SF += -DL_rsqrtsf2

0 commit comments

Comments
 (0)