Skip to content

Commit caa13a2

Browse files
committed
Armv8.1-M: Add CFI directives for stack unwinding
Extend scripts/cfify with Armv8.1-M architecture support, handling push/pop, vpush/vpop, sub/add sp, and bx lr. Enable cfify for armv81m in scripts/autogen. - Resolves #1517 Signed-off-by: Matthias J. Kannwischer <matthias@kannwischer.eu>
1 parent e231577 commit caa13a2

File tree

5 files changed

+330
-37
lines changed

5 files changed

+330
-37
lines changed

mlkem/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,30 @@
7979
.global MLK_ASM_NAMESPACE(keccak_f1600_x4_mve_asm)
8080
MLK_ASM_FN_SYMBOL(keccak_f1600_x4_mve_asm)
8181

82+
.cfi_startproc
8283
push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
84+
.cfi_adjust_cfa_offset 0x24
85+
.cfi_rel_offset r4, 0x0
86+
.cfi_rel_offset r5, 0x4
87+
.cfi_rel_offset r6, 0x8
88+
.cfi_rel_offset r7, 0xc
89+
.cfi_rel_offset r8, 0x10
90+
.cfi_rel_offset r9, 0x14
91+
.cfi_rel_offset r10, 0x18
92+
.cfi_rel_offset r11, 0x1c
93+
.cfi_rel_offset lr, 0x20
8394
vpush {d8, d9, d10, d11, d12, d13, d14, d15}
95+
.cfi_adjust_cfa_offset 0x40
96+
.cfi_rel_offset d8, 0x0
97+
.cfi_rel_offset d9, 0x8
98+
.cfi_rel_offset d10, 0x10
99+
.cfi_rel_offset d11, 0x18
100+
.cfi_rel_offset d12, 0x20
101+
.cfi_rel_offset d13, 0x28
102+
.cfi_rel_offset d14, 0x30
103+
.cfi_rel_offset d15, 0x38
84104
sub sp, #0x80
105+
.cfi_adjust_cfa_offset 0x80
85106
mov r6, r2
86107
mov.w lr, #0x18
87108
mov r2, r0
@@ -90,9 +111,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_mve_asm)
90111
vldrw.u32 q0, [r3]
91112
vldrw.u32 q1, [r2]
92113
vldrw.u32 q2, [r2, #32]
93-
wls lr, lr, keccak_f1600_x4_mve_asm_roundend @ imm = #0x8c0
114+
wls lr, lr, Lkeccak_f1600_x4_mve_asm_roundend @ imm = #0x8c0
94115

95-
keccak_f1600_x4_mve_asm_roundstart:
116+
Lkeccak_f1600_x4_mve_asm_roundstart:
96117
vldrw.u32 q6, [r2, #112]
97118
veor q7, q6, q2
98119
vldrw.u32 q2, [r2, #80]
@@ -653,13 +674,34 @@ keccak_f1600_x4_mve_asm_roundstart:
653674
veor q0, q4, q6
654675
vstrw.32 q0, [r5]
655676

656-
keccak_f1600_x4_mve_asm_roundend_pre:
657-
le lr, keccak_f1600_x4_mve_asm_roundstart @ imm = #-0x8c0
677+
Lkeccak_f1600_x4_mve_asm_roundend_pre:
678+
le lr, Lkeccak_f1600_x4_mve_asm_roundstart @ imm = #-0x8c0
658679

659-
keccak_f1600_x4_mve_asm_roundend:
680+
Lkeccak_f1600_x4_mve_asm_roundend:
660681
add sp, #0x80
682+
.cfi_adjust_cfa_offset -0x80
661683
vpop {d8, d9, d10, d11, d12, d13, d14, d15}
684+
.cfi_restore d8
685+
.cfi_restore d9
686+
.cfi_restore d10
687+
.cfi_restore d11
688+
.cfi_restore d12
689+
.cfi_restore d13
690+
.cfi_restore d14
691+
.cfi_restore d15
692+
.cfi_adjust_cfa_offset -0x40
662693
pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
694+
.cfi_restore r4
695+
.cfi_restore r5
696+
.cfi_restore r6
697+
.cfi_restore r7
698+
.cfi_restore r8
699+
.cfi_restore r9
700+
.cfi_restore r10
701+
.cfi_restore r11
702+
.cfi_restore lr
703+
.cfi_adjust_cfa_offset -0x24
704+
.cfi_endproc
663705
nop
664706

665707
MLK_ASM_FN_SIZE(keccak_f1600_x4_mve_asm)

mlkem/src/fips202/native/armv81m/src/state_extract_bytes_x4_mve.S

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,19 +73,39 @@
7373
.global MLK_ASM_NAMESPACE(keccak_f1600_x4_state_extract_bytes_asm)
7474
MLK_ASM_FN_SYMBOL(keccak_f1600_x4_state_extract_bytes_asm)
7575

76+
.cfi_startproc
7677
push.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
78+
.cfi_adjust_cfa_offset 0x28
79+
.cfi_rel_offset r4, 0x0
80+
.cfi_rel_offset r5, 0x4
81+
.cfi_rel_offset r6, 0x8
82+
.cfi_rel_offset r7, 0xc
83+
.cfi_rel_offset r8, 0x10
84+
.cfi_rel_offset r9, 0x14
85+
.cfi_rel_offset r10, 0x18
86+
.cfi_rel_offset r11, 0x1c
87+
.cfi_rel_offset lr, 0x24
7788
vpush {d8, d9, d10, d11, d12, d13, d14, d15}
89+
.cfi_adjust_cfa_offset 0x40
90+
.cfi_rel_offset d8, 0x0
91+
.cfi_rel_offset d9, 0x8
92+
.cfi_rel_offset d10, 0x10
93+
.cfi_rel_offset d11, 0x18
94+
.cfi_rel_offset d12, 0x20
95+
.cfi_rel_offset d13, 0x28
96+
.cfi_rel_offset d14, 0x30
97+
.cfi_rel_offset d15, 0x38
7898
ldr r4, [sp, #0x68]
7999
ldr.w r10, [sp, #0x6c]
80100
ldr r6, [sp, #0x70]
81101
cmp r6, #0x0
82-
beq.w keccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0x2ea
102+
beq.w Lkeccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0x2ea
83103
and r5, r10, #0x7
84104
bic r9, r10, #0x7
85105
add.w r8, r0, r9, lsl #1
86106
add.w r7, r8, #0x190
87107
cmp r5, #0x0
88-
beq.w keccak_f1600_x4_state_extract_bytes_asm_pre_main @ imm = #0x112
108+
beq.w Lkeccak_f1600_x4_state_extract_bytes_asm_pre_main @ imm = #0x112
89109
vldrw.u32 q0, [r8], #16
90110
vldrw.u32 q1, [r7], #16
91111
vrev32.16 q2, q0
@@ -148,29 +168,29 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_state_extract_bytes_asm)
148168
subs r2, r2, r5
149169
subs r3, r3, r5
150170
subs r4, r4, r5
151-
vpstttt
171+
vpstttt
152172
vstrbt.8 q0, [r1], #4
153173
vstrbt.8 q1, [r2], #4
154174
vstrbt.8 q2, [r3], #4
155175
vstrbt.8 q3, [r4], #4
156176
subs.w r6, r6, lr
157177
cmp r6, #0x0
158-
beq.w keccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0x1cc
178+
beq.w Lkeccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0x1cc
159179
vmov q7[2], q7[0], r1, r3
160180
vmov q7[3], q7[1], r2, r4
161-
b keccak_f1600_x4_state_extract_bytes_asm_main_body @ imm = #0xe
181+
b Lkeccak_f1600_x4_state_extract_bytes_asm_main_body @ imm = #0xe
162182

163-
keccak_f1600_x4_state_extract_bytes_asm_pre_main:
183+
Lkeccak_f1600_x4_state_extract_bytes_asm_pre_main:
164184
vmov q7[2], q7[0], r1, r3
165185
vmov q7[3], q7[1], r2, r4
166186
mov.w r12, #0x4
167187
vsub.i32 q7, q7, r12
168188

169-
keccak_f1600_x4_state_extract_bytes_asm_main_body:
189+
Lkeccak_f1600_x4_state_extract_bytes_asm_main_body:
170190
lsr.w lr, r6, #0x3
171-
wls lr, lr, keccak_f1600_x4_state_extract_bytes_asm_main_loop_end @ imm = #0xb4
191+
wls lr, lr, Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_end @ imm = #0xb4
172192

173-
keccak_f1600_x4_state_extract_bytes_asm_main_loop_start:
193+
Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_start:
174194
vldrw.u32 q0, [r8], #16
175195
vldrw.u32 q1, [r7], #16
176196
vrev32.16 q2, q0
@@ -215,11 +235,11 @@ keccak_f1600_x4_state_extract_bytes_asm_main_loop_start:
215235
vorr q1, q1, q3
216236
vstrw.32 q0, [q7, #4]!
217237
vstrw.32 q1, [q7, #4]!
218-
le lr, keccak_f1600_x4_state_extract_bytes_asm_main_loop_start @ imm = #-0xb4
238+
le lr, Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_start @ imm = #-0xb4
219239

220-
keccak_f1600_x4_state_extract_bytes_asm_main_loop_end:
240+
Lkeccak_f1600_x4_state_extract_bytes_asm_main_loop_end:
221241
ands r6, r6, #0x7
222-
beq keccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0xee
242+
beq Lkeccak_f1600_x4_state_extract_bytes_asm_exit @ imm = #0xee
223243
mov.w r12, #0x4
224244
vadd.i32 q7, q7, r12
225245
vmov r1, r3, q7[2], q7[0]
@@ -275,15 +295,35 @@ keccak_f1600_x4_state_extract_bytes_asm_main_loop_end:
275295
vmov.f64 d4, d1
276296
vmov.f64 d6, d3
277297
vctp.8 r6
278-
vpstttt
298+
vpstttt
279299
vstrbt.8 q0, [r1], #4
280300
vstrbt.8 q1, [r2], #4
281301
vstrbt.8 q2, [r3], #4
282302
vstrbt.8 q3, [r4], #4
283303

284-
keccak_f1600_x4_state_extract_bytes_asm_exit:
304+
Lkeccak_f1600_x4_state_extract_bytes_asm_exit:
285305
vpop {d8, d9, d10, d11, d12, d13, d14, d15}
306+
.cfi_restore d8
307+
.cfi_restore d9
308+
.cfi_restore d10
309+
.cfi_restore d11
310+
.cfi_restore d12
311+
.cfi_restore d13
312+
.cfi_restore d14
313+
.cfi_restore d15
314+
.cfi_adjust_cfa_offset -0x40
286315
pop.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
316+
.cfi_restore r4
317+
.cfi_restore r5
318+
.cfi_restore r6
319+
.cfi_restore r7
320+
.cfi_restore r8
321+
.cfi_restore r9
322+
.cfi_restore r10
323+
.cfi_restore r11
324+
.cfi_restore lr
325+
.cfi_adjust_cfa_offset -0x28
326+
.cfi_endproc
287327

288328
MLK_ASM_FN_SIZE(keccak_f1600_x4_state_extract_bytes_asm)
289329

mlkem/src/fips202/native/armv81m/src/state_xor_bytes_x4_mve.S

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,39 @@
7272
.global MLK_ASM_NAMESPACE(keccak_f1600_x4_state_xor_bytes_asm)
7373
MLK_ASM_FN_SYMBOL(keccak_f1600_x4_state_xor_bytes_asm)
7474

75+
.cfi_startproc
7576
push.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
77+
.cfi_adjust_cfa_offset 0x28
78+
.cfi_rel_offset r4, 0x0
79+
.cfi_rel_offset r5, 0x4
80+
.cfi_rel_offset r6, 0x8
81+
.cfi_rel_offset r7, 0xc
82+
.cfi_rel_offset r8, 0x10
83+
.cfi_rel_offset r9, 0x14
84+
.cfi_rel_offset r10, 0x18
85+
.cfi_rel_offset r11, 0x1c
86+
.cfi_rel_offset lr, 0x24
7687
vpush {d8, d9, d10, d11, d12, d13, d14, d15}
88+
.cfi_adjust_cfa_offset 0x40
89+
.cfi_rel_offset d8, 0x0
90+
.cfi_rel_offset d9, 0x8
91+
.cfi_rel_offset d10, 0x10
92+
.cfi_rel_offset d11, 0x18
93+
.cfi_rel_offset d12, 0x20
94+
.cfi_rel_offset d13, 0x28
95+
.cfi_rel_offset d14, 0x30
96+
.cfi_rel_offset d15, 0x38
7797
ldr r4, [sp, #0x68]
7898
ldr.w r10, [sp, #0x6c]
7999
ldr r6, [sp, #0x70]
80100
cmp r6, #0x0
81-
beq.w keccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x34c
101+
beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x34c
82102
and r5, r10, #0x7
83103
bic r9, r10, #0x7
84104
add.w r8, r0, r9, lsl #1
85105
add.w r7, r8, #0x190
86106
cmp r5, #0x0
87-
beq.w keccak_f1600_x4_state_xor_bytes_asm_pre_main @ imm = #0x132
107+
beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_pre_main @ imm = #0x132
88108
subs r1, r1, r5
89109
subs r2, r2, r5
90110
subs r3, r3, r5
@@ -98,7 +118,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_state_xor_bytes_asm)
98118
vmrs r11, p0
99119
lsl.w r11, r11, r5
100120
vmsr p0, r11
101-
vpstttt
121+
vpstttt
102122
vldrbt.u8 q0, [r1], #4
103123
vldrbt.u8 q1, [r2], #4
104124
vldrbt.u8 q2, [r3], #4
@@ -164,20 +184,20 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_state_xor_bytes_asm)
164184
vmov q7[2], q7[0], r1, r3
165185
vmov q7[3], q7[1], r2, r4
166186
cmp r6, #0x0
167-
beq.w keccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x206
168-
b keccak_f1600_x4_state_xor_bytes_asm_main_body @ imm = #0xe
187+
beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x206
188+
b Lkeccak_f1600_x4_state_xor_bytes_asm_main_body @ imm = #0xe
169189

170-
keccak_f1600_x4_state_xor_bytes_asm_pre_main:
190+
Lkeccak_f1600_x4_state_xor_bytes_asm_pre_main:
171191
vmov q7[2], q7[0], r1, r3
172192
vmov q7[3], q7[1], r2, r4
173193
mov.w r0, #0x4
174194
vsub.i32 q7, q7, r0
175195

176-
keccak_f1600_x4_state_xor_bytes_asm_main_body:
196+
Lkeccak_f1600_x4_state_xor_bytes_asm_main_body:
177197
lsr.w lr, r6, #0x3
178-
wls lr, lr, keccak_f1600_x4_state_xor_bytes_asm_main_loop_end @ imm = #0xd4
198+
wls lr, lr, Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_end @ imm = #0xd4
179199

180-
keccak_f1600_x4_state_xor_bytes_asm_main_loop_start:
200+
Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_start:
181201
vldrw.u32 q0, [q7, #4]!
182202
vldrw.u32 q1, [q7, #4]!
183203
vmov q2, q0
@@ -230,17 +250,17 @@ keccak_f1600_x4_state_xor_bytes_asm_main_loop_start:
230250
veor q5, q5, q1
231251
vstrw.32 q4, [r8], #16
232252
vstrw.32 q5, [r7], #16
233-
le lr, keccak_f1600_x4_state_xor_bytes_asm_main_loop_start @ imm = #-0xd4
253+
le lr, Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_start @ imm = #-0xd4
234254

235-
keccak_f1600_x4_state_xor_bytes_asm_main_loop_end:
255+
Lkeccak_f1600_x4_state_xor_bytes_asm_main_loop_end:
236256
ands r6, r6, #0x7
237-
beq.w keccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x110
257+
beq.w Lkeccak_f1600_x4_state_xor_bytes_asm_exit @ imm = #0x110
238258
mov.w r0, #0x4
239259
vadd.i32 q7, q7, r0
240260
vmov r1, r3, q7[2], q7[0]
241261
vmov r2, r4, q7[3], q7[1]
242262
vctp.8 r6
243-
vpstttt
263+
vpstttt
244264
vldrbt.u8 q0, [r1]
245265
vldrbt.u8 q1, [r2]
246266
vldrbt.u8 q2, [r3]
@@ -304,9 +324,29 @@ keccak_f1600_x4_state_xor_bytes_asm_main_loop_end:
304324
vstrw.32 q4, [r8], #16
305325
vstrw.32 q5, [r7], #16
306326

307-
keccak_f1600_x4_state_xor_bytes_asm_exit:
327+
Lkeccak_f1600_x4_state_xor_bytes_asm_exit:
308328
vpop {d8, d9, d10, d11, d12, d13, d14, d15}
329+
.cfi_restore d8
330+
.cfi_restore d9
331+
.cfi_restore d10
332+
.cfi_restore d11
333+
.cfi_restore d12
334+
.cfi_restore d13
335+
.cfi_restore d14
336+
.cfi_restore d15
337+
.cfi_adjust_cfa_offset -0x40
309338
pop.w {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
339+
.cfi_restore r4
340+
.cfi_restore r5
341+
.cfi_restore r6
342+
.cfi_restore r7
343+
.cfi_restore r8
344+
.cfi_restore r9
345+
.cfi_restore r10
346+
.cfi_restore r11
347+
.cfi_restore lr
348+
.cfi_adjust_cfa_offset -0x28
349+
.cfi_endproc
310350
nop
311351

312352
MLK_ASM_FN_SIZE(keccak_f1600_x4_state_xor_bytes_asm)

scripts/autogen

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2888,9 +2888,7 @@ def update_via_simpasm(
28882888
"-o",
28892889
tmp.name,
28902890
]
2891-
# TODO: Support CFI for Armv8.1-M
2892-
if arch != "armv81m":
2893-
cmd += ["--cfify"]
2891+
cmd += ["--cfify"]
28942892
if cross_prefix is not None:
28952893
# Stick with llvm-objdump for disassembly
28962894
cmd += ["--cc", cross_prefix + "gcc"]

0 commit comments

Comments
 (0)