Skip to content

Commit b3810aa

Browse files
committed
Armv8.1-M: Add CFI directives for stack unwinding
Extend scripts/cfify with Armv8.1-M architecture support, handling push/pop, vpush/vpop, sub/add sp, and bx lr. Enable cfify for armv81m in scripts/autogen. Also removes pushing/poping r3 and r12 in keccak_f1600_x4_mve_asm - they do not have to be preserved according ot the AAPCS. - Ports pq-code-package/mlkem-native#1558 Signed-off-by: Matthias J. Kannwischer <matthias@kannwischer.eu>
1 parent 8283e45 commit b3810aa

File tree

4 files changed

+230
-17
lines changed

4 files changed

+230
-17
lines changed

dev/fips202/armv81m/src/keccak_f1600_x4_mve.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
c_parameter: const uint32_t *rc
3030
description: Keccak round constants in bit-interleaved form (24 pairs of 32-bit words)
3131
Stack:
32-
bytes: 236
33-
description: register preservation (44) + SIMD registers (64) + temporary storage (128)
32+
bytes: 228
33+
description: register preservation (36) + SIMD registers (64) + temporary storage (128)
3434
*/
3535

3636
#include "../../../../common.h"
@@ -431,7 +431,7 @@ qA20_l .req q2
431431
.global MLD_ASM_NAMESPACE(keccak_f1600_x4_mve_asm)
432432
MLD_ASM_FN_SYMBOL(keccak_f1600_x4_mve_asm)
433433

434-
push {r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
434+
push {r4,r5,r6,r7,r8,r9,r10,r11,lr}
435435
vpush {d8-d15}
436436
sub sp, #8*16
437437

@@ -1597,7 +1597,7 @@ keccak_f1600_x4_mve_asm_roundend:
15971597
add sp, #8*16
15981598

15991599
vpop {d8-d15}
1600-
ldmia.w sp!, {r3,r4,r5,r6,r7,r8,r9,r10,r11,r12, pc}
1600+
ldmia.w sp!, {r4,r5,r6,r7,r8,r9,r10,r11, pc}
16011601

16021602
/****************** REGISTER DEALLOCATIONS *******************/
16031603
.unreq qA00_h

mldsa/src/fips202/native/armv81m/src/keccak_f1600_x4_mve.S

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
c_parameter: const uint32_t *rc
3030
description: Keccak round constants in bit-interleaved form (24 pairs of 32-bit words)
3131
Stack:
32-
bytes: 236
33-
description: register preservation (44) + SIMD registers (64) + temporary storage (128)
32+
bytes: 228
33+
description: register preservation (36) + SIMD registers (64) + temporary storage (128)
3434
*/
3535

3636
#include "../../../../common.h"
@@ -50,9 +50,30 @@
5050
.global MLD_ASM_NAMESPACE(keccak_f1600_x4_mve_asm)
5151
MLD_ASM_FN_SYMBOL(keccak_f1600_x4_mve_asm)
5252

53-
push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
53+
.cfi_startproc
54+
push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
55+
.cfi_adjust_cfa_offset 0x24
56+
.cfi_rel_offset r4, 0x0
57+
.cfi_rel_offset r5, 0x4
58+
.cfi_rel_offset r6, 0x8
59+
.cfi_rel_offset r7, 0xc
60+
.cfi_rel_offset r8, 0x10
61+
.cfi_rel_offset r9, 0x14
62+
.cfi_rel_offset r10, 0x18
63+
.cfi_rel_offset r11, 0x1c
64+
.cfi_rel_offset lr, 0x20
5465
vpush {d8, d9, d10, d11, d12, d13, d14, d15}
66+
.cfi_adjust_cfa_offset 0x40
67+
.cfi_rel_offset d8, 0x0
68+
.cfi_rel_offset d9, 0x8
69+
.cfi_rel_offset d10, 0x10
70+
.cfi_rel_offset d11, 0x18
71+
.cfi_rel_offset d12, 0x20
72+
.cfi_rel_offset d13, 0x28
73+
.cfi_rel_offset d14, 0x30
74+
.cfi_rel_offset d15, 0x38
5575
sub sp, #0x80
76+
.cfi_adjust_cfa_offset 0x80
5677
mov r6, r2
5778
mov.w lr, #0x18
5879
mov r2, r0
@@ -61,9 +82,9 @@ MLD_ASM_FN_SYMBOL(keccak_f1600_x4_mve_asm)
6182
vldrw.u32 q0, [r3]
6283
vldrw.u32 q1, [r2]
6384
vldrw.u32 q2, [r2, #32]
64-
wls lr, lr, keccak_f1600_x4_mve_asm_roundend @ imm = #0x8c0
85+
wls lr, lr, Lkeccak_f1600_x4_mve_asm_roundend @ imm = #0x8c0
6586

66-
keccak_f1600_x4_mve_asm_roundstart:
87+
Lkeccak_f1600_x4_mve_asm_roundstart:
6788
vldrw.u32 q6, [r2, #112]
6889
veor q7, q6, q2
6990
vldrw.u32 q2, [r2, #80]
@@ -624,13 +645,34 @@ keccak_f1600_x4_mve_asm_roundstart:
624645
veor q0, q4, q6
625646
vstrw.32 q0, [r5]
626647

627-
keccak_f1600_x4_mve_asm_roundend_pre:
628-
le lr, keccak_f1600_x4_mve_asm_roundstart @ imm = #-0x8c0
648+
Lkeccak_f1600_x4_mve_asm_roundend_pre:
649+
le lr, Lkeccak_f1600_x4_mve_asm_roundstart @ imm = #-0x8c0
629650

630-
keccak_f1600_x4_mve_asm_roundend:
651+
Lkeccak_f1600_x4_mve_asm_roundend:
631652
add sp, #0x80
653+
.cfi_adjust_cfa_offset -0x80
632654
vpop {d8, d9, d10, d11, d12, d13, d14, d15}
633-
pop.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
655+
.cfi_restore d8
656+
.cfi_restore d9
657+
.cfi_restore d10
658+
.cfi_restore d11
659+
.cfi_restore d12
660+
.cfi_restore d13
661+
.cfi_restore d14
662+
.cfi_restore d15
663+
.cfi_adjust_cfa_offset -0x40
664+
pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
665+
.cfi_restore r4
666+
.cfi_restore r5
667+
.cfi_restore r6
668+
.cfi_restore r7
669+
.cfi_restore r8
670+
.cfi_restore r9
671+
.cfi_restore r10
672+
.cfi_restore r11
673+
.cfi_restore lr
674+
.cfi_adjust_cfa_offset -0x24
675+
.cfi_endproc
634676
nop
635677

636678
MLD_ASM_FN_SIZE(keccak_f1600_x4_mve_asm)

scripts/autogen

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2170,9 +2170,7 @@ def update_via_simpasm(
21702170
"-o",
21712171
tmp.name,
21722172
]
2173-
# TODO: Support CFI for Armv8.1-M
2174-
if arch != "armv81m":
2175-
cmd += ["--cfify"]
2173+
cmd += ["--cfify"]
21762174
if cross_prefix is not None:
21772175
# Stick with llvm-objdump for disassembly
21782176
cmd += ["--cc", cross_prefix + "gcc"]

scripts/cfify

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,179 @@ def add_cfi_directives(text, arch):
226226
i += 1
227227
continue
228228

229+
elif arch == "armv81m":
230+
# Armv8.1-M callee-saved registers
231+
armv81m_callee_saved_gprs = {
232+
"r4",
233+
"r5",
234+
"r6",
235+
"r7",
236+
"r8",
237+
"r9",
238+
"r10",
239+
"r11",
240+
"lr",
241+
}
242+
armv81m_callee_saved_dregs = {
243+
"d8",
244+
"d9",
245+
"d10",
246+
"d11",
247+
"d12",
248+
"d13",
249+
"d14",
250+
"d15",
251+
}
252+
# Register aliases: numeric form -> canonical name
253+
gpr_aliases = {"r14": "lr", "r15": "pc", "r13": "sp"}
254+
255+
def parse_reg(s):
256+
"""Parse a single register, returning (prefix, number) or None."""
257+
s = s.strip().lower()
258+
# Named aliases
259+
if s in ("lr", "pc", "sp"):
260+
return s
261+
m = re.match(r"([a-z]+)(\d+)$", s)
262+
if not m:
263+
raise ValueError(f"Cannot parse register: {s}")
264+
return gpr_aliases.get(s, s)
265+
266+
def expand_reg_range(token):
267+
"""Expand 'r4-r11' or 'd8-d15' into a list. Single regs returned as-is."""
268+
m = re.match(r"^\s*([a-z]+)(\d+)\s*-\s*([a-z]+)(\d+)\s*$", token)
269+
if m:
270+
prefix1, lo, prefix2, hi = (
271+
m.group(1),
272+
int(m.group(2)),
273+
m.group(3),
274+
int(m.group(4)),
275+
)
276+
if prefix1 != prefix2:
277+
raise ValueError(
278+
f"Mismatched register prefixes in range: {token}"
279+
)
280+
return [
281+
gpr_aliases.get(f"{prefix1}{n}", f"{prefix1}{n}")
282+
for n in range(lo, hi + 1)
283+
]
284+
return [parse_reg(token)]
285+
286+
def parse_reglist(reglist_str):
287+
"""Parse a register list string, expanding ranges and normalizing aliases."""
288+
result = []
289+
for token in reglist_str.split(","):
290+
result.extend(expand_reg_range(token))
291+
return result
292+
293+
# push.w {reglist} / push {reglist}
294+
match = re.match(r"(\s*)push(?:\.w)?\s*\{([^}]+)\}", line, re.IGNORECASE)
295+
if match:
296+
indent = match.group(1)
297+
regs = parse_reglist(match.group(2))
298+
total_size = 4 * len(regs)
299+
result.append(line)
300+
result.append(f"{indent}.cfi_adjust_cfa_offset {total_size:#x}")
301+
for idx, reg in enumerate(regs):
302+
if reg in armv81m_callee_saved_gprs:
303+
offset = 4 * idx
304+
result.append(f"{indent}.cfi_rel_offset {reg}, {offset:#x}")
305+
i += 1
306+
continue
307+
308+
# vpush {d-regs}
309+
match = re.match(r"(\s*)vpush\s*\{([^}]+)\}", line, re.IGNORECASE)
310+
if match:
311+
indent = match.group(1)
312+
regs = parse_reglist(match.group(2))
313+
total_size = 8 * len(regs)
314+
result.append(line)
315+
result.append(f"{indent}.cfi_adjust_cfa_offset {total_size:#x}")
316+
for idx, reg in enumerate(regs):
317+
if reg in armv81m_callee_saved_dregs:
318+
offset = 8 * idx
319+
result.append(f"{indent}.cfi_rel_offset {reg}, {offset:#x}")
320+
i += 1
321+
continue
322+
323+
# vpop {d-regs}
324+
match = re.match(r"(\s*)vpop\s*\{([^}]+)\}", line, re.IGNORECASE)
325+
if match:
326+
indent = match.group(1)
327+
regs = parse_reglist(match.group(2))
328+
total_size = 8 * len(regs)
329+
result.append(line)
330+
for reg in regs:
331+
if reg in armv81m_callee_saved_dregs:
332+
result.append(f"{indent}.cfi_restore {reg}")
333+
result.append(f"{indent}.cfi_adjust_cfa_offset -{total_size:#x}")
334+
i += 1
335+
continue
336+
337+
# pop.w {reglist} / pop {reglist}
338+
match = re.match(r"(\s*)pop(?:\.w)?\s*\{([^}]+)\}", line, re.IGNORECASE)
339+
if match:
340+
indent = match.group(1)
341+
regs = parse_reglist(match.group(2))
342+
total_size = 4 * len(regs)
343+
has_pc = "pc" in regs
344+
result.append(line)
345+
for reg in regs:
346+
if reg in armv81m_callee_saved_gprs:
347+
result.append(f"{indent}.cfi_restore {reg}")
348+
elif reg == "pc":
349+
# pop into pc restores lr
350+
result.append(f"{indent}.cfi_restore lr")
351+
result.append(f"{indent}.cfi_adjust_cfa_offset -{total_size:#x}")
352+
if has_pc:
353+
result.append(f"{indent}.cfi_endproc")
354+
i += 1
355+
continue
356+
357+
# sub.w sp, #imm / sub sp, #imm / sub sp, sp, #imm
358+
match = re.match(
359+
r"(\s*)sub(?:\.w)?\s+sp,\s*(?:sp,\s*)?#(0x[0-9a-fA-F]+|\d+)",
360+
line,
361+
re.IGNORECASE,
362+
)
363+
if match:
364+
indent, offset_str = match.groups()
365+
offset = (
366+
int(offset_str, 16)
367+
if offset_str.lower().startswith("0x")
368+
else int(offset_str)
369+
)
370+
result.append(line)
371+
result.append(f"{indent}.cfi_adjust_cfa_offset {offset:#x}")
372+
i += 1
373+
continue
374+
375+
# add.w sp, #imm / add sp, #imm / add sp, sp, #imm
376+
match = re.match(
377+
r"(\s*)add(?:\.w)?\s+sp,\s*(?:sp,\s*)?#(0x[0-9a-fA-F]+|\d+)",
378+
line,
379+
re.IGNORECASE,
380+
)
381+
if match:
382+
indent, offset_str = match.groups()
383+
offset = (
384+
int(offset_str, 16)
385+
if offset_str.lower().startswith("0x")
386+
else int(offset_str)
387+
)
388+
result.append(line)
389+
result.append(f"{indent}.cfi_adjust_cfa_offset -{offset:#x}")
390+
i += 1
391+
continue
392+
393+
# bx lr — function return
394+
match = re.match(r"(\s*)bx\s+lr\s*$", line, re.IGNORECASE)
395+
if match:
396+
indent = match.group(1)
397+
result.append(line)
398+
result.append(f"{indent}.cfi_endproc")
399+
i += 1
400+
continue
401+
229402
result.append(line)
230403
i += 1
231404

@@ -246,7 +419,7 @@ def main():
246419
)
247420
parser.add_argument(
248421
"--arch",
249-
choices=["aarch64", "x86_64"],
422+
choices=["aarch64", "x86_64", "armv81m"],
250423
default="aarch64",
251424
help="Target architecture (default: aarch64)",
252425
)

0 commit comments

Comments
 (0)