Skip to content

Commit 392153b

Browse files
brandtbucherhugovk
authored andcommitted
[3.14] GH-146128: Remove the buggy AArch64 "33rx" relocation (GH-146263)
(cherry picked from commit 6bb7b33) Co-authored-by: Brandt Bucher <brandt@python.org>
1 parent 25369a8 commit 392153b

File tree

4 files changed

+21
-103
lines changed

4 files changed

+21
-103
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a bug which could cause constant values to be partially corrupted in
2+
AArch64 JIT code. This issue is theoretical, and hasn't actually been
3+
observed in unmodified Python interpreters.

Python/jit.c

Lines changed: 11 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -248,18 +248,6 @@ patch_aarch64_12(unsigned char *location, uint64_t value)
248248
set_bits(loc32, 10, value, shift, 12);
249249
}
250250

251-
// Relaxable 12-bit low part of an absolute address. Pairs nicely with
252-
// patch_aarch64_21rx (below).
253-
void
254-
patch_aarch64_12x(unsigned char *location, uint64_t value)
255-
{
256-
// This can *only* be relaxed if it occurs immediately before a matching
257-
// patch_aarch64_21rx. If that happens, the JIT build step will replace both
258-
// calls with a single call to patch_aarch64_33rx. Otherwise, we end up
259-
// here, and the instruction is patched normally:
260-
patch_aarch64_12(location, value);
261-
}
262-
263251
// 16-bit low part of an absolute address.
264252
void
265253
patch_aarch64_16a(unsigned char *location, uint64_t value)
@@ -320,16 +308,19 @@ patch_aarch64_21r(unsigned char *location, uint64_t value)
320308
set_bits(loc32, 5, value, 2, 19);
321309
}
322310

323-
// Relaxable 21-bit count of pages between this page and an absolute address's
324-
// page. Pairs nicely with patch_aarch64_12x (above).
311+
// 21-bit relative branch.
325312
void
326-
patch_aarch64_21rx(unsigned char *location, uint64_t value)
313+
patch_aarch64_19r(unsigned char *location, uint64_t value)
327314
{
328-
// This can *only* be relaxed if it occurs immediately before a matching
329-
// patch_aarch64_12x. If that happens, the JIT build step will replace both
330-
// calls with a single call to patch_aarch64_33rx. Otherwise, we end up
331-
// here, and the instruction is patched normally:
332-
patch_aarch64_21r(location, value);
315+
uint32_t *loc32 = (uint32_t *)location;
316+
assert(IS_AARCH64_BRANCH_COND(*loc32) || IS_AARCH64_BRANCH_ZERO(*loc32));
317+
value -= (uintptr_t)location;
318+
// Check that we're not out of range of 21 signed bits:
319+
assert((int64_t)value >= -(1 << 20));
320+
assert((int64_t)value < (1 << 20));
321+
// Since instructions are 4-byte aligned, only use 19 bits:
322+
assert(get_bits(value, 0, 2) == 0);
323+
set_bits(loc32, 5, value, 2, 19);
333324
}
334325

335326
// 28-bit relative branch.
@@ -347,46 +338,6 @@ patch_aarch64_26r(unsigned char *location, uint64_t value)
347338
set_bits(loc32, 0, value, 2, 26);
348339
}
349340

350-
// A pair of patch_aarch64_21rx and patch_aarch64_12x.
351-
void
352-
patch_aarch64_33rx(unsigned char *location, uint64_t value)
353-
{
354-
uint32_t *loc32 = (uint32_t *)location;
355-
// Try to relax the pair of GOT loads into an immediate value:
356-
assert(IS_AARCH64_ADRP(*loc32));
357-
unsigned char reg = get_bits(loc32[0], 0, 5);
358-
assert(IS_AARCH64_LDR_OR_STR(loc32[1]));
359-
// There should be only one register involved:
360-
assert(reg == get_bits(loc32[1], 0, 5)); // ldr's output register.
361-
assert(reg == get_bits(loc32[1], 5, 5)); // ldr's input register.
362-
uint64_t relaxed = *(uint64_t *)value;
363-
if (relaxed < (1UL << 16)) {
364-
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; nop
365-
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
366-
loc32[1] = 0xD503201F;
367-
return;
368-
}
369-
if (relaxed < (1ULL << 32)) {
370-
// adrp reg, AAA; ldr reg, [reg + BBB] -> movz reg, XXX; movk reg, YYY
371-
loc32[0] = 0xD2800000 | (get_bits(relaxed, 0, 16) << 5) | reg;
372-
loc32[1] = 0xF2A00000 | (get_bits(relaxed, 16, 16) << 5) | reg;
373-
return;
374-
}
375-
relaxed = value - (uintptr_t)location;
376-
if ((relaxed & 0x3) == 0 &&
377-
(int64_t)relaxed >= -(1L << 19) &&
378-
(int64_t)relaxed < (1L << 19))
379-
{
380-
// adrp reg, AAA; ldr reg, [reg + BBB] -> ldr reg, XXX; nop
381-
loc32[0] = 0x58000000 | (get_bits(relaxed, 2, 19) << 5) | reg;
382-
loc32[1] = 0xD503201F;
383-
return;
384-
}
385-
// Couldn't do it. Just patch the two instructions normally:
386-
patch_aarch64_21rx(location, value);
387-
patch_aarch64_12x(location + 4, value);
388-
}
389-
390341
// Relaxable 32-bit relative address.
391342
void
392343
patch_x86_64_32rx(unsigned char *location, uint64_t value)

Tools/jit/_stencils.py

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -55,29 +55,29 @@ class HoleValue(enum.Enum):
5555
_PATCH_FUNCS = {
5656
# aarch64-apple-darwin:
5757
"ARM64_RELOC_BRANCH26": "patch_aarch64_26r",
58-
"ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21rx",
59-
"ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12x",
58+
"ARM64_RELOC_GOT_LOAD_PAGE21": "patch_aarch64_21r",
59+
"ARM64_RELOC_GOT_LOAD_PAGEOFF12": "patch_aarch64_12",
6060
"ARM64_RELOC_PAGE21": "patch_aarch64_21r",
6161
"ARM64_RELOC_PAGEOFF12": "patch_aarch64_12",
6262
"ARM64_RELOC_UNSIGNED": "patch_64",
6363
# x86_64-pc-windows-msvc:
6464
"IMAGE_REL_AMD64_REL32": "patch_x86_64_32rx",
6565
# aarch64-pc-windows-msvc:
6666
"IMAGE_REL_ARM64_BRANCH26": "patch_aarch64_26r",
67-
"IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21rx",
67+
"IMAGE_REL_ARM64_PAGEBASE_REL21": "patch_aarch64_21r",
6868
"IMAGE_REL_ARM64_PAGEOFFSET_12A": "patch_aarch64_12",
69-
"IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12x",
69+
"IMAGE_REL_ARM64_PAGEOFFSET_12L": "patch_aarch64_12",
7070
# i686-pc-windows-msvc:
7171
"IMAGE_REL_I386_DIR32": "patch_32",
7272
"IMAGE_REL_I386_REL32": "patch_x86_64_32rx",
7373
# aarch64-unknown-linux-gnu:
7474
"R_AARCH64_ABS64": "patch_64",
7575
"R_AARCH64_ADD_ABS_LO12_NC": "patch_aarch64_12",
76-
"R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21rx",
76+
"R_AARCH64_ADR_GOT_PAGE": "patch_aarch64_21r",
7777
"R_AARCH64_ADR_PREL_PG_HI21": "patch_aarch64_21r",
7878
"R_AARCH64_CALL26": "patch_aarch64_26r",
7979
"R_AARCH64_JUMP26": "patch_aarch64_26r",
80-
"R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12x",
80+
"R_AARCH64_LD64_GOT_LO12_NC": "patch_aarch64_12",
8181
"R_AARCH64_MOVW_UABS_G0_NC": "patch_aarch64_16a",
8282
"R_AARCH64_MOVW_UABS_G1_NC": "patch_aarch64_16b",
8383
"R_AARCH64_MOVW_UABS_G2_NC": "patch_aarch64_16c",
@@ -140,34 +140,6 @@ class Hole:
140140
def __post_init__(self) -> None:
141141
self.func = _PATCH_FUNCS[self.kind]
142142

143-
def fold(self, other: typing.Self, body: bytearray) -> typing.Self | None:
144-
"""Combine two holes into a single hole, if possible."""
145-
instruction_a = int.from_bytes(
146-
body[self.offset : self.offset + 4], byteorder=sys.byteorder
147-
)
148-
instruction_b = int.from_bytes(
149-
body[other.offset : other.offset + 4], byteorder=sys.byteorder
150-
)
151-
reg_a = instruction_a & 0b11111
152-
reg_b1 = instruction_b & 0b11111
153-
reg_b2 = (instruction_b >> 5) & 0b11111
154-
155-
if (
156-
self.offset + 4 == other.offset
157-
and self.value == other.value
158-
and self.symbol == other.symbol
159-
and self.addend == other.addend
160-
and self.func == "patch_aarch64_21rx"
161-
and other.func == "patch_aarch64_12x"
162-
and reg_a == reg_b1 == reg_b2
163-
):
164-
# These can *only* be properly relaxed when they appear together and
165-
# patch the same value:
166-
folded = self.replace()
167-
folded.func = "patch_aarch64_33rx"
168-
return folded
169-
return None
170-
171143
def as_c(self, where: str) -> str:
172144
"""Dump this hole as a call to a patch_* function."""
173145
location = f"{where} + {self.offset:#x}"

Tools/jit/_writer.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Utilities for writing StencilGroups out to a C header file."""
22

3-
import itertools
43
import typing
54
import math
65

@@ -60,15 +59,8 @@ def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator
6059
for part, stencil in [("data", group.data), ("code", group.code)]:
6160
if stencil.body.rstrip(b"\x00"):
6261
yield f" memcpy({part}, {part}_body, sizeof({part}_body));"
63-
skip = False
6462
stencil.holes.sort(key=lambda hole: hole.offset)
65-
for hole, pair in itertools.zip_longest(stencil.holes, stencil.holes[1:]):
66-
if skip:
67-
skip = False
68-
continue
69-
if pair and (folded := hole.fold(pair, stencil.body)):
70-
skip = True
71-
hole = folded
63+
for hole in stencil.holes:
7264
yield f" {hole.as_c(part)}"
7365
yield "}"
7466
yield ""

0 commit comments

Comments
 (0)