Skip to content

Commit 127d6eb

Browse files
Merge branch 'main' into cythonize
2 parents 4fbc698 + 41c679b commit 127d6eb

7 files changed

Lines changed: 206 additions & 76 deletions

File tree

doc/changelog.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ unreleased: Version 0.18.0
88
- Replace string literal type annotations with postponed evaluation using
99
``from __future__ import annotations`` PR #191
1010

11+
Breaking changes:
12+
13+
- ``BasicBlock``, ``Bytecode``, and ``ConcreteBytecode`` now validate inserted
14+
instructions at insertion time (``append``, ``extend``, ``insert``,
15+
``__setitem__``) rather than during iteration. Code that relied on catching
16+
``ValueError`` from ``list(block)`` or ``for instr in block:`` must wrap the
17+
insertion call instead. PR #199
18+
1119
Bugfixes:
1220

1321
- Fix handling of END_ASYNC_FOR which is a backward jump PR #179

src/bytecode/bytecode.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,30 @@ def legalize(self) -> None:
167167
def _check_instr(self, instr):
168168
raise NotImplementedError()
169169

170+
def append(self, instr: U) -> None: # type: ignore[override]
171+
self._check_instr(instr)
172+
super().append(instr)
173+
174+
def insert(self, index: SupportsIndex, instr: U) -> None: # type: ignore[override]
175+
self._check_instr(instr)
176+
super().insert(index, instr)
177+
178+
def extend(self, instrs) -> None: # type: ignore[override]
179+
instrs = list(instrs)
180+
for instr in instrs:
181+
self._check_instr(instr)
182+
super().extend(instrs)
183+
184+
def __setitem__(self, index, value):
185+
if isinstance(index, slice):
186+
values = list(value)
187+
for v in values:
188+
self._check_instr(v)
189+
super().__setitem__(index, values)
190+
else:
191+
self._check_instr(value)
192+
super().__setitem__(index, value)
193+
170194

171195
V = TypeVar("V")
172196

@@ -236,15 +260,11 @@ def __init__(
236260
) -> None:
237261
BaseBytecode.__init__(self)
238262
self.argnames: List[str] = []
239-
for instr in instructions:
240-
self._check_instr(instr)
241263
self.extend(instructions)
242264

243265
def __iter__(self) -> Iterator[Union[Instr, Label, TryBegin, TryEnd, SetLineno]]:
244-
instructions = super().__iter__()
245266
seen_try_begin = False
246-
for instr in instructions:
247-
self._check_instr(instr)
267+
for instr in super().__iter__():
248268
if isinstance(instr, TryBegin):
249269
if seen_try_begin:
250270
raise RuntimeError("TryBegin pseudo instructions cannot be nested.")

src/bytecode/cfg.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,39 +42,75 @@ def __init__(
4242
if instructions:
4343
super().__init__(instructions)
4444

45-
def __iter__(self) -> Iterator[Union[Instr, SetLineno, TryBegin, TryEnd]]:
46-
index = 0
47-
while index < len(self):
48-
instr = self[index]
49-
index += 1
45+
_VALID_TYPES = (SetLineno, Instr, TryBegin, TryEnd)
46+
47+
@staticmethod
48+
def _check_instr(instr: Any) -> None:
49+
if not isinstance(instr, (SetLineno, Instr, TryBegin, TryEnd)):
50+
raise ValueError(
51+
"BasicBlock must only contain SetLineno and Instr objects, "
52+
"but %s was found" % instr.__class__.__name__
53+
)
5054

51-
if not isinstance(instr, (SetLineno, Instr, TryBegin, TryEnd)):
55+
def append(self, instr: Union[Instr, SetLineno, TryBegin, TryEnd]) -> None:
56+
self._check_instr(instr)
57+
if isinstance(instr, Instr):
58+
last = self.get_last_non_artificial_instruction()
59+
if last is not None and last.has_jump():
5260
raise ValueError(
53-
"BasicBlock must only contain SetLineno and Instr objects, "
54-
"but %s was found" % instr.__class__.__name__
61+
"Only the last instruction of a basic block can be a jump"
5562
)
63+
super().append(instr)
5664

57-
if isinstance(instr, Instr) and instr.has_jump():
58-
if index < len(self) and any(
59-
isinstance(self[i], Instr) for i in range(index, len(self))
65+
def insert(
66+
self, index: SupportsIndex, instr: Union[Instr, SetLineno, TryBegin, TryEnd]
67+
) -> None:
68+
self._check_instr(instr)
69+
super().insert(index, instr)
70+
71+
def extend(
72+
self, instrs: Iterable[Union[Instr, SetLineno, TryBegin, TryEnd]]
73+
) -> None:
74+
instrs = list(instrs)
75+
for instr in instrs:
76+
self._check_instr(instr)
77+
existing_last = self.get_last_non_artificial_instruction()
78+
last_new_instr: Optional[Instr] = None
79+
for instr in instrs:
80+
if isinstance(instr, Instr):
81+
if (existing_last is not None and existing_last.has_jump()) or (
82+
last_new_instr is not None and last_new_instr.has_jump()
6083
):
6184
raise ValueError(
6285
"Only the last instruction of a basic block can be a jump"
6386
)
87+
last_new_instr = instr
88+
super().extend(instrs)
6489

90+
def __setitem__(self, index, value):
91+
if isinstance(index, slice):
92+
values = list(value)
93+
for instr in values:
94+
self._check_instr(instr)
95+
super().__setitem__(index, values)
96+
else:
97+
self._check_instr(value)
98+
super().__setitem__(index, value)
99+
100+
def __iter__(self) -> Iterator[Union[Instr, SetLineno, TryBegin, TryEnd]]:
101+
for instr in super().__iter__():
102+
if isinstance(instr, Instr) and instr.has_jump():
65103
if not isinstance(instr.arg, BasicBlock):
66104
raise ValueError(
67-
"Jump target must a BasicBlock, got %s",
68-
type(instr.arg).__name__,
105+
"Jump target must a BasicBlock, got %s"
106+
% type(instr.arg).__name__
69107
)
70-
71-
if isinstance(instr, TryBegin):
108+
elif isinstance(instr, TryBegin):
72109
if not isinstance(instr.target, BasicBlock):
73110
raise ValueError(
74-
"TryBegin target must a BasicBlock, got %s",
75-
type(instr.target).__name__,
111+
"TryBegin target must a BasicBlock, got %s"
112+
% type(instr.target).__name__
76113
)
77-
78114
yield instr
79115

80116
@overload
@@ -94,10 +130,9 @@ def __getitem__(self, index):
94130
return value
95131

96132
def get_last_non_artificial_instruction(self) -> Optional[Instr]:
97-
for instr in reversed(self):
133+
for instr in super().__reversed__():
98134
if isinstance(instr, Instr):
99135
return instr
100-
101136
return None
102137

103138
def copy(self: T) -> T:

src/bytecode/concrete.py

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,30 @@ def _from_opcode(
209209
new._size = 2
210210
return new
211211

212+
@classmethod
213+
def _from_trusted(
214+
cls: Type[T],
215+
name: str,
216+
opcode: int,
217+
arg: int,
218+
location: Optional[InstrLocation],
219+
) -> T:
220+
"""Fast path for concrete_instructions: skip validation, compute size from arg."""
221+
new = object.__new__(cls)
222+
new._name = name
223+
new._opcode = opcode
224+
new._arg = arg
225+
new._location = location
226+
new._extended_args = None
227+
size = 2
228+
if arg is not UNSET:
229+
_arg = arg
230+
while _arg > 0xFF:
231+
size += 2
232+
_arg >>= 8
233+
new._size = size
234+
return new
235+
212236
@classmethod
213237
def disassemble(cls: Type[T], lineno: Optional[int], code: bytes, offset: int) -> T:
214238
index = 2 * offset
@@ -315,16 +339,8 @@ def __init__(
315339
self.names = list(names)
316340
self.varnames = list(varnames)
317341
self.exception_table = exception_table or []
318-
for instr in instructions:
319-
self._check_instr(instr)
320342
self.extend(instructions)
321343

322-
def __iter__(self) -> Iterator[Union[ConcreteInstr, SetLineno]]:
323-
instructions = super().__iter__()
324-
for instr in instructions:
325-
self._check_instr(instr)
326-
yield instr
327-
328344
def _check_instr(self, instr: Any) -> None:
329345
if not isinstance(instr, (ConcreteInstr, SetLineno)):
330346
raise ValueError(
@@ -800,16 +816,19 @@ def to_bytecode(
800816
c_instructions = self[:]
801817
self._remove_extended_args(c_instructions)
802818

803-
# Find jump targets
819+
# Find jump targets; stash (size, jump_target) to avoid recomputing in the main loop
804820
jump_targets: Set[int] = set()
821+
_instr_props: List[Tuple[int, Optional[int]]] = []
805822
offset = 0
806823
for c_instr in c_instructions:
807824
if isinstance(c_instr, SetLineno):
808825
continue
826+
size = c_instr.size
809827
target = c_instr.get_jump_target(offset)
828+
_instr_props.append((size, target))
810829
if target is not None:
811830
jump_targets.add(target)
812-
offset += c_instr.size // 2
831+
offset += size // 2
813832

814833
# On 3.11+ we need to also look at the exception table for jump targets
815834
for ex_entry in self.exception_table:
@@ -857,6 +876,7 @@ def to_bytecode(
857876
else:
858877
locals_lookup = self.varnames
859878

879+
_props_iter = iter(_instr_props)
860880
for lineno, c_instr in self._normalize_lineno(
861881
c_instructions, self.first_lineno
862882
):
@@ -882,8 +902,7 @@ def to_bytecode(
882902
tb_instrs[entry] = tb_instr
883903
instructions.append(tb_instr)
884904

885-
jump_target = c_instr.get_jump_target(offset)
886-
size = c_instr.size
905+
size, jump_target = next(_props_iter)
887906
# If an instruction uses extended args, those appear before the instruction
888907
# causing the instruction to appear at offset that accounts for extended
889908
# args. So we first update the offset to account for extended args, then
@@ -1046,7 +1065,9 @@ def __init__(self, code: _bytecode.Bytecode) -> None:
10461065
self.consts_indices: dict[bytes | Tuple[type, int], int] = {}
10471066
self.consts_list: list[Any] = []
10481067
self.names: list[str] = []
1068+
self.names_map: dict[str, int] = {}
10491069
self.varnames: list[str] = []
1070+
self.varnames_map: dict[str, int] = {}
10501071

10511072
def add_const(self, value: Any) -> int:
10521073
key = const_key(value)
@@ -1057,13 +1078,20 @@ def add_const(self, value: Any) -> int:
10571078
self.consts_list.append(value)
10581079
return index
10591080

1060-
@staticmethod
1061-
def add(names: list[str], name: str) -> int:
1062-
try:
1063-
index = names.index(name)
1064-
except ValueError:
1065-
index = len(names)
1066-
names.append(name)
1081+
def add_name(self, name: str) -> int:
1082+
index = self.names_map.get(name)
1083+
if index is None:
1084+
index = len(self.names)
1085+
self.names_map[name] = index
1086+
self.names.append(name)
1087+
return index
1088+
1089+
def add_varname(self, name: str) -> int:
1090+
index = self.varnames_map.get(name)
1091+
if index is None:
1092+
index = len(self.varnames)
1093+
self.varnames_map[name] = index
1094+
self.varnames.append(name)
10671095
return index
10681096

10691097
def concrete_instructions(self) -> None:
@@ -1084,7 +1112,7 @@ def concrete_instructions(self) -> None:
10841112
assert isinstance(binstr.arg, tuple)
10851113
for parg in binstr.arg:
10861114
assert isinstance(parg, str)
1087-
self.add(self.varnames, parg)
1115+
self.add_varname(parg)
10881116

10891117
# We use None as a sentinel to ensure caches for the last instruction are
10901118
# properly generated.
@@ -1151,12 +1179,14 @@ def concrete_instructions(self) -> None:
11511179

11521180
assert isinstance(instr, Instr)
11531181

1154-
if instr.location is not UNSET and instr.location is not None:
1155-
location = instr.location
1182+
# Access private slots directly — avoids property descriptor overhead on
1183+
# every iteration; safe because instr is a validated Instr at this point.
1184+
if instr._location is not UNSET and instr._location is not None:
1185+
location = instr._location
11561186

1157-
instr_name = instr.name
1187+
instr_name = instr._name
11581188
opcode = instr._opcode
1159-
arg = instr.arg
1189+
arg = instr._arg
11601190
is_jump = False
11611191
if isinstance(arg, Label):
11621192
label = arg
@@ -1168,8 +1198,8 @@ def concrete_instructions(self) -> None:
11681198
elif opcode in HAS_LOCAL:
11691199
if opcode in DUAL_ARG_OPCODES:
11701200
_arg2 = cast(Tuple[str, str], arg)
1171-
arg1_index = self.add(self.varnames, _arg2[0])
1172-
arg2_index = self.add(self.varnames, _arg2[1])
1201+
arg1_index = self.add_varname(_arg2[0])
1202+
arg2_index = self.add_varname(_arg2[1])
11731203
if arg1_index > 16 or arg2_index > 16:
11741204
n1, n2 = DUAL_ARG_OPCODES_SINGLE_OPS[opcode]
11751205
c_instr = ConcreteInstr(n1, arg1_index, location=location)
@@ -1186,7 +1216,7 @@ def concrete_instructions(self) -> None:
11861216
c_arg = self.bytecode.freevars.index(arg.name)
11871217
else:
11881218
assert isinstance(arg, str)
1189-
c_arg = self.add(self.varnames, arg)
1219+
c_arg = self.add_varname(arg)
11901220
elif opcode in HAS_NAME:
11911221
if opcode in BITFLAG_OPCODES:
11921222
assert (
@@ -1195,19 +1225,19 @@ def concrete_instructions(self) -> None:
11951225
and isinstance(arg[0], bool)
11961226
), arg
11971227
if isinstance(arg[1], str):
1198-
index = self.add(self.names, arg[1])
1228+
index = self.add_name(arg[1])
11991229
elif isinstance(arg, FormatValue):
12001230
index = int(arg)
12011231
else:
12021232
assert False, arg # noqa
12031233
c_arg = int(arg[0]) + (index << 1)
12041234
elif opcode in BITFLAG2_OPCODES:
12051235
_arg3 = cast(tuple[bool, bool, str], arg)
1206-
index = self.add(self.names, _arg3[2])
1236+
index = self.add_name(_arg3[2])
12071237
c_arg = int(_arg3[0]) + 2 * int(_arg3[1]) + (index << 2)
12081238
else:
12091239
assert isinstance(arg, str), f"Got {arg}, expected a str"
1210-
c_arg = self.add(self.names, arg)
1240+
c_arg = self.add_name(arg)
12111241
elif opcode in HAS_FREE:
12121242
if isinstance(arg, CellVar):
12131243
cell_instrs.append(len(self.instructions))
@@ -1240,7 +1270,7 @@ def concrete_instructions(self) -> None:
12401270
c_arg = arg
12411271

12421272
# The above should have performed all the necessary conversion
1243-
c_instr = ConcreteInstr(instr_name, c_arg, location=location)
1273+
c_instr = ConcreteInstr._from_trusted(instr_name, opcode, c_arg, location)
12441274
if is_jump:
12451275
self.jumps.append((len(self.instructions), label, c_instr))
12461276

@@ -1352,7 +1382,8 @@ def to_concrete_bytecode(
13521382
if first_const is not UNSET:
13531383
self.add_const(first_const)
13541384

1355-
self.varnames.extend(self.bytecode.argnames)
1385+
for name in self.bytecode.argnames:
1386+
self.add_varname(name)
13561387

13571388
self.concrete_instructions()
13581389
for _ in range(0, compute_jumps_passes):

0 commit comments

Comments
 (0)