From 7ac2792b0ab4d52d225b35af71014a361bf60609 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Fri, 17 Oct 2025 12:21:21 +0100 Subject: [PATCH 1/2] perf: improve general performance We try to use sets for quick lookups and cache the result of some repetitive operations to speed up some operations. We also remove a good deal of asserts embedded within the code that adde extra overhead. --- src/bytecode/cfg.py | 8 +----- src/bytecode/concrete.py | 51 ++++++++++++++------------------------ src/bytecode/instr.py | 53 ++++++++++++++++++++-------------------- 3 files changed, 47 insertions(+), 65 deletions(-) diff --git a/src/bytecode/cfg.py b/src/bytecode/cfg.py index c39124b5..bb9bfcd5 100644 --- a/src/bytecode/cfg.py +++ b/src/bytecode/cfg.py @@ -588,7 +588,6 @@ def compute_stacksize( if compute_exception_stack_depths: for tb in common.try_begins: size = common.exception_block_startsize[id(tb.target)] - assert size >= 0 tb.stack_depth = size return args @@ -841,7 +840,6 @@ def from_bytecode(bytecode: _bytecode.Bytecode) -> "ControlFlowGraph": # The last instruction is final, if the current instruction is a # TryEnd insert it in the same block and move to the next instruction if last_instr.is_final() and isinstance(instr, TryEnd): - assert active_try_begin nte = instr.copy() nte.entry = try_begins[active_try_begin][-1] old_block.append(nte) @@ -888,7 +886,6 @@ def from_bytecode(bytecode: _bytecode.Bytecode) -> "ControlFlowGraph": if isinstance(instr, (Instr, TryBegin, TryEnd)): new = instr.copy() if isinstance(instr, TryBegin): - assert active_try_begin is None active_try_begin = instr try_begin_inserted_in_block = True assert isinstance(new, TryBegin) @@ -982,9 +979,7 @@ def to_bytecode(self) -> _bytecode.Bytecode: # If due to jumps and split TryBegin, we encounter a TryBegin # while we still have a TryBegin ensure they can be fused. if last_try_begin is not None: - cfg_tb, byt_tb = last_try_begin - assert instr.target is cfg_tb.target - assert instr.push_lasti == cfg_tb.push_lasti + _, byt_tb = last_try_begin byt_tb.stack_depth = min( byt_tb.stack_depth, instr.stack_depth ) @@ -1003,7 +998,6 @@ def to_bytecode(self) -> _bytecode.Bytecode: # If we did not yet compute the required stack depth # keep the value as UNSET if entry.stack_depth is UNSET: - assert instr.stack_depth is UNSET byt_te.entry.stack_depth = UNSET else: byt_te.entry.stack_depth = min( diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 32c64524..35a9b328 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -67,6 +67,12 @@ # - dis displays bytes OFFSET_AS_INSTRUCTION = PY310 +HAS_CONST = set(_opcode.hasconst) +HAS_LOCAL = set(_opcode.haslocal) +HAS_NAME = set(_opcode.hasname) +HAS_FREE = set(_opcode.hasfree) +HAS_COMPARE = set(_opcode.hascompare) + def _set_docstring(code: _bytecode.BaseBytecode, consts: Sequence) -> None: if not consts: @@ -478,9 +484,6 @@ def _assemble_lnotab( doff = 0 dlineno -= 127 - assert 0 <= doff <= 255 - assert -128 <= dlineno <= 127 - lnotab.append(struct.pack("Bb", doff, dlineno)) return b"".join(lnotab) @@ -500,7 +503,6 @@ def _pack_linetable( linetable.append(struct.pack("Bb", 0, 127)) dlineno -= 127 - assert -127 <= dlineno <= 127 else: dlineno = -128 @@ -520,8 +522,6 @@ def _pack_linetable( else: linetable.append(struct.pack("Bb", doff, dlineno)) - assert 0 <= doff <= 254 - # Used on 3.10 def _assemble_linestable( self, @@ -640,9 +640,6 @@ def _pack_location( # We enforce the end_lineno to be defined else: - assert end_lineno is not None - assert end_col_offset is not None - # Short forms if ( end_lineno == l_lineno @@ -812,7 +809,6 @@ def _parse_varint(except_table_iterator: Iterator[int]) -> int: def _parse_exception_table( self, exception_table: bytes ) -> List[ExceptionTableEntry]: - assert PY311 table = [] iterator = iter(exception_table) try: @@ -833,7 +829,6 @@ def _encode_varint(value: int, set_begin_marker: bool = False) -> Iterator[int]: # Encode value as a varint on 7 bits (MSB should come first) and set # the begin marker if requested. temp: List[int] = [] - assert value >= 0 while value: temp.append(value & 63 | (64 if temp else 0)) value >>= 6 @@ -967,7 +962,6 @@ def to_bytecode( for entry in self.exception_table: # Ensure we do not have more than one entry with identical starting # offsets - assert entry.start_offset not in ex_start ex_start[entry.start_offset] = entry ex_end.setdefault(entry.stop_offset, []).append(entry) @@ -1046,7 +1040,9 @@ def to_bytecode( # We are careful to first advance the offset and check that the CACHE # is not a jump target. It should never be the case but we double check. if prune_caches and c_instr.name == "CACHE": - assert jump_target is None + if jump_target is not None: + msg = "cache instruction cannot have jump target" + raise ValueError(msg) # We may need to insert a TryEnd after a CACHE so we need to run the # through the last block. @@ -1055,14 +1051,14 @@ def to_bytecode( arg: InstrArg c_arg = c_instr.arg # FIXME: better error reporting - if opcode in _opcode.hasconst: + if opcode in HAS_CONST: arg = self.consts[c_arg] elif opcode in _opcode.haslocal: if opcode in DUAL_ARG_OPCODES: arg = (locals_lookup[c_arg >> 4], locals_lookup[c_arg & 15]) else: arg = locals_lookup[c_arg] - elif opcode in _opcode.hasname: + elif opcode in HAS_NAME: if opcode in BITFLAG_OPCODES: arg = ( bool(c_arg & 1), @@ -1072,7 +1068,7 @@ def to_bytecode( arg = (bool(c_arg & 1), bool(c_arg & 2), self.names[c_arg >> 2]) else: arg = self.names[c_arg] - elif opcode in _opcode.hasfree: + elif opcode in HAS_FREE: if c_arg < ncells: n_or_cell = cells_lookup[c_arg] arg = ( @@ -1083,7 +1079,7 @@ def to_bytecode( else: name = self.freevars[c_arg - ncells] arg = FreeVar(name) - elif opcode in _opcode.hascompare: + elif opcode in HAS_COMPARE: arg = Compare( (c_arg >> 5) + ((1 << 4) if (c_arg & 16) else 0) if PY313 @@ -1175,7 +1171,6 @@ class _ConvertBytecodeToConcrete: _compute_jumps_passes = 10 def __init__(self, code: _bytecode.Bytecode) -> None: - assert isinstance(code, _bytecode.Bytecode) self.bytecode = code # temporary variables @@ -1248,8 +1243,8 @@ def concrete_instructions(self) -> None: ConcreteInstr( "CACHE", 0, location=self.instructions[-1].location ) - for i in range(self.required_caches) ] + * self.required_caches ) self.required_caches = 0 self.seen_manual_cache = False @@ -1272,7 +1267,6 @@ def concrete_instructions(self) -> None: if isinstance(instr, TryBegin): # We expect the stack depth to have be provided or computed earlier - assert instr.stack_depth is not UNSET # NOTE here we store the index of the instruction at which the # exception table entry starts. This is not the final value we want, # we want the offset in the bytecode but that requires to compute @@ -1306,16 +1300,10 @@ def concrete_instructions(self) -> None: # fake value, real value is set in compute_jumps() c_arg = 0 is_jump = True - elif opcode in _opcode.hasconst: + elif opcode in HAS_CONST: c_arg = self.add_const(arg) - elif opcode in _opcode.haslocal: + elif opcode in HAS_LOCAL: if opcode in DUAL_ARG_OPCODES: - assert ( - isinstance(arg, tuple) - and len(arg) == 2 - and isinstance(arg[0], str) - and isinstance(arg[1], str) - ) arg1_index = self.add(self.varnames, arg[0]) arg2_index = self.add(self.varnames, arg[1]) if arg1_index > 16 or arg2_index > 16: @@ -1335,7 +1323,7 @@ def concrete_instructions(self) -> None: else: assert isinstance(arg, str) c_arg = self.add(self.varnames, arg) - elif opcode in _opcode.hasname: + elif opcode in HAS_NAME: if opcode in BITFLAG_OPCODES: assert ( isinstance(arg, tuple) @@ -1362,15 +1350,14 @@ def concrete_instructions(self) -> None: else: assert isinstance(arg, str), f"Got {arg}, expected a str" c_arg = self.add(self.names, arg) - elif opcode in _opcode.hasfree: + elif opcode in HAS_FREE: if isinstance(arg, CellVar): cell_instrs.append(len(self.instructions)) c_arg = self.bytecode.cellvars.index(arg.name) else: - assert isinstance(arg, FreeVar) free_instrs.append(len(self.instructions)) c_arg = self.bytecode.freevars.index(arg.name) - elif opcode in _opcode.hascompare: + elif opcode in HAS_COMPARE: if isinstance(arg, Compare): # In Python 3.13 the 4 lowest bits are used for caching # and the 5th one indicate a cast to bool diff --git a/src/bytecode/instr.py b/src/bytecode/instr.py index ac13f77c..80b1fc7d 100644 --- a/src/bytecode/instr.py +++ b/src/bytecode/instr.py @@ -4,8 +4,9 @@ import sys from abc import abstractmethod from dataclasses import dataclass +from functools import cache from marshal import dumps as _dumps -from typing import Any, Callable, Dict, Generic, Optional, Tuple, TypeVar, Union +from typing import Any, Callable, Dict, Generic, Optional, Set, Tuple, TypeVar, Union try: from typing import TypeGuard @@ -22,68 +23,68 @@ # Instructions relying on a bit to modify its behavior. # The lowest bit is used to encode custom behavior. BITFLAG_OPCODES = ( - ( + { _opcode.opmap["BUILD_INTERPOLATION"], _opcode.opmap["LOAD_GLOBAL"], _opcode.opmap["LOAD_ATTR"], - ) + } if PY314 else ( - (_opcode.opmap["LOAD_GLOBAL"], _opcode.opmap["LOAD_ATTR"]) + {_opcode.opmap["LOAD_GLOBAL"], _opcode.opmap["LOAD_ATTR"]} if PY312 - else ((_opcode.opmap["LOAD_GLOBAL"],) if PY311 else ()) + else ({_opcode.opmap["LOAD_GLOBAL"]} if PY311 else set()) ) ) -BITFLAG2_OPCODES = (_opcode.opmap["LOAD_SUPER_ATTR"],) if PY312 else () +BITFLAG2_OPCODES = {_opcode.opmap["LOAD_SUPER_ATTR"]} if PY312 else set() # Binary op opcode which has a dedicated arg -BINARY_OPS = (_opcode.opmap["BINARY_OP"],) if PY311 else () +BINARY_OPS = {_opcode.opmap["BINARY_OP"]} if PY311 else set() # Intrinsic related opcodes -INTRINSIC_1OP = (_opcode.opmap["CALL_INTRINSIC_1"],) if PY312 else () -INTRINSIC_2OP = (_opcode.opmap["CALL_INTRINSIC_2"],) if PY312 else () -INTRINSIC = INTRINSIC_1OP + INTRINSIC_2OP +INTRINSIC_1OP = {_opcode.opmap["CALL_INTRINSIC_1"]} if PY312 else set() +INTRINSIC_2OP = {_opcode.opmap["CALL_INTRINSIC_2"]} if PY312 else set() +INTRINSIC = INTRINSIC_1OP | INTRINSIC_2OP # Small integer related opcode -SMALL_INT_OPS = (_opcode.opmap["LOAD_SMALL_INT"],) if PY314 else () +SMALL_INT_OPS = {_opcode.opmap["LOAD_SMALL_INT"]} if PY314 else set() # Special method loading related opcodes -SPECIAL_OPS = (_opcode.opmap["LOAD_SPECIAL"],) if PY314 else () +SPECIAL_OPS = {_opcode.opmap["LOAD_SPECIAL"]} if PY314 else set() # Common constant loading related opcodes -COMMON_CONSTANT_OPS = (_opcode.opmap["LOAD_COMMON_CONSTANT"],) if PY314 else () +COMMON_CONSTANT_OPS = {_opcode.opmap["LOAD_COMMON_CONSTANT"]} if PY314 else set() # Value formatting related opcodes (only handle CONVERT_VALUE and BUILD_INTERPOLATION) FORMAT_VALUE_OPS = ( - ( + { _opcode.opmap["CONVERT_VALUE"], _opcode.opmap["BUILD_INTERPOLATION"], - ) + } if PY314 - else ((_opcode.opmap["CONVERT_VALUE"],) if PY313 else ()) + else ({_opcode.opmap["CONVERT_VALUE"]} if PY313 else set()) ) -HASJABS = () if PY313 else _opcode.hasjabs +HASJABS = set() if PY313 else set(_opcode.hasjabs) if sys.version_info >= (3, 13): - HASJREL = _opcode.hasjump + HASJREL = set(_opcode.hasjump) else: - HASJREL = _opcode.hasjrel + HASJREL = set(_opcode.hasjrel) #: Opcodes taking 2 arguments (highest 4 bits and lowest 4 bits) -DUAL_ARG_OPCODES: Tuple[int, ...] = () +DUAL_ARG_OPCODES: Set[int] = set() DUAL_ARG_OPCODES_SINGLE_OPS: Dict[int, Tuple[str, str]] = {} if PY313: - DUAL_ARG_OPCODES = ( + DUAL_ARG_OPCODES = { _opcode.opmap["LOAD_FAST_LOAD_FAST"], _opcode.opmap["STORE_FAST_LOAD_FAST"], _opcode.opmap["STORE_FAST_STORE_FAST"], - ) + } if PY314: - DUAL_ARG_OPCODES = ( + DUAL_ARG_OPCODES = { *DUAL_ARG_OPCODES, _opcode.opmap["LOAD_FAST_BORROW_LOAD_FAST_BORROW"], - ) + } DUAL_ARG_OPCODES_SINGLE_OPS = { _opcode.opmap["LOAD_FAST_LOAD_FAST"]: ("LOAD_FAST", "LOAD_FAST"), _opcode.opmap["STORE_FAST_LOAD_FAST"]: ("STORE_FAST", "LOAD_FAST"), @@ -345,11 +346,13 @@ def _check_arg_int(arg: Any, name: str) -> TypeGuard[int]: if sys.version_info >= (3, 12): + @cache def opcode_has_argument(opcode: int) -> bool: return opcode in dis.hasarg else: + @cache def opcode_has_argument(opcode: int) -> bool: return opcode >= dis.HAVE_ARGUMENT @@ -727,11 +730,9 @@ def stack_effect(self, jump: Optional[bool] = None) -> int: # 3.12 does the same for LOAD_ATTR # 3.14 does this for BUILD_INTERPOLATION elif self._opcode in BITFLAG_OPCODES and isinstance(self._arg, tuple): - assert len(self._arg) == 2 arg = self._arg[0] # 3.12 does a similar trick for LOAD_SUPER_ATTR elif self._opcode in BITFLAG2_OPCODES and isinstance(self._arg, tuple): - assert len(self._arg) == 3 arg = self._arg[0] elif not isinstance(self._arg, int) or self._opcode in _opcode.hasconst: # Argument is either a non-integer or an integer constant, From 094f42de73ed09809fb402bc06b7cc74d4cf3f4a Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Thu, 20 Nov 2025 12:05:58 +0000 Subject: [PATCH 2/2] fix linting --- src/bytecode/cfg.py | 1 + src/bytecode/concrete.py | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/bytecode/cfg.py b/src/bytecode/cfg.py index bb9bfcd5..2ef7e68c 100644 --- a/src/bytecode/cfg.py +++ b/src/bytecode/cfg.py @@ -840,6 +840,7 @@ def from_bytecode(bytecode: _bytecode.Bytecode) -> "ControlFlowGraph": # The last instruction is final, if the current instruction is a # TryEnd insert it in the same block and move to the next instruction if last_instr.is_final() and isinstance(instr, TryEnd): + assert active_try_begin is not None nte = instr.copy() nte.entry = try_begins[active_try_begin][-1] old_block.append(nte) diff --git a/src/bytecode/concrete.py b/src/bytecode/concrete.py index 35a9b328..0f60865c 100644 --- a/src/bytecode/concrete.py +++ b/src/bytecode/concrete.py @@ -19,6 +19,7 @@ Type, TypeVar, Union, + cast, ) # alias to keep the 'bytecode' variable free @@ -640,6 +641,8 @@ def _pack_location( # We enforce the end_lineno to be defined else: + assert end_col_offset is not None + # Short forms if ( end_lineno == l_lineno @@ -671,6 +674,8 @@ def _pack_location( # Long form else: + assert end_lineno is not None + packed.extend( ( self._pack_location_header(14, size), @@ -1304,8 +1309,9 @@ def concrete_instructions(self) -> None: c_arg = self.add_const(arg) elif opcode in HAS_LOCAL: if opcode in DUAL_ARG_OPCODES: - arg1_index = self.add(self.varnames, arg[0]) - arg2_index = self.add(self.varnames, arg[1]) + _arg2 = cast(Tuple[str, str], arg) + arg1_index = self.add(self.varnames, _arg2[0]) + arg2_index = self.add(self.varnames, _arg2[1]) if arg1_index > 16 or arg2_index > 16: n1, n2 = DUAL_ARG_OPCODES_SINGLE_OPS[opcode] c_instr = ConcreteInstr(n1, arg1_index, location=location) @@ -1338,15 +1344,9 @@ def concrete_instructions(self) -> None: assert False, arg # noqa c_arg = int(arg[0]) + (index << 1) elif opcode in BITFLAG2_OPCODES: - assert ( - isinstance(arg, tuple) - and len(arg) == 3 - and isinstance(arg[0], bool) - and isinstance(arg[1], bool) - and isinstance(arg[2], str) - ), arg - index = self.add(self.names, arg[2]) - c_arg = int(arg[0]) + 2 * int(arg[1]) + (index << 2) + _arg3 = cast(Tuple[bool, bool, str], arg) + index = self.add(self.names, _arg3[2]) + c_arg = int(_arg3[0]) + 2 * int(_arg3[1]) + (index << 2) else: assert isinstance(arg, str), f"Got {arg}, expected a str" c_arg = self.add(self.names, arg) @@ -1355,6 +1355,7 @@ def concrete_instructions(self) -> None: cell_instrs.append(len(self.instructions)) c_arg = self.bytecode.cellvars.index(arg.name) else: + assert isinstance(arg, FreeVar) free_instrs.append(len(self.instructions)) c_arg = self.bytecode.freevars.index(arg.name) elif opcode in HAS_COMPARE: