Skip to content

Commit d118805

Browse files
committed
Fixed bytecode-clash edge case
line_profiler/_line_profiler.pyx::LineProfiler.add_function() Now stripping padded code objects and re-padding when appropriate, so that no two profiled code objects can end up with the same bytecode tests/test_line_profiler.py test_multiple_profilers_identical_bytecode() - Updated comments and docstring - Added check against duplicate bytecodes
1 parent 703dd97 commit d118805

2 files changed

Lines changed: 37 additions & 23 deletions

File tree

line_profiler/_line_profiler.pyx

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -378,27 +378,36 @@ cdef class LineProfiler:
378378
co_code: bytes = code.co_code
379379
code_hashes = []
380380
if any(co_code): # Normal Python functions
381+
base_co_code: bytes = co_code
382+
# Figure out how much padding we need and strip the bytecode
383+
# TODO: is there a way to do this faster? `.rstrip()` doesn't
384+
# work (reliably) since `NOP_BYTES` should be 2-byte wide
385+
npad_code: int = 0
386+
nop_len: int = -len(NOP_BYTES)
387+
while base_co_code.endswith(NOP_BYTES):
388+
base_co_code = base_co_code[:nop_len]
389+
npad_code += 1
381390
try:
382-
npad = self._all_paddings[co_code]
383-
self._all_paddings[co_code] = npad + 1
391+
npad = self._all_paddings[base_co_code]
384392
except KeyError:
385393
npad = 0
386-
self._all_paddings[co_code] = 1
394+
self._all_paddings[base_co_code] = max(npad, npad_code) + 1
387395
try:
388396
neighbors = self._all_instances_by_funcs[func_id]
389397
neighbors.add(self)
390398
except KeyError:
391-
neighbors = self._all_instances_by_funcs[func_id] = WeakSet({self})
399+
neighbors = self._all_instances_by_funcs[func_id] = WeakSet(
400+
{self})
392401
# Maintain `.dupes_map` (legacy)
393402
try:
394-
self.dupes_map[co_code].append(code)
403+
self.dupes_map[base_co_code].append(code)
395404
except KeyError:
396-
self.dupes_map[co_code] = [code]
397-
if npad:
405+
self.dupes_map[base_co_code] = [code]
406+
if npad > npad_code:
398407
# Code hash already exists, so there must be a duplicate
399408
# function (on some instance);
400-
# add no-op
401-
co_code += NOP_BYTES * npad
409+
# (re-)pad with no-op
410+
co_code = base_co_code + NOP_BYTES * npad
402411
code = _code_replace(func, co_code=co_code)
403412
try:
404413
func.__code__ = code

tests/test_line_profiler.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -845,14 +845,12 @@ def func(n):
845845
'-func3:p3:p4:p1'
846846
'-func4:p2:p1:p4',
847847
# Misc. edge cases
848-
# - Note: while the following results in `func1()` and `func2()`
849-
# sharing the same bytecodes, the profiler `p3` is nonetheless
850-
# able to distinguish between the two (when the functions have
851-
# distinct line numbers), because they are defined on
852-
# different lines and thus hashed to different line hashes
853-
'func1:p1:p2' # `func1()` padded once
854-
'-func2:p3' # `func2()` padded twice
855-
'-func1:p4:p3', # `func1()` padded once (again)
848+
# - Naive padding of the following case would cause `func1()`
849+
# and `func2()` to end up with the same bytecode, so guard
850+
# against it
851+
'func1:p1:p2' # `func1()` padded once?
852+
'-func2:p3' # `func2()` padded twice?
853+
'-func1:p4:p3', # `func1()` padded once (again)?
856854
])
857855
def test_multiple_profilers_identical_bytecode(
858856
tmp_path, ops, force_same_line_numbers):
@@ -862,10 +860,12 @@ def test_multiple_profilers_identical_bytecode(
862860
863861
Notes
864862
-----
865-
`ops` should consist of chunks joined by hyphens, where each chunk
866-
has the format `<func_id>:<prof_name>[:<prof_name>[...]]`,
867-
indicating that the profilers are to be used in order to decorate
868-
the specified function.
863+
- `ops` should consist of chunks joined by hyphens, where each chunk
864+
has the format `<func_id>:<prof_name>[:<prof_name>[...]]`,
865+
indicating that the profilers are to be used in order to decorate
866+
the specified function.
867+
- `force_same_line_numbers` is used to coerce all functions to
868+
compile down to code objects with the same line numbers.
869869
"""
870870
def check_seen(name, output, func_id, expected):
871871
lines = [line for line in output.splitlines()
@@ -941,11 +941,11 @@ def func4():
941941
'func3': func3, 'func4': func4}
942942

943943
# Apply the decorators in order
944-
all_dec_names = {}
944+
all_dec_names = {f'func{i}': set() for i in [1, 2, 3, 4]}
945945
all_profs = {}
946946
for op in ops.split('-'):
947947
func_id, *profs = op.split(':')
948-
all_dec_names.setdefault(func_id, set()).update(profs)
948+
all_dec_names[func_id].update(profs)
949949
for name in profs:
950950
try:
951951
prof = all_profs[name]
@@ -957,6 +957,11 @@ def func4():
957957
assert funcs['func2']() == [2, 2]
958958
assert funcs['func3']() == [3, 3, 3]
959959
assert funcs['func4']() == [4, 4, 4, 4]
960+
# Check that the bytecodes of the profiled functions are distinct
961+
profiled_funcs = {funcs[name].__line_profiler_id__.func
962+
for name, decs in all_dec_names.items() if decs}
963+
assert len({func.__code__.co_code
964+
for func in profiled_funcs}) == len(profiled_funcs)
960965
# Check the profiling results
961966
for name, prof in sorted(all_profs.items()):
962967
with io.StringIO() as sio:

0 commit comments

Comments
 (0)