Skip to content

Commit c2a6a30

Browse files
authored
Merge branch 'main' into scoping-policy-typing-fix
2 parents 6a5db86 + 9aea7d8 commit c2a6a30

3 files changed

Lines changed: 129 additions & 41 deletions

File tree

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Changes
1010
* FIX: ``ScopingPolicy`` members now type-check properly as instances
1111
thereof -- at least on Python versions (3.11+) where ``enum.StrEnum``
1212
is available (#427)
13+
* FIX: Bytecodes of profiled functions now always labeled to prevent
14+
confusion with non-profiled "twins" (#425)
1315

1416

1517
5.0.2

line_profiler/_line_profiler.pyx

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ import opcode
3131
import os
3232
import types
3333
from warnings import warn
34-
from weakref import WeakSet
3534

3635
from line_profiler._diagnostics import (
3736
WRAP_TRACE, SET_FRAME_LOCAL_TRACE, USE_LEGACY_TRACE
@@ -1067,9 +1066,6 @@ cdef class LineProfiler:
10671066
_managers = {}
10681067
# type: ClassVar[dict[bytes, int]], bytes = bytecode
10691068
_all_paddings = {}
1070-
# type: ClassVar[dict[int, weakref.WeakSet[LineProfiler]]],
1071-
# int = func id
1072-
_all_instances_by_funcs = {}
10731069

10741070
def __init__(self, *functions,
10751071
wrap_trace=None, set_frame_local_trace=None):
@@ -1139,26 +1135,23 @@ datamodel.html#user-defined-functions
11391135
code_hashes = []
11401136
if any(co_code): # Normal Python functions
11411137
# Figure out how much padding we need and strip the bytecode
1138+
# Notes:
1139+
# - Profiled function are always padded, so as to
1140+
# distinguish between them and unprofiled bytecode twins
1141+
# - `npad` is strictly increasing, except when the function
1142+
# has already been padded -- we assume that is solely
1143+
# because a (could be the same) profiler instance has seen
1144+
# it
11421145
base_co_code: bytes
1143-
npad_code: int
1144-
base_co_code, npad_code = multibyte_rstrip(co_code)
1145-
try:
1146-
npad = self._all_paddings[base_co_code]
1147-
except KeyError:
1148-
npad = 0
1149-
self._all_paddings[base_co_code] = max(npad, npad_code) + 1
1150-
try:
1151-
profilers_to_update = self._all_instances_by_funcs[func_id]
1152-
profilers_to_update.add(self)
1153-
except KeyError:
1154-
profilers_to_update = WeakSet({self})
1155-
self._all_instances_by_funcs[func_id] = profilers_to_update
1156-
# Maintain `.dupes_map` (legacy)
1157-
try:
1158-
self.dupes_map[base_co_code].append(code)
1159-
except KeyError:
1160-
self.dupes_map[base_co_code] = [code]
1161-
if npad > npad_code:
1146+
npad: int
1147+
is_padded: int
1148+
base_co_code, is_padded = multibyte_rstrip(co_code)
1149+
if not is_padded:
1150+
try:
1151+
npad = self._all_paddings[base_co_code]
1152+
except KeyError:
1153+
npad = 1
1154+
self._all_paddings[base_co_code] = npad + 1
11621155
# Code hash already exists, so there must be a duplicate
11631156
# function (on some instance);
11641157
# (re-)pad with no-op
@@ -1168,8 +1161,11 @@ datamodel.html#user-defined-functions
11681161
func.__code__ = code
11691162
except AttributeError as e:
11701163
func.__func__.__code__ = code
1171-
else: # No re-padding -> no need to update the other profs
1172-
profilers_to_update = {self}
1164+
# Maintain `.dupes_map` (legacy)
1165+
try:
1166+
self.dupes_map[base_co_code].append(code)
1167+
except KeyError:
1168+
self.dupes_map[base_co_code] = [code]
11731169
# TODO: Since each line can be many bytecodes, this is kinda
11741170
# inefficient
11751171
# See if this can be sped up by not needing to iterate over
@@ -1204,21 +1200,17 @@ datamodel.html#user-defined-functions
12041200
# because Cython shim code objects don't support local
12051201
# events
12061202
code = code.replace(co_filename=cython_source)
1207-
profilers_to_update = {self}
12081203
# Update `._c_code_map` and `.code_hash_map` with the new line
1209-
# hashes on `self` (and other instances profiling the same
1210-
# function if we padded the bytecode)
1211-
for instance in profilers_to_update:
1212-
prof = <LineProfiler>instance
1213-
try:
1214-
line_hashes = prof.code_hash_map[code]
1215-
except KeyError:
1216-
line_hashes = prof.code_hash_map[code] = []
1217-
for code_hash in code_hashes:
1218-
line_hash = <int64>code_hash
1219-
if not prof._c_code_map.count(line_hash):
1220-
line_hashes.append(line_hash)
1221-
prof._c_code_map[line_hash]
1204+
# hashes
1205+
try:
1206+
line_hashes = self.code_hash_map[code]
1207+
except KeyError:
1208+
line_hashes = self.code_hash_map[code] = []
1209+
for code_hash in code_hashes:
1210+
line_hash = <int64>code_hash
1211+
if not self._c_code_map.count(line_hash):
1212+
line_hashes.append(line_hash)
1213+
self._c_code_map[line_hash]
12221214

12231215
self.functions.append(func)
12241216

tests/test_line_profiler.py

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
import io
88
import os
99
import pickle
10+
import subprocess
1011
import sys
1112
import textwrap
1213
import types
14+
from pathlib import Path
1315
from tempfile import TemporaryDirectory
1416
import pytest
17+
from ubelt import ChDir
1518
from line_profiler import _line_profiler, LineProfiler, LineStats
1619

1720

@@ -989,6 +992,94 @@ def func(n):
989992
assert entries[-2][1] == 10 + 20
990993

991994

995+
def test_nonprofiled_clashing_bytecodes(tmp_path_factory):
996+
"""
997+
Test that the profiler can distinguish between a profiled function
998+
and a non-profiled one compiling down to the same bytecode.
999+
"""
1000+
# See issue #424
1001+
template = textwrap.dedent("""
1002+
def {}(n): # Any function using this compiles to the same bytecode
1003+
x = 0
1004+
for n in range(1, n + 1):
1005+
x += n
1006+
return x
1007+
""").strip('\n')
1008+
module_name = 'my_module'
1009+
script_name = 'my-script.py'
1010+
outfile = 'out.lprof'
1011+
func_p = 'profiled_func'
1012+
func_no_p = 'nonprofiled_func'
1013+
1014+
# Note: bytecode padding depends on the existence of duplicates,
1015+
# which are counted throughout the lifetime of the `LineProfiler`
1016+
# class. To ensure that we start on a clean slate -- that
1017+
# `LineProfiler` isn't "polluted" by running prior tests -- run the
1018+
# profliing in a subprocess.
1019+
with ChDir(tmp_path_factory.mktemp('test_nonprofiled_clashing_bytecodes')):
1020+
syspath_annex = (Path.cwd() / 'syspath').resolve()
1021+
syspath_annex.mkdir()
1022+
with (syspath_annex / (module_name + '.py')).open('w') as fobj:
1023+
print(
1024+
textwrap.dedent("""
1025+
'''
1026+
This docstring is fluff to make the function definitions overlap in
1027+
line numbers.
1028+
'''
1029+
1030+
1031+
{fp_def}
1032+
""").strip('\n').format(fp_def=template.format(func_p)),
1033+
file=fobj,
1034+
)
1035+
with open(script_name, 'w') as fobj:
1036+
print(
1037+
textwrap.dedent("""
1038+
from line_profiler import LineProfiler
1039+
from {mod} import {fp_name}
1040+
1041+
1042+
{fnp_def}
1043+
1044+
1045+
if __name__ == '__main__':
1046+
prof = LineProfiler()
1047+
prof.add_callable({fp_name})
1048+
with prof:
1049+
# The context turns on profiling for the call, but it
1050+
# shouldn't do anything since the imported and profiled
1051+
# function is not called
1052+
{fnp_name}(10)
1053+
prof.dump_stats({out!r})
1054+
""").strip('\n').format(
1055+
mod=module_name,
1056+
fp_name=func_p,
1057+
fnp_name=func_no_p,
1058+
fnp_def=template.format(func_no_p),
1059+
out=outfile,
1060+
),
1061+
file=fobj,
1062+
)
1063+
1064+
syspath = os.environ.get('PYTHONPATH', '')
1065+
syspath = ('{}:{}' if syspath else '{}').format(syspath_annex, syspath)
1066+
subprocess.run(
1067+
[sys.executable, script_name],
1068+
check=True, env={**os.environ, 'PYTHONPATH': syspath},
1069+
)
1070+
1071+
assert os.path.exists(outfile)
1072+
stats = LineStats.from_files(outfile)
1073+
stats.print() # For debugging purposes
1074+
1075+
# There should only be one function profiled (`profiled_func()`),
1076+
# which however doesn't have any actual data because it was never
1077+
# called
1078+
((*_, func_name), data), = stats.timings.items()
1079+
assert (func_name == func_p), stats
1080+
assert (not data), stats
1081+
1082+
9921083
@pytest.mark.parametrize('force_same_line_numbers', [True, False])
9931084
@pytest.mark.parametrize(
9941085
'ops',
@@ -1157,6 +1248,11 @@ def test_aggregate_profiling_data_between_code_versions():
11571248
Test that profiling data from previous versions of the code object
11581249
are preserved when another profiler causes the code object of a
11591250
function to be overwritten.
1251+
1252+
Note
1253+
----
1254+
Now obsolete because we no longer double-pad/overwrite the function
1255+
bytecode if it has already been seen by another profiler instance.
11601256
"""
11611257

11621258
def func(n):
@@ -1171,10 +1267,8 @@ def func(n):
11711267
# Gather data with `@prof1`
11721268
wrapper1 = prof1(func)
11731269
assert wrapper1(10) == 10 * 11 // 2
1174-
code = func.__code__
11751270
# Gather data with `@prof2`; the code object is overwritten here
11761271
wrapper2 = prof2(wrapper1)
1177-
assert func.__code__ != code
11781272
assert wrapper2(15) == 15 * 16 // 2
11791273
# Despite the overwrite of the code object, the old data should
11801274
# still remain, and be aggregated with the new data when calling

0 commit comments

Comments
 (0)