Merge branch 'main' into scoping-policy-typing-fix

Erotemic · web-flow · commit c2a6a30326a8 · 2026-04-15T10:57:25.000-04:00
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -10,6 +10,8 @@ Changes
 * FIX: ``ScopingPolicy`` members now type-check properly as instances
   thereof -- at least on Python versions (3.11+) where ``enum.StrEnum``
   is available (#427)
+* FIX: Bytecodes of profiled functions now always labeled to prevent
+  confusion with non-profiled "twins" (#425)
 
 
 5.0.2
diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx
@@ -31,7 +31,6 @@ import opcode
 import os
 import types
 from warnings import warn
-from weakref import WeakSet
 
 from line_profiler._diagnostics import (
     WRAP_TRACE, SET_FRAME_LOCAL_TRACE, USE_LEGACY_TRACE
@@ -1067,9 +1066,6 @@ cdef class LineProfiler:
     _managers = {}
     # type: ClassVar[dict[bytes, int]], bytes = bytecode
     _all_paddings = {}
-    # type: ClassVar[dict[int, weakref.WeakSet[LineProfiler]]],
-    # int = func id
-    _all_instances_by_funcs = {}
 
     def __init__(self, *functions,
                  wrap_trace=None, set_frame_local_trace=None):
@@ -1139,26 +1135,23 @@ datamodel.html#user-defined-functions
         code_hashes = []
         if any(co_code):  # Normal Python functions
             # Figure out how much padding we need and strip the bytecode
+            # Notes:
+            # - Profiled function are always padded, so as to
+            #   distinguish between them and unprofiled bytecode twins
+            # - `npad` is strictly increasing, except when the function
+            #   has already been padded -- we assume that is solely
+            #   because a (could be the same) profiler instance has seen
+            #   it
             base_co_code: bytes
-            npad_code: int
-            base_co_code, npad_code = multibyte_rstrip(co_code)
-            try:
-                npad = self._all_paddings[base_co_code]
-            except KeyError:
-                npad = 0
-            self._all_paddings[base_co_code] = max(npad, npad_code) + 1
-            try:
-                profilers_to_update = self._all_instances_by_funcs[func_id]
-                profilers_to_update.add(self)
-            except KeyError:
-                profilers_to_update = WeakSet({self})
-                self._all_instances_by_funcs[func_id] = profilers_to_update
-            # Maintain `.dupes_map` (legacy)
-            try:
-                self.dupes_map[base_co_code].append(code)
-            except KeyError:
-                self.dupes_map[base_co_code] = [code]
-            if npad > npad_code:
+            npad: int
+            is_padded: int
+            base_co_code, is_padded = multibyte_rstrip(co_code)
+            if not is_padded:
+                try:
+                    npad = self._all_paddings[base_co_code]
+                except KeyError:
+                    npad = 1
+                self._all_paddings[base_co_code] = npad + 1
                 # Code hash already exists, so there must be a duplicate
                 # function (on some instance);
                 # (re-)pad with no-op
@@ -1168,8 +1161,11 @@ datamodel.html#user-defined-functions
                     func.__code__ = code
                 except AttributeError as e:
                     func.__func__.__code__ = code
-            else:  # No re-padding -> no need to update the other profs
-                profilers_to_update = {self}
+            # Maintain `.dupes_map` (legacy)
+            try:
+                self.dupes_map[base_co_code].append(code)
+            except KeyError:
+                self.dupes_map[base_co_code] = [code]
             # TODO: Since each line can be many bytecodes, this is kinda
             # inefficient
             # See if this can be sped up by not needing to iterate over
@@ -1204,21 +1200,17 @@ datamodel.html#user-defined-functions
             # because Cython shim code objects don't support local
             # events
             code = code.replace(co_filename=cython_source)
-            profilers_to_update = {self}
         # Update `._c_code_map` and `.code_hash_map` with the new line
-        # hashes on `self` (and other instances profiling the same
-        # function if we padded the bytecode)
-        for instance in profilers_to_update:
-            prof = <LineProfiler>instance
-            try:
-                line_hashes = prof.code_hash_map[code]
-            except KeyError:
-                line_hashes = prof.code_hash_map[code] = []
-            for code_hash in code_hashes:
-                line_hash = <int64>code_hash
-                if not prof._c_code_map.count(line_hash):
-                    line_hashes.append(line_hash)
-                    prof._c_code_map[line_hash]
+        # hashes
+        try:
+            line_hashes = self.code_hash_map[code]
+        except KeyError:
+            line_hashes = self.code_hash_map[code] = []
+        for code_hash in code_hashes:
+            line_hash = <int64>code_hash
+            if not self._c_code_map.count(line_hash):
+                line_hashes.append(line_hash)
+                self._c_code_map[line_hash]
 
         self.functions.append(func)
 
diff --git a/tests/test_line_profiler.py b/tests/test_line_profiler.py
@@ -7,11 +7,14 @@
 import io
 import os
 import pickle
+import subprocess
 import sys
 import textwrap
 import types
+from pathlib import Path
 from tempfile import TemporaryDirectory
 import pytest
+from ubelt import ChDir
 from line_profiler import _line_profiler, LineProfiler, LineStats
 
 
@@ -989,6 +992,94 @@ def func(n):
     assert entries[-2][1] == 10 + 20
 
 
+def test_nonprofiled_clashing_bytecodes(tmp_path_factory):
+    """
+    Test that the profiler can distinguish between a profiled function
+    and a non-profiled one compiling down to the same bytecode.
+    """
+    # See issue #424
+    template = textwrap.dedent("""
+    def {}(n):  # Any function using this compiles to the same bytecode
+        x = 0
+        for n in range(1, n + 1):
+            x += n
+        return x
+    """).strip('\n')
+    module_name = 'my_module'
+    script_name = 'my-script.py'
+    outfile = 'out.lprof'
+    func_p = 'profiled_func'
+    func_no_p = 'nonprofiled_func'
+
+    # Note: bytecode padding depends on the existence of duplicates,
+    # which are counted throughout the lifetime of the `LineProfiler`
+    # class. To ensure that we start on a clean slate -- that
+    # `LineProfiler` isn't "polluted" by running prior tests -- run the
+    # profliing in a subprocess.
+    with ChDir(tmp_path_factory.mktemp('test_nonprofiled_clashing_bytecodes')):
+        syspath_annex = (Path.cwd() / 'syspath').resolve()
+        syspath_annex.mkdir()
+        with (syspath_annex / (module_name + '.py')).open('w') as fobj:
+            print(
+                textwrap.dedent("""
+    '''
+    This docstring is fluff to make the function definitions overlap in
+    line numbers.
+    '''
+
+
+    {fp_def}
+                """).strip('\n').format(fp_def=template.format(func_p)),
+                file=fobj,
+            )
+        with open(script_name, 'w') as fobj:
+            print(
+                textwrap.dedent("""
+    from line_profiler import LineProfiler
+    from {mod} import {fp_name}
+
+
+    {fnp_def}
+
+
+    if __name__ == '__main__':
+        prof = LineProfiler()
+        prof.add_callable({fp_name})
+        with prof:
+            # The context turns on profiling for the call, but it
+            # shouldn't do anything since the imported and profiled
+            # function is not called
+            {fnp_name}(10)
+        prof.dump_stats({out!r})
+                """).strip('\n').format(
+                    mod=module_name,
+                    fp_name=func_p,
+                    fnp_name=func_no_p,
+                    fnp_def=template.format(func_no_p),
+                    out=outfile,
+                ),
+                file=fobj,
+            )
+
+        syspath = os.environ.get('PYTHONPATH', '')
+        syspath = ('{}:{}' if syspath else '{}').format(syspath_annex, syspath)
+        subprocess.run(
+            [sys.executable, script_name],
+            check=True, env={**os.environ, 'PYTHONPATH': syspath},
+        )
+
+        assert os.path.exists(outfile)
+        stats = LineStats.from_files(outfile)
+        stats.print()  # For debugging purposes
+
+    # There should only be one function profiled (`profiled_func()`),
+    # which however doesn't have any actual data because it was never
+    # called
+    ((*_, func_name), data), = stats.timings.items()
+    assert (func_name == func_p), stats
+    assert (not data), stats
+
+
 @pytest.mark.parametrize('force_same_line_numbers', [True, False])
 @pytest.mark.parametrize(
     'ops',
@@ -1157,6 +1248,11 @@ def test_aggregate_profiling_data_between_code_versions():
     Test that profiling data from previous versions of the code object
     are preserved when another profiler causes the code object of a
     function to be overwritten.
+
+    Note
+    ----
+    Now obsolete because we no longer double-pad/overwrite the function
+    bytecode if it has already been seen by another profiler instance.
     """
 
     def func(n):
@@ -1171,10 +1267,8 @@ def func(n):
     # Gather data with `@prof1`
     wrapper1 = prof1(func)
     assert wrapper1(10) == 10 * 11 // 2
-    code = func.__code__
     # Gather data with `@prof2`; the code object is overwritten here
     wrapper2 = prof2(wrapper1)
-    assert func.__code__ != code
     assert wrapper2(15) == 15 * 16 // 2
     # Despite the overwrite of the code object, the old data should
     # still remain, and be aggregated with the new data when calling