Skip to content

Commit 61116f2

Browse files
avara1986claude
andcommitted
refactor(appsec): extract rel_path and get_caller_frame_info to _patch_utils
Move rel_path() and the frame-walking logic (_compute_file_line) from VulnerabilityBase in _iast/taint_sinks/_base.py to shared functions in _patch_utils.py so both IAST and SCA can reuse them without depending on IAST internals. Split out from #17156 to keep PRs incremental and reviewable. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 76b1d8f commit 61116f2

2 files changed

Lines changed: 62 additions & 43 deletions

File tree

ddtrace/appsec/_iast/taint_sinks/_base.py

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import os
2-
import sysconfig
31
from typing import Optional
42
from typing import Union
53

@@ -8,7 +6,7 @@
86
from ddtrace.appsec._iast._taint_tracking import get_ranges
97
from ddtrace.appsec._iast.sampling.vulnerability_detection import rollback_quota
108
from ddtrace.appsec._iast.sampling.vulnerability_detection import should_process_vulnerability
11-
from ddtrace.appsec._shared._stacktrace import get_info_frame
9+
from ddtrace.appsec._patch_utils import get_caller_frame_info
1210
from ddtrace.appsec._trace_utils import _asm_manual_keep
1311
from ddtrace.internal import core
1412
from ddtrace.internal.logger import get_logger
@@ -28,13 +26,8 @@
2826

2927
log = get_logger(__name__)
3028

31-
CWD = os.path.abspath(os.getcwd())
32-
3329
TEXT_TYPES = Union[str, bytes, bytearray]
3430

35-
PURELIB_PATH = sysconfig.get_path("purelib")
36-
STDLIB_PATH = sysconfig.get_path("stdlib")
37-
3831

3932
class taint_sink_deduplication(deduplication):
4033
def _check_deduplication(self):
@@ -111,40 +104,6 @@ def _prepare_report(
111104

112105
return True
113106

114-
@classmethod
115-
def _compute_file_line(cls) -> tuple[Optional[str], Optional[int], Optional[str], Optional[str]]:
116-
file_name = line_number = function_name = class_name = None
117-
frame_info = get_info_frame()
118-
if not frame_info or frame_info[0] in ("", -1):
119-
return file_name, line_number, function_name, class_name
120-
121-
file_name, line_number, function_name, class_name = frame_info
122-
if not file_name:
123-
return None, None, None, None
124-
125-
file_name = cls._rel_path(file_name)
126-
if not file_name:
127-
log.debug("Could not relativize vulnerability location path: %s", frame_info[0])
128-
return None, None, None, None
129-
130-
return file_name, line_number, function_name, class_name
131-
132-
@staticmethod
133-
def _rel_path(file_name: str) -> str:
134-
file_name_norm = file_name.replace("\\", "/")
135-
if file_name_norm.startswith(PURELIB_PATH):
136-
return os.path.relpath(file_name_norm, start=PURELIB_PATH)
137-
138-
if file_name_norm.startswith(STDLIB_PATH):
139-
return os.path.relpath(file_name_norm, start=STDLIB_PATH)
140-
if file_name_norm.startswith(CWD):
141-
return os.path.relpath(file_name_norm, start=CWD)
142-
# If the path contains site-packages anywhere, return 'site-packages/<rest>'
143-
# Normalize separators to forward slashes for consistency
144-
if (idx := file_name_norm.find("/site-packages/")) != -1:
145-
return file_name_norm[idx:]
146-
return ""
147-
148107
@classmethod
149108
def _create_evidence_and_report(
150109
cls,
@@ -177,7 +136,7 @@ def report(cls, evidence_value: TEXT_TYPES = "", dialect: Optional[str] = None)
177136
file_name = line_number = function_name = class_name = None
178137

179138
if not getattr(cls, "skip_location", False):
180-
file_name, line_number, function_name, class_name = cls._compute_file_line()
139+
file_name, line_number, function_name, class_name = get_caller_frame_info()
181140
if file_name is None:
182141
rollback_quota(cls.vulnerability_type)
183142
return result

ddtrace/appsec/_patch_utils.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import ctypes
2+
import os
3+
import sysconfig
24
from typing import Any
35
from typing import Callable
46
from typing import Optional
@@ -12,6 +14,64 @@
1214

1315

1416
log = get_logger(__name__)
17+
18+
# Cached paths for relativizing file paths (computed once at import time).
19+
_CWD = os.path.abspath(os.getcwd())
20+
_PURELIB_PATH = sysconfig.get_path("purelib") or ""
21+
_STDLIB_PATH = sysconfig.get_path("stdlib") or ""
22+
23+
24+
def rel_path(file_name: str) -> str:
25+
"""Relativize an absolute file path for vulnerability/reachability reporting.
26+
27+
Used by both IAST and SCA to produce short, readable paths in telemetry
28+
payloads. Tries purelib first, then stdlib, then CWD-relative, then
29+
site-packages. Returns empty string if the path cannot be relativized.
30+
"""
31+
file_name_norm = file_name.replace("\\", "/")
32+
if file_name_norm.startswith(_PURELIB_PATH):
33+
return os.path.relpath(file_name_norm, start=_PURELIB_PATH)
34+
35+
if file_name_norm.startswith(_STDLIB_PATH):
36+
return os.path.relpath(file_name_norm, start=_STDLIB_PATH)
37+
if file_name_norm.startswith(_CWD):
38+
return os.path.relpath(file_name_norm, start=_CWD)
39+
# If the path contains site-packages anywhere, return 'site-packages/<rest>'
40+
# Normalize separators to forward slashes for consistency
41+
if (idx := file_name_norm.find("/site-packages/")) != -1:
42+
return file_name_norm[idx:]
43+
return ""
44+
45+
46+
def get_caller_frame_info() -> tuple:
47+
"""Walk the stack and return (file_name, line_number, function_name, class_name).
48+
49+
Uses the native C get_info_frame() to skip ddtrace, stdlib, and special
50+
frames, then relativizes the path. Shared by IAST vulnerability
51+
reporting and SCA reachability detection.
52+
53+
Returns (None, None, None, None) when no relevant frame is found.
54+
"""
55+
try:
56+
from ddtrace.appsec._shared._stacktrace import get_info_frame
57+
except ImportError:
58+
return None, None, None, None
59+
60+
frame_info = get_info_frame()
61+
if not frame_info or frame_info[0] in ("", -1, None):
62+
return None, None, None, None
63+
64+
file_name, line_number, function_name, class_name = frame_info
65+
if not file_name:
66+
return None, None, None, None
67+
68+
file_name = rel_path(file_name)
69+
if not file_name:
70+
return None, None, None, None
71+
72+
return file_name, line_number, function_name, class_name
73+
74+
1575
_DD_ORIGINAL_ATTRIBUTES: dict[Any, Any] = {}
1676

1777

0 commit comments

Comments
 (0)