Skip to content

Commit 3b4dff7

Browse files
chore: Script to find undocumented attributes
1 parent dee861e commit 3b4dff7

File tree

1 file changed

+286
-0
lines changed

1 file changed

+286
-0
lines changed
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
"""
2+
Best-effort script to find all attribute names set by the SDK that are not documented in sentry-conventions.
3+
Install both the `sentry_sdk` and `sentry_conventions` packages in your environment to run the script.
4+
"""
5+
6+
from dataclasses import dataclass
7+
8+
import ast
9+
import re
10+
from pathlib import Path
11+
from typing import Any
12+
13+
from sentry_sdk.consts import SPANDATA
14+
from sentry_conventions.attributes import ATTRIBUTE_NAMES
15+
16+
ALLOWED_EXPRESSION_PATTERNS_FOR_UNRESOLVED_KEYS = {
17+
# User-provided attributes in the `sentry_data` keyword argument of the `ai_track` decorator.
18+
Path("sentry_sdk") / "ai" / "monitoring.py": [r"^k$"],
19+
# Caller provides parameter name to `set_data_normalized()`.
20+
Path("sentry_sdk") / "ai" / "utils.py": [r"^key$"],
21+
# OTel span attributes from external instrumentation.
22+
Path("sentry_sdk") / "integrations" / "opentelemetry" / "span_processor.py": [
23+
r"^key$"
24+
],
25+
# Rust extensions instrumented with the `tracing` crate.
26+
Path("sentry_sdk") / "integrations" / "rust_tracing.py": [r"^field$", r"^key$"],
27+
# Determined based on MCP tool parameters only known at runtime.
28+
Path("sentry_sdk") / "integrations" / "mcp.py": [r"mcp\.request\.argument"],
29+
}
30+
31+
32+
@dataclass
33+
class ResolvedSetDataKey:
34+
value: str
35+
path: Path
36+
line_number: int
37+
38+
39+
@dataclass
40+
class UnresolvedSetDataKey:
41+
argument_expression: str
42+
path: Path
43+
line_number: int
44+
45+
46+
def try_eval(node: "ast.expr", namespace: "dict[str, Any]", path: "Path") -> "Any":
47+
"""
48+
Evaluate expressions that can be statically resolved with the namespace.
49+
"""
50+
try:
51+
return eval(compile(ast.Expression(body=node), path, "eval"), namespace)
52+
except Exception:
53+
return None
54+
55+
56+
def evaluate_dictionary_keys(
57+
node: "ast.Dict", namespace: "dict[str, Any]", path: "Path"
58+
) -> "dict[str, None] | None":
59+
"""
60+
Evaluate dict literal keys that can be statically resolved with the namespace.
61+
"""
62+
partial = {}
63+
for key_node in node.keys:
64+
if key_node is None:
65+
continue
66+
67+
resolved_key = try_eval(node=key_node, namespace=namespace, path=path)
68+
if resolved_key is None:
69+
continue
70+
71+
# Dictionary values do not matter as attribute names only appear as dictionary keys.
72+
partial[resolved_key] = None
73+
74+
return partial
75+
76+
77+
def build_file_namespace(
78+
tree: "ast.AST", namespace: "dict[str, Any]", path: "Path"
79+
) -> "dict[str, Any]":
80+
"""
81+
Walk tree and add assignment targets to the namespace, including annotated assignments and subscripted assignments.
82+
"""
83+
for node in ast.walk(tree):
84+
if isinstance(node, ast.Assign) and len(node.targets) == 1:
85+
target, value = node.targets[0], node.value
86+
elif isinstance(node, ast.AnnAssign) and node.value is not None:
87+
target, value = node.target, node.value
88+
else:
89+
continue
90+
91+
if isinstance(target, ast.Name):
92+
val = try_eval(node=value, namespace=namespace, path=path)
93+
if val is not None:
94+
namespace[target.id] = val
95+
elif isinstance(value, ast.Dict):
96+
# Store dictionary with the subset of keys that could be statically resolved in the namespace.
97+
namespace[target.id] = evaluate_dictionary_keys(value, namespace, path)
98+
99+
elif (
100+
isinstance(target, ast.Subscript)
101+
and isinstance(target.value, ast.Name)
102+
and target.value.id in namespace
103+
and isinstance(namespace[target.value.id], dict)
104+
):
105+
key = try_eval(
106+
node=target.slice,
107+
namespace=namespace,
108+
path=path,
109+
)
110+
if key is None:
111+
continue
112+
113+
namespace[target.value.id][key] = None
114+
115+
return namespace
116+
117+
118+
def get_key_node(node: "ast.Call") -> "ast.expr | None":
119+
"""
120+
Get attribute key if the node is either a `Span.set_data()` method call or a `set_data_normalized()` function call.
121+
"""
122+
if isinstance(node.func, ast.Attribute) and node.func.attr == "set_data":
123+
return node.args[0]
124+
if isinstance(node.func, ast.Name) and node.func.id == "set_data_normalized":
125+
return node.args[1]
126+
return None
127+
128+
129+
class SetDataKeysCollector(ast.NodeVisitor):
130+
"""
131+
AST traversal that collects all attribute keys passed to functions that set an attribute on a span.
132+
A best-effort name resolution evaluates expressions given the namespace and by tracing for loop
133+
variables.
134+
"""
135+
136+
def __init__(self, namespace: "dict[str, Any]", path: "Path") -> None:
137+
self.namespace = namespace
138+
self.path = path
139+
self.for_stack: "list[ast.For]" = []
140+
self.resolved: "list[ResolvedSetDataKey]" = []
141+
self.unresolved: "list[UnresolvedSetDataKey]" = []
142+
143+
def visit_For(self, node: "ast.For") -> None:
144+
self.for_stack.append(node)
145+
self.generic_visit(node)
146+
self.for_stack.pop()
147+
148+
def visit_Call(self, node: "ast.Call") -> None:
149+
key_node = get_key_node(node)
150+
if key_node is not None:
151+
self._resolve(node, key_node)
152+
self.generic_visit(node)
153+
154+
def _resolve(self, node: "ast.Call", key_node: "ast.expr") -> None:
155+
direct = try_eval(key_node, self.namespace, self.path)
156+
if direct is not None:
157+
self.resolved.append(
158+
ResolvedSetDataKey(
159+
value=direct,
160+
path=self.path,
161+
line_number=node.lineno,
162+
)
163+
)
164+
return
165+
166+
if self.for_stack:
167+
set_data_keys = self._eval_via_loop(key_node)
168+
if set_data_keys:
169+
self.resolved += [
170+
ResolvedSetDataKey(
171+
value=value, path=self.path, line_number=node.lineno
172+
)
173+
for value in set_data_keys
174+
]
175+
return
176+
177+
# The key is considered unresolved as neither direct evaluation nor resolution via loop variables worked.
178+
self.unresolved.append(
179+
UnresolvedSetDataKey(
180+
argument_expression=ast.unparse(key_node),
181+
path=self.path,
182+
line_number=node.lineno,
183+
)
184+
)
185+
186+
def _eval_via_loop(self, key_node: "ast.expr") -> "list[str] | None":
187+
for_node = self.for_stack[-1]
188+
189+
# Trick: build a list comprehension that mirrors the for loop statement.
190+
list_comprehension = ast.ListComp(
191+
elt=key_node,
192+
generators=[
193+
ast.comprehension(
194+
target=for_node.target,
195+
iter=for_node.iter,
196+
ifs=[],
197+
is_async=0,
198+
)
199+
],
200+
)
201+
202+
ast.fix_missing_locations(
203+
list_comprehension
204+
) # Adds information required to call compile.
205+
try:
206+
values = eval(
207+
compile(ast.Expression(body=list_comprehension), self.path, "eval"),
208+
self.namespace,
209+
)
210+
except NameError:
211+
return None
212+
213+
return values
214+
215+
216+
def format_unknown_resolved_attributes(keys: "list[ResolvedSetDataKey]") -> "str":
217+
lines = [
218+
"The following resolved string attribute names are not in sentry-conventions:\n"
219+
]
220+
for key in keys:
221+
lines.append(f"{key.value} ({key.path}:{key.line_number})")
222+
return "\n".join(lines) + "\n"
223+
224+
225+
def format_unresolved_attributes(keys: "list[UnresolvedSetDataKey]") -> "str":
226+
lines = [
227+
"The following unresolved expressions for attribute names may not be in sentry-conventions:\n"
228+
]
229+
for key in keys:
230+
lines.append(f"{key.argument_expression} ({key.path}:{key.line_number})")
231+
return "\n".join(lines)
232+
233+
234+
def main():
235+
# Includes special attributes (with double underscores), but only proper attributes should match.
236+
convention_keys = vars(ATTRIBUTE_NAMES).values()
237+
238+
all_resolved: "list[ResolvedSetDataKey]" = []
239+
all_unresolved: "list[UnresolvedSetDataKey]" = []
240+
241+
for path in Path("sentry_sdk").rglob("*.py"):
242+
tree = ast.parse(path.read_text(), path)
243+
244+
# A limited namespace is used to resolve keys as resolution is best-effort and some keys depend on runtime values.
245+
# In practice, span attribute names are often stored as dictionary keys in the same file.
246+
namespace = build_file_namespace(
247+
tree=tree,
248+
namespace={"SPANDATA": SPANDATA},
249+
path=path,
250+
)
251+
252+
collector = SetDataKeysCollector(namespace=namespace, path=path)
253+
collector.visit(tree)
254+
255+
all_resolved += collector.resolved
256+
all_unresolved += collector.unresolved
257+
258+
unknown_resolved_keys = [
259+
resolved for resolved in all_resolved if resolved.value not in convention_keys
260+
]
261+
262+
truly_unresolved = []
263+
for unresolved_set_data in all_unresolved:
264+
patterns = ALLOWED_EXPRESSION_PATTERNS_FOR_UNRESOLVED_KEYS.get(
265+
unresolved_set_data.path
266+
)
267+
if patterns and any(
268+
re.search(p, unresolved_set_data.argument_expression) for p in patterns
269+
):
270+
continue
271+
truly_unresolved.append(unresolved_set_data)
272+
273+
if unknown_resolved_keys or truly_unresolved:
274+
exc = Exception("Undocumented attributes not in the allow-list detected.")
275+
276+
if unknown_resolved_keys:
277+
exc.add_note(format_unknown_resolved_attributes(unknown_resolved_keys))
278+
279+
if truly_unresolved:
280+
exc.add_note(format_unresolved_attributes(truly_unresolved))
281+
282+
raise exc
283+
284+
285+
if __name__ == "__main__":
286+
main()

0 commit comments

Comments
 (0)