|
| 1 | +""" |
| 2 | +Best-effort script to find all attribute names set by the SDK that are not documented in sentry-conventions. |
| 3 | +Install both the `sentry_sdk` and `sentry_conventions` packages in your environment to run the script. |
| 4 | +""" |
| 5 | + |
| 6 | +from dataclasses import dataclass |
| 7 | + |
| 8 | +import ast |
| 9 | +import re |
| 10 | +from pathlib import Path |
| 11 | +from typing import Any |
| 12 | + |
| 13 | +from sentry_sdk.consts import SPANDATA |
| 14 | +from sentry_conventions.attributes import ATTRIBUTE_NAMES |
| 15 | + |
| 16 | +ALLOWED_EXPRESSION_PATTERNS_FOR_UNRESOLVED_KEYS = { |
| 17 | + # User-provided attributes in the `sentry_data` keyword argument of the `ai_track` decorator. |
| 18 | + Path("sentry_sdk") / "ai" / "monitoring.py": [r"^k$"], |
| 19 | + # Caller provides parameter name to `set_data_normalized()`. |
| 20 | + Path("sentry_sdk") / "ai" / "utils.py": [r"^key$"], |
| 21 | + # OTel span attributes from external instrumentation. |
| 22 | + Path("sentry_sdk") / "integrations" / "opentelemetry" / "span_processor.py": [ |
| 23 | + r"^key$" |
| 24 | + ], |
| 25 | + # Rust extensions instrumented with the `tracing` crate. |
| 26 | + Path("sentry_sdk") / "integrations" / "rust_tracing.py": [r"^field$", r"^key$"], |
| 27 | + # Determined based on MCP tool parameters only known at runtime. |
| 28 | + Path("sentry_sdk") / "integrations" / "mcp.py": [r"mcp\.request\.argument"], |
| 29 | +} |
| 30 | + |
| 31 | + |
| 32 | +@dataclass |
| 33 | +class ResolvedSetDataKey: |
| 34 | + value: str |
| 35 | + path: Path |
| 36 | + line_number: int |
| 37 | + |
| 38 | + |
| 39 | +@dataclass |
| 40 | +class UnresolvedSetDataKey: |
| 41 | + argument_expression: str |
| 42 | + path: Path |
| 43 | + line_number: int |
| 44 | + |
| 45 | + |
| 46 | +def try_eval(node: "ast.expr", namespace: "dict[str, Any]", path: "Path") -> "Any": |
| 47 | + """ |
| 48 | + Evaluate expressions that can be statically resolved with the namespace. |
| 49 | + """ |
| 50 | + try: |
| 51 | + return eval(compile(ast.Expression(body=node), path, "eval"), namespace) |
| 52 | + except Exception: |
| 53 | + return None |
| 54 | + |
| 55 | + |
| 56 | +def evaluate_dictionary_keys( |
| 57 | + node: "ast.Dict", namespace: "dict[str, Any]", path: "Path" |
| 58 | +) -> "dict[str, None] | None": |
| 59 | + """ |
| 60 | + Evaluate dict literal keys that can be statically resolved with the namespace. |
| 61 | + """ |
| 62 | + partial = {} |
| 63 | + for key_node in node.keys: |
| 64 | + if key_node is None: |
| 65 | + continue |
| 66 | + |
| 67 | + resolved_key = try_eval(node=key_node, namespace=namespace, path=path) |
| 68 | + if resolved_key is None: |
| 69 | + continue |
| 70 | + |
| 71 | + # Dictionary values do not matter as attribute names only appear as dictionary keys. |
| 72 | + partial[resolved_key] = None |
| 73 | + |
| 74 | + return partial |
| 75 | + |
| 76 | + |
| 77 | +def build_file_namespace( |
| 78 | + tree: "ast.AST", namespace: "dict[str, Any]", path: "Path" |
| 79 | +) -> "dict[str, Any]": |
| 80 | + """ |
| 81 | + Walk tree and add assignment targets to the namespace, including annotated assignments and subscripted assignments. |
| 82 | + """ |
| 83 | + for node in ast.walk(tree): |
| 84 | + if isinstance(node, ast.Assign) and len(node.targets) == 1: |
| 85 | + target, value = node.targets[0], node.value |
| 86 | + elif isinstance(node, ast.AnnAssign) and node.value is not None: |
| 87 | + target, value = node.target, node.value |
| 88 | + else: |
| 89 | + continue |
| 90 | + |
| 91 | + if isinstance(target, ast.Name): |
| 92 | + val = try_eval(node=value, namespace=namespace, path=path) |
| 93 | + if val is not None: |
| 94 | + namespace[target.id] = val |
| 95 | + elif isinstance(value, ast.Dict): |
| 96 | + # Store dictionary with the subset of keys that could be statically resolved in the namespace. |
| 97 | + namespace[target.id] = evaluate_dictionary_keys(value, namespace, path) |
| 98 | + |
| 99 | + elif ( |
| 100 | + isinstance(target, ast.Subscript) |
| 101 | + and isinstance(target.value, ast.Name) |
| 102 | + and target.value.id in namespace |
| 103 | + and isinstance(namespace[target.value.id], dict) |
| 104 | + ): |
| 105 | + key = try_eval( |
| 106 | + node=target.slice, |
| 107 | + namespace=namespace, |
| 108 | + path=path, |
| 109 | + ) |
| 110 | + if key is None: |
| 111 | + continue |
| 112 | + |
| 113 | + namespace[target.value.id][key] = None |
| 114 | + |
| 115 | + return namespace |
| 116 | + |
| 117 | + |
| 118 | +def get_key_node(node: "ast.Call") -> "ast.expr | None": |
| 119 | + """ |
| 120 | + Get attribute key if the node is either a `Span.set_data()` method call or a `set_data_normalized()` function call. |
| 121 | + """ |
| 122 | + if isinstance(node.func, ast.Attribute) and node.func.attr == "set_data": |
| 123 | + return node.args[0] |
| 124 | + if isinstance(node.func, ast.Name) and node.func.id == "set_data_normalized": |
| 125 | + return node.args[1] |
| 126 | + return None |
| 127 | + |
| 128 | + |
| 129 | +class SetDataKeysCollector(ast.NodeVisitor): |
| 130 | + """ |
| 131 | + AST traversal that collects all attribute keys passed to functions that set an attribute on a span. |
| 132 | + A best-effort name resolution evaluates expressions given the namespace and by tracing for loop |
| 133 | + variables. |
| 134 | + """ |
| 135 | + |
| 136 | + def __init__(self, namespace: "dict[str, Any]", path: "Path") -> None: |
| 137 | + self.namespace = namespace |
| 138 | + self.path = path |
| 139 | + self.for_stack: "list[ast.For]" = [] |
| 140 | + self.resolved: "list[ResolvedSetDataKey]" = [] |
| 141 | + self.unresolved: "list[UnresolvedSetDataKey]" = [] |
| 142 | + |
| 143 | + def visit_For(self, node: "ast.For") -> None: |
| 144 | + self.for_stack.append(node) |
| 145 | + self.generic_visit(node) |
| 146 | + self.for_stack.pop() |
| 147 | + |
| 148 | + def visit_Call(self, node: "ast.Call") -> None: |
| 149 | + key_node = get_key_node(node) |
| 150 | + if key_node is not None: |
| 151 | + self._resolve(node, key_node) |
| 152 | + self.generic_visit(node) |
| 153 | + |
| 154 | + def _resolve(self, node: "ast.Call", key_node: "ast.expr") -> None: |
| 155 | + direct = try_eval(key_node, self.namespace, self.path) |
| 156 | + if direct is not None: |
| 157 | + self.resolved.append( |
| 158 | + ResolvedSetDataKey( |
| 159 | + value=direct, |
| 160 | + path=self.path, |
| 161 | + line_number=node.lineno, |
| 162 | + ) |
| 163 | + ) |
| 164 | + return |
| 165 | + |
| 166 | + if self.for_stack: |
| 167 | + set_data_keys = self._eval_via_loop(key_node) |
| 168 | + if set_data_keys: |
| 169 | + self.resolved += [ |
| 170 | + ResolvedSetDataKey( |
| 171 | + value=value, path=self.path, line_number=node.lineno |
| 172 | + ) |
| 173 | + for value in set_data_keys |
| 174 | + ] |
| 175 | + return |
| 176 | + |
| 177 | + # The key is considered unresolved as neither direct evaluation nor resolution via loop variables worked. |
| 178 | + self.unresolved.append( |
| 179 | + UnresolvedSetDataKey( |
| 180 | + argument_expression=ast.unparse(key_node), |
| 181 | + path=self.path, |
| 182 | + line_number=node.lineno, |
| 183 | + ) |
| 184 | + ) |
| 185 | + |
| 186 | + def _eval_via_loop(self, key_node: "ast.expr") -> "list[str] | None": |
| 187 | + for_node = self.for_stack[-1] |
| 188 | + |
| 189 | + # Trick: build a list comprehension that mirrors the for loop statement. |
| 190 | + list_comprehension = ast.ListComp( |
| 191 | + elt=key_node, |
| 192 | + generators=[ |
| 193 | + ast.comprehension( |
| 194 | + target=for_node.target, |
| 195 | + iter=for_node.iter, |
| 196 | + ifs=[], |
| 197 | + is_async=0, |
| 198 | + ) |
| 199 | + ], |
| 200 | + ) |
| 201 | + |
| 202 | + ast.fix_missing_locations( |
| 203 | + list_comprehension |
| 204 | + ) # Adds information required to call compile. |
| 205 | + try: |
| 206 | + values = eval( |
| 207 | + compile(ast.Expression(body=list_comprehension), self.path, "eval"), |
| 208 | + self.namespace, |
| 209 | + ) |
| 210 | + except NameError: |
| 211 | + return None |
| 212 | + |
| 213 | + return values |
| 214 | + |
| 215 | + |
| 216 | +def format_unknown_resolved_attributes(keys: "list[ResolvedSetDataKey]") -> "str": |
| 217 | + lines = [ |
| 218 | + "The following resolved string attribute names are not in sentry-conventions:\n" |
| 219 | + ] |
| 220 | + for key in keys: |
| 221 | + lines.append(f"{key.value} ({key.path}:{key.line_number})") |
| 222 | + return "\n".join(lines) + "\n" |
| 223 | + |
| 224 | + |
| 225 | +def format_unresolved_attributes(keys: "list[UnresolvedSetDataKey]") -> "str": |
| 226 | + lines = [ |
| 227 | + "The following unresolved expressions for attribute names may not be in sentry-conventions:\n" |
| 228 | + ] |
| 229 | + for key in keys: |
| 230 | + lines.append(f"{key.argument_expression} ({key.path}:{key.line_number})") |
| 231 | + return "\n".join(lines) |
| 232 | + |
| 233 | + |
| 234 | +def main(): |
| 235 | + # Includes special attributes (with double underscores), but only proper attributes should match. |
| 236 | + convention_keys = vars(ATTRIBUTE_NAMES).values() |
| 237 | + |
| 238 | + all_resolved: "list[ResolvedSetDataKey]" = [] |
| 239 | + all_unresolved: "list[UnresolvedSetDataKey]" = [] |
| 240 | + |
| 241 | + for path in Path("sentry_sdk").rglob("*.py"): |
| 242 | + tree = ast.parse(path.read_text(), path) |
| 243 | + |
| 244 | + # A limited namespace is used to resolve keys as resolution is best-effort and some keys depend on runtime values. |
| 245 | + # In practice, span attribute names are often stored as dictionary keys in the same file. |
| 246 | + namespace = build_file_namespace( |
| 247 | + tree=tree, |
| 248 | + namespace={"SPANDATA": SPANDATA}, |
| 249 | + path=path, |
| 250 | + ) |
| 251 | + |
| 252 | + collector = SetDataKeysCollector(namespace=namespace, path=path) |
| 253 | + collector.visit(tree) |
| 254 | + |
| 255 | + all_resolved += collector.resolved |
| 256 | + all_unresolved += collector.unresolved |
| 257 | + |
| 258 | + unknown_resolved_keys = [ |
| 259 | + resolved for resolved in all_resolved if resolved.value not in convention_keys |
| 260 | + ] |
| 261 | + |
| 262 | + truly_unresolved = [] |
| 263 | + for unresolved_set_data in all_unresolved: |
| 264 | + patterns = ALLOWED_EXPRESSION_PATTERNS_FOR_UNRESOLVED_KEYS.get( |
| 265 | + unresolved_set_data.path |
| 266 | + ) |
| 267 | + if patterns and any( |
| 268 | + re.search(p, unresolved_set_data.argument_expression) for p in patterns |
| 269 | + ): |
| 270 | + continue |
| 271 | + truly_unresolved.append(unresolved_set_data) |
| 272 | + |
| 273 | + if unknown_resolved_keys or truly_unresolved: |
| 274 | + exc = Exception("Undocumented attributes not in the allow-list detected.") |
| 275 | + |
| 276 | + if unknown_resolved_keys: |
| 277 | + exc.add_note(format_unknown_resolved_attributes(unknown_resolved_keys)) |
| 278 | + |
| 279 | + if truly_unresolved: |
| 280 | + exc.add_note(format_unresolved_attributes(truly_unresolved)) |
| 281 | + |
| 282 | + raise exc |
| 283 | + |
| 284 | + |
| 285 | +if __name__ == "__main__": |
| 286 | + main() |
0 commit comments