|
1 | | -import re |
2 | | -import ast |
3 | | -import textwrap |
4 | | - |
5 | | -# Rule definitions for secret detection. |
6 | | -# Rules can match either: |
7 | | -# - a secret-like value pattern, regardless of variable name |
8 | | -# - a suspicious variable name combined with a minimum value length |
9 | | -REGEX_INFO = { |
10 | | - "AWS Access Key": { |
11 | | - "value_pattern": re.compile(r"(AKIA[0-9A-Z]{16})"), |
12 | | - "severity": "HIGH", |
13 | | - }, |
14 | | - "Password": { |
15 | | - "var_patterns": [ |
16 | | - re.compile(r"password", re.IGNORECASE), |
17 | | - re.compile(r"pwd", re.IGNORECASE), |
18 | | - re.compile(r"passwd", re.IGNORECASE), |
19 | | - ], |
20 | | - "min_length": 4, |
21 | | - "severity": "HIGH", |
22 | | - }, |
23 | | - "API Key": { |
24 | | - "var_patterns": [ |
25 | | - re.compile(r"api_key", re.IGNORECASE), |
26 | | - re.compile(r"apikey", re.IGNORECASE), |
27 | | - ], |
28 | | - "min_length": 4, |
29 | | - "severity": "HIGH", |
30 | | - }, |
31 | | - "Token": { |
32 | | - "var_patterns": [re.compile(r"token", re.IGNORECASE)], |
33 | | - "min_length": 4, |
34 | | - "severity": "MEDIUM", |
35 | | - }, |
36 | | - "Secret": { |
37 | | - "var_patterns": [ |
38 | | - re.compile(r"secret", re.IGNORECASE), |
39 | | - ], |
40 | | - "min_length": 4, |
41 | | - "severity": "MEDIUM", |
42 | | - }, |
43 | | -} |
44 | | - |
45 | | - |
46 | | -def parse_ast(file): |
47 | | - """ |
48 | | - Parse source code into an AST. |
49 | | -
|
50 | | - Args: |
51 | | - file (str): Raw Python source code. |
52 | | -
|
53 | | - Returns: |
54 | | - ast.AST | None: Parsed AST object, or None if parsing fails. |
55 | | - """ |
56 | | - file = textwrap.dedent(file) |
57 | | - |
58 | | - try: |
59 | | - tree = ast.parse(file) |
60 | | - except SyntaxError: |
61 | | - return None |
62 | | - |
63 | | - return tree |
64 | | - |
65 | | - |
66 | | -def get_assignments(tree): |
67 | | - """ |
68 | | - Yield all assignment nodes from an AST. |
69 | | -
|
70 | | - Args: |
71 | | - tree (ast.AST): Parsed syntax tree. |
72 | | -
|
73 | | - Yields: |
74 | | - ast.Assign: Assignment nodes found during traversal. |
75 | | - """ |
76 | | - for node in ast.walk(tree): |
77 | | - if isinstance(node, ast.Assign): |
78 | | - yield node |
79 | | - |
80 | | - |
81 | | -def extract_node_value(node): |
82 | | - """ |
83 | | - Extract a string literal value from an assignment node. |
84 | | -
|
85 | | - Args: |
86 | | - node (ast.Assign): Assignment node. |
87 | | -
|
88 | | - Returns: |
89 | | - str | None: String value if valid, otherwise None. |
90 | | - """ |
91 | | - val = node.value |
92 | | - if not (isinstance(val, ast.Constant) and isinstance(val.value, str)): |
93 | | - return None |
94 | | - return val.value |
95 | | - |
96 | | - |
97 | | -def extract_variable_path(node): |
98 | | - """ |
99 | | - Extract variable name paths from assignment targets. |
100 | | -
|
101 | | - Supports simple variables and nested attributes (e.g., self.config.key). |
102 | | -
|
103 | | - Args: |
104 | | - node (ast.Assign): Assignment node. |
105 | | -
|
106 | | - Yields: |
107 | | - list[str]: Normalized variable path (lowercased components). |
108 | | - """ |
109 | | - for var in node.targets: |
110 | | - full_path = [] |
111 | | - temp_node = var |
112 | | - |
113 | | - # Traverse nested attribute chain (e.g., self.config.key) |
114 | | - if isinstance(temp_node, ast.Attribute): |
115 | | - while isinstance(temp_node, ast.Attribute): |
116 | | - full_path.append(temp_node.attr.lower()) |
117 | | - temp_node = temp_node.value |
118 | | - |
119 | | - # Ensure root is a valid variable name |
120 | | - if not isinstance(temp_node, ast.Name): |
121 | | - continue |
122 | | - |
123 | | - full_path.append(temp_node.id.lower()) |
124 | | - full_path.reverse() |
125 | | - |
126 | | - # Handle simple variable assignment |
127 | | - elif isinstance(var, ast.Name): |
128 | | - full_path.append(var.id.lower()) |
129 | | - |
130 | | - # Yield only valid, non-empty paths |
131 | | - if len(full_path) != 0: |
132 | | - yield full_path |
133 | | - |
134 | | - |
135 | | -def detect_from_parts(var_name, val): |
136 | | - """ |
137 | | - Classify an extracted variable/value pair against secret detection rules. |
138 | | -
|
139 | | - Args: |
140 | | - var_name (str): Normalized variable name or final attribute name. |
141 | | - val (str): Extracted string literal value. |
142 | | -
|
143 | | - Returns: |
144 | | - list[tuple[str, str, str]] | None: |
145 | | - A list of findings where each finding is: |
146 | | - (rule_name, severity, extracted_value) |
147 | | -
|
148 | | - Returns None if no rule matches. |
149 | | - """ |
150 | | - findings = [] |
151 | | - |
152 | | - for rule, data in REGEX_INFO.items(): |
153 | | - # Match structured secret values, such as AWS access keys |
154 | | - if "value_pattern" in data: |
155 | | - if data["value_pattern"].fullmatch(val): |
156 | | - findings.append((rule, data["severity"], val)) |
157 | | - |
158 | | - # Match suspicious variable names and enforce minimum value length |
159 | | - if "var_patterns" in data: |
160 | | - for pattern in data["var_patterns"]: |
161 | | - match = pattern.search(var_name) |
162 | | - if match and len(val) >= data["min_length"]: |
163 | | - findings.append((rule, data["severity"], val)) |
164 | | - |
165 | | - return findings or None |
166 | | - |
| 1 | +from detectors.ast_analyzer import parse_ast, get_assignments, extract_node_value, extract_variable_path |
| 2 | +from detectors.rule_engine import apply_rules |
167 | 3 |
|
168 | 4 | def detect_ast_secrets(code): |
169 | 5 | """ |
@@ -196,7 +32,7 @@ def detect_ast_secrets(code): |
196 | 32 | for full_path in extract_variable_path(node): |
197 | 33 | var_name = full_path[-1] |
198 | 34 |
|
199 | | - vulnerabilities = detect_from_parts(var_name, val) |
| 35 | + vulnerabilities = apply_rules(var_name, val) |
200 | 36 |
|
201 | 37 | if vulnerabilities: |
202 | 38 | for pattern_name, severity, value in vulnerabilities: |
|
0 commit comments