|
2 | 2 | import ast |
3 | 3 | import textwrap |
4 | 4 |
|
5 | | -# Precompiled regex rules for detecting hardcoded secrets in source code. |
6 | | -# Each rule contains: |
7 | | -# - pattern: compiled regex used to identify the secret |
8 | | -# - severity: classification of the finding (e.g., HIGH, MEDIUM) |
9 | | -# |
10 | | -# Notes: |
11 | | -# - Patterns use capture groups to extract only the secret value. |
12 | | -# - Length constraints ({4,}) help reduce false positives. |
13 | | -# - Case-insensitive matching is applied where appropriate. |
14 | 5 | REGEX_INFO = { |
15 | 6 | "AWS Access Key": { |
16 | | - "pattern": re.compile(r"(AKIA[0-9A-Z]{16})"), |
| 7 | + "value_pattern": re.compile(r"(AKIA[0-9A-Z]{16})"), |
17 | 8 | "severity": "HIGH" |
18 | 9 | }, |
19 | 10 | "Password": { |
20 | | - "pattern": re.compile(r"password\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE), |
| 11 | + "var_patterns": [ |
| 12 | + re.compile(r"password", re.IGNORECASE), |
| 13 | + re.compile(r"pwd", re.IGNORECASE), |
| 14 | + re.compile(r"passwd", re.IGNORECASE) |
| 15 | + ], |
| 16 | + "min_length": 4, |
21 | 17 | "severity": "HIGH" |
22 | 18 | }, |
23 | 19 | "API Key": { |
24 | | - "pattern": re.compile(r"api_key\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE), |
| 20 | + "var_patterns": [ |
| 21 | + re.compile(r"api_key", re.IGNORECASE), |
| 22 | + re.compile(r"apikey", re.IGNORECASE) |
| 23 | + ], |
| 24 | + "min_length": 4, |
25 | 25 | "severity": "HIGH" |
26 | 26 | }, |
27 | 27 | "Token": { |
28 | | - "pattern": re.compile(r"token\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE), |
| 28 | + "var_patterns": [re.compile(r"token", re.IGNORECASE)], |
| 29 | + "min_length": 4, |
29 | 30 | "severity": "MEDIUM" |
30 | 31 | }, |
31 | 32 | "Secret": { |
32 | | - "pattern": re.compile(r"secret\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE), |
| 33 | + "var_patterns": [re.compile(r"secret", re.IGNORECASE),], |
| 34 | + "min_length": 4, |
33 | 35 | "severity": "MEDIUM" |
34 | 36 | } |
35 | 37 | } |
36 | 38 |
|
37 | 39 |
|
38 | | -def detect_secrets(line): |
39 | | - """ |
40 | | - Scan a single line of source code for hardcoded secrets. |
41 | | -
|
42 | | - This function applies all predefined regex detection rules to the given line |
43 | | - and returns any matches found. It ignores commented portions of the line and |
44 | | - skips empty or non-executable content. |
45 | | -
|
46 | | - Args: |
47 | | - line (str): A raw line of source code. |
48 | | -
|
49 | | - Returns: |
50 | | - list[tuple[str, str, str]] | None: |
51 | | - A list of findings, where each finding is a tuple: |
52 | | - (rule_name, severity, extracted_secret_value) |
53 | | -
|
54 | | - - rule_name (str): Type of secret detected (e.g., "API Key") |
55 | | - - severity (str): Risk level associated with the finding |
56 | | - - extracted_secret_value (str): The actual secret value captured |
57 | | -
|
58 | | - Returns None if no secrets are detected in the line. |
59 | | -
|
60 | | - Behavior: |
61 | | - - Strips inline comments using '#' delimiter |
62 | | - - Ignores empty or whitespace-only lines |
63 | | - - Supports multiple detections per line |
64 | | - - Uses precompiled regex for efficiency |
65 | | - """ |
66 | | - line = line.strip() |
67 | | - if not line or line.startswith("#"): |
68 | | - return None |
69 | | - |
70 | | - findings = [] |
71 | | - |
72 | | - # Apply each detection rule to the line |
73 | | - for pattern_name, data in REGEX_INFO.items(): |
74 | | - for match in data["pattern"].finditer(line): |
75 | | - findings.append( |
76 | | - (pattern_name, data["severity"], match.group(1)) |
77 | | - ) |
78 | | - |
79 | | - return findings or None |
80 | | - |
81 | | - |
82 | 40 | def parse_ast(file): |
83 | 41 | """ |
84 | 42 | Parse source code into an AST. |
@@ -170,6 +128,21 @@ def extract_variable_path(node): |
170 | 128 | if len(full_path) != 0: |
171 | 129 | yield full_path |
172 | 130 |
|
| 131 | +def detect_from_parts(var_name, val): |
| 132 | + findings = [] |
| 133 | + for rule, data in REGEX_INFO.items(): |
| 134 | + if "value_pattern" in data: |
| 135 | + if data["value_pattern"].fullmatch(val): |
| 136 | + findings.append((rule, data["severity"], val)) |
| 137 | + |
| 138 | + if "var_patterns" in data: |
| 139 | + for pattern in data["var_patterns"]: |
| 140 | + match = pattern.search(var_name) |
| 141 | + if match and len(val) >= data["min_length"]: |
| 142 | + findings.append((rule, data["severity"], val)) |
| 143 | + |
| 144 | + return findings or None |
| 145 | + |
173 | 146 |
|
174 | 147 | def detect_ast_secrets(code): |
175 | 148 | """ |
@@ -201,10 +174,7 @@ def detect_ast_secrets(code): |
201 | 174 | for full_path in extract_variable_path(node): |
202 | 175 | var_name = full_path[-1] |
203 | 176 |
|
204 | | - # Normalize AST data into a regex-compatible assignment string |
205 | | - fake_line = f"{var_name} = \"{val}\"" |
206 | | - |
207 | | - vulnerabilities = detect_secrets(fake_line) |
| 177 | + vulnerabilities = detect_from_parts(var_name, val) |
208 | 178 |
|
209 | 179 | if vulnerabilities: |
210 | 180 | for pattern_name, severity, value in vulnerabilities: |
|
0 commit comments