Skip to content

Commit e8d6b9a

Browse files
committed
refactor: remove fake_line and old regex function and implement AST native detection
1 parent e5afa76 commit e8d6b9a

2 files changed

Lines changed: 33 additions & 63 deletions

File tree

detectors/find_secrets.py

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2,83 +2,41 @@
22
import ast
33
import textwrap
44

5-
# Precompiled regex rules for detecting hardcoded secrets in source code.
6-
# Each rule contains:
7-
# - pattern: compiled regex used to identify the secret
8-
# - severity: classification of the finding (e.g., HIGH, MEDIUM)
9-
#
10-
# Notes:
11-
# - Patterns use capture groups to extract only the secret value.
12-
# - Length constraints ({4,}) help reduce false positives.
13-
# - Case-insensitive matching is applied where appropriate.
145
REGEX_INFO = {
156
"AWS Access Key": {
16-
"pattern": re.compile(r"(AKIA[0-9A-Z]{16})"),
7+
"value_pattern": re.compile(r"(AKIA[0-9A-Z]{16})"),
178
"severity": "HIGH"
189
},
1910
"Password": {
20-
"pattern": re.compile(r"password\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE),
11+
"var_patterns": [
12+
re.compile(r"password", re.IGNORECASE),
13+
re.compile(r"pwd", re.IGNORECASE),
14+
re.compile(r"passwd", re.IGNORECASE)
15+
],
16+
"min_length": 4,
2117
"severity": "HIGH"
2218
},
2319
"API Key": {
24-
"pattern": re.compile(r"api_key\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE),
20+
"var_patterns": [
21+
re.compile(r"api_key", re.IGNORECASE),
22+
re.compile(r"apikey", re.IGNORECASE)
23+
],
24+
"min_length": 4,
2525
"severity": "HIGH"
2626
},
2727
"Token": {
28-
"pattern": re.compile(r"token\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE),
28+
"var_patterns": [re.compile(r"token", re.IGNORECASE)],
29+
"min_length": 4,
2930
"severity": "MEDIUM"
3031
},
3132
"Secret": {
32-
"pattern": re.compile(r"secret\s*=\s*['\"]([^'\"]{4,})['\"]", re.IGNORECASE),
33+
"var_patterns": [re.compile(r"secret", re.IGNORECASE),],
34+
"min_length": 4,
3335
"severity": "MEDIUM"
3436
}
3537
}
3638

3739

38-
def detect_secrets(line):
39-
"""
40-
Scan a single line of source code for hardcoded secrets.
41-
42-
This function applies all predefined regex detection rules to the given line
43-
and returns any matches found. It ignores commented portions of the line and
44-
skips empty or non-executable content.
45-
46-
Args:
47-
line (str): A raw line of source code.
48-
49-
Returns:
50-
list[tuple[str, str, str]] | None:
51-
A list of findings, where each finding is a tuple:
52-
(rule_name, severity, extracted_secret_value)
53-
54-
- rule_name (str): Type of secret detected (e.g., "API Key")
55-
- severity (str): Risk level associated with the finding
56-
- extracted_secret_value (str): The actual secret value captured
57-
58-
Returns None if no secrets are detected in the line.
59-
60-
Behavior:
61-
- Strips inline comments using '#' delimiter
62-
- Ignores empty or whitespace-only lines
63-
- Supports multiple detections per line
64-
- Uses precompiled regex for efficiency
65-
"""
66-
line = line.strip()
67-
if not line or line.startswith("#"):
68-
return None
69-
70-
findings = []
71-
72-
# Apply each detection rule to the line
73-
for pattern_name, data in REGEX_INFO.items():
74-
for match in data["pattern"].finditer(line):
75-
findings.append(
76-
(pattern_name, data["severity"], match.group(1))
77-
)
78-
79-
return findings or None
80-
81-
8240
def parse_ast(file):
8341
"""
8442
Parse source code into an AST.
@@ -170,6 +128,21 @@ def extract_variable_path(node):
170128
if len(full_path) != 0:
171129
yield full_path
172130

131+
def detect_from_parts(var_name, val):
132+
findings = []
133+
for rule, data in REGEX_INFO.items():
134+
if "value_pattern" in data:
135+
if data["value_pattern"].fullmatch(val):
136+
findings.append((rule, data["severity"], val))
137+
138+
if "var_patterns" in data:
139+
for pattern in data["var_patterns"]:
140+
match = pattern.search(var_name)
141+
if match and len(val) >= data["min_length"]:
142+
findings.append((rule, data["severity"], val))
143+
144+
return findings or None
145+
173146

174147
def detect_ast_secrets(code):
175148
"""
@@ -201,10 +174,7 @@ def detect_ast_secrets(code):
201174
for full_path in extract_variable_path(node):
202175
var_name = full_path[-1]
203176

204-
# Normalize AST data into a regex-compatible assignment string
205-
fake_line = f"{var_name} = \"{val}\""
206-
207-
vulnerabilities = detect_secrets(fake_line)
177+
vulnerabilities = detect_from_parts(var_name, val)
208178

209179
if vulnerabilities:
210180
for pattern_name, severity, value in vulnerabilities:

scanner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pathlib import Path
2-
from detectors.find_secrets import detect_secrets, detect_ast_secrets
2+
from detectors.find_secrets import detect_ast_secrets
33

44

55
def check_path(input_path):

0 commit comments

Comments
 (0)