Skip to content

Commit 3e259a9

Browse files
committed
refactor: split detection rules into modular rule engine
1 parent 8530e0c commit 3e259a9

8 files changed

Lines changed: 272 additions & 266 deletions

File tree

detectors/ast_analyzer.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import ast
2+
import textwrap
3+
4+
def parse_ast(file):
5+
"""
6+
Parse source code into an AST.
7+
8+
Args:
9+
file (str): Raw Python source code.
10+
11+
Returns:
12+
ast.AST | None: Parsed AST object, or None if parsing fails.
13+
"""
14+
file = textwrap.dedent(file)
15+
16+
try:
17+
tree = ast.parse(file)
18+
except SyntaxError:
19+
return None
20+
21+
return tree
22+
23+
24+
def get_assignments(tree):
25+
"""
26+
Yield all assignment nodes from an AST.
27+
28+
Args:
29+
tree (ast.AST): Parsed syntax tree.
30+
31+
Yields:
32+
ast.Assign: Assignment nodes found during traversal.
33+
"""
34+
for node in ast.walk(tree):
35+
if isinstance(node, ast.Assign):
36+
yield node
37+
38+
39+
def extract_node_value(node):
40+
"""
41+
Extract a string literal value from an assignment node.
42+
43+
Args:
44+
node (ast.Assign): Assignment node.
45+
46+
Returns:
47+
str | None: String value if valid, otherwise None.
48+
"""
49+
val = node.value
50+
if not (isinstance(val, ast.Constant) and isinstance(val.value, str)):
51+
return None
52+
return val.value
53+
54+
55+
def extract_variable_path(node):
56+
"""
57+
Extract variable name paths from assignment targets.
58+
59+
Supports simple variables and nested attributes (e.g., self.config.key).
60+
61+
Args:
62+
node (ast.Assign): Assignment node.
63+
64+
Yields:
65+
list[str]: Normalized variable path (lowercased components).
66+
"""
67+
for var in node.targets:
68+
full_path = []
69+
temp_node = var
70+
71+
# Traverse nested attribute chain (e.g., self.config.key)
72+
if isinstance(temp_node, ast.Attribute):
73+
while isinstance(temp_node, ast.Attribute):
74+
full_path.append(temp_node.attr.lower())
75+
temp_node = temp_node.value
76+
77+
# Ensure root is a valid variable name
78+
if not isinstance(temp_node, ast.Name):
79+
continue
80+
81+
full_path.append(temp_node.id.lower())
82+
full_path.reverse()
83+
84+
# Handle simple variable assignment
85+
elif isinstance(var, ast.Name):
86+
full_path.append(var.id.lower())
87+
88+
# Yield only valid, non-empty paths
89+
if len(full_path) != 0:
90+
yield full_path

detectors/find_secrets.py

Lines changed: 3 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -1,169 +1,5 @@
1-
import re
2-
import ast
3-
import textwrap
4-
5-
# Rule definitions for secret detection.
6-
# Rules can match either:
7-
# - a secret-like value pattern, regardless of variable name
8-
# - a suspicious variable name combined with a minimum value length
9-
REGEX_INFO = {
10-
"AWS Access Key": {
11-
"value_pattern": re.compile(r"(AKIA[0-9A-Z]{16})"),
12-
"severity": "HIGH",
13-
},
14-
"Password": {
15-
"var_patterns": [
16-
re.compile(r"password", re.IGNORECASE),
17-
re.compile(r"pwd", re.IGNORECASE),
18-
re.compile(r"passwd", re.IGNORECASE),
19-
],
20-
"min_length": 4,
21-
"severity": "HIGH",
22-
},
23-
"API Key": {
24-
"var_patterns": [
25-
re.compile(r"api_key", re.IGNORECASE),
26-
re.compile(r"apikey", re.IGNORECASE),
27-
],
28-
"min_length": 4,
29-
"severity": "HIGH",
30-
},
31-
"Token": {
32-
"var_patterns": [re.compile(r"token", re.IGNORECASE)],
33-
"min_length": 4,
34-
"severity": "MEDIUM",
35-
},
36-
"Secret": {
37-
"var_patterns": [
38-
re.compile(r"secret", re.IGNORECASE),
39-
],
40-
"min_length": 4,
41-
"severity": "MEDIUM",
42-
},
43-
}
44-
45-
46-
def parse_ast(file):
47-
"""
48-
Parse source code into an AST.
49-
50-
Args:
51-
file (str): Raw Python source code.
52-
53-
Returns:
54-
ast.AST | None: Parsed AST object, or None if parsing fails.
55-
"""
56-
file = textwrap.dedent(file)
57-
58-
try:
59-
tree = ast.parse(file)
60-
except SyntaxError:
61-
return None
62-
63-
return tree
64-
65-
66-
def get_assignments(tree):
67-
"""
68-
Yield all assignment nodes from an AST.
69-
70-
Args:
71-
tree (ast.AST): Parsed syntax tree.
72-
73-
Yields:
74-
ast.Assign: Assignment nodes found during traversal.
75-
"""
76-
for node in ast.walk(tree):
77-
if isinstance(node, ast.Assign):
78-
yield node
79-
80-
81-
def extract_node_value(node):
82-
"""
83-
Extract a string literal value from an assignment node.
84-
85-
Args:
86-
node (ast.Assign): Assignment node.
87-
88-
Returns:
89-
str | None: String value if valid, otherwise None.
90-
"""
91-
val = node.value
92-
if not (isinstance(val, ast.Constant) and isinstance(val.value, str)):
93-
return None
94-
return val.value
95-
96-
97-
def extract_variable_path(node):
98-
"""
99-
Extract variable name paths from assignment targets.
100-
101-
Supports simple variables and nested attributes (e.g., self.config.key).
102-
103-
Args:
104-
node (ast.Assign): Assignment node.
105-
106-
Yields:
107-
list[str]: Normalized variable path (lowercased components).
108-
"""
109-
for var in node.targets:
110-
full_path = []
111-
temp_node = var
112-
113-
# Traverse nested attribute chain (e.g., self.config.key)
114-
if isinstance(temp_node, ast.Attribute):
115-
while isinstance(temp_node, ast.Attribute):
116-
full_path.append(temp_node.attr.lower())
117-
temp_node = temp_node.value
118-
119-
# Ensure root is a valid variable name
120-
if not isinstance(temp_node, ast.Name):
121-
continue
122-
123-
full_path.append(temp_node.id.lower())
124-
full_path.reverse()
125-
126-
# Handle simple variable assignment
127-
elif isinstance(var, ast.Name):
128-
full_path.append(var.id.lower())
129-
130-
# Yield only valid, non-empty paths
131-
if len(full_path) != 0:
132-
yield full_path
133-
134-
135-
def detect_from_parts(var_name, val):
136-
"""
137-
Classify an extracted variable/value pair against secret detection rules.
138-
139-
Args:
140-
var_name (str): Normalized variable name or final attribute name.
141-
val (str): Extracted string literal value.
142-
143-
Returns:
144-
list[tuple[str, str, str]] | None:
145-
A list of findings where each finding is:
146-
(rule_name, severity, extracted_value)
147-
148-
Returns None if no rule matches.
149-
"""
150-
findings = []
151-
152-
for rule, data in REGEX_INFO.items():
153-
# Match structured secret values, such as AWS access keys
154-
if "value_pattern" in data:
155-
if data["value_pattern"].fullmatch(val):
156-
findings.append((rule, data["severity"], val))
157-
158-
# Match suspicious variable names and enforce minimum value length
159-
if "var_patterns" in data:
160-
for pattern in data["var_patterns"]:
161-
match = pattern.search(var_name)
162-
if match and len(val) >= data["min_length"]:
163-
findings.append((rule, data["severity"], val))
164-
165-
return findings or None
166-
1+
from detectors.ast_analyzer import parse_ast, get_assignments, extract_node_value, extract_variable_path
2+
from detectors.rule_engine import apply_rules
1673

1684
def detect_ast_secrets(code):
1695
"""
@@ -196,7 +32,7 @@ def detect_ast_secrets(code):
19632
for full_path in extract_variable_path(node):
19733
var_name = full_path[-1]
19834

199-
vulnerabilities = detect_from_parts(var_name, val)
35+
vulnerabilities = apply_rules(var_name, val)
20036

20137
if vulnerabilities:
20238
for pattern_name, severity, value in vulnerabilities:

detectors/rule_engine.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from detectors.rules import RULES
2+
3+
4+
def apply_rules(var_name, val):
5+
"""
6+
Classify an extracted variable/value pair against secret detection rules.
7+
8+
Args:
9+
var_name (str): Normalized variable name or final attribute name.
10+
val (str): Extracted string literal value.
11+
12+
Returns:
13+
list[tuple[str, str, str]] | None:
14+
A list of findings where each finding is:
15+
(rule_name, severity, extracted_value)
16+
17+
Returns None if no rule matches.
18+
"""
19+
findings = []
20+
21+
for rule, data in RULES.items():
22+
# Match structured secret values, such as AWS access keys
23+
if "value_pattern" in data:
24+
if data["value_pattern"].fullmatch(val):
25+
findings.append((rule, data["severity"], val))
26+
27+
# Match suspicious variable names and enforce minimum value length
28+
if "var_patterns" in data:
29+
for pattern in data["var_patterns"]:
30+
match = pattern.search(var_name)
31+
if match and len(val) >= data["min_length"]:
32+
findings.append((rule, data["severity"], val))
33+
34+
return findings or None

detectors/rules.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import re
2+
3+
4+
# Rule definitions for secret detection.
5+
# Rules can match either:
6+
# - a secret-like value pattern, regardless of variable name
7+
# - a suspicious variable name combined with a minimum value length
8+
RULES = {
9+
"AWS Access Key": {
10+
"value_pattern": re.compile(r"(AKIA[0-9A-Z]{16})"),
11+
"severity": "HIGH",
12+
},
13+
"Password": {
14+
"var_patterns": [
15+
re.compile(r"password", re.IGNORECASE),
16+
re.compile(r"pwd", re.IGNORECASE),
17+
re.compile(r"passwd", re.IGNORECASE),
18+
],
19+
"min_length": 4,
20+
"severity": "HIGH",
21+
},
22+
"API Key": {
23+
"var_patterns": [
24+
re.compile(r"api_key", re.IGNORECASE),
25+
re.compile(r"apikey", re.IGNORECASE),
26+
],
27+
"min_length": 4,
28+
"severity": "HIGH",
29+
},
30+
"Token": {
31+
"var_patterns": [re.compile(r"token", re.IGNORECASE)],
32+
"min_length": 4,
33+
"severity": "MEDIUM",
34+
},
35+
"Secret": {
36+
"var_patterns": [
37+
re.compile(r"secret", re.IGNORECASE),
38+
],
39+
"min_length": 4,
40+
"severity": "MEDIUM",
41+
},
42+
}

main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
"""
1414

1515
from cli import input_path, chosen_severity, use_json
16-
from scanner import check_path, scan, list_python_files, output, filter_results
16+
from scanner import check_path, scan, list_python_files
17+
from output import filter_results, output
1718

1819

1920
if __name__ == "__main__":

0 commit comments

Comments
 (0)