Skip to content

Commit d3e5264

Browse files
committed
feat: add secret detection for annotated assignments
1 parent 8dfcdc4 commit d3e5264

4 files changed

Lines changed: 320 additions & 39 deletions

File tree

detectors/ast_analyzer.py

Lines changed: 51 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_assignments(tree):
3737
ast.Assign: Assignment nodes found during traversal.
3838
"""
3939
for node in ast.walk(tree):
40-
if isinstance(node, ast.Assign):
40+
if isinstance(node, ast.Assign) or isinstance(node, ast.AnnAssign):
4141
yield node
4242

4343

@@ -60,7 +60,16 @@ def extract_node_value(node):
6060
return val.value
6161

6262

63-
def extract_variable_path(node):
63+
def extract_target_nodes(node):
64+
if isinstance(node, ast.AnnAssign):
65+
yield node.target
66+
67+
elif isinstance(node, ast.Assign):
68+
for target in node.targets:
69+
yield target
70+
71+
72+
def extract_variable_path_from_target(target):
6473
"""
6574
Extract normalized variable paths from assignment targets.
6675
@@ -73,37 +82,37 @@ def extract_variable_path(node):
7382
Yields:
7483
list[str]: Lowercased variable path components.
7584
"""
76-
for var in node.targets:
77-
full_path = []
78-
temp_node = var
79-
80-
# Walk nested attributes from right to left.
81-
if isinstance(temp_node, ast.Attribute):
82-
while isinstance(temp_node, ast.Attribute):
83-
full_path.append(temp_node.attr.lower())
84-
temp_node = temp_node.value
85-
86-
# Ignore unsupported roots such as function calls or subscripts.
87-
if not isinstance(temp_node, ast.Name) or isinstance(temp_node, ast.Subscript):
88-
continue
89-
90-
full_path.append(temp_node.id.lower())
91-
full_path.reverse()
92-
93-
# Handle direct variable assignment.
94-
elif isinstance(var, ast.Name):
95-
full_path.append(var.id.lower())
96-
97-
elif isinstance(var, ast.Subscript):
98-
key = var.slice
85+
86+
full_path = []
87+
temp_node = target
88+
89+
# Walk nested attributes from right to left.
90+
if isinstance(temp_node, ast.Attribute):
91+
while isinstance(temp_node, ast.Attribute):
92+
full_path.append(temp_node.attr.lower())
93+
temp_node = temp_node.value
94+
95+
# Ignore unsupported roots.
96+
if not isinstance(temp_node, ast.Name) or isinstance(temp_node, ast.Subscript):
97+
return
9998

100-
if not (isinstance(key, ast.Constant) and isinstance(key.value, str)):
101-
continue
99+
full_path.append(temp_node.id.lower())
100+
full_path.reverse()
102101

103-
full_path.append(key.value.lower())
104-
105-
if full_path:
106-
yield full_path
102+
# Handle direct variable assignment.
103+
elif isinstance(target, ast.Name):
104+
full_path.append(target.id.lower())
105+
106+
elif isinstance(target, ast.Subscript):
107+
key = target.slice
108+
109+
if not (isinstance(key, ast.Constant) and isinstance(key.value, str)):
110+
return
111+
112+
full_path.append(key.value.lower())
113+
114+
if full_path:
115+
yield full_path
107116

108117

109118
def extract_candidates(code):
@@ -130,11 +139,14 @@ def extract_candidates(code):
130139

131140
line_number = node.lineno
132141

133-
for full_path in extract_variable_path(node):
134-
var_name = full_path[-1]
135-
136-
yield Candidate(
137-
line_number=line_number,
138-
var_name=var_name,
139-
value=val,
140-
)
142+
targets = extract_target_nodes(node)
143+
144+
for target in targets:
145+
for full_path in extract_variable_path_from_target(target):
146+
var_name = full_path[-1]
147+
148+
yield Candidate(
149+
line_number=line_number,
150+
var_name=var_name,
151+
value=val,
152+
)

test_dirs/test_repo/open_vulns.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
api_key = "12dwdqwdqwdqw3"
77
token = "xyzgggggg" # noqa: E702
88
TOKEN = "abc1234567890j"
9+
password: str = "nbvyhbgyu1903"
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
from tests.test_ast.ast_helpers import (
2+
API_KEY_REASON,
3+
PASSWORD_REASON,
4+
SECRET_REASON,
5+
TOKEN_REASON,
6+
assert_single_finding,
7+
detect_ast_secrets,
8+
)
9+
10+
11+
def test_ast_annotated_password_assignment():
12+
"""
13+
Annotated assignments should be treated like normal assignments.
14+
15+
Example:
16+
password: str = "abcdef"
17+
"""
18+
code = 'password: str = "abcdef"'
19+
result = detect_ast_secrets(code)
20+
21+
assert_single_finding(
22+
result,
23+
line_number=1,
24+
var_name="password",
25+
value="abcdef",
26+
rule_id="PASSWORD",
27+
rule_name="Password",
28+
severity="HIGH",
29+
reason=PASSWORD_REASON,
30+
confidence="LOW",
31+
)
32+
33+
34+
def test_ast_annotated_api_key_assignment():
35+
"""
36+
Annotated api_key assignments should produce API key findings.
37+
"""
38+
code = 'api_key: str = "abc1234567890j"'
39+
result = detect_ast_secrets(code)
40+
41+
assert_single_finding(
42+
result,
43+
line_number=1,
44+
var_name="api_key",
45+
value="abc1234567890j",
46+
rule_id="API_KEY",
47+
rule_name="API Key",
48+
severity="HIGH",
49+
reason=API_KEY_REASON,
50+
confidence="HIGH",
51+
)
52+
53+
54+
def test_ast_annotated_token_assignment():
55+
"""
56+
Annotated token assignments should produce token findings.
57+
"""
58+
code = 'token: str = "abc1234567890j"'
59+
result = detect_ast_secrets(code)
60+
61+
assert_single_finding(
62+
result,
63+
line_number=1,
64+
var_name="token",
65+
value="abc1234567890j",
66+
rule_id="TOKEN",
67+
rule_name="Token",
68+
severity="MEDIUM",
69+
reason=TOKEN_REASON,
70+
confidence="HIGH",
71+
)
72+
73+
74+
def test_ast_annotated_secret_assignment():
75+
"""
76+
Annotated secret assignments should produce secret findings.
77+
"""
78+
code = 'client_secret: str = "abcdef"'
79+
result = detect_ast_secrets(code)
80+
81+
assert_single_finding(
82+
result,
83+
line_number=1,
84+
var_name="client_secret",
85+
value="abcdef",
86+
rule_id="SECRET",
87+
rule_name="Secret",
88+
severity="MEDIUM",
89+
reason=SECRET_REASON,
90+
confidence="LOW",
91+
)
92+
93+
94+
def test_ast_annotated_uppercase_variable_is_normalized():
95+
"""
96+
Annotated variable names should still be normalized to lowercase.
97+
"""
98+
code = 'PASSWORD: str = "abcdef"'
99+
result = detect_ast_secrets(code)
100+
101+
assert_single_finding(
102+
result,
103+
line_number=1,
104+
var_name="password",
105+
value="abcdef",
106+
rule_id="PASSWORD",
107+
rule_name="Password",
108+
severity="HIGH",
109+
reason=PASSWORD_REASON,
110+
confidence="LOW",
111+
)
112+
113+
114+
def test_ast_annotated_irrelevant_variable_is_ignored():
115+
"""
116+
Annotated assignments should not create findings for safe variable names.
117+
"""
118+
code = 'username: str = "abcdef"'
119+
result = detect_ast_secrets(code)
120+
121+
assert result == []
122+
123+
124+
def test_ast_annotated_short_value_is_ignored():
125+
"""
126+
Annotated assignments should still respect minimum length rules.
127+
"""
128+
code = 'password: str = "abc"'
129+
result = detect_ast_secrets(code)
130+
131+
assert result == []
132+
133+
134+
def test_ast_annotated_assignment_without_value_is_ignored():
135+
"""
136+
Bare annotations without assigned values should be ignored.
137+
138+
Example:
139+
password: str
140+
"""
141+
code = "password: str"
142+
result = detect_ast_secrets(code)
143+
144+
assert result == []
145+
146+
147+
def test_ast_annotated_non_string_value_is_ignored():
148+
"""
149+
Annotated assignments with non-string values should be ignored.
150+
"""
151+
code = "password: str = 123456"
152+
result = detect_ast_secrets(code)
153+
154+
assert result == []
155+
156+
157+
def test_ast_annotated_assignment_preserves_line_number():
158+
"""
159+
Annotated assignment findings should preserve AST line numbers.
160+
"""
161+
code = """
162+
username: str = "safe"
163+
password: str = "abcdef"
164+
"""
165+
result = detect_ast_secrets(code)
166+
167+
assert_single_finding(
168+
result,
169+
line_number=3,
170+
var_name="password",
171+
value="abcdef",
172+
rule_id="PASSWORD",
173+
rule_name="Password",
174+
severity="HIGH",
175+
reason=PASSWORD_REASON,
176+
confidence="LOW",
177+
)

tests/test_scanner.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from scanner import scan, scan_file
2+
from tests.helpers import write_python_file
3+
4+
5+
def test_scan_file_returns_findings_for_single_file(tmp_path):
6+
"""
7+
scan_file should scan one Python file and return findings with file metadata attached.
8+
"""
9+
findings_file = write_python_file(
10+
tmp_path,
11+
"findings.py",
12+
'password = "abcdef"\n',
13+
)
14+
15+
result = scan_file(findings_file)
16+
17+
assert len(result) == 1
18+
19+
finding = result[0]
20+
21+
assert finding.file_path == str(findings_file)
22+
assert finding.line_number == 1
23+
assert finding.var_name == "password"
24+
assert finding.rule_id == "PASSWORD"
25+
assert finding.rule_name == "Password"
26+
assert finding.severity == "HIGH"
27+
assert finding.value == "abcdef"
28+
29+
30+
def test_scan_file_respects_inline_ignore(tmp_path):
31+
"""
32+
scan_file should suppress findings marked with sentinelscan inline ignore comments.
33+
"""
34+
findings_file = write_python_file(
35+
tmp_path,
36+
"findings.py",
37+
'password = "abcdef" # sentinelscan: ignore\n'
38+
'token = "abc1234567890j"\n',
39+
)
40+
41+
result = scan_file(findings_file)
42+
43+
assert len(result) == 1
44+
45+
finding = result[0]
46+
47+
assert finding.file_path == str(findings_file)
48+
assert finding.line_number == 2
49+
assert finding.var_name == "token"
50+
assert finding.rule_id == "TOKEN"
51+
assert finding.rule_name == "Token"
52+
assert finding.severity == "MEDIUM"
53+
assert finding.value == "abc1234567890j"
54+
55+
56+
def test_scan_returns_flat_list_across_multiple_files(tmp_path):
57+
"""
58+
scan should combine findings from multiple files into one flat list.
59+
60+
This protects against accidentally returning a nested list like:
61+
[[finding1], [finding2]]
62+
"""
63+
password_file = write_python_file(
64+
tmp_path,
65+
"password_file.py",
66+
'password = "abcdef"\n',
67+
)
68+
69+
token_file = write_python_file(
70+
tmp_path,
71+
"token_file.py",
72+
'token = "abc1234567890j"\n',
73+
)
74+
75+
result = scan([password_file, token_file])
76+
77+
assert len(result) == 2
78+
assert all(not isinstance(item, list) for item in result)
79+
80+
assert [finding.rule_id for finding in result] == ["PASSWORD", "TOKEN"]
81+
assert [finding.file_path for finding in result] == [
82+
str(password_file),
83+
str(token_file),
84+
]
85+
86+
87+
def test_scan_returns_empty_list_when_no_files():
88+
"""
89+
scan should return an empty list when given no files.
90+
"""
91+
assert scan([]) == []

0 commit comments

Comments
 (0)