feat: add secret detection for annotated assignments

Saharsh1123 · Saharsh1123 · commit d3e5264f7cae · 2026-05-23T16:04:16.000-04:00
diff --git a/detectors/ast_analyzer.py b/detectors/ast_analyzer.py
@@ -37,7 +37,7 @@ def get_assignments(tree):
         ast.Assign: Assignment nodes found during traversal.
     """
     for node in ast.walk(tree):
-        if isinstance(node, ast.Assign):
+        if isinstance(node, ast.Assign) or isinstance(node, ast.AnnAssign):
             yield node
 
 
@@ -60,7 +60,16 @@ def extract_node_value(node):
     return val.value
 
 
-def extract_variable_path(node):
+def extract_target_nodes(node):
+    if isinstance(node, ast.AnnAssign):
+        yield node.target
+    
+    elif isinstance(node, ast.Assign):
+        for target in node.targets:
+            yield target
+
+
+def extract_variable_path_from_target(target):
     """
     Extract normalized variable paths from assignment targets.
 
@@ -73,37 +82,37 @@ def extract_variable_path(node):
     Yields:
         list[str]: Lowercased variable path components.
     """
-    for var in node.targets:
-        full_path = []
-        temp_node = var
-
-        # Walk nested attributes from right to left.
-        if isinstance(temp_node, ast.Attribute):
-            while isinstance(temp_node, ast.Attribute):
-                full_path.append(temp_node.attr.lower())
-                temp_node = temp_node.value
-
-            # Ignore unsupported roots such as function calls or subscripts.
-            if not isinstance(temp_node, ast.Name) or isinstance(temp_node, ast.Subscript):
-                continue
-
-            full_path.append(temp_node.id.lower())
-            full_path.reverse()
-
-        # Handle direct variable assignment.
-        elif isinstance(var, ast.Name):
-            full_path.append(var.id.lower())
-        
-        elif isinstance(var, ast.Subscript):
-            key = var.slice
+    
+    full_path = []
+    temp_node = target
+
+    # Walk nested attributes from right to left.
+    if isinstance(temp_node, ast.Attribute):
+        while isinstance(temp_node, ast.Attribute):
+            full_path.append(temp_node.attr.lower())
+            temp_node = temp_node.value
+
+        # Ignore unsupported roots.
+        if not isinstance(temp_node, ast.Name) or isinstance(temp_node, ast.Subscript):
+            return
 
-            if not (isinstance(key, ast.Constant) and isinstance(key.value, str)):
-                continue
+        full_path.append(temp_node.id.lower())
+        full_path.reverse()
 
-            full_path.append(key.value.lower())
-            
-        if full_path:
-            yield full_path
+    # Handle direct variable assignment.
+    elif isinstance(target, ast.Name):
+        full_path.append(target.id.lower())
+    
+    elif isinstance(target, ast.Subscript):
+        key = target.slice
+
+        if not (isinstance(key, ast.Constant) and isinstance(key.value, str)):
+            return
+
+        full_path.append(key.value.lower())
+
+    if full_path:
+        yield full_path
 
 
 def extract_candidates(code):
@@ -130,11 +139,14 @@ def extract_candidates(code):
 
         line_number = node.lineno
 
-        for full_path in extract_variable_path(node):
-            var_name = full_path[-1]
-
-            yield Candidate(
-                line_number=line_number,
-                var_name=var_name,
-                value=val,
-            )
+        targets = extract_target_nodes(node)
+        
+        for target in targets:
+            for full_path in extract_variable_path_from_target(target):
+                var_name = full_path[-1]
+
+                yield Candidate(
+                    line_number=line_number,
+                    var_name=var_name,
+                    value=val,
+                )
diff --git a/test_dirs/test_repo/open_vulns.py b/test_dirs/test_repo/open_vulns.py
@@ -6,3 +6,4 @@
 api_key = "12dwdqwdqwdqw3"
 token = "xyzgggggg"  # noqa: E702
 TOKEN = "abc1234567890j"
+password: str = "nbvyhbgyu1903"
diff --git a/tests/test_ast/test_ast_annotations.py b/tests/test_ast/test_ast_annotations.py
@@ -0,0 +1,177 @@
+from tests.test_ast.ast_helpers import (
+    API_KEY_REASON,
+    PASSWORD_REASON,
+    SECRET_REASON,
+    TOKEN_REASON,
+    assert_single_finding,
+    detect_ast_secrets,
+)
+
+
+def test_ast_annotated_password_assignment():
+    """
+    Annotated assignments should be treated like normal assignments.
+
+    Example:
+        password: str = "abcdef"
+    """
+    code = 'password: str = "abcdef"'
+    result = detect_ast_secrets(code)
+
+    assert_single_finding(
+        result,
+        line_number=1,
+        var_name="password",
+        value="abcdef",
+        rule_id="PASSWORD",
+        rule_name="Password",
+        severity="HIGH",
+        reason=PASSWORD_REASON,
+        confidence="LOW",
+    )
+
+
+def test_ast_annotated_api_key_assignment():
+    """
+    Annotated api_key assignments should produce API key findings.
+    """
+    code = 'api_key: str = "abc1234567890j"'
+    result = detect_ast_secrets(code)
+
+    assert_single_finding(
+        result,
+        line_number=1,
+        var_name="api_key",
+        value="abc1234567890j",
+        rule_id="API_KEY",
+        rule_name="API Key",
+        severity="HIGH",
+        reason=API_KEY_REASON,
+        confidence="HIGH",
+    )
+
+
+def test_ast_annotated_token_assignment():
+    """
+    Annotated token assignments should produce token findings.
+    """
+    code = 'token: str = "abc1234567890j"'
+    result = detect_ast_secrets(code)
+
+    assert_single_finding(
+        result,
+        line_number=1,
+        var_name="token",
+        value="abc1234567890j",
+        rule_id="TOKEN",
+        rule_name="Token",
+        severity="MEDIUM",
+        reason=TOKEN_REASON,
+        confidence="HIGH",
+    )
+
+
+def test_ast_annotated_secret_assignment():
+    """
+    Annotated secret assignments should produce secret findings.
+    """
+    code = 'client_secret: str = "abcdef"'
+    result = detect_ast_secrets(code)
+
+    assert_single_finding(
+        result,
+        line_number=1,
+        var_name="client_secret",
+        value="abcdef",
+        rule_id="SECRET",
+        rule_name="Secret",
+        severity="MEDIUM",
+        reason=SECRET_REASON,
+        confidence="LOW",
+    )
+
+
+def test_ast_annotated_uppercase_variable_is_normalized():
+    """
+    Annotated variable names should still be normalized to lowercase.
+    """
+    code = 'PASSWORD: str = "abcdef"'
+    result = detect_ast_secrets(code)
+
+    assert_single_finding(
+        result,
+        line_number=1,
+        var_name="password",
+        value="abcdef",
+        rule_id="PASSWORD",
+        rule_name="Password",
+        severity="HIGH",
+        reason=PASSWORD_REASON,
+        confidence="LOW",
+    )
+
+
+def test_ast_annotated_irrelevant_variable_is_ignored():
+    """
+    Annotated assignments should not create findings for safe variable names.
+    """
+    code = 'username: str = "abcdef"'
+    result = detect_ast_secrets(code)
+
+    assert result == []
+
+
+def test_ast_annotated_short_value_is_ignored():
+    """
+    Annotated assignments should still respect minimum length rules.
+    """
+    code = 'password: str = "abc"'
+    result = detect_ast_secrets(code)
+
+    assert result == []
+
+
+def test_ast_annotated_assignment_without_value_is_ignored():
+    """
+    Bare annotations without assigned values should be ignored.
+
+    Example:
+        password: str
+    """
+    code = "password: str"
+    result = detect_ast_secrets(code)
+
+    assert result == []
+
+
+def test_ast_annotated_non_string_value_is_ignored():
+    """
+    Annotated assignments with non-string values should be ignored.
+    """
+    code = "password: str = 123456"
+    result = detect_ast_secrets(code)
+
+    assert result == []
+
+
+def test_ast_annotated_assignment_preserves_line_number():
+    """
+    Annotated assignment findings should preserve AST line numbers.
+    """
+    code = """
+    username: str = "safe"
+    password: str = "abcdef"
+    """
+    result = detect_ast_secrets(code)
+
+    assert_single_finding(
+        result,
+        line_number=3,
+        var_name="password",
+        value="abcdef",
+        rule_id="PASSWORD",
+        rule_name="Password",
+        severity="HIGH",
+        reason=PASSWORD_REASON,
+        confidence="LOW",
+    )
diff --git a/tests/test_scanner.py b/tests/test_scanner.py
@@ -0,0 +1,91 @@
+from scanner import scan, scan_file
+from tests.helpers import write_python_file
+
+
+def test_scan_file_returns_findings_for_single_file(tmp_path):
+    """
+    scan_file should scan one Python file and return findings with file metadata attached.
+    """
+    findings_file = write_python_file(
+        tmp_path,
+        "findings.py",
+        'password = "abcdef"\n',
+    )
+
+    result = scan_file(findings_file)
+
+    assert len(result) == 1
+
+    finding = result[0]
+
+    assert finding.file_path == str(findings_file)
+    assert finding.line_number == 1
+    assert finding.var_name == "password"
+    assert finding.rule_id == "PASSWORD"
+    assert finding.rule_name == "Password"
+    assert finding.severity == "HIGH"
+    assert finding.value == "abcdef"
+
+
+def test_scan_file_respects_inline_ignore(tmp_path):
+    """
+    scan_file should suppress findings marked with sentinelscan inline ignore comments.
+    """
+    findings_file = write_python_file(
+        tmp_path,
+        "findings.py",
+        'password = "abcdef"  # sentinelscan: ignore\n'
+        'token = "abc1234567890j"\n',
+    )
+
+    result = scan_file(findings_file)
+
+    assert len(result) == 1
+
+    finding = result[0]
+
+    assert finding.file_path == str(findings_file)
+    assert finding.line_number == 2
+    assert finding.var_name == "token"
+    assert finding.rule_id == "TOKEN"
+    assert finding.rule_name == "Token"
+    assert finding.severity == "MEDIUM"
+    assert finding.value == "abc1234567890j"
+
+
+def test_scan_returns_flat_list_across_multiple_files(tmp_path):
+    """
+    scan should combine findings from multiple files into one flat list.
+
+    This protects against accidentally returning a nested list like:
+    [[finding1], [finding2]]
+    """
+    password_file = write_python_file(
+        tmp_path,
+        "password_file.py",
+        'password = "abcdef"\n',
+    )
+
+    token_file = write_python_file(
+        tmp_path,
+        "token_file.py",
+        'token = "abc1234567890j"\n',
+    )
+
+    result = scan([password_file, token_file])
+
+    assert len(result) == 2
+    assert all(not isinstance(item, list) for item in result)
+
+    assert [finding.rule_id for finding in result] == ["PASSWORD", "TOKEN"]
+    assert [finding.file_path for finding in result] == [
+        str(password_file),
+        str(token_file),
+    ]
+
+
+def test_scan_returns_empty_list_when_no_files():
+    """
+    scan should return an empty list when given no files.
+    """
+    assert scan([]) == []