Skip to content

Commit 7f1f639

Browse files
committed
feat: add confidence ratings using entropy and length calculations
1 parent 0f1385b commit 7f1f639

4 files changed

Lines changed: 69 additions & 2 deletions

File tree

detectors/confidence.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from collections import Counter
2+
import math
3+
4+
COMMON_LOW_CONFIDENCE_VALUES = {
5+
"password",
6+
"admin",
7+
"secret",
8+
"token",
9+
"apikey",
10+
"api_key",
11+
"test",
12+
"testing",
13+
"example",
14+
"changeme",
15+
"default",
16+
"letmein",
17+
"qwerty",
18+
"abcdef",
19+
"abc123",
20+
"password123",
21+
}
22+
23+
def calculate_entropy(value):
24+
if not value:
25+
return 0.0
26+
27+
counts = Counter(value)
28+
total = len(value)
29+
entropy = 0.0
30+
31+
for count in counts.values():
32+
probability = count / total
33+
entropy -= probability * math.log2(probability)
34+
35+
return entropy
36+
37+
def calculate_confidence(value):
38+
entropy = calculate_entropy(value)
39+
40+
if value.lower() in COMMON_LOW_CONFIDENCE_VALUES:
41+
return "LOW"
42+
43+
if len(value) < 8:
44+
return "LOW"
45+
46+
if entropy < 2.5:
47+
return "LOW"
48+
49+
if entropy < 3.5:
50+
return "MEDIUM"
51+
52+
if len(value) < 12:
53+
return "MEDIUM"
54+
55+
return "HIGH"

detectors/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,6 @@ class Finding:
5050
rule_id: str
5151
rule_name: str
5252
severity: str
53-
reason: str
53+
reason: str
54+
entropy: int | None = None
55+
confidence: str

detectors/rule_engine.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from detectors.rules import RULES
22
from detectors.models import Finding
3-
3+
from detectors.confidence import calculate_confidence, calculate_entropy
44

55
def apply_rules(candidate):
66
"""
@@ -17,6 +17,7 @@ def apply_rules(candidate):
1717
for rule in RULES:
1818
val = candidate.value
1919
var_name = candidate.var_name
20+
entropy = calculate_entropy(val)
2021

2122
# Match structured secret values, such as AWS access keys.
2223
if rule.value_pattern is not None:
@@ -30,13 +31,17 @@ def apply_rules(candidate):
3031
severity=rule.severity,
3132
value=val,
3233
reason=rule.reason,
34+
entropy=entropy,
35+
confidence="HIGH",
3336
)
3437
)
3538

3639
# Match suspicious variable names and enforce minimum value length.
3740
if rule.var_patterns:
3841
for var_pattern in rule.var_patterns:
3942
match = var_pattern.search(var_name)
43+
entropy = calculate_entropy(val)
44+
confidence = calculate_confidence(val)
4045

4146
if (
4247
match
@@ -52,6 +57,8 @@ def apply_rules(candidate):
5257
severity=rule.severity,
5358
value=val,
5459
reason=rule.reason,
60+
entropy=entropy,
61+
confidence=confidence,
5562
)
5663
)
5764

output.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def output_json(filtered_findings, redact_secrets):
6464
"severity": filtered_finding.severity,
6565
"value": value,
6666
"reason": filtered_finding.reason,
67+
"entropy": filtered_finding.entropy,
68+
"confidence": filtered_finding.confidence,
6769
}
6870
json_results.append(finding)
6971

@@ -104,6 +106,7 @@ def output(filtered_findings, use_json, redact_secrets, files):
104106
f"{finding.file_path}:{finding.line_number} "
105107
f"{finding.rule_name}{display_value}"
106108
)
109+
print(f" Confidence: {finding.confidence}")
107110
print(f" Reason: {finding.reason}\n")
108111

109112
print(f"\nTotal findings: {len(filtered_findings)}")

0 commit comments

Comments
 (0)