feat: add confidence ratings using entropy and length calculations

Saharsh1123 · Saharsh1123 · commit 7f1f639f150e · 2026-05-09T17:29:25.000-04:00
diff --git a/detectors/confidence.py b/detectors/confidence.py
@@ -0,0 +1,55 @@
+from collections import Counter
+import math
+
+COMMON_LOW_CONFIDENCE_VALUES = {
+    "password",
+    "admin",
+    "secret",
+    "token",
+    "apikey",
+    "api_key",
+    "test",
+    "testing",
+    "example",
+    "changeme",
+    "default",
+    "letmein",
+    "qwerty",
+    "abcdef",
+    "abc123",
+    "password123",
+}
+
+def calculate_entropy(value):
+    if not value:
+        return 0.0
+
+    counts = Counter(value)
+    total = len(value)
+    entropy = 0.0
+
+    for count in counts.values():
+        probability = count / total
+        entropy -= probability * math.log2(probability)
+
+    return entropy
+
+def calculate_confidence(value):
+    entropy = calculate_entropy(value)
+
+    if value.lower() in COMMON_LOW_CONFIDENCE_VALUES:
+        return "LOW"
+
+    if len(value) < 8:
+        return "LOW"
+
+    if entropy < 2.5:
+        return "LOW"
+
+    if entropy < 3.5:
+        return "MEDIUM"
+
+    if len(value) < 12:
+        return "MEDIUM"
+
+    return "HIGH"
diff --git a/detectors/models.py b/detectors/models.py
@@ -50,4 +50,6 @@ class Finding:
     rule_id: str
     rule_name: str
     severity: str
-    reason: str
+    reason: str
+    entropy: int | None = None
+    confidence: str
diff --git a/detectors/rule_engine.py b/detectors/rule_engine.py
@@ -1,6 +1,6 @@
 from detectors.rules import RULES
 from detectors.models import Finding
-
+from detectors.confidence import calculate_confidence, calculate_entropy
 
 def apply_rules(candidate):
     """
@@ -17,6 +17,7 @@ def apply_rules(candidate):
     for rule in RULES:
         val = candidate.value
         var_name = candidate.var_name
+        entropy = calculate_entropy(val)
 
         # Match structured secret values, such as AWS access keys.
         if rule.value_pattern is not None:
@@ -30,13 +31,17 @@ def apply_rules(candidate):
                         severity=rule.severity,
                         value=val,
                         reason=rule.reason,
+                        entropy=entropy,
+                        confidence="HIGH",
                     )
                 )
 
         # Match suspicious variable names and enforce minimum value length.
         if rule.var_patterns:
             for var_pattern in rule.var_patterns:
                 match = var_pattern.search(var_name)
+                entropy = calculate_entropy(val)
+                confidence = calculate_confidence(val)
 
                 if (
                     match
@@ -52,6 +57,8 @@ def apply_rules(candidate):
                             severity=rule.severity,
                             value=val,
                             reason=rule.reason,
+                            entropy=entropy,
+                            confidence=confidence,
                         )
                     )
 
diff --git a/output.py b/output.py
@@ -64,6 +64,8 @@ def output_json(filtered_findings, redact_secrets):
             "severity": filtered_finding.severity,
             "value": value,
             "reason": filtered_finding.reason,
+            "entropy": filtered_finding.entropy,
+            "confidence": filtered_finding.confidence,
         }
         json_results.append(finding)
 
@@ -104,6 +106,7 @@ def output(filtered_findings, use_json, redact_secrets, files):
                 f"{finding.file_path}:{finding.line_number} "
                 f"{finding.rule_name} → {display_value}"
             )
+            print(f"       Confidence: {finding.confidence}")
             print(f"       Reason: {finding.reason}\n")
 
         print(f"\nTotal findings: {len(filtered_findings)}")

Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,8 @@ def output_json(filtered_findings, redact_secrets):`
`64`	`64`	`"severity": filtered_finding.severity,`
`65`	`65`	`"value": value,`
`66`	`66`	`"reason": filtered_finding.reason,`
	`67`	`+ "entropy": filtered_finding.entropy,`
	`68`	`+ "confidence": filtered_finding.confidence,`
`67`	`69`	`}`
`68`	`70`	`json_results.append(finding)`
`69`	`71`
`@@ -104,6 +106,7 @@ def output(filtered_findings, use_json, redact_secrets, files):`
`104`	`106`	`f"{finding.file_path}:{finding.line_number} "`
`105`	`107`	`f"{finding.rule_name} → {display_value}"`
`106`	`108`	`)`
	`109`	`+ print(f" Confidence: {finding.confidence}")`
`107`	`110`	`print(f" Reason: {finding.reason}\n")`
`108`	`111`
`109`	`112`	`print(f"\nTotal findings: {len(filtered_findings)}")`