@@ -44,13 +44,15 @@ check_pattern() {
4444 if echo " $PROMPT " | grep -qiE " $pattern " ; then
4545 local evidence
4646 evidence=$( echo " $PROMPT " | grep -oiE " $pattern " | head -1)
47- THREATS_FOUND+=(" $category :$severity :$description :$evidence " )
47+ local evidence_encoded
48+ evidence_encoded=$( printf ' %s' " $evidence " | base64 | tr -d ' \n' )
49+ THREATS_FOUND+=(" $category $severity $description $evidence_encoded " )
4850 fi
4951}
5052
5153# Data exfiltration signals
5254check_pattern " send\s+(all|every|entire)\s+\w+\s+to\s+" " data_exfiltration" " 0.8" " Bulk data transfer"
53- check_pattern " export\s+.*\s+to\s+(external|outside|third. ?party)" " data_exfiltration" " 0.9" " External export"
55+ check_pattern " export\s+.*\s+to\s+(external|outside|third[_-] ?party)" " data_exfiltration" " 0.9" " External export"
5456check_pattern " curl\s+.*\s+-d\s+" " data_exfiltration" " 0.7" " HTTP POST with data"
5557check_pattern " upload\s+.*\s+(credentials|secrets|keys)" " data_exfiltration" " 0.95" " Credential upload"
5658
@@ -61,16 +63,16 @@ check_pattern "add\s+.*\s+(sudoers|administrators)" "privilege_escalation" "0.95
6163
6264# System destruction signals
6365check_pattern " (rm\s+-rf\s+/|del\s+/[sq]|format\s+c:)" " system_destruction" " 0.95" " Destructive command"
64- check_pattern " (drop\s+database|truncate\s+table|delete\s+from\s+\w+\s*$ )" " system_destruction" " 0.9" " Database destruction"
66+ check_pattern " (drop\s+database|truncate\s+table|delete\s+from\s+\w+\s*(;|\s*$) )" " system_destruction" " 0.9" " Database destruction"
6567check_pattern " wipe\s+(all|entire|every)" " system_destruction" " 0.9" " Mass deletion"
6668
6769# Prompt injection signals
6870check_pattern " ignore\s+(previous|above|all)\s+(instructions?|rules?|prompts?)" " prompt_injection" " 0.9" " Instruction override"
69- check_pattern " you\s+are\s+now\s+(a|an)\s+" " prompt_injection" " 0.7" " Role reassignment"
70- check_pattern " system\s*:\s*" " prompt_injection" " 0.6" " System prompt injection"
71+ check_pattern " you\s+are\s+now\s+(a|an)\s+(assistant|ai|bot|system|expert|language\s+model)\b " " prompt_injection" " 0.7" " Role reassignment"
72+ check_pattern " (^|\n)\s* system\s*:\s*you\s+are " " prompt_injection" " 0.6" " System prompt injection"
7173
7274# Credential exposure signals
73- check_pattern " (api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*['\" ]?\w{8,}" " credential_exposure" " 0.9" " Hardcoded credential"
75+ check_pattern " (api[_-]?key|secret[_-]?key|password|token)\s*[:=]\s*['\" ]?\w{8,}" " credential_exposure" " 0.9" " Possible hardcoded credential"
7476check_pattern " (aws_access_key|AKIA[0-9A-Z]{16})" " credential_exposure" " 0.95" " AWS key exposure"
7577
7678# Log the prompt event
@@ -80,7 +82,9 @@ if [[ ${#THREATS_FOUND[@]} -gt 0 ]]; then
8082 FIRST=true
8183 MAX_SEVERITY=" 0.0"
8284 for threat in " ${THREATS_FOUND[@]} " ; do
83- IFS=' :' read -r category severity description evidence <<< " $threat"
85+ IFS=$' \t ' read -r category severity description evidence_encoded <<< " $threat"
86+ local evidence
87+ evidence=$( printf ' %s' " $evidence_encoded " | base64 -d 2> /dev/null || echo " [redacted]" )
8488
8589 if [[ " $FIRST " != " true" ]]; then
8690 THREATS_JSON+=" ,"
@@ -104,14 +108,15 @@ if [[ ${#THREATS_FOUND[@]} -gt 0 ]]; then
104108 jq -Rn \
105109 --arg timestamp " $TIMESTAMP " \
106110 --arg level " $LEVEL " \
111+ --arg max_severity " $MAX_SEVERITY " \
107112 --argjson threats " $THREATS_JSON " \
108113 --argjson count " ${# THREATS_FOUND[@]} " \
109- ' {"timestamp":$timestamp,"event":"threat_detected","governance_level":$level,"threat_count":$count,"threats":$threats}' \
114+ ' {"timestamp":$timestamp,"event":"threat_detected","governance_level":$level,"threat_count":$count,"max_severity":($max_severity|tonumber)," threats":$threats}' \
110115 >> " $LOG_FILE "
111116
112- echo " ⚠️ Governance: ${# THREATS_FOUND[@]} threat signal(s) detected"
117+ echo " ⚠️ Governance: ${# THREATS_FOUND[@]} threat signal(s) detected (max severity: $MAX_SEVERITY ) "
113118 for threat in " ${THREATS_FOUND[@]} " ; do
114- IFS=' : ' read -r category severity description evidence <<< " $threat"
119+ IFS=$' \t ' read -r category severity description _evidence_encoded <<< " $threat"
115120 echo " 🔴 [$category ] $description (severity: $severity )"
116121 done
117122
0 commit comments