feat: implement session-based tracking for malicious behavior

Jazzcort · Jazzcort · commit 0951ca091c2a · 2026-05-21T15:19:17.000-04:00
Adds `BehaviorRecord` and `BehaviorRecordManager` to monitor gatekeeper verdicts per
session. If a session accumulates 4 total or 3 consecutive malicious actions, it is
permanently flagged as security compromised. Once flagged, all subsequent scripts
executed in that session will be forced to require user confirmation, regardless of
the script's default requirements.

Also the subsequent execution request will be force to use either
run_script_with_confirmation or run_script_interactive to get human review in the
loop if the previous script was marked as malicious.

The security compromised flag is emitted through the tool call result for mcp-app.
A warning message will be shown in the mcp-app to notify uses about the malicious
behaviors.
diff --git a/mcp-app/src/global.css b/mcp-app/src/global.css
@@ -111,6 +111,10 @@
   .execution-state-tag {
     @apply border border-app-border-primary rounded-md w-fit px-2 text-sm
   }
+
+  .security-warning {
+    @apply bg-red-200 text-red-600 rounded-lg p-4 mb-4
+  }
 }
 
 * {
diff --git a/mcp-app/src/run-script-app.tsx b/mcp-app/src/run-script-app.tsx
@@ -251,6 +251,15 @@ function RunScriptAppInner({
     <div className="app-container">
       <div className="script-main-box">
         <div className="mb-4">
+          {validatedToolResult.isSecurityCompromised && (
+            <p className="security-warning">
+              Suspicious activity detected - your chat client may be under
+              attack. Please examine previous tool calls in detail, and if you
+              have any doubts, do not approve this command and terminate this
+              chat session.
+            </p>
+          )}
+
           {/* TODO: we can dynamically inject the platform that users are using here */}
           <p>
             Goose wants to perform the following action on{" "}
diff --git a/mcp-app/src/types.ts b/mcp-app/src/types.ts
@@ -33,19 +33,27 @@ export const ExecuteScriptResultSchema = z.object({
 
 export type ExecuteScriptResult = z.infer<typeof ExecuteScriptResultSchema>;
 
-export const McpAppToolResultSchema = z.object({
-  status: z.enum([
-    "OK",
-    "BAD_DESCRIPTION",
-    "POLICY",
-    "MODIFIES_SYSTEM",
-    "UNCLEAR",
-    "DANGEROUS",
-    "MALICIOUS",
-  ]),
-  detail: z.string(),
-  id: z.string(),
-});
+export const McpAppToolResultSchema = z
+  .object({
+    status: z.enum([
+      "OK",
+      "BAD_DESCRIPTION",
+      "POLICY",
+      "MODIFIES_SYSTEM",
+      "UNCLEAR",
+      "DANGEROUS",
+      "MALICIOUS",
+    ]),
+    detail: z.string(),
+    id: z.string(),
+    is_security_compromised: z.boolean(),
+  })
+  .transform((data) => ({
+    status: data.status,
+    detail: data.detail,
+    id: data.id,
+    isSecurityCompromised: data.is_security_compromised,
+  }));
 
 export type McpAppToolResult = z.infer<typeof McpAppToolResultSchema>;
 
diff --git a/src/linux_mcp_server/tools/run_script.py b/src/linux_mcp_server/tools/run_script.py
@@ -154,8 +154,77 @@ def set_script_state(self, id: str, new_state: ExecutionState):
 
 
 script_store = ScriptStore()
+MAX_CONSECUTIVE_MALICIOUS_ACTIONS = 3
+MAX_TOTAL_MALICIOUS_ACTIONS = 4
 
 
+class BehaviorRecord:
+    """
+    Tracks gatekeeper verdicts for a single session to detect malicious behavior.
+
+    A session is flagged as security compromised if it accumulates MAX_TOTAL_MALICIOUS_ACTIONS
+    total malicious actions or MAX_CONSECUTIVE_MALICIOUS_ACTIONS consecutive malicious actions.
+    Once flagged, the session is permanently marked and all subsequent scripts require confirmation.
+    """
+
+    def __init__(self):
+        self._consecutive_malicious_action_counts = 0
+        self._total_malicious_action_counts = 0
+        self._previous_action_status = None
+        self._current_action_status = None
+        self._is_security_compromised: bool = False
+
+    def add_record(self, status: GatekeeperStatus):
+        """
+        Record a gatekeeper verdict and update the security compromised flag if
+        thresholds are met.
+        """
+        # No need to update the record if it's already considered security compromised
+        if self._is_security_compromised:
+            return
+
+        self._previous_action_status = self._current_action_status
+        self._current_action_status = status
+
+        if status == GatekeeperStatus.MALICIOUS:
+            self._consecutive_malicious_action_counts += 1
+            self._total_malicious_action_counts += 1
+        else:
+            self._consecutive_malicious_action_counts = 0
+
+        # Check if the record matches the conditions of being considered as malicious
+        if self._total_malicious_action_counts >= MAX_TOTAL_MALICIOUS_ACTIONS:
+            self._is_security_compromised = True
+            return
+
+        if self._consecutive_malicious_action_counts >= MAX_CONSECUTIVE_MALICIOUS_ACTIONS:
+            self._is_security_compromised = True
+
+    @property
+    def is_security_compromised(self) -> bool:
+        return self._is_security_compromised
+
+    @property
+    def is_previous_action_malicious(self) -> bool:
+        return self._previous_action_status == GatekeeperStatus.MALICIOUS
+
+
+class BehaviorRecordManager:
+    """Manages per-session BehaviorRecords, creating them on first access."""
+
+    def __init__(self):
+        self._records: dict[str, BehaviorRecord] = dict()
+
+    def get_record_by_session_id(self, session_id: str) -> BehaviorRecord:
+        """Return the BehaviorRecord for a session, creating one if it doesn't exist."""
+        if session_id not in self._records:
+            self._records[session_id] = BehaviorRecord()
+
+        return self._records[session_id]
+
+
+behavior_record_manager = BehaviorRecordManager()
+
 BASH_STRICT_PREAMBLE = "set -euo pipefail; "
 
 SYSTEMD_RUN_ARGS = [
@@ -209,6 +278,7 @@ class RunScriptInteractiveResult(BaseModel):
     id: str
     status: GatekeeperStatus
     detail: str
+    is_security_compromised: bool
 
 
 # class UserInfo(BaseModel):
@@ -314,9 +384,16 @@ async def run_script_interactive(
     host: Host = None,
 ) -> ToolResult:
     script_details = script_store.get_script_details(token)
+    behavior_record = behavior_record_manager.get_record_by_session_id(ctx.session_id)
+
+    needs_confirmation = (
+        script_details.needs_confirmation
+        or behavior_record.is_security_compromised
+        or behavior_record.is_previous_action_malicious
+    )
 
     # Verify that this script requires confirmation
-    if not script_details.needs_confirmation:
+    if not needs_confirmation:
         raise ToolError("This script does not require confirmation. Use run_script instead of run_script_interactive.")
 
     # Check if the passed parameters match the stored script details
@@ -338,6 +415,8 @@ async def run_script_interactive(
             (BASH_STRICT_PREAMBLE + script) if script_type == SCRIPT_TYPE_BASH else script,
             readonly=readonly,
         )
+        behavior_record.add_record(gatekeeper_result.status)
+
         if gatekeeper_result.status != GatekeeperStatus.OK:
             script_store.set_script_state(token, "rejected-gatekeeper")
             raise ToolError(gatekeeper_result.description)
@@ -352,7 +431,12 @@ async def run_script_interactive(
         )
     ]
 
-    structured_content_obj = RunScriptInteractiveResult(id=result_id, status=GatekeeperStatus.OK, detail="")
+    structured_content_obj = RunScriptInteractiveResult(
+        id=result_id,
+        status=GatekeeperStatus.OK,
+        detail="",
+        is_security_compromised=behavior_record.is_security_compromised,
+    )
 
     return ToolResult(content=content, structured_content=structured_content_obj.model_dump())
 
@@ -413,23 +497,32 @@ async def validate_script(
         readonly=readonly,
     )
 
+    behavior_record = behavior_record_manager.get_record_by_session_id(ctx.session_id)
+    behavior_record.add_record(gatekeeper_result.status)
+
     id = script_store.add_script(description, script, script_type, host, readonly)
     script_details = script_store.get_script_details(id)
 
     if gatekeeper_result.status != GatekeeperStatus.OK:
         script_store.set_script_state(id, "rejected-gatekeeper")
         raise ToolError(gatekeeper_result.description)
 
+    needs_confirmation = (
+        script_details.needs_confirmation
+        or behavior_record.is_security_compromised
+        or behavior_record.is_previous_action_malicious
+    )
+
     result = ToolResult(
         content=[
             TextContent(
                 type="text",
-                text=f"Script passed gatekeeper validation and is stored with ID {id}. Please use {_pick_execution_tool(script_details.needs_confirmation)} to execute the validated script.",
+                text=f"Script passed gatekeeper validation and is stored with ID {id}. Please use {_pick_execution_tool(needs_confirmation)} to execute the validated script.",
             )
         ],
         structured_content={
             "token": id,
-            "needs_confirmation": script_details.needs_confirmation,
+            "needs_confirmation": needs_confirmation,
         },
     )
     return result
@@ -448,10 +541,17 @@ async def run_script(
     token: t.Annotated[str, Field(description="The token returned by the validate_script tool.")],
 ) -> str:
     script_details = script_store.get_script_details(token)
+    behavior_record = behavior_record_manager.get_record_by_session_id(ctx.session_id)
+
+    needs_confirmation = (
+        script_details.needs_confirmation
+        or behavior_record.is_security_compromised
+        or behavior_record.is_previous_action_malicious
+    )
 
     # Verify that this script doesn't require confirmation
-    if script_details.needs_confirmation:
-        raise ToolError(f"This script requires confirmation. Use {_pick_execution_tool(True)} instead of run_script.")
+    if needs_confirmation:
+        raise ToolError("This script requires confirmation. Use run_script_with_confirmation instead of run_script.")
 
     script_store.set_script_state(token, "executing")
     try:
@@ -498,9 +598,16 @@ async def run_script_with_confirmation(
     host: Host = None,
 ) -> str:
     script_details = script_store.get_script_details(token)
+    behavior_record = behavior_record_manager.get_record_by_session_id(ctx.session_id)
+
+    needs_confirmation = (
+        script_details.needs_confirmation
+        or behavior_record.is_security_compromised
+        or behavior_record.is_previous_action_malicious
+    )
 
     # Verify that this script requires confirmation
-    if not script_details.needs_confirmation:
+    if not needs_confirmation:
         raise ToolError(
             "This script does not require confirmation. Use run_script instead of run_script_with_confirmation."
         )
@@ -527,6 +634,8 @@ async def run_script_with_confirmation(
             (BASH_STRICT_PREAMBLE + script) if script_type == SCRIPT_TYPE_BASH else script,
             readonly=readonly,
         )
+        behavior_record.add_record(gatekeeper_result.status)
+
         if gatekeeper_result.status != GatekeeperStatus.OK:
             script_store.set_script_state(token, "rejected-gatekeeper")
             raise ToolError(gatekeeper_result.description)

Original file line number	Diff line number	Diff line change
`@@ -111,6 +111,10 @@`
`111`	`111`	`.execution-state-tag {`
`112`	`112`	`@apply border border-app-border-primary rounded-md w-fit px-2 text-sm`
`113`	`113`	`}`
	`114`	`+`
	`115`	`+ .security-warning {`
	`116`	`+ @apply bg-red-200 text-red-600 rounded-lg p-4 mb-4`
	`117`	`+ }`
`114`	`118`	`}`
`115`	`119`
`116`	`120`	`* {`