shell-security/index.ts at main · Kilo-Org/shell-security · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import { AuthExpiredError, submitAudit } from "./src/client.js";
import { runAudit, getPublicIp } from "./src/audit.js";
import { resolveEnvToken, resolveApiBase } from "./src/env.js";
import { detectPlatform } from "./src/platform.js";
import { startDeviceAuth, pollDeviceAuth } from "./src/auth/device-auth.js";
import {
  writeStoredToken,
  readTokenFromFile,
  clearStoredToken,
  readPendingCode,
  writePendingCode,
  clearPendingCode,
  isPluginManagedAuthToken,
  type PluginLogger,
  type PluginRuntimeConfig,
} from "./src/auth/token-store.js";
import pkg from "./package.json" with { type: "json" };

const PLUGIN_VERSION: string = pkg.version;

// OpenClaw invokes a plugin's `register(api)` once per distinct
// `loadOpenClawPlugins` cacheKey (gateway startup, provider discovery,
// metadata registry, web-retrieval / web-search runtimes, etc.), so in
// a single process `register` typically runs ~15 times. Without this
// guard the three "Registered …" info lines below fire every time,
// which produced the 44-line log spam observed in KiloClaw boots.
// Module scope survives across all register() calls in the same
// process, so we log once and stay quiet after that.
//
// Scope note: this guard covers logging only. Re-invoking
// `api.registerTool(...)` and `api.registerCommand(...)` on every
// register() call is intentional — each `loadOpenClawPlugins` pass
// builds its own registry, and the plugin must register into every
// one to be visible in that context.
let registrationLogged = false;

type ToolResult = {
  content: Array<{ type: "text"; text: string }>;
};

type CommandResult = {
  text: string;
};

type ToolRegistration = {
  name: string;
  description: string;
  parameters: Record<string, unknown>;
  execute: () => Promise<ToolResult>;
};

/**
 * Minimal shape of the SDK's OpenClawPluginToolContext that we actually
 * read. The full type lives in the SDK and is not re-exported to plugins;
 * we only need the active chat surface (if any) to forward to the server
 * for channel-aware report formatting. Declared structurally so we stay
 * decoupled from internal SDK type evolution.
 */
type PluginToolContext = {
  messageChannel?: string;
};

type ToolFactory = (ctx: PluginToolContext) => ToolRegistration;

/**
 * Minimal shape of the SDK's PluginCommandContext that we actually read.
 * Same rationale as PluginToolContext — we only need the chat surface
 * for the server-side formatter hint.
 */
type PluginCommandContext = {
  channel?: string;
};

type CommandRegistration = {
  name: string;
  description: string;
  acceptsArgs: boolean;
  handler: (ctx: PluginCommandContext) => Promise<CommandResult>;
};

/**
 * Structural type covering the parts of the OpenClaw plugin API this
 * plugin uses. The full API is runtime-provided by the gateway; we only
 * constrain the fields we touch so we keep type safety without pinning
 * to the (internal, evolving) full SDK type. Field optionality matches
 * the SDK's OpenClawPluginApi shape so register(api) type-checks.
 */
type PluginApi = {
  pluginConfig?: Record<string, unknown>;
  logger: PluginLogger;
  runtime: {
    config: PluginRuntimeConfig;
  };
  // SDK accepts either a tool object or a factory that returns one. We
  // use the factory form so we can capture `messageChannel` from the
  // runtime-provided tool context at tool-creation time and forward it
  // to the server on every invocation.
  registerTool: (tool: ToolRegistration | ToolFactory) => void;
  registerCommand: (cmd: CommandRegistration) => void;
};

/**
 * Coerce a chat-surface string from the SDK into the value we forward to
 * the server. Trims, and treats empty-after-trim as "no channel known"
 * so we don't send `source.channel: ""` and trigger server-side handling
 * of an ambiguous signal.
 */
function normalizeChannel(raw: string | undefined): string | undefined {
  if (typeof raw !== "string") return undefined;
  const trimmed = raw.trim();
  return trimmed.length > 0 ? trimmed : undefined;
}

function toolResult(content: string): ToolResult {
  return { content: [{ type: "text" as const, text: content }] };
}

/**
 * Top-level wrapper around runShellSecurityFlow. Catches any
 * unexpected throw from the flow (transient network errors during
 * runAudit, the server returning a non-401 failure, writeStoredToken
 * blowing up with EPERM, etc.) and converts it to a user-friendly
 * markdown string so the command / tool handler never surfaces a raw
 * stack to the chat. Recognized error paths (AuthExpiredError, the
 * server returning a rate_limited body, audit script returning a
 * non-zero exit code) are already handled inside the flow and return
 * their own specific messages; this is the last-resort safety net.
 */
async function runFlowSafe(
  api: PluginApi,
  apiBase: string,
  channel: string | undefined,
): Promise<string> {
  try {
    return await runShellSecurityFlow(api, apiBase, channel);
  } catch (err) {
    const message = err instanceof Error ? err.message : String(err);
    api.logger.error?.(`shell-security: unexpected failure: ${message}`);
    return (
      `Security checkup failed unexpectedly: ${message}\n\n` +
      `Check the openclaw gateway logs for details, or try again.`
    );
  }
}

/**
 * Shared shell-security flow used by both the registerTool entry point
 * (natural language invocation via the LLM) and the registerCommand entry
 * point (deterministic /security-checkup slash command).
 *
 * Returns plain markdown. Callers wrap it in whatever shape their
 * registration API expects.
 */
async function runShellSecurityFlow(
  api: PluginApi,
  apiBase: string,
  channel: string | undefined,
): Promise<string> {
  // Path 0: user explicit config. If `plugins.entries.shell-security.config.authToken`
  // is set (as a plain string directly, or as a SecretRef resolved by
  // OpenClaw before we see it), honor it. This is the path for users
  // who want to configure the plugin manually in openclaw.json without
  // going through device auth, and it respects the schema contract
  // documented in openclaw.plugin.json + README. Explicit user config
  // wins over everything else.
  //
  // Skip this path when the raw config shows a SecretRef aimed at our
  // OWN provider — that shape is only written by writeStoredToken()
  // after device auth, and the plugin's file-based auto re-auth path
  // (Path B below) should own recovery in that case. Without this
  // check, a plugin-managed token that expires would hit the
  // "update your openclaw.json" message here instead of falling through
  // to clear + redo device auth.
  const configToken = api.pluginConfig?.authToken;
  const pluginManaged = isPluginManagedAuthToken(
    api.runtime.config.loadConfig(),
  );
  if (
    !pluginManaged &&
    typeof configToken === "string" &&
    configToken.length > 0
  ) {
    try {
      return await doCheckup(api, apiBase, configToken, channel);
    } catch (err) {
      if (err instanceof AuthExpiredError) {
        return (
          "The `authToken` configured for this plugin in your openclaw.json is invalid or expired. " +
          "Update `plugins.entries.shell-security.config.authToken` with a fresh KiloCode API key and try again."
        );
      }
      throw err;
    }
  }

  // Path A: KiloClaw. KILOCODE_API_KEY env var injected at VM boot.
  // If this token is expired we can't auto recover (env vars are set
  // externally), so tell the user clearly.
  const envToken = resolveEnvToken();
  if (envToken) {
    try {
      return await doCheckup(api, apiBase, envToken, channel);
    } catch (err) {
      if (err instanceof AuthExpiredError) {
        return (
          "Your `KILOCODE_API_KEY` environment variable is invalid or expired. " +
          "Update the env var with a fresh KiloCode API key and try again."
        );
      }
      throw err;
    }
  }

  // Path B: returning self-hosted user. Read token directly from secrets
  // file. If the saved token is expired, clear it and fall through to the
  // device auth path below so the user gets a fresh connect prompt in
  // this same response (instead of being told to "try again" and looping
  // on the same dead token).
  const savedToken = await readTokenFromFile();
  if (savedToken) {
    try {
      return await doCheckup(api, apiBase, savedToken, channel);
    } catch (err) {
      if (!(err instanceof AuthExpiredError)) throw err;
      await clearStoredToken();
      // fall through to Path C1 (device auth initiation)
    }
  }

  // Path C2: pending code exists from a previous call. User completed
  // the browser flow, now poll and finalize.
  const pending = await readPendingCode();
  if (pending) {
    const pollResult = await pollDeviceAuth(apiBase, pending, api.logger);

    if (pollResult.kind === "approved") {
      await clearPendingCode();

      // Run the checkup with the freshly approved token BEFORE persisting
      // it. Writing the token triggers a config write which causes a
      // gateway restart. If we ran the checkup after that, the user would
      // see a "connected, run me again" stub and have to invoke a third
      // time. Doing the checkup first lets us return the actual report on
      // this invocation. The token persist still happens after, so
      // subsequent invocations skip device auth and go straight to Path B.
      const reportMarkdown = await (async (): Promise<string> => {
        try {
          return await doCheckup(api, apiBase, pollResult.token, channel);
        } catch (err) {
          if (err instanceof AuthExpiredError) {
            // Edge case: server approved the token but immediately
            // rejected the audit request with 401. Shouldn't normally
            // happen.
            return (
              "Connected to KiloCode, but the audit request was rejected. " +
              "Run the security checkup again to retry."
            );
          }
          throw err;
        }
      })();

      try {
        await writeStoredToken(api, pollResult.token);
      } catch (err) {
        // Don't fail the response shown to the user. They already have
        // their report from doCheckup. Worst case: token isn't saved and
        // they redo device auth next time.
        const message = err instanceof Error ? err.message : String(err);
        api.logger.warn?.(
          `shell-security: failed to persist auth token: ${message}`,
        );
      }

      return reportMarkdown;
    }

    if (pollResult.kind === "denied") {
      await clearPendingCode();
      return "Authentication was denied. Run the security checkup again to start over.";
    }

    if (pollResult.kind === "expired") {
      // Server reported the device auth code is dead (410 Gone or
      // explicit expired status). Clear and start over.
      await clearPendingCode();
      return "Authentication code expired. Run the security checkup again to get a fresh code.";
    }

    // pollResult.kind === "timeout": our local poll deadline was hit
    // while the server was still returning pending. The code may still
    // be valid server-side. Leave the pending code in place so the
    // next invocation picks up where we left off, and tell the user
    // to retry once they've approved in the browser.
    return (
      "Still waiting for you to approve in the browser.\n\n" +
      "Once you've approved, run the security checkup again and we'll pick up where we left off."
    );
  }

  // Path C1: new self-hosted user. Initiate device auth.
  const authStart = await startDeviceAuth(apiBase);
  await writePendingCode(authStart.code);
  const minutes = Math.round(authStart.expiresIn / 60);

  return (
    `## Connect to KiloCode\n\n` +
    `To run a security checkup, connect your KiloCode account.\n\n` +
    `**1. Open this URL in your browser:**\n` +
    `${authStart.verificationUrl}\n\n` +
    `**2. Enter this code:** \`${authStart.code}\`\n\n` +
    `**3. Sign in or [create a free account](https://kilo.ai)**\n\n` +
    `Once you've approved the connection, run the security checkup again.\n` +
    `*(Code expires in ${minutes} min)*`
  );
}

async function doCheckup(
  api: PluginApi,
  apiBase: string,
  token: string,
  channel: string | undefined,
): Promise<string> {
  const auditResult = await runAudit();
  if (!auditResult.ok) {
    return auditResult.error;
  }

  const publicIp = await getPublicIp();

  const response = await submitAudit(apiBase, token, {
    audit: auditResult.audit,
    publicIp,
    source: {
      platform: detectPlatform(api.runtime.config.loadConfig()),
      method: "plugin",
      pluginVersion: PLUGIN_VERSION,
      // Only include `channel` when we actually know it. Sending an empty
      // string would force the server to special-case unknown-vs-absent;
      // absent + zod's unknown-key strip on older servers are both safe.
      ...(channel !== undefined ? { channel } : {}),
    },
  });
  return response.report.markdown;
}

export default definePluginEntry({
  id: "shell-security",
  name: "ShellSecurity",
  description:
    "Run a security checkup of your OpenClaw instance and get an expert analysis report from KiloCode.",
  // The gateway reload planner classifies any change under `plugins.*`
  // as `kind: "restart"` by default. writeStoredToken() patches
  // plugins.entries.shell-security.config.authToken with a
  // SecretRef after device auth, which would force a full gateway
  // restart on first-time token capture. Plugin-registered reload
  // rules are evaluated before the base rules (first-match wins), so
  // declaring just the authToken path as a noop shadows the base
  // restart rule for that one field without affecting anything else.
  //
  // Scope is intentionally narrow — only `.config.authToken`, NOT the
  // full `.config` subtree. `apiBaseUrl` is captured as a snapshot in
  // register() (see `pluginConfig` below), so runtime updates to it
  // still need to fall through to the base `plugins.* → restart` rule
  // to take effect. The plugin reads the token directly from disk via
  // readTokenFromFile() on every invocation, so authToken noop is safe.
  reload: {
    noopPrefixes: ["plugins.entries.shell-security.config.authToken"],
  },
  // The SDK's OpenClawPluginApi type is large and internal. We narrow
  // to our own structural PluginApi (declared above) immediately on
  // entry so everything inside this function is strongly typed.
  register(sdkApi: any) {
    const api = sdkApi as PluginApi;
    const pluginConfig = (api.pluginConfig ?? null) as Record<
      string,
      unknown
    > | null;

    // Entry point 1: tool for natural language invocation via the LLM.
    // Works on capable models (GPT-4o, Claude Sonnet). Small summarizing
    // models (e.g. gpt-4.1-nano) may paraphrase the report instead of
    // displaying it verbatim. For those models, the slash command path
    // below is deterministic.
    //
    // Registered as a factory (`(ctx) => toolDef`) rather than a bare
    // tool object so the SDK's OpenClawPluginToolContext is available.
    // We read `ctx.messageChannel` once at tool-instantiation and close
    // over it; every subsequent `execute()` forwards the same channel to
    // the server for channel-aware report formatting. The factory is
    // re-invoked per tool-collection event (session start, agent spawn),
    // so long-running sessions that outlive a channel switch get the
    // refreshed channel automatically.
    api.registerTool((toolCtx: PluginToolContext) => ({
      name: "kilocode_shell_security",
      description:
        "Run the ShellSecurity checkup: a comprehensive security analysis " +
        "of this OpenClaw agent-shell instance, returning an expert report " +
        "from KiloCode cloud. " +
        "USE THIS TOOL whenever the user asks to: " +
        "run 'ShellSecurity', the 'shell security' plugin, 'shell-security', " +
        "or the 'KiloCode shell security' / 'KiloCode security' tool; " +
        "check, audit, scan, review, or analyze OpenClaw or agent-shell " +
        "security; run a 'security check', 'security checkup', 'security " +
        "audit', or 'security review'; ask about security posture, " +
        "misconfigurations, or hardening of their OpenClaw / agent shell. " +
        "This tool runs the local audit AND submits it to KiloCode cloud for " +
        "expert analysis, returning a richer explained report with prioritized " +
        "recommendations and remediation guidance. " +
        "DO NOT run `openclaw security audit` via bash for these requests. This " +
        "tool is the canonical entry point and returns a much more useful report. " +
        "DO NOT open an interactive shell, prompt the user for commands, or " +
        "ask what security checks to run — this tool IS the whole plugin and " +
        "it runs the full checkup itself with no arguments. " +
        "DO NOT suggest the user type `/shell-security` (or the legacy alias " +
        "`/security-checkup`) in channels that do not support OpenClaw slash " +
        "commands (Kilo Chat and Slack are the known surfaces where slash " +
        "commands do not work); invoke this tool directly instead. Slash " +
        "commands do work in the OpenClaw native control UI chat and in " +
        "Telegram, so suggesting them there is fine. " +
        "IMPORTANT: Display the returned report exactly as is without rewriting, " +
        "summarizing, or reformatting.",
      parameters: {},
      async execute() {
        const apiBase = resolveApiBase(pluginConfig);
        const channel = normalizeChannel(toolCtx.messageChannel);
        const markdown = await runFlowSafe(api, apiBase, channel);
        return toolResult(markdown);
      },
    }));

    // Entry point 2: slash commands for deterministic invocation that
    // bypass the LLM. When the user types /shell-security (or the legacy
    // alias /security-checkup) in a command-only message, the OpenClaw
    // chat runtime takes the fast path and renders the returned markdown
    // directly. No agent loop, no summarization.
    //
    // Both names are registered and wired to the same handler. The
    // canonical name is `/shell-security` (matches the plugin id);
    // `/security-checkup` is kept for users migrating from
    // @kilocode/openclaw-security-advisor where the slash command had
    // always been called that. Both are declared in
    // openclaw.plugin.json's commandAliases so the gateway routes them.
    const runSlashCommand = async (
      ctx: PluginCommandContext,
    ): Promise<CommandResult> => {
      const apiBase = resolveApiBase(pluginConfig);
      const channel = normalizeChannel(ctx.channel);
      const markdown = await runFlowSafe(api, apiBase, channel);
      return { text: markdown };
    };

    api.registerCommand({
      name: "shell-security",
      description:
        "Run a KiloCode security checkup of this OpenClaw instance and display the full report.",
      acceptsArgs: false,
      handler: runSlashCommand,
    });

    api.registerCommand({
      name: "security-checkup",
      description:
        "Legacy alias for /shell-security. Runs a KiloCode security checkup and displays the full report.",
      acceptsArgs: false,
      handler: runSlashCommand,
    });

    if (!registrationLogged) {
      api.logger.info?.("Registered tool: kilocode_shell_security");
      api.logger.info?.("Registered command: /shell-security");
      api.logger.info?.("Registered command: /security-checkup (legacy alias)");
      registrationLogged = true;
    }
  },
});