Merge pull request #1 from Evan1108-Coder/fix/hardening-pass

meganfoster-dotcom · web-flow · commit 2d519dda8f56 · 2026-06-11T15:57:05.000+08:00
Harden safety, executor &amp; CI; dedup shell quoting
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,49 @@
+name: CI
+
+on:
+  push:
+    branches: ["**"]
+  pull_request:
+    branches: ["**"]
+
+# Cancel superseded runs on the same ref to save CI minutes.
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-and-test:
+    name: Build & test (Node ${{ matrix.node }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # The dev/test toolchain (vitest 4 via rolldown) requires Node 20+
+        # (it imports `styleText` from node:util, added in Node 20). The published
+        # CLI bundle still targets Node 18, but CI builds and tests on 20/22.
+        node: ["20", "22"]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Node ${{ matrix.node }}
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ matrix.node }}
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Typecheck
+        run: npm run typecheck
+
+      - name: Lint
+        run: npm run lint
+
+      - name: Build
+        run: npm run build
+
+      # The test suite is fully offline (no live AI provider calls), so no API keys are needed.
+      - name: Test
+        run: npm test
diff --git a/SECURITY.md b/SECURITY.md
@@ -16,3 +16,35 @@ Please do not open a public issue for a suspected vulnerability. Email the maint
 ## Secret Handling
 
 Never commit API keys, Telegram tokens, OAuth credentials, database files, `.env` files, or local user exports. Use `.env.example` for placeholders only.
+
+Secrets are redacted on a best-effort basis (`redactText`/`redactObject` in `src/core/engine.ts`) before commands, history, and logs are persisted. This targets common token shapes and `NAME=value` credential assignments — it is not a guarantee that no secret will ever reach a log file.
+
+## Threat Model
+
+Setupr runs real shell commands on your machine with your full user privileges. The command-safety
+layer (`src/agent/safety.ts`) is a **best-effort, defense-in-depth guard, not a sandbox.** It
+classifies each planned command and decides whether to allow, confirm, or block it:
+
+- **Block (cannot be bypassed by `--force`)** — clearly destructive or hostile patterns such as
+  `rm -rf /` (and `~`, `*`, `$HOME` variants), `sudo`, `chmod 777`, `chown -R`, and
+  `curl … | sh`/`| bash` pipe-to-shell installs.
+- **Confirm** — high/medium-risk commands (dependency installs, commands that delete files or reset
+  git state, commands that embed a secret value or `KEY=value` credential assignment). `--force`
+  may proceed past a **medium**-risk confirmation using safe defaults, but **never** past a
+  high-risk or blocked one.
+- **Allow** — everything else.
+
+What this layer is **not**:
+
+- **Not a security boundary.** Anything you could run in your shell, a command run through Setupr
+  can run.
+- **The block list is a denylist, and denylists are inherently incomplete.** It catches common,
+  obvious footguns; an obfuscated or equivalent destructive command can get past it. Treat the
+  guard as a seatbelt, not a vault door.
+- **Trust the project and its AI-generated plan.** When AI planning is enabled, setup steps may be
+  proposed by a model based on the project's contents. Review the plan — especially before using
+  `--force` — the way you would review a shell script you downloaded.
+
+Recommendations: run Setupr only against projects you trust, read the pre-execution plan before
+confirming, be deliberate with `--force`, and keep real secrets in `.env` files rather than inline
+in commands.
diff --git a/src/agent/safety.ts b/src/agent/safety.ts
@@ -10,7 +10,11 @@ export interface SafetyEvaluation {
   forceCanSkipConfirmation: boolean;
 }
 
-const SECRET_PATTERN = /(API[_-]?KEY|TOKEN|SECRET|PASSWORD|PRIVATE[_-]?KEY|CREDENTIAL|AUTH)/i;
+// Flag only commands that actually embed a secret: an inline `NAME=value` assignment to a
+// credential-shaped variable, or a recognizable secret value literal. Matching bare words like
+// "auth" or "token" produced false positives on legitimate commands (e.g. `npm i next-auth`).
+const SECRET_ASSIGNMENT_PATTERN = /\b(?:API[_-]?KEY|ACCESS[_-]?KEY|AUTH[_-]?TOKEN|TOKEN|SECRET|PASSWORD|PASSWD|PRIVATE[_-]?KEY|CREDENTIALS?)\s*=\s*[^\s'"]{6,}/i;
+const SECRET_VALUE_PATTERN = /\b(?:sk-ant-[A-Za-z0-9-]{16,}|sk-[A-Za-z0-9]{16,}|ghp_[A-Za-z0-9]{16,}|gho_[A-Za-z0-9]{16,}|github_pat_[A-Za-z0-9_]{20,}|AIza[A-Za-z0-9_-]{16,}|xox[baprs]-[A-Za-z0-9-]{10,})\b|-----BEGIN[A-Z ]+PRIVATE KEY-----/;
 const SHELL_META_PATTERN = /(;|&&|\|\||`|\$\(|>\s*\/|rm\s+-rf\s+(?:\/|\*|~|\$HOME))/;
 const DESTRUCTIVE_PATTERN = /\b(rm|del|rmdir|trash|git\s+reset|git\s+clean|docker\s+system\s+prune)\b/i;
 const INSTALL_PATTERN = /\b(npm|pnpm|yarn|bun|pip|poetry|cargo|go)\b.*\b(install|add|get|download|build)\b/i;
@@ -20,9 +24,9 @@ export function evaluateCommandSafety(command: string, options: { force?: boolea
   let risk: SafetyRisk = "none";
   let decision: SafetyDecision = "allow";
 
-  if (SECRET_PATTERN.test(command)) {
+  if (SECRET_ASSIGNMENT_PATTERN.test(command) || SECRET_VALUE_PATTERN.test(command)) {
     risk = maxRisk(risk, "high");
-    reasons.push("The command text appears to include a secret-looking token or variable name.");
+    reasons.push("The command appears to embed a secret value or credential assignment in plaintext.");
   }
 
   if (SHELL_META_PATTERN.test(command)) {
@@ -51,14 +55,18 @@ export function evaluateCommandSafety(command: string, options: { force?: boolea
   }
 
   if (risk === "critical") decision = "block";
-  else if (risk === "high") decision = options.force ? "confirm" : "confirm";
+  // High risk always requires confirmation; --force cannot bypass it.
+  else if (risk === "high") decision = "confirm";
+  // Medium risk confirms by default, but --force may proceed with safe defaults.
   else if (risk === "medium") decision = options.force ? "allow" : "confirm";
 
   return {
     decision,
     risk,
     reasons,
-    forceCanSkipConfirmation: risk === "low" || risk === "medium",
+    // --force only skips a confirmation it would otherwise raise (medium risk). Low/none have no
+    // confirmation to skip; high/critical can never be skipped by force.
+    forceCanSkipConfirmation: risk === "medium",
   };
 }
 
diff --git a/src/commands/plain/product.ts b/src/commands/plain/product.ts
@@ -8,6 +8,7 @@ import { createSetuprError, printPlainError } from "../../errors/index.js";
 import { scanProject } from "../../scanner/index.js";
 import { collectContext } from "../../context/collector.js";
 import { collectDashboardStatus } from "../../status/collector.js";
+import { shellQuote } from "../../util/shell.js";
 
 interface ProductFlags {
   args?: string[];
@@ -231,7 +232,3 @@ function parseGitHubRepo(remote: string): string | null {
   const https = remote.match(/github\.com\/([^/]+\/[^/.]+)(?:\.git)?$/);
   return https?.[1] || null;
 }
-
-function shellQuote(value: string): string {
-  return `'${value.replace(/'/g, "'\\''")}'`;
-}
diff --git a/src/core/engine.ts b/src/core/engine.ts
@@ -144,7 +144,9 @@ export class ProjectEngine {
       decision: risk === "high" || risk === "medium" ? "confirm" : "allow",
       risk: risk === "none" ? "none" : risk,
       reasons,
-      forceCanSkipConfirmation: risk !== "high",
+      // Consistent with agent/safety.ts: --force only skips the medium-risk confirmation;
+      // high risk always confirms.
+      forceCanSkipConfirmation: risk === "medium",
     };
   }
 
diff --git a/src/executor/index.ts b/src/executor/index.ts
@@ -17,6 +17,24 @@ export interface ExecutionResult {
   duration: number;
 }
 
+// Per-step wall-clock limits so a hung command (stuck install, prompt waiting on stdin,
+// unreachable registry) can never block the setup run forever. Override with the
+// SETUPR_STEP_TIMEOUT_MS env var (applies to every command step).
+const DEFAULT_STEP_TIMEOUT_MS: Record<SetupStep["type"], number> = {
+  runtime: 600_000, // installing/switching runtimes can be slow
+  deps: 600_000, // package installs on large projects
+  script: 600_000, // build/postinstall scripts
+  config: 120_000,
+  env: 120_000,
+  verify: 120_000,
+};
+
+export function stepTimeoutMs(step: SetupStep): number {
+  const override = Number(process.env.SETUPR_STEP_TIMEOUT_MS);
+  if (Number.isFinite(override) && override > 0) return override;
+  return DEFAULT_STEP_TIMEOUT_MS[step.type] ?? 300_000;
+}
+
 export async function executeStep(
   step: SetupStep,
   cwd: string,
@@ -64,14 +82,16 @@ export async function executeStep(
 
   store.getState().addLog({ content: step.command, type: "command" });
 
+  const timeoutMs = stepTimeoutMs(step);
+
   try {
     const result = await runCommand(step.command, cwd, (line) => {
       store.getState().addLog({ content: line, type: "progress" });
       store.getState().addMessage({
         role: "system",
         content: `[${step.label}] ${line}`,
       });
-    });
+    }, undefined, { timeoutMs });
 
     const duration = Date.now() - start;
     const durationStr = duration < 1000 ? `${duration}ms` : `${(duration / 1000).toFixed(1)}s`;
@@ -81,19 +101,28 @@ export async function executeStep(
       store.getState().addLog({ content: `✓ ${step.label} — OK (${durationStr})`, type: "success" });
       return { success: true, output: result.stdout, duration };
     } else {
+      if (result.timedOut) {
+        store.getState().addLog({
+          content: `✗ ${step.label} — timed out after ${Math.round(timeoutMs / 1000)}s and was terminated`,
+          type: "error",
+        });
+      }
+      const stderr = result.timedOut
+        ? `Command timed out after ${Math.round(timeoutMs / 1000)}s and was terminated.\n${result.stderr}`.trim()
+        : result.stderr;
       const psetupError = classifyCommandFailure({
         command: step.command,
         cwd,
         exitCode: result.exitCode,
         stdout: result.stdout,
-        stderr: result.stderr,
+        stderr,
         stepLabel: step.label,
         stepType: step.type,
       });
       store.getState().updateStep(step.id, { status: "failed", error: psetupError.title });
       store.getState().addLog({ content: `✗ ${step.label} — ${errorSummary(psetupError)}`, type: "error" });
       store.getState().addMessage({ role: "system", content: errorSummary(psetupError) });
-      return { success: false, output: result.stdout, error: result.stderr, psetupError, duration };
+      return { success: false, output: result.stdout, error: stderr, psetupError, duration };
     }
   } catch (err) {
     const duration = Date.now() - start;
diff --git a/src/security/index.ts b/src/security/index.ts
@@ -5,6 +5,7 @@ import { runCommand } from "../executor/index.js";
 import { parseEnvKeys, parseEnvPairs } from "../env/index.js";
 import { scanProject } from "../scanner/index.js";
 import { appendHistoryEvent, readProjectJson, writeProjectJson } from "../state/project.js";
+import { shellQuote } from "../util/shell.js";
 
 export type SecuritySeverity = "info" | "low" | "medium" | "high" | "critical";
 export type SecurityCategory = "deps" | "secrets" | "env" | "docker" | "ci" | "code" | "routes" | "auth" | "headers" | "config";
@@ -422,7 +423,3 @@ function score(findings: SecurityFinding[]): number {
 function redact(text: string): string {
   return text.replace(/[A-Za-z0-9_/-]{16,}/g, "****").slice(0, 180);
 }
-
-function shellQuote(value: string): string {
-  return `'${value.replace(/'/g, "'\\''")}'`;
-}
diff --git a/src/util/shell.ts b/src/util/shell.ts
@@ -0,0 +1,8 @@
+/**
+ * POSIX-safe single-quote escaping for interpolating untrusted values into shell commands.
+ * Wraps the value in single quotes and escapes any embedded single quotes using the
+ * `'\''` idiom, so the result is safe to splice into a `sh -c` style command line.
+ */
+export function shellQuote(value: string): string {
+  return `'${value.replace(/'/g, "'\\''")}'`;
+}
diff --git a/src/verification/index.ts b/src/verification/index.ts
@@ -4,6 +4,7 @@ import { basename, dirname, extname, join } from "path";
 import { runCommand } from "../executor/index.js";
 import { scanProject, type ScanResult } from "../scanner/index.js";
 import { appendHistoryEvent, readProjectJson, writeProjectJson } from "../state/project.js";
+import { shellQuote } from "../util/shell.js";
 
 export type VerificationStatus = "pass" | "warn" | "fail" | "skip";
 
@@ -442,10 +443,6 @@ function dedupe(values: string[]): string[] {
   return [...new Set(values)];
 }
 
-function shellQuote(value: string): string {
-  return `'${value.replace(/'/g, "'\\''")}'`;
-}
-
 function safeName(file: string): string {
   return basename(file, extname(file)).replace(/[^A-Za-z0-9_]/g, "_");
 }
diff --git a/tests/agent-runtime.test.ts b/tests/agent-runtime.test.ts
@@ -155,6 +155,38 @@ describe("agent runtime", () => {
     expect(evaluateCommandSafety("curl https://example.com/install.sh | sh").decision).toBe("block");
   });
 
+  it("does not flag bare credential words in package names as secrets", () => {
+    // Previously the secret check matched any occurrence of "auth"/"token"/"secret",
+    // raising false-positive high-risk confirmations on benign installs.
+    const next = evaluateCommandSafety("npm install next-auth");
+    expect(next.risk).toBe("low");
+    expect(next.reasons.join(" ")).not.toMatch(/secret value/i);
+
+    const token = evaluateCommandSafety("npm install passport-token");
+    expect(token.reasons.join(" ")).not.toMatch(/secret value/i);
+  });
+
+  it("flags inline secret assignments and recognizable secret values", () => {
+    const assignment = evaluateCommandSafety("API_KEY=sk-livetokenvalue1234 node deploy.js");
+    expect(assignment.risk).toBe("high");
+    expect(assignment.decision).toBe("confirm");
+
+    const literal = evaluateCommandSafety("echo ghp_abcdefghijklmnop1234567890");
+    expect(literal.reasons.join(" ")).toMatch(/secret value/i);
+  });
+
+  it("never lets --force skip a high-risk confirmation", () => {
+    const high = evaluateCommandSafety("git reset --hard HEAD~3", { force: true });
+    expect(high.risk).toBe("high");
+    expect(high.decision).toBe("confirm");
+    expect(high.forceCanSkipConfirmation).toBe(false);
+
+    const medium = evaluateCommandSafety("mkdir build && cd build", { force: true });
+    expect(medium.risk).toBe("medium");
+    expect(medium.decision).toBe("allow");
+    expect(medium.forceCanSkipConfirmation).toBe(true);
+  });
+
   it("saves and restores agent workflow checkpoints", async () => {
     await saveAgentWorkflowCheckpoint(tempDir, {
       cwd: tempDir,
diff --git a/tests/util-shell.test.ts b/tests/util-shell.test.ts
@@ -0,0 +1,48 @@
+import { afterEach, describe, expect, it } from "vitest";
+import { shellQuote } from "../src/util/shell.js";
+import { stepTimeoutMs } from "../src/executor/index.js";
+import type { SetupStep } from "../src/ai/planner.js";
+
+describe("shellQuote", () => {
+  it("wraps plain values in single quotes", () => {
+    expect(shellQuote("lodash")).toBe("'lodash'");
+  });
+
+  it("escapes embedded single quotes safely", () => {
+    expect(shellQuote("it's")).toBe("'it'\\''s'");
+  });
+
+  it("neutralizes shell metacharacters by quoting", () => {
+    expect(shellQuote("a; rm -rf /")).toBe("'a; rm -rf /'");
+    expect(shellQuote("$(whoami)")).toBe("'$(whoami)'");
+  });
+});
+
+describe("stepTimeoutMs", () => {
+  const step = (type: SetupStep["type"]): SetupStep => ({
+    id: "s1",
+    label: "step",
+    type,
+    command: "echo hi",
+    status: "pending",
+  });
+
+  afterEach(() => {
+    delete process.env.SETUPR_STEP_TIMEOUT_MS;
+  });
+
+  it("uses generous defaults for slow step types", () => {
+    expect(stepTimeoutMs(step("deps"))).toBe(600_000);
+    expect(stepTimeoutMs(step("verify"))).toBe(120_000);
+  });
+
+  it("honors a positive SETUPR_STEP_TIMEOUT_MS override", () => {
+    process.env.SETUPR_STEP_TIMEOUT_MS = "5000";
+    expect(stepTimeoutMs(step("deps"))).toBe(5000);
+  });
+
+  it("ignores an invalid override and falls back to the default", () => {
+    process.env.SETUPR_STEP_TIMEOUT_MS = "not-a-number";
+    expect(stepTimeoutMs(step("deps"))).toBe(600_000);
+  });
+});