Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions .github/workflows/auto-approve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@ name: auto-approve
# upgrade-main dependency PRs). pull_request_target is intentional and safe
# here: the job never checks out or executes PR code — it only calls the
# review API. Branch protection still requires the build workflow to pass.
#
# Trigger is `labeled` ONLY: each approval requires an explicit label event
# from someone with triage+ permission. Approving on `synchronize` would
# re-approve arbitrary future commits pushed to an already-labeled PR with no
# human re-review — combined with Mergify's auto-merge that is an
# unreviewed-merge path. The Mergify `dismiss stale approvals` rule is the
# other half of this invariant: a push invalidates the bot's prior approval,
# and re-approval requires removing and re-adding the label (deliberate
# friction: new content needs a fresh, explicit trigger).
on:
pull_request_target:
types:
- labeled
- unlabeled
- opened
- synchronize
- reopened
- ready_for_review
- review_requested
permissions:
actions: none
attestations: none
Expand All @@ -34,7 +37,11 @@ jobs:
runs-on: ubuntu-latest
permissions:
pull-requests: write
if: contains(github.event.pull_request.labels.*.name, 'auto-approve')
# Only act on the auto-approve label itself, and never on fork PRs —
# a fork head can be force-pushed by the fork owner after labeling.
if: |
github.event.label.name == 'auto-approve' &&
github.event.pull_request.head.repo.full_name == github.repository
steps:
- uses: hmarr/auto-approve-action@f0939ea97e9205ef24d872e76833fa908a770363 # v4.0.0
with:
Expand Down
12 changes: 12 additions & 0 deletions .mergify.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ The `@backgroundagent/cli` package provides the `bgagent` executable for submitt
- `src/api-client.ts` — HTTP client wrapping `fetch` with auth header injection
- `src/auth.ts` — Cognito login, token caching (`~/.bgagent/credentials.json`), auto-refresh
- `src/config.ts` — Read/write `~/.bgagent/config.json`
- `src/types.ts` — API request/response types (mirrored from `cdk/src/handlers/shared/types.ts`), including `TaskType` (`new_task` | `pr_iteration` | `pr_review`)
- `src/types.ts` — API request/response types (mirrored from `cdk/src/handlers/shared/types.ts`), including `workflow_ref` / `ResolvedWorkflow` (workflow ids like `coding/new-task-v1`, `coding/pr-iteration-v1`, `coding/pr-review-v1`; replaced the former `TaskType` enum, #248)
- `src/format.ts` — Output formatting (table, detail view, JSON)
- `src/debug.ts` — Verbose/debug logging (`--verbose` flag)
- `src/errors.ts` — `CliError` and `ApiError` classes
Expand Down
8 changes: 7 additions & 1 deletion agent/src/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import re
import time
from collections.abc import Callable
from datetime import UTC
from typing import TYPE_CHECKING, Any

import nudge_reader
Expand Down Expand Up @@ -805,7 +806,12 @@ def _remaining_maxlifetime_s() -> int | None:
else:
from datetime import datetime

started_epoch = int(datetime.strptime(started_at, "%Y-%m-%dT%H:%M:%SZ").timestamp())
# The trailing Z means UTC; strptime returns a naive datetime whose
# .timestamp() would otherwise be interpreted in the container's
# local TZ, skewing remaining-lifetime math by the UTC offset.
started_epoch = int(
datetime.strptime(started_at, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=UTC).timestamp()
)
except (ValueError, AttributeError):
return None
elapsed = int(time.time()) - started_epoch
Expand Down
10 changes: 8 additions & 2 deletions agent/src/output_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,15 @@ class ScanResult:
re.IGNORECASE,
),
),
# GitHub tokens (PAT, OAuth, App, user-to-server, fine-grained)
("GITHUB_TOKEN", re.compile(r"(?:ghp|gho|ghs|ghu)_[a-zA-Z0-9]{36}")),
# GitHub tokens (PAT, OAuth, App, user-to-server, refresh, fine-grained).
# Length is variable across token generations — match 36+ chars rather
# than exactly 36 so newer/longer formats are still caught.
("GITHUB_TOKEN", re.compile(r"(?:ghp|gho|ghs|ghu|ghr)_[a-zA-Z0-9]{36,}")),
("GITHUB_PAT", re.compile(r"github_pat_[a-zA-Z0-9_]{22,}")),
# Credentials embedded in git remote URLs (https://x-access-token:TOKEN@github.com/...).
# The agent authenticates via env-based credential helper, but tool output
# may still echo such URLs from other sources.
("GITHUB_URL_TOKEN", re.compile(r"x-access-token:[^\s@\"']+@")),
# Private keys (PEM blocks)
(
"PRIVATE_KEY",
Expand Down
18 changes: 14 additions & 4 deletions agent/src/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,30 @@ def setup_repo(config: TaskConfig) -> RepoSetup:
label="clone",
)

# Configure remote URL with embedded token so git push works without
# credential helpers or extra auth setup inside the agent.
token = config.github_token
# Pin the remote to the plain https URL (no embedded credentials) and
# authenticate git push via gh's credential helper. Embedding the token
# in the remote URL would persist it in .git/config inside the workspace
# the agent (and any code it runs) fully controls — readable by a
# prompt-injected step, `git remote -v`, or anything that copies the
# tree. The helper resolves credentials at call time from the
# GH_TOKEN/GITHUB_TOKEN env vars the caller exports before setup_repo(),
# so the token never touches disk.
run_cmd(
[
"git",
"remote",
"set-url",
"origin",
f"https://x-access-token:{token}@github.com/{config.repo_url}.git",
f"https://github.com/{config.repo_url}.git",
],
label="set-remote-url",
cwd=repo_dir,
)
run_cmd(
["git", "config", "--local", "credential.helper", "!gh auth git-credential"],
label="configure-git-credential-helper",
cwd=repo_dir,
)

# Branch setup
if config.is_pr_workflow and config.branch_name:
Expand Down
42 changes: 42 additions & 0 deletions agent/tests/test_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ def test_matchers_with_trajectory(self):
import hashlib
import json as _json
from collections import deque
from datetime import UTC
from typing import Any

import hooks
Expand Down Expand Up @@ -1412,3 +1413,44 @@ def test_stop_hook_drains_denials_when_no_cancel(self, engine_with_soft_gate):
# DDB and returns []. Denial should be injected.
assert result.get("decision") == "block"
assert "<user_denial" in result.get("reason", "")


class TestRemainingMaxlifetime:
"""_remaining_maxlifetime_s parses TASK_STARTED_AT correctly."""

def test_iso_timestamp_parsed_as_utc_regardless_of_local_tz(self, monkeypatch):
# The trailing Z means UTC. Before the fix, strptime produced a naive
# datetime whose .timestamp() used the container's local TZ, skewing
# the remaining-lifetime math by the UTC offset (wrong approval
# timeouts / spurious insufficient-lifetime denies on non-UTC hosts).
import time as time_module
from datetime import datetime

started = datetime(2026, 6, 11, 12, 0, 0, tzinfo=UTC)
monkeypatch.setenv("TASK_STARTED_AT", "2026-06-11T12:00:00Z")
monkeypatch.setenv("AGENTCORE_MAX_LIFETIME_S", "28800")
# Freeze "now" at exactly 1000s after the UTC start time.
monkeypatch.setattr(hooks.time, "time", lambda: started.timestamp() + 1000, raising=True)
# Simulate a non-UTC container: if the implementation regresses to
# local-time interpretation, the result shifts by the TZ offset.
monkeypatch.setenv("TZ", "America/New_York")
time_module.tzset()
try:
assert hooks._remaining_maxlifetime_s() == 28800 - 1000
finally:
monkeypatch.delenv("TZ")
time_module.tzset()

def test_epoch_seconds_passthrough(self, monkeypatch):
monkeypatch.setenv("TASK_STARTED_AT", "1000000")
monkeypatch.setenv("AGENTCORE_MAX_LIFETIME_S", "500")
monkeypatch.setattr(hooks.time, "time", lambda: 1000100, raising=True)
assert hooks._remaining_maxlifetime_s() == 400

def test_missing_started_at_returns_none(self, monkeypatch):
monkeypatch.delenv("TASK_STARTED_AT", raising=False)
assert hooks._remaining_maxlifetime_s() is None

def test_unparseable_started_at_returns_none(self, monkeypatch):
monkeypatch.setenv("TASK_STARTED_AT", "not-a-timestamp")
assert hooks._remaining_maxlifetime_s() is None
17 changes: 17 additions & 0 deletions agent/tests/test_output_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ def test_detects_github_fine_grained_pat(self):
assert "GITHUB_PAT detected" in result.findings
assert "[REDACTED-GITHUB_PAT]" in result.redacted_content

def test_detects_token_longer_than_36_chars(self):
# Token lengths vary across generations; the pattern must not be
# anchored to exactly 36 chars.
content = "token: ghs_" + "A" * 48
result = scan_tool_output(content)
assert result.has_sensitive_content is True
assert "GITHUB_TOKEN detected" in result.findings
assert "ghs_" not in result.redacted_content

def test_detects_token_in_remote_url(self):
# `git remote -v` style output with x-access-token credentials.
content = "origin https://x-access-token:ghs_secret123@github.com/owner/repo.git (push)"
result = scan_tool_output(content)
assert result.has_sensitive_content is True
assert "GITHUB_URL_TOKEN detected" in result.findings
assert "ghs_secret123" not in result.redacted_content

# ---- Private keys ----

def test_detects_rsa_private_key(self):
Expand Down
80 changes: 43 additions & 37 deletions cdk/src/handlers/shared/agentcore-browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,19 @@ async function runCdpScreenshot(wssUrl: string, url: string, timeoutMs: number):
}
const eventWaiters: EventWaiter[] = [];

// Track the latest Document response status PER FRAME so we can fail fast
// on 4xx/5xx (404 / 503 / auth wall pages) instead of capturing what looks
// like the app but isn't. The main frame's id is only known once
// Page.navigate resolves, but `Network.responseReceived` for the main
// document frequently arrives BEFORE that response — so record every
// Document status keyed by frameId and resolve which one is the main
// document afterwards. (Recording a single "latest" status instead would
// race: an early sub-frame response could be misattributed as the main
// document, or the real main-document status missed entirely.)
// Redirect chains re-fire for the same frameId; last write wins, which is
// the final response. (Auth walls that return 200 are out of scope — #287.)
const documentStatusByFrame = new Map<string, number>();

ws.on('message', (raw: RawData) => {
const data = raw.toString();
let msg: CdpMessage;
Expand All @@ -164,6 +177,19 @@ async function runCdpScreenshot(wssUrl: string, url: string, timeoutMs: number):
} catch {
return;
}
if (msg.method === 'Network.responseReceived') {
const params = msg.params as
| { type?: string; frameId?: string; response?: { status?: number } }
| undefined;
// CDP's `Network.responseReceived` fires for every resource (HTML,
// JS, CSS, images, XHR, …). Only type==='Document' responses are
// candidate main-document responses.
if (params?.type === 'Document' && typeof params.frameId === 'string') {
const status = params.response?.status;
if (typeof status === 'number') documentStatusByFrame.set(params.frameId, status);
}
return;
}
if (typeof msg.id === 'number') {
const slot = pending.get(msg.id);
if (slot) {
Expand Down Expand Up @@ -262,14 +288,6 @@ async function runCdpScreenshot(wssUrl: string, url: string, timeoutMs: number):
});
}

// Track the main-document HTTP response so we can fail fast on 4xx/5xx
// (404 / 503 / auth wall pages) instead of capturing what looks like the
// app but isn't. Captured in a Network.responseReceived listener below;
// checked after Page.loadEventFired but before Page.captureScreenshot.
// (Auth walls that return 200 are out of scope — see issue #287.)
let mainDocumentStatus: number | null = null;
let mainDocumentFrameId: string | null = null;

try {
// 1. List existing targets, find the default about:blank page.
const targetsResp = await cdpSend('Target.getTargets');
Expand All @@ -293,34 +311,11 @@ async function runCdpScreenshot(wssUrl: string, url: string, timeoutMs: number):
// we wait on below AND the main-document response status. Network
// has to be enabled BEFORE Page.navigate, or the response event
// fires before our listener is wired and we miss the status.
// (Document statuses are recorded per-frame by the single message
// listener above; we resolve the main frame's status after load.)
await cdpSend('Page.enable', {}, pageSessionId);
await cdpSend('Network.enable', {}, pageSessionId);

// Tap the raw message stream for Network.responseReceived events —
// we want a multi-fire listener (Document responses can appear for
// redirect chains), not the one-shot waiter pattern that
// eventWaiters / waitForEvent use. Records the latest matching
// status; the post-load check below acts on whatever was captured.
ws.on('message', (raw: RawData) => {
let msg: CdpMessage;
try {
msg = JSON.parse(raw.toString()) as CdpMessage;
} catch {
return;
}
if (msg.method !== 'Network.responseReceived') return;
const params = msg.params as
| { type?: string; frameId?: string; response?: { status?: number } }
| undefined;
// CDP's `Network.responseReceived` fires for every resource (HTML,
// JS, CSS, images, XHR, …). Only the type==='Document' event for
// the navigated frame is the main-document response we care about.
if (!params || params.type !== 'Document') return;
if (mainDocumentFrameId && params.frameId !== mainDocumentFrameId) return;
const status = params.response?.status;
if (typeof status === 'number') mainDocumentStatus = status;
});

// 4. Navigate. The response includes a `frameId`; we wait on the
// `Page.loadEventFired` event below (more reliable than
// `frameStoppedLoading` which can fire before navigation
Expand All @@ -330,7 +325,7 @@ async function runCdpScreenshot(wssUrl: string, url: string, timeoutMs: number):
if (navError) {
throw new Error(`Page.navigate failed: ${navError}`);
}
mainDocumentFrameId = (navResp.result?.frameId as string | undefined) ?? null;
const mainDocumentFrameId = (navResp.result?.frameId as string | undefined) ?? null;

// 5. Wait for the page load event. SPA-style apps may continue
// fetching after this fires, so add a 2s settle wait. For
Expand All @@ -344,10 +339,21 @@ async function runCdpScreenshot(wssUrl: string, url: string, timeoutMs: number):
// perspective; the user sees a confidently-wrong screenshot of an
// error page posted as the deploy preview. Throw → processor's
// catch logs and skips the PR/Linear comment cleanly.
// If we never captured a status (Network.responseReceived was
// queued but predicate didn't match — e.g. a redirect chain that
// doesn't expose the final frame), fall through and capture
// The main frame's id comes from the Page.navigate response; its
// Document responses were recorded per-frame by the message
// listener even if they arrived before navigate resolved. If
// Page.navigate returned no frameId, only an unambiguous single
// recorded status is trusted — with multiple frames we cannot
// tell which is the main document.
// If we never captured a status (e.g. a service variant that
// doesn't emit Network events), fall through and capture
// optimistically; that's the pre-#287 behaviour.
let mainDocumentStatus: number | null = null;
if (mainDocumentFrameId !== null) {
mainDocumentStatus = documentStatusByFrame.get(mainDocumentFrameId) ?? null;
} else if (documentStatusByFrame.size === 1) {
mainDocumentStatus = documentStatusByFrame.values().next().value ?? null;
}
if (mainDocumentStatus !== null && (mainDocumentStatus < 200 || mainDocumentStatus >= 300)) {
throw new Error(`Preview URL returned HTTP ${mainDocumentStatus}; skipping screenshot`);
}
Expand Down
16 changes: 15 additions & 1 deletion cdk/src/handlers/shared/linear-verify.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@ export async function getLinearSecret(secretId: string, forceRefresh = false): P

try {
const result = await sm.send(new GetSecretValueCommand({ SecretId: secretId }));
if (!result.SecretString) {
// Treat empty / whitespace-only SecretString as null — an empty secret
// must never be used for HMAC, or HMAC('', body) becomes forgeable.
// (Same guard as getGitHubWebhookSecret.)
if (!result.SecretString || result.SecretString.trim() === '') {
logger.error('Linear webhook secret is empty — refusing to use for HMAC', {
secret_id: secretId,
});
secretCache.delete(secretId);
return null;
}
Expand Down Expand Up @@ -101,6 +107,14 @@ export function verifyLinearSignature(
signature: string,
body: string,
): boolean {
// Defense-in-depth: getLinearSecret already filters empty secrets, but
// callers like verifyLinearRequestForWorkspace pass secrets from other
// sources (per-workspace OAuth bundles) — HMAC('') must always be
// rejected or an attacker can forge signatures against a misconfigured
// empty secret. (Mirrors verifyGitHubSignature, PR-241 review B2.)
if (!webhookSecret || webhookSecret.trim() === '') {
return false;
}
const expected = crypto.createHmac('sha256', webhookSecret).update(body).digest('hex');
try {
return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(signature));
Expand Down
Loading
Loading