Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 70 additions & 8 deletions bin/gstack-global-discover.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,30 @@ import { homedir } from "os";

// ── Types ──────────────────────────────────────────────────────────────────

// Codex `payload.originator` values map to four buckets so /retro global can
// distinguish real codex dev (Codex Desktop) from subagent invocations
// (codex_exec) and CC-driven calls (Claude Code). See issue #1315.
type CodexOriginator = "desktop" | "exec" | "claude_code" | "other";

interface Session {
tool: "claude_code" | "codex" | "gemini";
cwd: string;
codexOriginator?: CodexOriginator;
}

interface CodexOriginatorCounts {
desktop: number;
exec: number;
claude_code: number;
other: number;
}

interface Repo {
name: string;
remote: string;
paths: string[];
sessions: { claude_code: number; codex: number; gemini: number };
codex_originators: CodexOriginatorCounts;
}

interface DiscoveryResult {
Expand All @@ -34,7 +48,7 @@ interface DiscoveryResult {
repos: Repo[];
tools: {
claude_code: { total_sessions: number; repos: number };
codex: { total_sessions: number; repos: number };
codex: { total_sessions: number; repos: number; originators: CodexOriginatorCounts };
gemini: { total_sessions: number; repos: number };
};
total_sessions: number;
Expand Down Expand Up @@ -178,7 +192,9 @@ function getGitRemote(cwd: string): string | null {
// ── Scanners ───────────────────────────────────────────────────────────────

function scanClaudeCode(since: Date): Session[] {
const projectsDir = join(homedir(), ".claude", "projects");
// `CLAUDE_PROJECTS_DIR` is honored for test injection (mirrors the existing
// `CODEX_SESSIONS_DIR` knob in scanCodex). Production paths leave it unset.
const projectsDir = process.env.CLAUDE_PROJECTS_DIR || join(homedir(), ".claude", "projects");
if (!existsSync(projectsDir)) return [];

const sessions: Session[] = [];
Expand Down Expand Up @@ -274,11 +290,17 @@ function resolveClaudeCodeCwd(
}

function extractCwdFromJsonl(filePath: string): string | null {
// Recent Claude Code / CCR JSONL files often start with a large
// `queue-operation` event (~30-50KB) that carries no `cwd`. The pre-fix
// 8KB buffer truncated mid-line, JSON.parse failed, and the project
// directory was silently dropped from the discovery count. Issue #1315
// (Akagilnc's diagnosis): ~450 CC jsonl files in a single repo went
// missing this way. 128KB matches scanCodex's existing buffer choice
// and covers the largest first-line events we've seen in the wild.
try {
// Read only the first 8KB to avoid loading huge JSONL files into memory
const fd = openSync(filePath, "r");
const buf = Buffer.alloc(8192);
const bytesRead = readSync(fd, buf, 0, 8192, 0);
const buf = Buffer.alloc(131072);
const bytesRead = readSync(fd, buf, 0, 131072, 0);
closeSync(fd);
const text = buf.toString("utf-8", 0, bytesRead);
const lines = text.split("\n").slice(0, 15);
Expand All @@ -297,6 +319,23 @@ function extractCwdFromJsonl(filePath: string): string | null {
return null;
}

// Codex rollouts ship a free-form `payload.originator` string. Real values
// seen in the wild: "Codex Desktop" (interactive dev), "codex_exec" (cron /
// scripted / subagent), "Claude Code" (CC's MCP / subagent integration).
// Anything else lands in `other` rather than being silently dropped.
function normalizeCodexOriginator(raw: unknown): CodexOriginator {
if (typeof raw !== "string") return "other";
const v = raw.toLowerCase();
if (v === "codex desktop" || v === "codex_desktop") return "desktop";
if (v === "codex_exec" || v === "codex exec") return "exec";
if (v === "claude code" || v === "claude_code") return "claude_code";
return "other";
}

function emptyOriginatorCounts(): CodexOriginatorCounts {
return { desktop: 0, exec: 0, claude_code: 0, other: 0 };
}

function scanCodex(since: Date): Session[] {
const sessionsDir = process.env.CODEX_SESSIONS_DIR || join(homedir(), ".codex", "sessions");
if (!existsSync(sessionsDir)) return [];
Expand Down Expand Up @@ -346,7 +385,11 @@ function scanCodex(since: Date): Session[] {
if (!firstLine) continue;
const meta = JSON.parse(firstLine);
if (meta.type === "session_meta" && meta.payload?.cwd) {
sessions.push({ tool: "codex", cwd: meta.payload.cwd });
sessions.push({
tool: "codex",
cwd: meta.payload.cwd,
codexOriginator: normalizeCodexOriginator(meta.payload.originator),
});
}
} catch {
console.error(`Warning: could not parse Codex session ${filePath}`);
Expand Down Expand Up @@ -508,15 +551,20 @@ async function resolveAndDeduplicate(sessions: Session[]): Promise<Repo[]> {
}

const sessionCounts = { claude_code: 0, codex: 0, gemini: 0 };
const codexOriginators = emptyOriginatorCounts();
for (const s of data.sessions) {
sessionCounts[s.tool]++;
if (s.tool === "codex") {
codexOriginators[s.codexOriginator ?? "other"]++;
}
}

repos.push({
name,
remote,
paths: data.paths,
sessions: sessionCounts,
codex_originators: codexOriginators,
});
}

Expand Down Expand Up @@ -559,13 +607,18 @@ async function main() {
const codexRepos = new Set(repos.filter((r) => r.sessions.codex > 0).map((r) => r.remote)).size;
const geminiRepos = new Set(repos.filter((r) => r.sessions.gemini > 0).map((r) => r.remote)).size;

const codexOriginatorTotals = emptyOriginatorCounts();
for (const s of codexSessions) {
codexOriginatorTotals[s.codexOriginator ?? "other"]++;
}

const result: DiscoveryResult = {
window: since,
start_date: startDate,
repos,
tools: {
claude_code: { total_sessions: ccSessions.length, repos: ccRepos },
codex: { total_sessions: codexSessions.length, repos: codexRepos },
codex: { total_sessions: codexSessions.length, repos: codexRepos, originators: codexOriginatorTotals },
gemini: { total_sessions: geminiSessions.length, repos: geminiRepos },
},
total_sessions: allSessions.length,
Expand All @@ -578,13 +631,22 @@ async function main() {
// Summary format
console.log(`Window: ${since} (since ${startDate})`);
console.log(`Sessions: ${allSessions.length} total (CC: ${ccSessions.length}, Codex: ${codexSessions.length}, Gemini: ${geminiSessions.length})`);
if (codexSessions.length > 0) {
const o = codexOriginatorTotals;
console.log(` Codex originators: desktop=${o.desktop}, exec=${o.exec}, claude_code=${o.claude_code}, other=${o.other}`);
}
console.log(`Repos: ${repos.length} unique`);
console.log("");
for (const repo of repos) {
const total = repo.sessions.claude_code + repo.sessions.codex + repo.sessions.gemini;
const tools = [];
if (repo.sessions.claude_code > 0) tools.push(`CC:${repo.sessions.claude_code}`);
if (repo.sessions.codex > 0) tools.push(`Codex:${repo.sessions.codex}`);
if (repo.sessions.codex > 0) {
const o = repo.codex_originators;
// Show the desktop/exec split inline when codex sessions are present —
// a single number hid real-dev vs subagent activity in /retro global.
tools.push(`Codex:${repo.sessions.codex} (desktop=${o.desktop}, exec=${o.exec}, cc=${o.claude_code}${o.other > 0 ? `, other=${o.other}` : ""})`);
}
if (repo.sessions.gemini > 0) tools.push(`Gemini:${repo.sessions.gemini}`);
console.log(` ${repo.name} (${total} sessions) — ${tools.join(", ")}`);
console.log(` Remote: ${repo.remote}`);
Expand Down
171 changes: 171 additions & 0 deletions test/global-discover.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,157 @@ describe("gstack-global-discover", () => {
});
});

describe("codex originator bucketing (issue #1315)", () => {
let tmpDir: string;
let codexDir: string;
let repoDir: string;

beforeEach(() => {
tmpDir = mkdtempSync(join(tmpdir(), "gstack-codex-orig-"));
const now = new Date();
const y = now.getFullYear().toString();
const m = String(now.getMonth() + 1).padStart(2, "0");
const d = String(now.getDate()).padStart(2, "0");
codexDir = join(tmpDir, "codex-home", "sessions", y, m, d);
mkdirSync(codexDir, { recursive: true });

repoDir = join(tmpDir, "fake-repo");
mkdirSync(repoDir);
spawnSync("git", ["init"], { cwd: repoDir, stdio: "pipe" });
spawnSync("git", ["commit", "--allow-empty", "-m", "init"], {
cwd: repoDir,
stdio: "pipe",
});
});

afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});

function writeCodex(originator: string) {
const line = JSON.stringify({
timestamp: new Date().toISOString(),
type: "session_meta",
payload: { id: `t-${Math.random()}`, timestamp: new Date().toISOString(), cwd: repoDir, originator },
});
const name = `rollout-${new Date().toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2)}.jsonl`;
writeFileSync(join(codexDir, name), line + "\n");
}

function discover() {
const r = spawnSync(
"bun",
["run", scriptPath, "--since", "1h", "--format", "json"],
{
encoding: "utf-8",
timeout: 30000,
env: { ...process.env, CODEX_SESSIONS_DIR: join(tmpDir, "codex-home", "sessions") },
}
);
expect(r.status).toBe(0);
return JSON.parse(r.stdout);
}

test("'Codex Desktop' originator → desktop bucket", () => {
writeCodex("Codex Desktop");
const json = discover();
expect(json.tools.codex.originators.desktop).toBe(1);
expect(json.tools.codex.originators.exec).toBe(0);
expect(json.tools.codex.originators.claude_code).toBe(0);
});

test("'codex_exec' originator → exec bucket", () => {
writeCodex("codex_exec");
const json = discover();
expect(json.tools.codex.originators.exec).toBe(1);
expect(json.tools.codex.originators.desktop).toBe(0);
});

test("'Claude Code' originator → claude_code bucket", () => {
writeCodex("Claude Code");
const json = discover();
expect(json.tools.codex.originators.claude_code).toBe(1);
expect(json.tools.codex.originators.desktop).toBe(0);
expect(json.tools.codex.originators.exec).toBe(0);
});

test("unknown originator → other bucket (not silently dropped)", () => {
writeCodex("future-agent-name-not-yet-mapped");
const json = discover();
expect(json.tools.codex.originators.other).toBe(1);
expect(json.tools.codex.total_sessions).toBe(1);
});

test("per-repo codex_originators sums to per-repo codex count", () => {
writeCodex("Codex Desktop");
writeCodex("codex_exec");
writeCodex("codex_exec");
writeCodex("Claude Code");
const json = discover();
// The fake repo's normalized remote will be local: form; just find it.
const repo = json.repos.find((r: any) => r.paths.includes(repoDir));
expect(repo).toBeDefined();
const o = repo.codex_originators;
expect(o.desktop + o.exec + o.claude_code + o.other).toBe(repo.sessions.codex);
expect(o.desktop).toBe(1);
expect(o.exec).toBe(2);
expect(o.claude_code).toBe(1);
});
});

describe("CC jsonl with >8KB first line (issue #1315 Problem 2)", () => {
let tmpDir: string;
let ccProjectsDir: string;
let realRepoDir: string;

beforeEach(() => {
tmpDir = mkdtempSync(join(tmpdir(), "gstack-cc-bigline-"));
// Real repo on disk so resolveClaudeCodeCwd can verify it.
realRepoDir = join(tmpDir, "real-repo");
mkdirSync(realRepoDir);
spawnSync("git", ["init"], { cwd: realRepoDir, stdio: "pipe" });
spawnSync("git", ["commit", "--allow-empty", "-m", "init"], { cwd: realRepoDir, stdio: "pipe" });
// CC project dir is a CCR / cron-style decoded path that does NOT exist
// on disk, so resolveClaudeCodeCwd falls to extractCwdFromJsonl.
const fakeProjectName = "-tmp-does-not-exist-on-disk-blogger-lab";
ccProjectsDir = join(tmpDir, "claude-home", "projects", fakeProjectName);
mkdirSync(ccProjectsDir, { recursive: true });
});

afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});

test("first-line queue-operation >8KB no longer hides cwd on later line", () => {
// Recreate the Akagilnc scenario: first line is a huge queue-operation
// event with no `cwd`, second line carries the real cwd.
const bigLine = JSON.stringify({
type: "queue-operation",
payload: { junk: "x".repeat(40000) },
});
expect(bigLine.length).toBeGreaterThan(30000);
const cwdLine = JSON.stringify({ type: "summary", cwd: realRepoDir });
const jsonl = bigLine + "\n" + cwdLine + "\n";
writeFileSync(join(ccProjectsDir, "session-1.jsonl"), jsonl);

const r = spawnSync(
"bun",
["run", scriptPath, "--since", "1h", "--format", "json"],
{
encoding: "utf-8",
timeout: 30000,
env: { ...process.env, HOME: join(tmpDir, "claude-home"), CLAUDE_PROJECTS_DIR: join(tmpDir, "claude-home", "projects") },
}
);
expect(r.status).toBe(0);
const json = JSON.parse(r.stdout);
// The fake repo should now be discovered as a CC session.
const found = json.repos.find((repo: any) => repo.paths.includes(realRepoDir));
expect(found).toBeDefined();
expect(found.sessions.claude_code).toBeGreaterThanOrEqual(1);
});
});

describe("discovery output structure", () => {
test("repos have required fields", () => {
const result = spawnSync(
Expand Down Expand Up @@ -329,6 +480,26 @@ describe("gstack-global-discover", () => {
expect(json.total_sessions).toBe(toolTotal);
});

test("repos expose codex_originators breakdown", () => {
const result = spawnSync(
"bun",
["run", scriptPath, "--since", "30d", "--format", "json"],
{ encoding: "utf-8", timeout: 30000 }
);
const json = JSON.parse(result.stdout);
expect(json.tools.codex).toHaveProperty("originators");
const o = json.tools.codex.originators;
for (const k of ["desktop", "exec", "claude_code", "other"]) {
expect(o).toHaveProperty(k);
expect(typeof o[k]).toBe("number");
}
// Sum of originators must equal codex total_sessions.
expect(o.desktop + o.exec + o.claude_code + o.other).toBe(json.tools.codex.total_sessions);
for (const repo of json.repos) {
expect(repo).toHaveProperty("codex_originators");
}
});

test("deduplicates Conductor workspaces by remote", () => {
const result = spawnSync(
"bun",
Expand Down