diff --git a/apps/desktop/native/macos-push-to-talk-helper.swift b/apps/desktop/native/macos-push-to-talk-helper.swift new file mode 100644 index 00000000000..d5d6e579813 --- /dev/null +++ b/apps/desktop/native/macos-push-to-talk-helper.swift @@ -0,0 +1,86 @@ +import ApplicationServices +import CoreGraphics +import Foundation + +private var isPushToTalkActive = false +private var eventTapPort: CFMachPort? + +private func writeLine(_ value: String) { + if let data = "\(value)\n".data(using: .utf8) { + FileHandle.standardOutput.write(data) + } + fflush(stdout) +} + +private func hasPushToTalkModifiers(_ flags: CGEventFlags) -> Bool { + flags.contains(.maskControl) + && flags.contains(.maskAlternate) + && !flags.contains(.maskCommand) +} + +private func syncPushToTalkState(_ flags: CGEventFlags) { + let nextActive = hasPushToTalkModifiers(flags) + if nextActive == isPushToTalkActive { + return + } + + isPushToTalkActive = nextActive + writeLine(nextActive ? "start" : "stop") +} + +private let eventCallback: CGEventTapCallBack = { proxy, type, event, refcon in + if type == .tapDisabledByTimeout || type == .tapDisabledByUserInput { + if let eventTapPort { + CGEvent.tapEnable(tap: eventTapPort, enable: true) + } + return Unmanaged.passUnretained(event) + } + + if type == .flagsChanged { + syncPushToTalkState(event.flags) + } + + return Unmanaged.passUnretained(event) +} + +let accessibilityOptions = [ + kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: true +] as CFDictionary + +if !AXIsProcessTrustedWithOptions(accessibilityOptions) { + writeLine("permission-required") + for _ in 0..<120 { + Thread.sleep(forTimeInterval: 1) + if AXIsProcessTrusted() { + writeLine("permission-granted") + break + } + } +} + +if !AXIsProcessTrusted() { + writeLine("permission-timeout") + exit(2) +} + +let eventMask = (1 << CGEventType.flagsChanged.rawValue) +let tap = CGEvent.tapCreate( + tap: .cgSessionEventTap, + place: .headInsertEventTap, + options: .listenOnly, + eventsOfInterest: CGEventMask(eventMask), + callback: eventCallback, + userInfo: nil +) + +guard let eventTap = tap else { + writeLine("tap-unavailable") + exit(3) +} + +eventTapPort = eventTap +let runLoopSource = CFMachPortCreateRunLoopSource(kCFAllocatorDefault, eventTap, 0) +CFRunLoopAddSource(CFRunLoopGetCurrent(), runLoopSource, .commonModes) +CGEvent.tapEnable(tap: eventTap, enable: true) +writeLine("ready") +CFRunLoopRun() diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 34a061ffc70..e89603b245e 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -6,9 +6,10 @@ "main": "dist-electron/main.cjs", "scripts": { "dev": "bun run --parallel dev:bundle dev:electron", - "dev:bundle": "tsdown --watch", + "dev:bundle": "bun run build:native && tsdown --watch", "dev:electron": "node scripts/dev-electron.mjs", - "build": "tsdown", + "build": "bun run build:native && tsdown", + "build:native": "node scripts/build-native-helpers.mjs", "start": "node scripts/start-electron.mjs", "typecheck": "tsc --noEmit", "test": "vitest run --passWithNoTests", diff --git a/apps/desktop/resources/native/t3code-push-to-talk-helper b/apps/desktop/resources/native/t3code-push-to-talk-helper new file mode 100755 index 00000000000..56a11387ab6 Binary files /dev/null and b/apps/desktop/resources/native/t3code-push-to-talk-helper differ diff --git a/apps/desktop/scripts/build-native-helpers.mjs b/apps/desktop/scripts/build-native-helpers.mjs new file mode 100644 index 00000000000..9da5c79cb54 --- /dev/null +++ b/apps/desktop/scripts/build-native-helpers.mjs @@ -0,0 +1,25 @@ +import { mkdirSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { spawnSync } from "node:child_process"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const desktopDir = resolve(__dirname, ".."); + +if (process.platform !== "darwin") { + process.exit(0); +} + +const sourcePath = resolve(desktopDir, "native/macos-push-to-talk-helper.swift"); +const outputPath = resolve(desktopDir, "resources/native/t3code-push-to-talk-helper"); + +mkdirSync(dirname(outputPath), { recursive: true }); + +const result = spawnSync("xcrun", ["swiftc", sourcePath, "-O", "-o", outputPath], { + cwd: desktopDir, + stdio: "inherit", +}); + +if (result.status !== 0) { + process.exit(result.status ?? 1); +} diff --git a/apps/desktop/src/main.ts b/apps/desktop/src/main.ts index c5507c6fb03..312b2803baf 100644 --- a/apps/desktop/src/main.ts +++ b/apps/desktop/src/main.ts @@ -10,12 +10,14 @@ import { type BrowserWindowConstructorOptions, clipboard, dialog, + globalShortcut, ipcMain, Menu, nativeImage, nativeTheme, protocol, safeStorage, + screen, shell, } from "electron"; import type { MenuItemConstructorOptions, OpenDialogOptions } from "electron"; @@ -30,6 +32,9 @@ import type { DesktopUpdateActionResult, DesktopUpdateCheckResult, DesktopUpdateState, + DesktopPushToTalkOverlayState, + DesktopPushToTalkTranscriptionInput, + DesktopPushToTalkTranscriptionResult, } from "@t3tools/contracts"; import { autoUpdater } from "electron-updater"; @@ -102,6 +107,9 @@ const SET_SAVED_ENVIRONMENT_SECRET_CHANNEL = "desktop:set-saved-environment-secr const REMOVE_SAVED_ENVIRONMENT_SECRET_CHANNEL = "desktop:remove-saved-environment-secret"; const GET_SERVER_EXPOSURE_STATE_CHANNEL = "desktop:get-server-exposure-state"; const SET_SERVER_EXPOSURE_MODE_CHANNEL = "desktop:set-server-exposure-mode"; +const PUSH_TO_TALK_EVENT_CHANNEL = "desktop:push-to-talk-event"; +const PUSH_TO_TALK_OVERLAY_STATE_CHANNEL = "desktop:push-to-talk-overlay-state"; +const PUSH_TO_TALK_TRANSCRIBE_CHANNEL = "desktop:push-to-talk-transcribe"; const BASE_DIR = process.env.T3CODE_HOME?.trim() || Path.join(OS.homedir(), ".t3"); const STATE_DIR = Path.join(BASE_DIR, "userdata"); const DESKTOP_SETTINGS_PATH = Path.join(STATE_DIR, "desktop-settings.json"); @@ -129,6 +137,14 @@ const APP_RUN_ID = Crypto.randomBytes(6).toString("hex"); const SERVER_SETTINGS_PATH = Path.join(STATE_DIR, "settings.json"); const AUTO_UPDATE_STARTUP_DELAY_MS = 15_000; const AUTO_UPDATE_POLL_INTERVAL_MS = 4 * 60 * 60 * 1000; +const PUSH_TO_TALK_GLOBAL_SHORTCUT = "Control+Alt+Space"; +const PUSH_TO_TALK_OVERLAY_WIDTH = 248; +const PUSH_TO_TALK_OVERLAY_HEIGHT = 76; +const PUSH_TO_TALK_OVERLAY_CURSOR_OFFSET = 18; +const PUSH_TO_TALK_NATIVE_HELPER_NAME = "t3code-push-to-talk-helper"; +const LOCAL_STT_MODEL_NAME = "ggml-tiny.en-q5_1.bin"; +const LOCAL_STT_MODEL_URL = + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin"; function resolvePickFolderDefaultPath(rawOptions: unknown): string | undefined { if (typeof rawOptions !== "object" || rawOptions === null) { @@ -224,6 +240,17 @@ let restoreStdIoCapture: (() => void) | null = null; let backendObservabilitySettings = readPersistedBackendObservabilitySettings(); let desktopSettings = readDesktopSettings(DESKTOP_SETTINGS_PATH, app.getVersion()); let desktopServerExposureMode: DesktopServerExposureMode = desktopSettings.serverExposureMode; +let pushToTalkOverlayWindow: BrowserWindow | null = null; +let pushToTalkOverlayFollowTimer: ReturnType | null = null; +let pushToTalkNativeHelperProcess: ChildProcess.ChildProcess | null = null; +let pushToTalkNativeHelperStdout = ""; +let pushToTalkActive = false; +let pushToTalkOverlayState: DesktopPushToTalkOverlayState = { + visible: true, + status: "idle", + title: "T3 Code", + message: "Hold Ctrl+Option", +}; let destructiveMenuIconCache: Electron.NativeImage | null | undefined; const expectedBackendExitChildren = new WeakSet(); @@ -465,6 +492,7 @@ async function waitForBackendWindowReady(baseUrl: string): Promise<"listening" | listeningPromise: backendListeningDetector?.promise ?? null, waitForHttpReady: () => waitForBackendHttpReady(baseUrl, { + path: "/.well-known/t3/environment", timeoutMs: 60_000, }), cancelHttpWait: cancelBackendReadinessWait, @@ -593,6 +621,214 @@ function captureBackendOutput(child: ChildProcess.ChildProcess): void { attachStream(child.stderr); } +function execFilePromise( + file: string, + args: readonly string[], + options?: ChildProcess.ExecFileOptions, +): Promise<{ readonly stdout: string; readonly stderr: string }> { + return new Promise((resolve, reject) => { + ChildProcess.execFile(file, [...args], options, (error, stdout, stderr) => { + if (error) { + reject(error); + return; + } + resolve({ stdout: String(stdout), stderr: String(stderr) }); + }); + }); +} + +function resolveExecutablePath( + envName: string, + executableNames: readonly string[], + resourcePaths: readonly string[] = [], +): string | null { + const configured = process.env[envName]?.trim(); + if (configured && FS.existsSync(configured)) { + return configured; + } + + for (const resourcePath of resourcePaths) { + if (FS.existsSync(resourcePath)) { + return resourcePath; + } + } + + for (const executableName of executableNames) { + try { + const resolved = ChildProcess.execFileSync("which", [executableName], { + encoding: "utf8", + stdio: ["ignore", "pipe", "ignore"], + }).trim(); + if (resolved.length > 0) { + return resolved; + } + } catch { + // Keep searching. + } + } + + return null; +} + +function resolveWhisperCliPath(): string | null { + return resolveExecutablePath( + "T3CODE_STT_BINARY", + ["whisper-cli", "whisper.cpp"], + [ + Path.join(__dirname, "../resources/native/whisper-cli"), + Path.join(__dirname, "../prod-resources/native/whisper-cli"), + Path.join(process.resourcesPath, "resources/native/whisper-cli"), + Path.join(process.resourcesPath, "native/whisper-cli"), + ], + ); +} + +function resolveFfmpegPath(): string | null { + return resolveExecutablePath( + "T3CODE_FFMPEG_BINARY", + ["ffmpeg"], + [ + Path.join(__dirname, "../resources/native/ffmpeg"), + Path.join(__dirname, "../prod-resources/native/ffmpeg"), + Path.join(process.resourcesPath, "resources/native/ffmpeg"), + Path.join(process.resourcesPath, "native/ffmpeg"), + ], + ); +} + +async function downloadLocalSttModel(modelPath: string): Promise { + FS.mkdirSync(Path.dirname(modelPath), { recursive: true }); + const temporaryPath = `${modelPath}.${Crypto.randomUUID()}.tmp`; + const response = await fetch(LOCAL_STT_MODEL_URL); + if (!response.ok) { + throw new Error(`Failed to download local STT model: HTTP ${String(response.status)}.`); + } + const modelBytes = Buffer.from(await response.arrayBuffer()); + FS.writeFileSync(temporaryPath, modelBytes); + FS.renameSync(temporaryPath, modelPath); +} + +async function resolveLocalSttModelPath(): Promise { + const configured = process.env.T3CODE_STT_MODEL?.trim(); + if (configured) { + if (!FS.existsSync(configured)) { + throw new Error(`Configured local STT model does not exist: ${configured}`); + } + return configured; + } + + const resourceCandidates = [ + Path.join(__dirname, "../resources/models/whisper", LOCAL_STT_MODEL_NAME), + Path.join(__dirname, "../prod-resources/models/whisper", LOCAL_STT_MODEL_NAME), + Path.join(process.resourcesPath, "resources/models/whisper", LOCAL_STT_MODEL_NAME), + Path.join(process.resourcesPath, "models/whisper", LOCAL_STT_MODEL_NAME), + ]; + for (const candidate of resourceCandidates) { + if (FS.existsSync(candidate)) { + return candidate; + } + } + + const modelPath = Path.join(STATE_DIR, "models", "whisper", LOCAL_STT_MODEL_NAME); + if (!FS.existsSync(modelPath)) { + await downloadLocalSttModel(modelPath); + } + return modelPath; +} + +function extensionForAudioMimeType(mimeType: string): string { + if (mimeType.includes("mp4")) return "mp4"; + if (mimeType.includes("mpeg")) return "mp3"; + if (mimeType.includes("wav")) return "wav"; + if (mimeType.includes("ogg")) return "ogg"; + return "webm"; +} + +function normalizeTranscriptionInput(rawInput: unknown): DesktopPushToTalkTranscriptionInput { + if (typeof rawInput !== "object" || rawInput === null) { + throw new Error("Invalid push-to-talk transcription input."); + } + + const input = rawInput as Partial; + if (typeof input.mimeType !== "string" || input.mimeType.length === 0) { + throw new Error("Invalid push-to-talk transcription input."); + } + + const rawAudio = input.audio as unknown; + const audio = + rawAudio instanceof ArrayBuffer + ? rawAudio + : ArrayBuffer.isView(rawAudio) + ? rawAudio.buffer.slice(rawAudio.byteOffset, rawAudio.byteOffset + rawAudio.byteLength) + : null; + if (!(audio instanceof ArrayBuffer)) { + throw new Error("Invalid push-to-talk transcription input."); + } + + return { + audio, + mimeType: input.mimeType, + }; +} + +async function transcribePushToTalkAudio( + rawInput: unknown, +): Promise { + let tempDir: string | null = null; + try { + const input = normalizeTranscriptionInput(rawInput); + const whisperCliPath = resolveWhisperCliPath(); + if (!whisperCliPath) { + return { + ok: false, + error: "Local STT engine not found. Install whisper.cpp or set T3CODE_STT_BINARY.", + }; + } + + const ffmpegPath = resolveFfmpegPath(); + if (!ffmpegPath) { + return { + ok: false, + error: + "ffmpeg is required for local voice capture. Install ffmpeg or set T3CODE_FFMPEG_BINARY.", + }; + } + + const modelPath = await resolveLocalSttModelPath(); + tempDir = FS.mkdtempSync(Path.join(OS.tmpdir(), "t3code-voice-")); + const inputPath = Path.join(tempDir, `input.${extensionForAudioMimeType(input.mimeType)}`); + const wavPath = Path.join(tempDir, "input.wav"); + const outputPrefix = Path.join(tempDir, "transcript"); + FS.writeFileSync(inputPath, Buffer.from(input.audio)); + + await execFilePromise( + ffmpegPath, + ["-y", "-i", inputPath, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", wavPath], + { timeout: 30_000 }, + ); + const whisperResult = await execFilePromise( + whisperCliPath, + ["-m", modelPath, "-f", wavPath, "-l", "en", "-otxt", "-of", outputPrefix], + { timeout: 120_000, maxBuffer: 1024 * 1024 * 8 }, + ); + const transcriptPath = `${outputPrefix}.txt`; + const transcript = FS.existsSync(transcriptPath) + ? FS.readFileSync(transcriptPath, "utf8") + : whisperResult.stdout; + + return { ok: true, text: transcript.trim() }; + } catch (error) { + return { + ok: false, + error: error instanceof Error ? error.message : "Local transcription failed.", + }; + } finally { + if (tempDir) { + FS.rmSync(tempDir, { force: true, recursive: true }); + } + } +} + initializePackagedLogging(); if (process.platform === "linux") { @@ -1669,6 +1905,38 @@ function registerIpcHandlers(): void { return nextState; }); + ipcMain.removeHandler(PUSH_TO_TALK_OVERLAY_STATE_CHANNEL); + ipcMain.handle(PUSH_TO_TALK_OVERLAY_STATE_CHANNEL, async (_event, rawState: unknown) => { + if (typeof rawState !== "object" || rawState === null) { + throw new Error("Invalid push-to-talk overlay state."); + } + + const state = rawState as Partial; + if ( + typeof state.visible !== "boolean" || + (state.status !== "idle" && + state.status !== "listening" && + state.status !== "processing" && + state.status !== "error") || + typeof state.title !== "string" || + (state.message !== undefined && typeof state.message !== "string") + ) { + throw new Error("Invalid push-to-talk overlay state."); + } + + setPushToTalkOverlayState({ + visible: state.visible, + status: state.status, + title: state.title, + ...(state.message ? { message: state.message } : {}), + }); + }); + + ipcMain.removeHandler(PUSH_TO_TALK_TRANSCRIBE_CHANNEL); + ipcMain.handle(PUSH_TO_TALK_TRANSCRIBE_CHANNEL, async (_event, rawInput: unknown) => { + return await transcribePushToTalkAudio(rawInput); + }); + ipcMain.removeHandler(PICK_FOLDER_CHANNEL); ipcMain.handle(PICK_FOLDER_CHANNEL, async (_event, rawOptions: unknown) => { const owner = BrowserWindow.getFocusedWindow() ?? mainWindow; @@ -1916,6 +2184,496 @@ function syncAllWindowAppearance(): void { } } +function pushToTalkOverlayHtml(): string { + return ` + + + + + + +
+ +
+
T3 Code
+
Hold Ctrl+Option
+
+
+ + +`; +} + +function ensurePushToTalkOverlayWindow(): BrowserWindow { + if (pushToTalkOverlayWindow && !pushToTalkOverlayWindow.isDestroyed()) { + return pushToTalkOverlayWindow; + } + + const window = new BrowserWindow({ + width: PUSH_TO_TALK_OVERLAY_WIDTH, + height: PUSH_TO_TALK_OVERLAY_HEIGHT, + frame: false, + transparent: true, + resizable: false, + movable: false, + minimizable: false, + maximizable: false, + closable: false, + focusable: false, + skipTaskbar: true, + show: false, + hasShadow: false, + alwaysOnTop: true, + webPreferences: { + contextIsolation: true, + nodeIntegration: false, + sandbox: true, + }, + }); + + window.setIgnoreMouseEvents(true, { forward: true }); + window.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true }); + window.setAlwaysOnTop(true, "screen-saver"); + window.loadURL(`data:text/html;charset=utf-8,${encodeURIComponent(pushToTalkOverlayHtml())}`); + pushToTalkOverlayWindow = window; + window.on("closed", () => { + if (pushToTalkOverlayWindow === window) { + pushToTalkOverlayWindow = null; + } + }); + return window; +} + +function updatePushToTalkOverlayPosition(): void { + const overlayWindow = pushToTalkOverlayWindow; + if (!overlayWindow || overlayWindow.isDestroyed() || !pushToTalkOverlayState.visible) { + return; + } + + const cursor = screen.getCursorScreenPoint(); + const display = screen.getDisplayNearestPoint(cursor); + const area = display.workArea; + const maxX = area.x + area.width - PUSH_TO_TALK_OVERLAY_WIDTH; + const maxY = area.y + area.height - PUSH_TO_TALK_OVERLAY_HEIGHT; + const preferredX = cursor.x + PUSH_TO_TALK_OVERLAY_CURSOR_OFFSET; + const preferredY = cursor.y + PUSH_TO_TALK_OVERLAY_CURSOR_OFFSET; + const x = Math.min(Math.max(preferredX, area.x), maxX); + const y = Math.min(Math.max(preferredY, area.y), maxY); + overlayWindow.setBounds({ + x: Math.round(x), + y: Math.round(y), + width: PUSH_TO_TALK_OVERLAY_WIDTH, + height: PUSH_TO_TALK_OVERLAY_HEIGHT, + }); +} + +function startPushToTalkOverlayFollow(): void { + if (pushToTalkOverlayFollowTimer) { + return; + } + updatePushToTalkOverlayPosition(); + pushToTalkOverlayFollowTimer = setInterval(updatePushToTalkOverlayPosition, 33); + pushToTalkOverlayFollowTimer.unref(); +} + +function stopPushToTalkOverlayFollow(): void { + if (!pushToTalkOverlayFollowTimer) { + return; + } + clearInterval(pushToTalkOverlayFollowTimer); + pushToTalkOverlayFollowTimer = null; +} + +function setPushToTalkOverlayState(state: DesktopPushToTalkOverlayState): void { + pushToTalkOverlayState = state; + if (!state.visible) { + stopPushToTalkOverlayFollow(); + pushToTalkOverlayWindow?.hide(); + return; + } + + const overlayWindow = ensurePushToTalkOverlayWindow(); + const applyState = () => { + void overlayWindow.webContents + .executeJavaScript(`window.__setPushToTalkState(${JSON.stringify(state)})`) + .catch(() => undefined); + }; + if (overlayWindow.webContents.isLoading()) { + overlayWindow.webContents.once("did-finish-load", applyState); + } else { + applyState(); + } + startPushToTalkOverlayFollow(); + overlayWindow.showInactive(); +} + +function showIdlePushToTalkOverlay(): void { + setPushToTalkOverlayState({ + visible: true, + status: "idle", + title: "T3 Code", + message: "Hold Ctrl+Option", + }); +} + +function sendPushToTalkEvent(type: "start" | "stop"): void { + const targetWindow = mainWindow ?? BrowserWindow.getAllWindows()[0] ?? null; + targetWindow?.webContents.send(PUSH_TO_TALK_EVENT_CHANNEL, { type }); +} + +function startPushToTalk(source: "native" | "shortcut"): void { + if (pushToTalkActive) { + return; + } + pushToTalkActive = true; + setPushToTalkOverlayState({ + visible: true, + status: "listening", + title: "Listening", + message: + source === "native" ? "Release Ctrl+Option to send" : "Press Ctrl+Option+Space again to send", + }); + sendPushToTalkEvent("start"); +} + +function stopPushToTalk(): void { + if (!pushToTalkActive) { + return; + } + pushToTalkActive = false; + setPushToTalkOverlayState({ + visible: true, + status: "processing", + title: "Sending voice command", + message: "Starting agent session", + }); + sendPushToTalkEvent("stop"); +} + +function startPushToTalkFromShortcut(): void { + startPushToTalk("shortcut"); +} + +function stopPushToTalkFromShortcut(): void { + stopPushToTalk(); +} + +function startPushToTalkFromNativeHelper(): void { + startPushToTalk("native"); +} + +function stopPushToTalkFromNativeHelper(): void { + stopPushToTalk(); +} + +function compileNativePushToTalkHelperForDevelopment(): string | null { + if (process.platform !== "darwin" || app.isPackaged) { + return null; + } + + const sourcePath = Path.join( + ROOT_DIR, + "apps", + "desktop", + "native", + "macos-push-to-talk-helper.swift", + ); + const outputPath = Path.join(STATE_DIR, "native-helpers", PUSH_TO_TALK_NATIVE_HELPER_NAME); + + try { + if (!FS.existsSync(sourcePath)) { + return null; + } + + const sourceStat = FS.statSync(sourcePath); + const outputStat = FS.existsSync(outputPath) ? FS.statSync(outputPath) : null; + if (outputStat && outputStat.mtimeMs >= sourceStat.mtimeMs) { + return outputPath; + } + + FS.mkdirSync(Path.dirname(outputPath), { recursive: true }); + const result = ChildProcess.spawnSync("xcrun", ["swiftc", sourcePath, "-O", "-o", outputPath], { + cwd: ROOT_DIR, + encoding: "utf8", + }); + if (result.status !== 0) { + writeDesktopLogHeader( + `push-to-talk native helper compile failed status=${String(result.status)} stderr=${sanitizeLogValue( + result.stderr ?? "", + )}`, + ); + return null; + } + FS.chmodSync(outputPath, 0o755); + return outputPath; + } catch (error) { + writeDesktopLogHeader( + `push-to-talk native helper compile error=${ + error instanceof Error ? sanitizeLogValue(error.message) : "unknown" + }`, + ); + return null; + } +} + +function resolveNativePushToTalkHelperPath(): string | null { + if (process.platform !== "darwin") { + return null; + } + + const developmentHelperPath = compileNativePushToTalkHelperForDevelopment(); + if (developmentHelperPath) { + return developmentHelperPath; + } + + const candidates = [ + Path.join(__dirname, "../resources/native", PUSH_TO_TALK_NATIVE_HELPER_NAME), + Path.join(__dirname, "../prod-resources/native", PUSH_TO_TALK_NATIVE_HELPER_NAME), + Path.join(process.resourcesPath, "resources/native", PUSH_TO_TALK_NATIVE_HELPER_NAME), + Path.join(process.resourcesPath, "native", PUSH_TO_TALK_NATIVE_HELPER_NAME), + ]; + + for (const candidate of candidates) { + if (FS.existsSync(candidate)) { + return candidate; + } + } + + return null; +} + +function handleNativePushToTalkHelperLine(line: string): void { + switch (line.trim()) { + case "ready": + case "permission-granted": + writeDesktopLogHeader(`push-to-talk native helper ${line.trim()}`); + showIdlePushToTalkOverlay(); + break; + case "start": + startPushToTalkFromNativeHelper(); + break; + case "stop": + stopPushToTalkFromNativeHelper(); + break; + case "permission-required": + setPushToTalkOverlayState({ + visible: true, + status: "error", + title: "Enable Accessibility", + message: "Grant T3 Code access in System Settings", + }); + writeDesktopLogHeader("push-to-talk native helper requested accessibility permission"); + break; + case "permission-timeout": + case "tap-unavailable": + setPushToTalkOverlayState({ + visible: true, + status: "error", + title: "Push-to-talk unavailable", + message: "Enable Accessibility, then restart T3 Code", + }); + writeDesktopLogHeader(`push-to-talk native helper ${line.trim()}`); + break; + default: + if (line.trim().length > 0) { + writeDesktopLogHeader(`push-to-talk native helper output=${sanitizeLogValue(line)}`); + } + break; + } +} + +function startNativePushToTalkHelper(): void { + if (process.platform !== "darwin" || pushToTalkNativeHelperProcess) { + return; + } + + const helperPath = resolveNativePushToTalkHelperPath(); + if (!helperPath) { + writeDesktopLogHeader("push-to-talk native helper missing; using accelerator fallback only"); + return; + } + + const child = ChildProcess.spawn(helperPath, [], { + cwd: Path.dirname(helperPath), + stdio: ["ignore", "pipe", "pipe"], + }); + pushToTalkNativeHelperProcess = child; + pushToTalkNativeHelperStdout = ""; + + child.stdout?.on("data", (chunk: unknown) => { + pushToTalkNativeHelperStdout += Buffer.isBuffer(chunk) ? chunk.toString("utf8") : String(chunk); + const lines = pushToTalkNativeHelperStdout.split(/\r?\n/); + pushToTalkNativeHelperStdout = lines.pop() ?? ""; + for (const line of lines) { + handleNativePushToTalkHelperLine(line); + } + }); + child.stderr?.on("data", (chunk: unknown) => { + const message = Buffer.isBuffer(chunk) ? chunk.toString("utf8") : String(chunk); + writeDesktopLogHeader(`push-to-talk native helper stderr=${sanitizeLogValue(message)}`); + }); + child.once("error", (error) => { + if (pushToTalkNativeHelperProcess === child) { + pushToTalkNativeHelperProcess = null; + } + writeDesktopLogHeader(`push-to-talk native helper error=${sanitizeLogValue(error.message)}`); + }); + child.once("exit", (code, signal) => { + if (pushToTalkNativeHelperProcess === child) { + pushToTalkNativeHelperProcess = null; + } + if (pushToTalkActive) { + stopPushToTalkFromNativeHelper(); + } + if (!isQuitting) { + writeDesktopLogHeader( + `push-to-talk native helper exited code=${String(code)} signal=${String(signal)}`, + ); + } + }); +} + +function stopNativePushToTalkHelper(): void { + const child = pushToTalkNativeHelperProcess; + pushToTalkNativeHelperProcess = null; + pushToTalkNativeHelperStdout = ""; + if (!child || child.exitCode !== null || child.signalCode !== null) { + return; + } + child.kill("SIGTERM"); + setTimeout(() => { + if (child.exitCode === null && child.signalCode === null) { + child.kill("SIGKILL"); + } + }, 2_000).unref(); +} + +function registerPushToTalkShortcuts(): void { + if (!app.isReady()) { + return; + } + globalShortcut.unregister(PUSH_TO_TALK_GLOBAL_SHORTCUT); + const registered = globalShortcut.register(PUSH_TO_TALK_GLOBAL_SHORTCUT, () => { + if (pushToTalkActive) { + stopPushToTalkFromShortcut(); + } else { + startPushToTalkFromShortcut(); + } + }); + if (!registered) { + writeDesktopLogHeader( + `push-to-talk failed to register shortcut accelerator=${PUSH_TO_TALK_GLOBAL_SHORTCUT}`, + ); + } + showIdlePushToTalkOverlay(); +} + nativeTheme.on("updated", syncAllWindowAppearance); function createWindow(): BrowserWindow { @@ -2082,9 +2840,7 @@ async function bootstrap(): Promise { writeDesktopLogHeader("bootstrap backend start requested"); if (isDevelopment) { - mainWindow = createWindow(); - writeDesktopLogHeader("bootstrap main window created"); - void waitForBackendWindowReady(backendHttpUrl) + await waitForBackendWindowReady(backendHttpUrl) .then((source) => { writeDesktopLogHeader(`bootstrap backend ready source=${source}`); }) @@ -2097,6 +2853,8 @@ async function bootstrap(): Promise { ); console.warn("[desktop] backend readiness check timed out during dev bootstrap", error); }); + mainWindow = createWindow(); + writeDesktopLogHeader("bootstrap main window created"); return; } @@ -2109,6 +2867,10 @@ app.on("before-quit", () => { writeDesktopLogHeader("before-quit received"); clearUpdatePollTimer(); cancelBackendReadinessWait(); + globalShortcut.unregister(PUSH_TO_TALK_GLOBAL_SHORTCUT); + stopNativePushToTalkHelper(); + stopPushToTalkOverlayFollow(); + pushToTalkOverlayWindow?.destroy(); stopBackend(); restoreStdIoCapture?.(); }); @@ -2121,6 +2883,8 @@ app configureApplicationMenu(); registerDesktopProtocol(); configureAutoUpdater(); + registerPushToTalkShortcuts(); + startNativePushToTalkHelper(); void bootstrap().catch((error) => { if (isBackendReadinessAborted(error) && isQuitting) { return; diff --git a/apps/desktop/src/preload.ts b/apps/desktop/src/preload.ts index a6756048725..0a65fab5e41 100644 --- a/apps/desktop/src/preload.ts +++ b/apps/desktop/src/preload.ts @@ -24,6 +24,9 @@ const SET_SAVED_ENVIRONMENT_SECRET_CHANNEL = "desktop:set-saved-environment-secr const REMOVE_SAVED_ENVIRONMENT_SECRET_CHANNEL = "desktop:remove-saved-environment-secret"; const GET_SERVER_EXPOSURE_STATE_CHANNEL = "desktop:get-server-exposure-state"; const SET_SERVER_EXPOSURE_MODE_CHANNEL = "desktop:set-server-exposure-mode"; +const PUSH_TO_TALK_EVENT_CHANNEL = "desktop:push-to-talk-event"; +const PUSH_TO_TALK_OVERLAY_STATE_CHANNEL = "desktop:push-to-talk-overlay-state"; +const PUSH_TO_TALK_TRANSCRIBE_CHANNEL = "desktop:push-to-talk-transcribe"; contextBridge.exposeInMainWorld("desktopBridge", { getAppBranding: () => { @@ -85,4 +88,20 @@ contextBridge.exposeInMainWorld("desktopBridge", { ipcRenderer.removeListener(UPDATE_STATE_CHANNEL, wrappedListener); }; }, + onPushToTalkEvent: (listener) => { + const wrappedListener = (_event: Electron.IpcRendererEvent, payload: unknown) => { + if (typeof payload !== "object" || payload === null) return; + const type = (payload as { type?: unknown }).type; + if (type !== "start" && type !== "stop") return; + listener({ type }); + }; + + ipcRenderer.on(PUSH_TO_TALK_EVENT_CHANNEL, wrappedListener); + return () => { + ipcRenderer.removeListener(PUSH_TO_TALK_EVENT_CHANNEL, wrappedListener); + }; + }, + setPushToTalkOverlayState: (state) => + ipcRenderer.invoke(PUSH_TO_TALK_OVERLAY_STATE_CHANNEL, state), + transcribePushToTalkAudio: (input) => ipcRenderer.invoke(PUSH_TO_TALK_TRANSCRIBE_CHANNEL, input), } satisfies DesktopBridge); diff --git a/apps/server/src/provider/Layers/CodexAdapter.test.ts b/apps/server/src/provider/Layers/CodexAdapter.test.ts index 4df4fb5d32f..3443e59d2a7 100644 --- a/apps/server/src/provider/Layers/CodexAdapter.test.ts +++ b/apps/server/src/provider/Layers/CodexAdapter.test.ts @@ -268,6 +268,7 @@ validationLayer("CodexAdapterLive validation", (it) => { assert.deepStrictEqual(validationRuntimeFactory.factory.mock.calls[0]?.[0], { binaryPath: "codex", cwd: process.cwd(), + mcpServers: {}, model: "gpt-5.3-codex", providerInstanceId: ProviderInstanceId.make("codex"), serviceTier: "fast", diff --git a/apps/server/src/provider/Layers/CodexAdapter.ts b/apps/server/src/provider/Layers/CodexAdapter.ts index 5186dc29627..b48aa19dc97 100644 --- a/apps/server/src/provider/Layers/CodexAdapter.ts +++ b/apps/server/src/provider/Layers/CodexAdapter.ts @@ -1374,6 +1374,7 @@ export const makeCodexAdapter = Effect.fn("makeCodexAdapter")(function* ( binaryPath: codexConfig.binaryPath, ...(options?.environment ? { environment: options.environment } : {}), ...(codexConfig.homePath ? { homePath: codexConfig.homePath } : {}), + mcpServers: codexConfig.mcpServers, ...(Schema.is(CodexResumeCursorSchema)(input.resumeCursor) ? { resumeCursor: input.resumeCursor } : {}), diff --git a/apps/server/src/provider/Layers/CodexSessionRuntime.test.ts b/apps/server/src/provider/Layers/CodexSessionRuntime.test.ts index 780f9731fd2..2f565e205a5 100644 --- a/apps/server/src/provider/Layers/CodexSessionRuntime.test.ts +++ b/apps/server/src/provider/Layers/CodexSessionRuntime.test.ts @@ -11,6 +11,7 @@ import { CODEX_PLAN_MODE_DEVELOPER_INSTRUCTIONS, } from "../CodexDeveloperInstructions.ts"; import { + buildCodexMcpSessionConfig, buildTurnStartParams, isRecoverableThreadResumeError, openCodexThread, @@ -147,6 +148,109 @@ describe("buildTurnStartParams", () => { }); }); +describe("buildCodexMcpSessionConfig", () => { + it("renders enabled MCP servers with per-session cwd placeholders", () => { + assert.deepStrictEqual( + buildCodexMcpSessionConfig({ + cwd: "/repo/workspace", + mcpServers: { + axonMemory: { + enabled: true, + command: "uv", + args: [ + "run", + "--project", + "/Users/james/git/mcp/cool-shit/coding-ai/memory/agent-new", + "--no-sync", + "python", + "examples/serve_memory_mcp.py", + ], + cwd: "/Users/james/git/mcp/cool-shit/coding-ai/memory/agent-new", + env: {}, + supportsParallelToolCalls: false, + }, + codeIntel: { + enabled: true, + command: "uv", + args: [ + "run", + "--project", + "/Users/james/git/mcp/cool-shit/coding-ai/code-graph-rag/code-intel/code-intel", + "--no-sync", + "code-intel", + "--root", + "${cwd}", + "serve", + ], + cwd: "", + env: { + CODE_INTEL_ROOT: "{workspaceRoot}", + }, + startupTimeoutMs: 20_000, + supportsParallelToolCalls: true, + defaultToolsApprovalMode: "prompt", + }, + disabledServer: { + enabled: false, + command: "ignored", + args: [], + cwd: "", + env: {}, + supportsParallelToolCalls: false, + }, + }, + }), + { + mcp_servers: { + axonMemory: { + type: "stdio", + command: "uv", + args: [ + "run", + "--project", + "/Users/james/git/mcp/cool-shit/coding-ai/memory/agent-new", + "--no-sync", + "python", + "examples/serve_memory_mcp.py", + ], + cwd: "/Users/james/git/mcp/cool-shit/coding-ai/memory/agent-new", + }, + codeIntel: { + type: "stdio", + command: "uv", + args: [ + "run", + "--project", + "/Users/james/git/mcp/cool-shit/coding-ai/code-graph-rag/code-intel/code-intel", + "--no-sync", + "code-intel", + "--root", + "/repo/workspace", + "serve", + ], + env: { + CODE_INTEL_ROOT: "/repo/workspace", + }, + startup_timeout_ms: 20_000, + supports_parallel_tool_calls: true, + default_tools_approval_mode: "prompt", + }, + }, + }, + ); + }); + + it("omits config when no MCP servers are enabled", () => { + assert.equal( + buildCodexMcpSessionConfig({ + cwd: "/repo/workspace", + mcpServers: {}, + }), + undefined, + ); + }); +}); + describe("isRecoverableThreadResumeError", () => { it("matches missing thread errors", () => { assert.equal( diff --git a/apps/server/src/provider/Layers/CodexSessionRuntime.ts b/apps/server/src/provider/Layers/CodexSessionRuntime.ts index 4f9011fba04..b4c44eff873 100644 --- a/apps/server/src/provider/Layers/CodexSessionRuntime.ts +++ b/apps/server/src/provider/Layers/CodexSessionRuntime.ts @@ -1,5 +1,6 @@ import { ApprovalRequestId, + type CodexMcpServerSettings, DEFAULT_MODEL, EventId, ProviderDriverKind, @@ -84,6 +85,7 @@ export interface CodexSessionRuntimeOptions { readonly runtimeMode: RuntimeMode; readonly model?: string; readonly serviceTier?: EffectCodexSchema.V2ThreadStartParams__ServiceTier | undefined; + readonly mcpServers?: Readonly>; readonly resumeCursor?: CodexResumeCursor; } @@ -264,17 +266,82 @@ function runtimeModeToThreadConfig(input: RuntimeMode): { } } +function interpolateCodexMcpTemplate(value: string, input: { readonly cwd: string }): string { + return value + .replaceAll("${cwd}", input.cwd) + .replaceAll("{cwd}", input.cwd) + .replaceAll("${workspaceRoot}", input.cwd) + .replaceAll("{workspaceRoot}", input.cwd); +} + +export function buildCodexMcpSessionConfig(input: { + readonly cwd: string; + readonly mcpServers?: Readonly>; +}): Record | undefined { + const enabledServers = Object.entries(input.mcpServers ?? {}).filter( + ([, server]) => server.enabled, + ); + if (enabledServers.length === 0) { + return undefined; + } + + const mcpServers: Record = {}; + for (const [name, server] of enabledServers) { + const serverConfig: Record = { + type: "stdio", + command: interpolateCodexMcpTemplate(server.command, input), + }; + + if (server.args.length > 0) { + serverConfig.args = server.args.map((arg) => interpolateCodexMcpTemplate(arg, input)); + } + + if (server.cwd.length > 0) { + serverConfig.cwd = interpolateCodexMcpTemplate(server.cwd, input); + } + + const envEntries = Object.entries(server.env); + if (envEntries.length > 0) { + serverConfig.env = Object.fromEntries( + envEntries.map(([key, value]) => [key, interpolateCodexMcpTemplate(value, input)]), + ); + } + + if (server.startupTimeoutMs !== undefined) { + serverConfig.startup_timeout_ms = server.startupTimeoutMs; + } + + if (server.supportsParallelToolCalls) { + serverConfig.supports_parallel_tool_calls = true; + } + + if (server.defaultToolsApprovalMode !== undefined) { + serverConfig.default_tools_approval_mode = server.defaultToolsApprovalMode; + } + + mcpServers[name] = serverConfig; + } + + return { mcp_servers: mcpServers }; +} + function buildThreadStartParams(input: { readonly cwd: string; readonly runtimeMode: RuntimeMode; readonly model: string | undefined; readonly serviceTier: EffectCodexSchema.V2ThreadStartParams__ServiceTier | undefined; + readonly mcpServers?: Readonly>; }): EffectCodexSchema.V2ThreadStartParams { const config = runtimeModeToThreadConfig(input.runtimeMode); + const mcpConfig = buildCodexMcpSessionConfig({ + cwd: input.cwd, + ...(input.mcpServers ? { mcpServers: input.mcpServers } : {}), + }); return { cwd: input.cwd, approvalPolicy: config.approvalPolicy, sandbox: config.sandbox, + ...(mcpConfig ? { config: mcpConfig } : {}), ...(input.model ? { model: input.model } : {}), ...(input.serviceTier ? { serviceTier: input.serviceTier } : {}), }; @@ -418,6 +485,7 @@ export const openCodexThread = (input: { readonly cwd: string; readonly requestedModel: string | undefined; readonly serviceTier: EffectCodexSchema.V2ThreadStartParams__ServiceTier | undefined; + readonly mcpServers?: Readonly>; readonly resumeThreadId: string | undefined; }): Effect.Effect => { const resumeThreadId = input.resumeThreadId; @@ -426,6 +494,7 @@ export const openCodexThread = (input: { runtimeMode: input.runtimeMode, model: input.requestedModel, serviceTier: input.serviceTier, + ...(input.mcpServers ? { mcpServers: input.mcpServers } : {}), }); if (resumeThreadId === undefined) { @@ -1166,6 +1235,7 @@ export const makeCodexSessionRuntime = ( cwd: options.cwd, requestedModel, serviceTier: options.serviceTier, + ...(options.mcpServers ? { mcpServers: options.mcpServers } : {}), resumeThreadId: readResumeCursorThreadId(options.resumeCursor), }); diff --git a/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts b/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts index 2246a2ae478..dd32c813a3c 100644 --- a/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts +++ b/apps/server/src/provider/Layers/ProviderInstanceRegistryLive.test.ts @@ -50,6 +50,7 @@ const makeCodexConfig = (overrides: Partial): CodexSettings => ({ homePath: "", shadowHomePath: "", customModels: [], + mcpServers: {}, ...overrides, }); diff --git a/apps/server/src/serverSettings.test.ts b/apps/server/src/serverSettings.test.ts index f11c5bf4519..36406e1b65d 100644 --- a/apps/server/src/serverSettings.test.ts +++ b/apps/server/src/serverSettings.test.ts @@ -115,6 +115,7 @@ it.layer(NodeServices.layer)("server settings", (it) => { homePath: "/Users/julius/.codex", shadowHomePath: "", customModels: [], + mcpServers: {}, }); assert.deepEqual(next.providers.claudeAgent, { enabled: true, @@ -356,6 +357,7 @@ it.layer(NodeServices.layer)("server settings", (it) => { homePath: "", shadowHomePath: "", customModels: [], + mcpServers: {}, }); assert.deepEqual(next.providers.claudeAgent, { enabled: true, @@ -394,6 +396,45 @@ it.layer(NodeServices.layer)("server settings", (it) => { }).pipe(Effect.provide(makeServerSettingsLayer())), ); + it.effect("normalizes Codex MCP server settings when updates are applied", () => + Effect.gen(function* () { + const serverSettings = yield* ServerSettingsService; + + const next = yield* serverSettings.updateSettings({ + providers: { + codex: { + mcpServers: { + codeIntel: { + command: " uv ", + args: [" run ", "--no-sync", "code-intel", "--root", " ${cwd} ", "serve"], + cwd: " /tmp/code-intel ", + env: { + CODE_INTEL_ROOT: " {workspaceRoot} ", + }, + startupTimeoutMs: 20_000, + supportsParallelToolCalls: true, + defaultToolsApprovalMode: "prompt", + }, + }, + }, + }, + }); + + assert.deepEqual(next.providers.codex.mcpServers.codeIntel, { + enabled: true, + command: "uv", + args: ["run", "--no-sync", "code-intel", "--root", "${cwd}", "serve"], + cwd: "/tmp/code-intel", + env: { + CODE_INTEL_ROOT: "{workspaceRoot}", + }, + startupTimeoutMs: 20_000, + supportsParallelToolCalls: true, + defaultToolsApprovalMode: "prompt", + }); + }).pipe(Effect.provide(makeServerSettingsLayer())), + ); + it.effect("defaults blank binary paths to provider executables", () => Effect.gen(function* () { const serverSettings = yield* ServerSettingsService; diff --git a/apps/web/src/components/KeybindingsToast.browser.tsx b/apps/web/src/components/KeybindingsToast.browser.tsx index df1c6ba542f..49600cdadee 100644 --- a/apps/web/src/components/KeybindingsToast.browser.tsx +++ b/apps/web/src/components/KeybindingsToast.browser.tsx @@ -109,6 +109,7 @@ function createBaseServerConfig(): ServerConfig { homePath: "", shadowHomePath: "", customModels: [], + mcpServers: {}, }, claudeAgent: { enabled: true, diff --git a/apps/web/src/components/chat/ChatComposer.tsx b/apps/web/src/components/chat/ChatComposer.tsx index 2b7f62e8af7..3f7860a035b 100644 --- a/apps/web/src/components/chat/ChatComposer.tsx +++ b/apps/web/src/components/chat/ChatComposer.tsx @@ -92,6 +92,8 @@ import { type LucideIcon, LockIcon, LockOpenIcon, + MicIcon, + MicOffIcon, PenLineIcon, XIcon, } from "lucide-react"; @@ -139,6 +141,18 @@ const runtimeModeOptions = Object.keys(runtimeModeConfig) as RuntimeMode[]; const COMPOSER_PATH_QUERY_DEBOUNCE_MS = 120; const EMPTY_PROJECT_ENTRIES: ProjectEntry[] = []; +export function appendVoiceTranscript(basePrompt: string, transcript: string): string { + const normalizedTranscript = transcript.trim(); + if (normalizedTranscript.length === 0) { + return basePrompt; + } + + const normalizedBase = basePrompt.trimEnd(); + return normalizedBase.length > 0 + ? `${normalizedBase}\n\n${normalizedTranscript}` + : normalizedTranscript; +} + const extendReplacementRangeForTrailingSpace = ( text: string, rangeEnd: number, @@ -1032,6 +1046,339 @@ export const ChatComposer = memo( [composerDraftTarget, setComposerDraftPrompt], ); + const voiceRecorderRef = useRef<{ + autoSubmit: boolean; + chunks: Blob[]; + recorder: MediaRecorder; + stream: MediaStream; + } | null>(null); + const voiceBasePromptRef = useRef(""); + const [isVoiceListening, setIsVoiceListening] = useState(false); + const [isVoiceSupported, setIsVoiceSupported] = useState(false); + + useEffect(() => { + setIsVoiceSupported( + Boolean( + window.desktopBridge?.transcribePushToTalkAudio && + typeof navigator.mediaDevices?.getUserMedia === "function" && + typeof MediaRecorder !== "undefined", + ), + ); + }, []); + + useEffect( + () => () => { + const activeRecorder = voiceRecorderRef.current; + voiceRecorderRef.current = null; + activeRecorder?.stream.getTracks().forEach((track) => track.stop()); + if (activeRecorder?.recorder.state === "recording") { + activeRecorder.recorder.stop(); + } + }, + [], + ); + + const applyVoiceTranscriptToPrompt = useCallback( + (transcript: string) => { + const nextPrompt = appendVoiceTranscript(voiceBasePromptRef.current, transcript); + promptRef.current = nextPrompt; + setPrompt(nextPrompt); + const nextCursor = collapseExpandedComposerCursor(nextPrompt, nextPrompt.length); + setComposerCursor(nextCursor); + setComposerTrigger(detectComposerTrigger(nextPrompt, nextPrompt.length)); + }, + [promptRef, setPrompt], + ); + + const setDesktopVoiceOverlay = useCallback( + (state: { + visible: boolean; + status: "idle" | "listening" | "processing" | "error"; + title: string; + message?: string; + }) => { + void window.desktopBridge?.setPushToTalkOverlayState(state).catch(() => undefined); + }, + [], + ); + + const showDesktopVoiceIdleOverlay = useCallback(() => { + setDesktopVoiceOverlay({ + visible: true, + status: "idle", + title: "T3 Code", + message: "Hold Ctrl+Option", + }); + }, [setDesktopVoiceOverlay]); + + const stopVoiceListening = useCallback(() => { + const activeRecorder = voiceRecorderRef.current; + if (!activeRecorder || activeRecorder.recorder.state === "inactive") { + return; + } + activeRecorder.recorder.stop(); + }, []); + + const startVoiceListening = useCallback( + async (options: { autoSubmit: boolean }) => { + if (voiceRecorderRef.current || isVoiceListening) { + if (voiceRecorderRef.current) { + voiceRecorderRef.current.autoSubmit = + voiceRecorderRef.current.autoSubmit || options.autoSubmit; + } + return; + } + + if ( + !window.desktopBridge?.transcribePushToTalkAudio || + typeof navigator.mediaDevices?.getUserMedia !== "function" || + typeof MediaRecorder === "undefined" + ) { + toastManager.add({ + type: "error", + title: "Voice input unavailable", + description: "Local voice capture is unavailable in this runtime.", + }); + setDesktopVoiceOverlay({ + visible: true, + status: "error", + title: "Voice input unavailable", + message: "Local voice capture is unavailable.", + }); + window.setTimeout(() => { + showDesktopVoiceIdleOverlay(); + }, 1600); + return; + } + + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mimeType = MediaRecorder.isTypeSupported("audio/webm;codecs=opus") + ? "audio/webm;codecs=opus" + : MediaRecorder.isTypeSupported("audio/webm") + ? "audio/webm" + : ""; + const chunks: Blob[] = []; + const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined); + voiceBasePromptRef.current = promptRef.current; + voiceRecorderRef.current = { + autoSubmit: options.autoSubmit, + chunks, + recorder, + stream, + }; + + recorder.addEventListener("dataavailable", (event) => { + if (event.data.size > 0) { + chunks.push(event.data); + } + }); + recorder.addEventListener("start", () => { + setIsVoiceListening(true); + setDesktopVoiceOverlay({ + visible: true, + status: "listening", + title: "Listening", + message: options.autoSubmit ? "Release to send" : "Speak your command", + }); + }); + recorder.addEventListener("error", () => { + setIsVoiceListening(false); + voiceRecorderRef.current = null; + stream.getTracks().forEach((track) => track.stop()); + toastManager.add({ + type: "error", + title: "Voice input stopped", + description: "Local audio recording failed.", + }); + setDesktopVoiceOverlay({ + visible: true, + status: "error", + title: "Voice input stopped", + message: "Local audio recording failed.", + }); + window.setTimeout(() => { + showDesktopVoiceIdleOverlay(); + }, 1600); + }); + recorder.addEventListener("stop", () => { + const stoppedRecorder = voiceRecorderRef.current; + voiceRecorderRef.current = null; + setIsVoiceListening(false); + stream.getTracks().forEach((track) => track.stop()); + + void (async () => { + const audioBlob = new Blob(chunks, { type: recorder.mimeType || "audio/webm" }); + if (!stoppedRecorder || audioBlob.size === 0) { + showDesktopVoiceIdleOverlay(); + return; + } + + setDesktopVoiceOverlay({ + visible: true, + status: "processing", + title: "Transcribing locally", + message: "Using local STT", + }); + + const result = await window.desktopBridge?.transcribePushToTalkAudio({ + audio: await audioBlob.arrayBuffer(), + mimeType: audioBlob.type || "audio/webm", + }); + if (!result?.ok) { + const message = result?.error ?? "Local transcription failed."; + toastManager.add({ + type: "error", + title: "Voice input stopped", + description: message, + }); + setDesktopVoiceOverlay({ + visible: true, + status: "error", + title: "Voice input stopped", + message, + }); + window.setTimeout(() => { + showDesktopVoiceIdleOverlay(); + }, 2200); + return; + } + + const transcript = result.text.trim(); + if (transcript.length === 0) { + showDesktopVoiceIdleOverlay(); + return; + } + + applyVoiceTranscriptToPrompt(transcript); + scheduleComposerFocus(); + if (stoppedRecorder.autoSubmit) { + setDesktopVoiceOverlay({ + visible: true, + status: "processing", + title: "Sending voice command", + message: "Starting agent session", + }); + window.setTimeout(() => { + void onSend(); + window.setTimeout(() => { + showDesktopVoiceIdleOverlay(); + }, 900); + }, 0); + } else { + showDesktopVoiceIdleOverlay(); + } + })(); + }); + recorder.start(); + } catch (error) { + voiceRecorderRef.current?.stream.getTracks().forEach((track) => track.stop()); + voiceRecorderRef.current = null; + setIsVoiceListening(false); + toastManager.add({ + type: "error", + title: "Unable to start voice input", + description: error instanceof Error ? error.message : "Local audio capture failed.", + }); + setDesktopVoiceOverlay({ + visible: true, + status: "error", + title: "Unable to start voice input", + message: error instanceof Error ? error.message : "Local audio capture failed.", + }); + } + }, + [ + applyVoiceTranscriptToPrompt, + isVoiceListening, + onSend, + promptRef, + scheduleComposerFocus, + setDesktopVoiceOverlay, + showDesktopVoiceIdleOverlay, + ], + ); + + const toggleVoiceListening = useCallback(() => { + if (isVoiceListening) { + stopVoiceListening(); + return; + } + + startVoiceListening({ autoSubmit: true }); + }, [isVoiceListening, startVoiceListening, stopVoiceListening]); + + useEffect(() => { + const unsubscribe = window.desktopBridge?.onPushToTalkEvent((event) => { + if (event.type === "start") { + startVoiceListening({ autoSubmit: true }); + } else { + stopVoiceListening(); + } + }); + return () => { + unsubscribe?.(); + }; + }, [startVoiceListening, stopVoiceListening]); + + useEffect(() => { + if (window.sessionStorage.getItem("t3code:voice-autostart") !== "1") { + return; + } + + window.sessionStorage.removeItem("t3code:voice-autostart"); + window.setTimeout(() => { + startVoiceListening({ autoSubmit: true }); + }, 0); + }, [startVoiceListening]); + + useEffect(() => { + if (!window.desktopBridge) { + return; + } + + const shouldHandleEvent = (event: KeyboardEvent) => + event.ctrlKey && + event.altKey && + !event.metaKey && + !event.shiftKey && + (event.code === "ControlLeft" || + event.code === "ControlRight" || + event.code === "AltLeft" || + event.code === "AltRight"); + + const onKeyDown = (event: KeyboardEvent) => { + if (event.repeat || !shouldHandleEvent(event)) { + return; + } + event.preventDefault(); + startVoiceListening({ autoSubmit: true }); + }; + const onKeyUp = (event: KeyboardEvent) => { + if ( + event.code !== "ControlLeft" && + event.code !== "ControlRight" && + event.code !== "AltLeft" && + event.code !== "AltRight" + ) { + return; + } + stopVoiceListening(); + }; + const onBlur = () => { + stopVoiceListening(); + }; + + window.addEventListener("keydown", onKeyDown); + window.addEventListener("keyup", onKeyUp); + window.addEventListener("blur", onBlur); + return () => { + window.removeEventListener("keydown", onKeyDown); + window.removeEventListener("keyup", onKeyUp); + window.removeEventListener("blur", onBlur); + }; + }, [startVoiceListening, stopVoiceListening]); + const addComposerImage = useCallback( (image: ComposerImageAttachment) => { addComposerDraftImage(composerDraftTarget, image); @@ -2069,6 +2416,37 @@ export const ChatComposer = memo( } className="flex shrink-0 flex-nowrap items-center justify-end gap-2" > + + + } + > + {isVoiceListening ? ( + + ) : ( + + )} + + + {isVoiceSupported + ? "Hold Ctrl+Option to speak. Ctrl+Option+Space works globally." + : "Voice input is unavailable in this runtime."} + + () => {}, + onPushToTalkEvent: () => () => {}, + setPushToTalkOverlayState: vi.fn().mockResolvedValue(undefined), + transcribePushToTalkAudio: vi.fn().mockResolvedValue({ ok: true, text: "" }), }; }; diff --git a/apps/web/src/localApi.test.ts b/apps/web/src/localApi.test.ts index fbdd203e99f..7801577b86b 100644 --- a/apps/web/src/localApi.test.ts +++ b/apps/web/src/localApi.test.ts @@ -203,6 +203,9 @@ function makeDesktopBridge(overrides: Partial = {}): DesktopBridg throw new Error("installUpdate not implemented in test"); }, onUpdateState: () => () => undefined, + onPushToTalkEvent: () => () => undefined, + setPushToTalkOverlayState: async () => undefined, + transcribePushToTalkAudio: async () => ({ ok: true, text: "" }), ...overrides, }; } diff --git a/apps/web/src/main.tsx b/apps/web/src/main.tsx index 68a7dfaa931..88b611f53ad 100644 --- a/apps/web/src/main.tsx +++ b/apps/web/src/main.tsx @@ -11,8 +11,11 @@ import { getRouter } from "./router"; import { APP_DISPLAY_NAME } from "./branding"; import { syncDocumentWindowControlsOverlayClass } from "./lib/windowControlsOverlay"; -// Electron loads the app from a file-backed shell, so hash history avoids path resolution issues. -const history = isElectron ? createHashHistory() : createBrowserHistory(); +// Packaged Electron loads from the backend/custom shell, where hash history +// avoids path resolution issues. Desktop development still loads Vite over +// HTTP, so it must use browser history like the normal web app. +const usesHttpShell = window.location.protocol === "http:" || window.location.protocol === "https:"; +const history = isElectron && !usesHttpShell ? createHashHistory() : createBrowserHistory(); const router = getRouter(history); diff --git a/apps/web/src/routes/_chat.tsx b/apps/web/src/routes/_chat.tsx index fb8191f4480..e3d1000ed0b 100644 --- a/apps/web/src/routes/_chat.tsx +++ b/apps/web/src/routes/_chat.tsx @@ -29,6 +29,10 @@ function ChatRouteGlobalShortcuts() { const appSettings = useSettings(); useEffect(() => { + const defaultThreadEnvMode = resolveSidebarNewThreadEnvMode({ + defaultEnvMode: appSettings.defaultThreadEnvMode, + }); + const onWindowKeyDown = (event: KeyboardEvent) => { if (event.defaultPrevented) return; const command = resolveShortcutCommand(event, keybindings, { @@ -55,9 +59,7 @@ function ChatRouteGlobalShortcuts() { activeDraftThread, activeThread, defaultProjectRef, - defaultThreadEnvMode: resolveSidebarNewThreadEnvMode({ - defaultEnvMode: appSettings.defaultThreadEnvMode, - }), + defaultThreadEnvMode, handleNewThread, }); return; @@ -70,17 +72,31 @@ function ChatRouteGlobalShortcuts() { activeDraftThread, activeThread, defaultProjectRef, - defaultThreadEnvMode: resolveSidebarNewThreadEnvMode({ - defaultEnvMode: appSettings.defaultThreadEnvMode, - }), + defaultThreadEnvMode, handleNewThread, }); } }; + const unsubscribePushToTalk = window.desktopBridge?.onPushToTalkEvent((event) => { + if ( + event.type !== "start" || + activeDraftThread || + activeThread || + !defaultProjectRef || + useCommandPaletteStore.getState().open + ) { + return; + } + + window.sessionStorage.setItem("t3code:voice-autostart", "1"); + void handleNewThread(defaultProjectRef, { envMode: defaultThreadEnvMode }); + }); + window.addEventListener("keydown", onWindowKeyDown); return () => { window.removeEventListener("keydown", onWindowKeyDown); + unsubscribePushToTalk?.(); }; }, [ activeDraftThread, diff --git a/docs/codex-mcp-integrations.md b/docs/codex-mcp-integrations.md new file mode 100644 index 00000000000..3a4ba62c082 --- /dev/null +++ b/docs/codex-mcp-integrations.md @@ -0,0 +1,63 @@ +# Codex MCP Integrations + +T3 Code can pass Codex MCP server definitions into each Codex app-server thread through +`providers.codex.mcpServers` in server settings. + +The settings shape mirrors Codex `config.toml` MCP server entries, but uses camelCase in +T3 settings: + +```json +{ + "providers": { + "codex": { + "mcpServers": { + "axonMemory": { + "enabled": true, + "command": "uv", + "args": [ + "run", + "--project", + "/Users/james/git/mcp/cool-shit/coding-ai/memory/agent-new", + "--no-sync", + "python", + "examples/serve_memory_mcp.py" + ], + "cwd": "/Users/james/git/mcp/cool-shit/coding-ai/memory/agent-new", + "startupTimeoutMs": 20000, + "defaultToolsApprovalMode": "prompt" + }, + "codeIntel": { + "enabled": true, + "command": "uv", + "args": [ + "run", + "--project", + "/Users/james/git/mcp/cool-shit/coding-ai/code-graph-rag/code-intel/code-intel", + "--no-sync", + "code-intel", + "--root", + "${cwd}", + "serve" + ], + "startupTimeoutMs": 20000, + "defaultToolsApprovalMode": "prompt" + } + } + } + } +} +``` + +Supported placeholders in `args`, `cwd`, and `env` values: + +- `${cwd}` / `{cwd}`: the Codex thread working directory +- `${workspaceRoot}` / `{workspaceRoot}`: alias for the Codex thread working directory + +Notes: + +- The Axon memory example runs the stdio MCP server from `examples/serve_memory_mcp.py`. +- The code-intel example runs one MCP server per Codex session, rooted at that session's `cwd`. +- These entries are injected as Codex session config, not written into the user's + `CODEX_HOME/config.toml`. +- Local service dependencies still apply. The current code-intel CLI expects FalkorDB unless + configured otherwise, and the Axon memory YAML points at FalkorDB on `127.0.0.1:6380`. diff --git a/packages/contracts/src/ipc.ts b/packages/contracts/src/ipc.ts index a63c6de9626..4dd3ef1cd10 100644 --- a/packages/contracts/src/ipc.ts +++ b/packages/contracts/src/ipc.ts @@ -143,6 +143,24 @@ export interface DesktopServerExposureState { advertisedHost: string | null; } +export type DesktopPushToTalkEvent = { type: "start" } | { type: "stop" }; + +export interface DesktopPushToTalkOverlayState { + visible: boolean; + status: "idle" | "listening" | "processing" | "error"; + title: string; + message?: string; +} + +export interface DesktopPushToTalkTranscriptionInput { + audio: ArrayBuffer; + mimeType: string; +} + +export type DesktopPushToTalkTranscriptionResult = + | { ok: true; text: string } + | { ok: false; error: string }; + export interface PickFolderOptions { initialPath?: string | null; } @@ -176,6 +194,11 @@ export interface DesktopBridge { downloadUpdate: () => Promise; installUpdate: () => Promise; onUpdateState: (listener: (state: DesktopUpdateState) => void) => () => void; + onPushToTalkEvent: (listener: (event: DesktopPushToTalkEvent) => void) => () => void; + setPushToTalkOverlayState: (state: DesktopPushToTalkOverlayState) => Promise; + transcribePushToTalkAudio: ( + input: DesktopPushToTalkTranscriptionInput, + ) => Promise; } /** diff --git a/packages/contracts/src/settings.ts b/packages/contracts/src/settings.ts index 4a10c4cc6cf..a79238218ba 100644 --- a/packages/contracts/src/settings.ts +++ b/packages/contracts/src/settings.ts @@ -1,7 +1,7 @@ import { Effect } from "effect"; import * as Schema from "effect/Schema"; import * as SchemaTransformation from "effect/SchemaTransformation"; -import { TrimmedNonEmptyString, TrimmedString } from "./baseSchemas.ts"; +import { NonNegativeInt, TrimmedNonEmptyString, TrimmedString } from "./baseSchemas.ts"; import { DEFAULT_GIT_TEXT_GENERATION_MODEL, ProviderOptionSelections } from "./model.ts"; import { ModelSelection } from "./orchestration.ts"; import { ProviderInstanceConfig, ProviderInstanceId } from "./providerInstance.ts"; @@ -96,12 +96,32 @@ const makeBinaryPathSetting = (fallback: string) => Schema.withDecodingDefault(Effect.succeed(fallback)), ); +export const CodexMcpToolApprovalMode = Schema.Literals(["auto", "prompt", "approve"]); +export type CodexMcpToolApprovalMode = typeof CodexMcpToolApprovalMode.Type; + +export const CodexMcpServerSettings = Schema.Struct({ + enabled: Schema.Boolean.pipe(Schema.withDecodingDefault(Effect.succeed(true))), + command: TrimmedNonEmptyString, + args: Schema.Array(TrimmedString).pipe(Schema.withDecodingDefault(Effect.succeed([]))), + cwd: TrimmedString.pipe(Schema.withDecodingDefault(Effect.succeed(""))), + env: Schema.Record(TrimmedNonEmptyString, TrimmedString).pipe( + Schema.withDecodingDefault(Effect.succeed({})), + ), + startupTimeoutMs: Schema.optionalKey(NonNegativeInt), + supportsParallelToolCalls: Schema.Boolean.pipe(Schema.withDecodingDefault(Effect.succeed(false))), + defaultToolsApprovalMode: Schema.optionalKey(CodexMcpToolApprovalMode), +}); +export type CodexMcpServerSettings = typeof CodexMcpServerSettings.Type; + export const CodexSettings = Schema.Struct({ enabled: Schema.Boolean.pipe(Schema.withDecodingDefault(Effect.succeed(true))), binaryPath: makeBinaryPathSetting("codex"), homePath: TrimmedString.pipe(Schema.withDecodingDefault(Effect.succeed(""))), shadowHomePath: TrimmedString.pipe(Schema.withDecodingDefault(Effect.succeed(""))), customModels: Schema.Array(Schema.String).pipe(Schema.withDecodingDefault(Effect.succeed([]))), + mcpServers: Schema.Record(TrimmedNonEmptyString, CodexMcpServerSettings).pipe( + Schema.withDecodingDefault(Effect.succeed({})), + ), }); export type CodexSettings = typeof CodexSettings.Type; @@ -212,6 +232,21 @@ const CodexSettingsPatch = Schema.Struct({ homePath: Schema.optionalKey(Schema.String), shadowHomePath: Schema.optionalKey(Schema.String), customModels: Schema.optionalKey(Schema.Array(Schema.String)), + mcpServers: Schema.optionalKey( + Schema.Record( + Schema.String, + Schema.Struct({ + enabled: Schema.optionalKey(Schema.Boolean), + command: Schema.optionalKey(Schema.String), + args: Schema.optionalKey(Schema.Array(Schema.String)), + cwd: Schema.optionalKey(Schema.String), + env: Schema.optionalKey(Schema.Record(Schema.String, Schema.String)), + startupTimeoutMs: Schema.optionalKey(Schema.Number), + supportsParallelToolCalls: Schema.optionalKey(Schema.Boolean), + defaultToolsApprovalMode: Schema.optionalKey(CodexMcpToolApprovalMode), + }), + ), + ), }); const ClaudeSettingsPatch = Schema.Struct({