Skip to content

Commit 1314650

Browse files
Merge branch 'pr/amDosion/92'
2 parents 52d8b83 + 7ae9432 commit 1314650

11 files changed

Lines changed: 304 additions & 122 deletions

File tree

DEV-LOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,35 @@
11
# DEV-LOG
22

3+
## Enable Voice Mode / VOICE_MODE (2026-04-03)
4+
5+
恢复 `/voice` 语音输入功能。`src/` 下所有 voice 相关源码已与官方一致(0 行差异),问题出在:① `VOICE_MODE` 编译开关未开,命令不显示;② `audio-capture-napi` 是 SoX 子进程 stub(Windows 不支持),缺少官方原生 `.node` 二进制。
6+
7+
**新增文件:**
8+
9+
| 文件 | 说明 |
10+
|------|------|
11+
| `vendor/audio-capture/{platform}/audio-capture.node` | 6 个平台的原生音频二进制(cpal,来自参考项目) |
12+
| `vendor/audio-capture-src/index.ts` | 原生模块加载器(按 `${arch}-${platform}` 动态 require `.node`|
13+
14+
**修改文件:**
15+
16+
| 文件 | 变更 |
17+
|------|------|
18+
| `packages/audio-capture-napi/src/index.ts` | SoX 子进程 stub → 原生 `.node` 加载器(含 `process.cwd()` workspace 路径 fallback) |
19+
| `scripts/dev.ts` | `DEFAULT_FEATURES``"VOICE_MODE"` |
20+
| `build.ts` | `DEFAULT_BUILD_FEATURES``"VOICE_MODE"` |
21+
| `docs/features/voice-mode.md` | 追加恢复计划章节(第八节) |
22+
23+
**验证结果:**
24+
25+
- `isNativeAudioAvailable()``true`(Windows x64 原生 `.node` 加载成功)
26+
- `feature('VOICE_MODE')``ENABLED`
27+
- `bun run build` → voice 代码编入产物
28+
29+
**运行时前置条件:** claude.ai OAuth 登录 + 麦克风权限
30+
31+
---
32+
333
## Enable Claude in Chrome MCP (2026-04-03)
434

535
恢复 Chrome 浏览器控制功能。`src/` 下所有 claudeInChrome 相关源码已与官方一致(0 行差异),问题出在 `@ant/claude-for-chrome-mcp` 包是 6 行 stub(返回空工具列表和 null server)。

build.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ rmSync(outdir, { recursive: true, force: true });
1010

1111
// Default features that match the official CLI build.
1212
// Additional features can be enabled via FEATURE_<NAME>=1 env vars.
13-
const DEFAULT_BUILD_FEATURES = ["AGENT_TRIGGERS_REMOTE"];
13+
const DEFAULT_BUILD_FEATURES = ["AGENT_TRIGGERS_REMOTE", "VOICE_MODE"];
1414

1515
// Collect FEATURE_* env vars → Bun.build features
1616
const envFeatures = Object.keys(process.env)
Lines changed: 121 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,151 +1,152 @@
1-
// audio-capture-napi: cross-platform audio capture using SoX (rec) on macOS
2-
// and arecord (ALSA) on Linux. Replaces the original cpal-based native module.
31

4-
import { type ChildProcess, spawn, spawnSync } from 'child_process'
5-
6-
// ─── State ───────────────────────────────────────────────────────────
7-
8-
let recordingProcess: ChildProcess | null = null
9-
let availabilityCache: boolean | null = null
10-
11-
// ─── Helpers ─────────────────────────────────────────────────────────
12-
13-
function commandExists(cmd: string): boolean {
14-
const result = spawnSync(cmd, ['--version'], {
15-
stdio: 'ignore',
16-
timeout: 3000,
17-
})
18-
return result.error === undefined
2+
type AudioCaptureNapi = {
3+
startRecording(
4+
onData: (data: Buffer) => void,
5+
onEnd: () => void,
6+
): boolean
7+
stopRecording(): void
8+
isRecording(): boolean
9+
startPlayback(sampleRate: number, channels: number): boolean
10+
writePlaybackData(data: Buffer): void
11+
stopPlayback(): void
12+
isPlaying(): boolean
13+
// TCC microphone authorization status (macOS only):
14+
// 0 = notDetermined, 1 = restricted, 2 = denied, 3 = authorized.
15+
// Linux: always returns 3 (authorized) — no system-level microphone permission API.
16+
// Windows: returns 3 (authorized) if registry key absent or allowed,
17+
// 2 (denied) if microphone access is explicitly denied.
18+
microphoneAuthorizationStatus?(): number
1919
}
2020

21-
// ─── Public API ──────────────────────────────────────────────────────
21+
let cachedModule: AudioCaptureNapi | null = null
22+
let loadAttempted = false
2223

23-
/**
24-
* Check whether a supported audio recording command is available.
25-
* Returns true if `rec` (SoX) is found on macOS, or `arecord` (ALSA) on Linux.
26-
* Windows is not supported and always returns false.
27-
*/
28-
export function isNativeAudioAvailable(): boolean {
29-
if (availabilityCache !== null) {
30-
return availabilityCache
24+
function loadModule(): AudioCaptureNapi | null {
25+
if (loadAttempted) {
26+
return cachedModule
3127
}
28+
loadAttempted = true
3229

33-
if (process.platform === 'win32') {
34-
availabilityCache = false
35-
return false
30+
// Supported platforms: macOS (darwin), Linux, Windows (win32)
31+
const platform = process.platform
32+
if (platform !== 'darwin' && platform !== 'linux' && platform !== 'win32') {
33+
return null
3634
}
3735

38-
if (process.platform === 'darwin') {
39-
// macOS: use SoX rec
40-
availabilityCache = commandExists('rec')
41-
return availabilityCache
36+
// Candidate 1: native-embed path (bun compile). AUDIO_CAPTURE_NODE_PATH is
37+
// defined at build time in build-with-plugins.ts for native builds only — the
38+
// define resolves it to the static literal "../../audio-capture.node" so bun
39+
// compile can rewrite it to /$bunfs/root/audio-capture.node. MUST stay a
40+
// direct require(env var) — bun cannot analyze require(variable) from a loop.
41+
if (process.env.AUDIO_CAPTURE_NODE_PATH) {
42+
try {
43+
// eslint-disable-next-line @typescript-eslint/no-require-imports
44+
cachedModule = require(
45+
process.env.AUDIO_CAPTURE_NODE_PATH,
46+
) as AudioCaptureNapi
47+
return cachedModule
48+
} catch {
49+
// fall through to runtime fallbacks below
50+
}
4251
}
4352

44-
if (process.platform === 'linux') {
45-
// Linux: prefer arecord, fall back to rec
46-
availabilityCache = commandExists('arecord') || commandExists('rec')
47-
return availabilityCache
53+
// Candidates 2-4: npm-install, dev/source, and workspace layouts.
54+
// In bundled output, require() resolves relative to cli.js at the package root.
55+
// In dev, it resolves relative to this file. When loaded from a workspace
56+
// package (packages/audio-capture-napi/src/), we need an absolute path fallback.
57+
const platformDir = `${process.arch}-${platform}`
58+
const fallbacks = [
59+
`./vendor/audio-capture/${platformDir}/audio-capture.node`,
60+
`../audio-capture/${platformDir}/audio-capture.node`,
61+
`${process.cwd()}/vendor/audio-capture/${platformDir}/audio-capture.node`,
62+
]
63+
for (const p of fallbacks) {
64+
try {
65+
// eslint-disable-next-line @typescript-eslint/no-require-imports
66+
cachedModule = require(p) as AudioCaptureNapi
67+
return cachedModule
68+
} catch {
69+
// try next
70+
}
4871
}
49-
50-
availabilityCache = false
51-
return false
72+
return null
5273
}
5374

54-
/**
55-
* Check whether a recording is currently in progress.
56-
*/
57-
export function isNativeRecordingActive(): boolean {
58-
return recordingProcess !== null && !recordingProcess.killed
59-
}
60-
61-
/**
62-
* Stop the active recording process, if any.
63-
*/
64-
export function stopNativeRecording(): void {
65-
if (recordingProcess) {
66-
const proc = recordingProcess
67-
recordingProcess = null
68-
if (!proc.killed) {
69-
proc.kill('SIGTERM')
70-
}
71-
}
75+
export function isNativeAudioAvailable(): boolean {
76+
return loadModule() !== null
7277
}
7378

74-
/**
75-
* Start recording audio. Raw PCM data (16kHz, 16-bit signed, mono) is
76-
* streamed via the onData callback. onEnd is called when recording stops
77-
* (either from silence detection or process termination).
78-
*
79-
* Returns true if recording started successfully, false otherwise.
80-
*/
8179
export function startNativeRecording(
8280
onData: (data: Buffer) => void,
8381
onEnd: () => void,
8482
): boolean {
85-
// Don't start if already recording
86-
if (isNativeRecordingActive()) {
87-
stopNativeRecording()
88-
}
89-
90-
if (!isNativeAudioAvailable()) {
83+
const mod = loadModule()
84+
if (!mod) {
9185
return false
9286
}
87+
return mod.startRecording(onData, onEnd)
88+
}
9389

94-
let child: ChildProcess
90+
export function stopNativeRecording(): void {
91+
const mod = loadModule()
92+
if (!mod) {
93+
return
94+
}
95+
mod.stopRecording()
96+
}
9597

96-
if (process.platform === 'darwin' || (process.platform === 'linux' && commandExists('rec'))) {
97-
// Use SoX rec: output raw PCM 16kHz 16-bit signed mono to stdout
98-
child = spawn(
99-
'rec',
100-
[
101-
'-q', // quiet
102-
'--buffer',
103-
'1024', // small buffer for low latency
104-
'-t', 'raw', // raw PCM output
105-
'-r', '16000', // 16kHz sample rate
106-
'-e', 'signed', // signed integer encoding
107-
'-b', '16', // 16-bit
108-
'-c', '1', // mono
109-
'-', // output to stdout
110-
],
111-
{ stdio: ['pipe', 'pipe', 'pipe'] },
112-
)
113-
} else if (process.platform === 'linux' && commandExists('arecord')) {
114-
// Use arecord: output raw PCM 16kHz 16-bit signed LE mono to stdout
115-
child = spawn(
116-
'arecord',
117-
[
118-
'-f', 'S16_LE', // signed 16-bit little-endian
119-
'-r', '16000', // 16kHz sample rate
120-
'-c', '1', // mono
121-
'-t', 'raw', // raw PCM, no header
122-
'-q', // quiet
123-
'-', // output to stdout
124-
],
125-
{ stdio: ['pipe', 'pipe', 'pipe'] },
126-
)
127-
} else {
98+
export function isNativeRecordingActive(): boolean {
99+
const mod = loadModule()
100+
if (!mod) {
128101
return false
129102
}
103+
return mod.isRecording()
104+
}
130105

131-
recordingProcess = child
132-
133-
child.stdout?.on('data', (chunk: Buffer) => {
134-
onData(chunk)
135-
})
106+
export function startNativePlayback(
107+
sampleRate: number,
108+
channels: number,
109+
): boolean {
110+
const mod = loadModule()
111+
if (!mod) {
112+
return false
113+
}
114+
return mod.startPlayback(sampleRate, channels)
115+
}
136116

137-
// Consume stderr to prevent backpressure
138-
child.stderr?.on('data', () => {})
117+
export function writeNativePlaybackData(data: Buffer): void {
118+
const mod = loadModule()
119+
if (!mod) {
120+
return
121+
}
122+
mod.writePlaybackData(data)
123+
}
139124

140-
child.on('close', () => {
141-
recordingProcess = null
142-
onEnd()
143-
})
125+
export function stopNativePlayback(): void {
126+
const mod = loadModule()
127+
if (!mod) {
128+
return
129+
}
130+
mod.stopPlayback()
131+
}
144132

145-
child.on('error', () => {
146-
recordingProcess = null
147-
onEnd()
148-
})
133+
export function isNativePlaying(): boolean {
134+
const mod = loadModule()
135+
if (!mod) {
136+
return false
137+
}
138+
return mod.isPlaying()
139+
}
149140

150-
return true
141+
// Returns the microphone authorization status.
142+
// On macOS, returns the TCC status: 0=notDetermined, 1=restricted, 2=denied, 3=authorized.
143+
// On Linux, always returns 3 (authorized) — no system-level mic permission API.
144+
// On Windows, returns 3 (authorized) if registry key absent or allowed, 2 (denied) if explicitly denied.
145+
// Returns 0 (notDetermined) if the native module is unavailable.
146+
export function microphoneAuthorizationStatus(): number {
147+
const mod = loadModule()
148+
if (!mod || !mod.microphoneAuthorizationStatus) {
149+
return 0
150+
}
151+
return mod.microphoneAuthorizationStatus()
151152
}

scripts/dev.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const defineArgs = Object.entries(defines).flatMap(([k, v]) => [
1515

1616
// Bun --feature flags: enable feature() gates at runtime.
1717
// Default features enabled in dev mode.
18-
const DEFAULT_FEATURES = ["BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE", "AGENT_TRIGGERS_REMOTE"];
18+
const DEFAULT_FEATURES = ["BUDDY", "TRANSCRIPT_CLASSIFIER", "BRIDGE_MODE", "AGENT_TRIGGERS_REMOTE", "VOICE_MODE"];
1919

2020
// Any env var matching FEATURE_<NAME>=1 will also enable that feature.
2121
// e.g. FEATURE_PROACTIVE=1 bun run dev

0 commit comments

Comments
 (0)