Skip to content

Commit 9b3c710

Browse files
authored
支持豆包流式语音输入 (#377)
1 parent a017671 commit 9b3c710

29 files changed

Lines changed: 2737 additions & 146 deletions

apps/electron/electron-builder.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ mac:
6767
gatekeeperAssess: false
6868
entitlements: resources/entitlements.mac.plist
6969
entitlementsInherit: resources/entitlements.mac.plist
70+
extendInfo:
71+
NSMicrophoneUsageDescription: "Proma 需要访问麦克风,用于将你的语音实时转写为文本。"
7072
# 架构由 CLI 参数控制(CI 矩阵分别传 --arm64 / --x64),
7173
# 每个 runner 只构建其宿主架构对应的产物,与 bun 按 os/cpu 装好的
7274
# @anthropic-ai/claude-agent-sdk-darwin-{arch} native binary 对齐。

apps/electron/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120
"use-stick-to-bottom": "^1.1.2",
121121
"vite": "^6.0.3",
122122
"word-extractor": "1.0.4",
123+
"ws": "8.19.0",
123124
"zod": "^4.0.0"
124125
},
125126
"electronmon": {

apps/electron/src/main/index.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ import { getDingTalkMultiBotConfig } from './lib/dingtalk-config'
5757
import { wechatBridge } from './lib/wechat-bridge'
5858
import { getWeChatConfig } from './lib/wechat-config'
5959
import { createQuickTaskWindow, toggleQuickTaskWindow, destroyQuickTaskWindow } from './lib/quick-task-window'
60+
import {
61+
createVoiceDictationWindow,
62+
toggleVoiceDictationWindow,
63+
destroyVoiceDictationWindow,
64+
shouldSuppressVoiceDictationActivate,
65+
} from './lib/voice-dictation-window'
6066
import { registerGlobalShortcut, unregisterAllGlobalShortcuts } from './lib/global-shortcut-service'
6167
import { TRAY_IPC_CHANNELS } from '../types'
6268

@@ -341,15 +347,23 @@ app.whenReady().then(async () => {
341347

342348
// 预创建快速任务窗口(隐藏状态,首次唤起秒开)
343349
createQuickTaskWindow()
350+
createVoiceDictationWindow()
344351

345352
// 注册全局快捷键
346353
registerGlobalShortcut('quick-task', toggleQuickTaskWindow)
347354
registerGlobalShortcut('show-main-window', showAndFocusMainWindow)
355+
registerGlobalShortcut('voice-dictation', () => {
356+
toggleVoiceDictationWindow({ targetIsProma: mainWindow?.isFocused() === true })
357+
})
348358

349359
// 启动所有已注册的 Bridge(飞书/钉钉/微信等)
350360
await startAllBridges()
351361

352362
app.on('activate', () => {
363+
if (shouldSuppressVoiceDictationActivate()) {
364+
return
365+
}
366+
353367
// 直接检查 mainWindow 引用,避免 getAllWindows() 包含 DevTools 等其他窗口导致误判
354368
if (!mainWindow || mainWindow.isDestroyed()) {
355369
createWindow()
@@ -390,6 +404,7 @@ app.on('before-quit', () => {
390404
unregisterAllGlobalShortcuts()
391405
// 销毁快速任务窗口
392406
destroyQuickTaskWindow()
407+
destroyVoiceDictationWindow()
393408
// Clean up system tray before quitting
394409
destroyTray()
395410
})

apps/electron/src/main/ipc.ts

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,19 @@ import { ipcMain, nativeTheme, shell, dialog, BrowserWindow, app } from 'electro
88
import { join } from 'node:path'
99
import { existsSync } from 'node:fs'
1010
import { IPC_CHANNELS, CHANNEL_IPC_CHANNELS, CHAT_IPC_CHANNELS, AGENT_IPC_CHANNELS, ENVIRONMENT_IPC_CHANNELS, INSTALLER_IPC_CHANNELS, PROXY_IPC_CHANNELS, GITHUB_RELEASE_IPC_CHANNELS, SYSTEM_PROMPT_IPC_CHANNELS, MEMORY_IPC_CHANNELS, CHAT_TOOL_IPC_CHANNELS, FEISHU_IPC_CHANNELS, DINGTALK_IPC_CHANNELS, WECHAT_IPC_CHANNELS } from '@proma/shared'
11-
import { USER_PROFILE_IPC_CHANNELS, SETTINGS_IPC_CHANNELS, QUICK_TASK_IPC_CHANNELS, APP_ICON_IPC_CHANNELS, DOCK_BADGE_IPC_CHANNELS } from '../types'
12-
import type { QuickTaskSubmitInput } from '../types'
11+
import { USER_PROFILE_IPC_CHANNELS, SETTINGS_IPC_CHANNELS, QUICK_TASK_IPC_CHANNELS, VOICE_DICTATION_IPC_CHANNELS, APP_ICON_IPC_CHANNELS, DOCK_BADGE_IPC_CHANNELS } from '../types'
12+
import type {
13+
QuickTaskSubmitInput,
14+
VoiceDictationAudioChunkInput,
15+
VoiceDictationCommitInput,
16+
VoiceDictationCommitResult,
17+
VoiceDictationResizeInput,
18+
VoiceDictationSettings,
19+
VoiceDictationSettingsUpdate,
20+
VoiceDictationStartInput,
21+
VoiceDictationStopInput,
22+
VoiceDictationTestResult,
23+
} from '../types'
1324
import type {
1425
RuntimeStatus,
1526
GitRepoStatus,
@@ -2607,4 +2618,101 @@ export function registerIpcHandlers(): void {
26072618
return reregisterAllGlobalShortcuts()
26082619
}
26092620
)
2621+
2622+
// ===== 语音输入 =====
2623+
2624+
ipcMain.handle(
2625+
VOICE_DICTATION_IPC_CHANNELS.GET_SETTINGS,
2626+
async (): Promise<VoiceDictationSettings> => {
2627+
const { getVoiceDictationSettings } = await import('./lib/voice-dictation-settings-service')
2628+
return getVoiceDictationSettings()
2629+
}
2630+
)
2631+
2632+
ipcMain.handle(
2633+
VOICE_DICTATION_IPC_CHANNELS.UPDATE_SETTINGS,
2634+
async (_, updates: VoiceDictationSettingsUpdate): Promise<VoiceDictationSettings> => {
2635+
const { updateVoiceDictationSettings } = await import('./lib/voice-dictation-settings-service')
2636+
return updateVoiceDictationSettings(updates)
2637+
}
2638+
)
2639+
2640+
ipcMain.handle(
2641+
VOICE_DICTATION_IPC_CHANNELS.TEST_CONNECTION,
2642+
async (_, updates?: VoiceDictationSettingsUpdate): Promise<VoiceDictationTestResult> => {
2643+
const { getVoiceDictationSettings } = await import('./lib/voice-dictation-settings-service')
2644+
const { testDoubaoAsrConnection } = await import('./lib/doubao-asr-service')
2645+
const settings = { ...getVoiceDictationSettings(), ...(updates ?? {}) }
2646+
return testDoubaoAsrConnection(settings)
2647+
}
2648+
)
2649+
2650+
ipcMain.handle(
2651+
VOICE_DICTATION_IPC_CHANNELS.TOGGLE,
2652+
async (event): Promise<void> => {
2653+
const { toggleVoiceDictationWindow } = await import('./lib/voice-dictation-window')
2654+
const sourceWindow = BrowserWindow.fromWebContents(event.sender)
2655+
toggleVoiceDictationWindow({ targetIsProma: !!sourceWindow })
2656+
}
2657+
)
2658+
2659+
ipcMain.handle(
2660+
VOICE_DICTATION_IPC_CHANNELS.START,
2661+
async (event, input: VoiceDictationStartInput): Promise<void> => {
2662+
const { getVoiceDictationSettings } = await import('./lib/voice-dictation-settings-service')
2663+
const { startDoubaoAsrSession } = await import('./lib/doubao-asr-service')
2664+
const win = BrowserWindow.fromWebContents(event.sender)
2665+
if (!win) throw new Error('语音输入窗口不存在')
2666+
await startDoubaoAsrSession(input.sessionId, getVoiceDictationSettings(), win)
2667+
}
2668+
)
2669+
2670+
ipcMain.handle(
2671+
VOICE_DICTATION_IPC_CHANNELS.SEND_AUDIO,
2672+
async (_, input: VoiceDictationAudioChunkInput): Promise<void> => {
2673+
const { sendDoubaoAsrAudio } = await import('./lib/doubao-asr-service')
2674+
sendDoubaoAsrAudio(input.sessionId, input.data)
2675+
}
2676+
)
2677+
2678+
ipcMain.handle(
2679+
VOICE_DICTATION_IPC_CHANNELS.STOP,
2680+
async (_, input: VoiceDictationStopInput): Promise<void> => {
2681+
const { stopDoubaoAsrSession } = await import('./lib/doubao-asr-service')
2682+
await stopDoubaoAsrSession(input.sessionId)
2683+
}
2684+
)
2685+
2686+
ipcMain.handle(
2687+
VOICE_DICTATION_IPC_CHANNELS.CANCEL,
2688+
async (_, input: VoiceDictationStopInput): Promise<void> => {
2689+
const { cancelDoubaoAsrSession } = await import('./lib/doubao-asr-service')
2690+
cancelDoubaoAsrSession(input.sessionId)
2691+
}
2692+
)
2693+
2694+
ipcMain.handle(
2695+
VOICE_DICTATION_IPC_CHANNELS.COMMIT,
2696+
async (_, input: VoiceDictationCommitInput): Promise<VoiceDictationCommitResult> => {
2697+
const { getVoiceDictationSettings } = await import('./lib/voice-dictation-settings-service')
2698+
const { commitVoiceDictationText } = await import('./lib/text-output-service')
2699+
return commitVoiceDictationText(input.text, getVoiceDictationSettings())
2700+
}
2701+
)
2702+
2703+
ipcMain.handle(
2704+
VOICE_DICTATION_IPC_CHANNELS.HIDE,
2705+
async (): Promise<void> => {
2706+
const { hideVoiceDictationWindow } = await import('./lib/voice-dictation-window')
2707+
hideVoiceDictationWindow()
2708+
}
2709+
)
2710+
2711+
ipcMain.handle(
2712+
VOICE_DICTATION_IPC_CHANNELS.RESIZE,
2713+
async (_, input: VoiceDictationResizeInput): Promise<void> => {
2714+
const { resizeVoiceDictationWindow } = await import('./lib/voice-dictation-window')
2715+
resizeVoiceDictationWindow(input.height)
2716+
}
2717+
)
26102718
}

0 commit comments

Comments
 (0)