From d03072dd493ffe2abb50dce5e5ea38468445c36f Mon Sep 17 00:00:00 2001 From: clairewangjia Date: Sun, 12 Apr 2026 15:36:32 +0800 Subject: [PATCH] feat: support receiving voice messages via ASR transcription MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract voice text from incoming WeChat voice messages using the ASR transcription provided by the iLink API. Voice messages now appear as "[语音] " and are processed by Claude like normal text. - Handle both `voice_text` and `text` field names in VoiceItem (API returns `text`) - Add `media` field to VoiceItem type for CDN data compatibility Co-Authored-By: Claude Opus 4.6 --- src/wechat/media.ts | 8 +++++++- src/wechat/types.ts | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/wechat/media.ts b/src/wechat/media.ts index fe053b4..807229b 100644 --- a/src/wechat/media.ts +++ b/src/wechat/media.ts @@ -73,9 +73,15 @@ export async function downloadImage(item: MessageItem): Promise { /** * Extract text content from a message item. - * Returns text_item.text or empty string. + * Handles text items and voice items (using ASR transcription). */ export function extractText(item: MessageItem): string { + if (item.type === MessageItemType.VOICE) { + const voiceText = item.voice_item?.voice_text || item.voice_item?.text; + if (voiceText) { + return `[语音] ${voiceText}`; + } + } return item.text_item?.text ?? ''; } diff --git a/src/wechat/types.ts b/src/wechat/types.ts index df135fe..46b255e 100644 --- a/src/wechat/types.ts +++ b/src/wechat/types.ts @@ -48,7 +48,9 @@ export interface ImageItem { export interface VoiceItem { cdn_media: CDNMedia; + media?: { encrypt_query_param: string; aes_key?: string }; voice_text?: string; + text?: string; } export interface FileItem {