@@ -2,12 +2,12 @@ import http from "node:http";
22import https from "node:https" ;
33import { URL } from "node:url" ;
44import type { Context } from "grammy" ;
5+ import type { FilePartInput } from "@opencode-ai/sdk/v2" ;
56import { HttpsProxyAgent } from "https-proxy-agent" ;
67import { SocksProxyAgent } from "socks-proxy-agent" ;
78import { config } from "../../config.js" ;
89import { isSttConfigured , transcribeAudio , type SttResult } from "../../stt/client.js" ;
9- import type { ProcessPromptDeps } from "./prompt.js" ;
10- import { showSttConfirmation } from "./stt-confirm.js" ;
10+ import { processUserPrompt , type ProcessPromptDeps } from "./prompt.js" ;
1111import { logger } from "../../utils/logger.js" ;
1212import { t } from "../../i18n/index.js" ;
1313import { buildTelegramFileUrl } from "../utils/telegram-file-url.js" ;
@@ -102,6 +102,13 @@ export interface VoiceMessageDeps extends ProcessPromptDeps {
102102 fileId : string ,
103103 ) => Promise < { buffer : Buffer ; filename : string } | null > ;
104104 transcribeAudio ?: ( audioBuffer : Buffer , filename : string ) => Promise < SttResult > ;
105+ processPrompt ?: (
106+ ctx : Context ,
107+ text : string ,
108+ deps : ProcessPromptDeps ,
109+ fileParts ?: FilePartInput [ ] ,
110+ options ?: { responseMode ?: "text_only" | "text_and_tts" } ,
111+ ) => Promise < boolean > ;
105112}
106113
107114/**
@@ -167,6 +174,7 @@ export async function handleVoiceMessage(ctx: Context, deps: VoiceMessageDeps):
167174 const sttConfigured = deps . isSttConfigured ?? isSttConfigured ;
168175 const downloadFile = deps . downloadTelegramFile ?? downloadTelegramFile ;
169176 const transcribe = deps . transcribeAudio ?? transcribeAudio ;
177+ const processPrompt = deps . processPrompt ?? processUserPrompt ;
170178
171179 // Determine file_id from voice or audio message
172180 const voice = ctx . message ?. voice ;
@@ -208,9 +216,32 @@ export async function handleVoiceMessage(ctx: Context, deps: VoiceMessageDeps):
208216 return ;
209217 }
210218
219+ // Show the recognized text by editing the status message.
220+ // IMPORTANT: even if this edit fails (e.g. Telegram message length limits),
221+ // we still send the recognized text to OpenCode as a prompt.
222+ try {
223+ await ctx . api . editMessageText (
224+ ctx . chat ! . id ,
225+ statusMessage . message_id ,
226+ t ( "stt.recognized" , { text : recognizedText } ) ,
227+ ) ;
228+ } catch ( editError ) {
229+ logger . warn ( "[Voice] Failed to edit status message with recognized text:" , editError ) ;
230+ }
231+
211232 logger . info ( `[Voice] Transcribed audio: ${ recognizedText . length } chars` ) ;
212233
213- await showSttConfirmation ( ctx , statusMessage . message_id , recognizedText ) ;
234+ let textForLLM = recognizedText ;
235+ const notePrompt = config . stt . notePrompt . trim ( ) ;
236+
237+ if ( notePrompt && notePrompt . toLowerCase ( ) !== "false" && notePrompt !== "0" ) {
238+ const llmNote = `[Note: ${ notePrompt } ]` ;
239+ logger . debug ( `[Voice] Added STT note to LLM prompt: ${ llmNote } ` ) ;
240+ textForLLM = `${ llmNote } \n${ recognizedText } ` ;
241+ }
242+
243+ // Process the recognized text as a prompt
244+ await processPrompt ( ctx , textForLLM , deps ) ;
214245 } catch ( err ) {
215246 const errorMessage = err instanceof Error ? err . message : "unknown error" ;
216247 logger . error ( "[Voice] Error processing voice message:" , err ) ;
0 commit comments