@@ -131,6 +131,21 @@ export default function ChatPage() {
131131 const remoteAudioRef = useRef ( null )
132132 const voiceModeStartTimeRef = useRef ( null )
133133
134+ const lastSpokenTextRef = useRef ( "" )
135+ const setMicrophoneEnabled = useCallback ( ( enabled ) => {
136+ if ( webrtcClientRef . current ?. mediaStream ) {
137+ const audioTracks =
138+ webrtcClientRef . current . mediaStream . getAudioTracks ( )
139+ if ( audioTracks . length > 0 ) {
140+ // Only change if the state is different to avoid unnecessary operations
141+ if ( audioTracks [ 0 ] . enabled !== enabled ) {
142+ audioTracks [ 0 ] . enabled = enabled
143+ setIsMuted ( ! enabled )
144+ }
145+ }
146+ }
147+ } , [ ] )
148+
134149 const fetchInitialMessages = useCallback ( async ( ) => {
135150 setIsLoading ( true )
136151 try {
@@ -596,110 +611,114 @@ export default function ChatPage() {
596611 return "Good Evening"
597612 }
598613
599- const getFinalAnswer = ( content ) => {
600- if ( ! content || typeof content !== "string" ) return ""
601-
602- const answerParts = [ ]
603- const regex =
604- / ( < t h i n k (?: i n g ) ? > [ \s \S ] * ?< \/ t h i n k (?: i n g ) ? > | < t o o l _ c o d e [ ^ > ] * > [ \s \S ] * ?< \/ t o o l _ c o d e > | < t o o l _ r e s u l t [ ^ > ] * > [ \s \S ] * ?< \/ t o o l _ r e s u l t > | < a n s w e r > [ \s \S ] * ?< \/ a n s w e r > ) / g
605- let lastIndex = 0
606- let inToolCallPhase = false
607-
608- for ( const match of content . matchAll ( regex ) ) {
609- const precedingText = content . substring ( lastIndex , match . index )
610- if ( precedingText . trim ( ) && ! inToolCallPhase ) {
611- answerParts . push ( precedingText . trim ( ) )
612- }
613-
614- const tag = match [ 0 ]
615- if ( tag . startsWith ( "<tool_code" ) ) inToolCallPhase = true
616- else if ( tag . startsWith ( "<tool_result" ) ) inToolCallPhase = false
617- else if ( tag . startsWith ( "<answer>" ) ) {
618- const answerContent =
619- tag . match ( / < a n s w e r > ( [ \s \S ] * ?) < \/ a n s w e r > / ) ?. [ 1 ] || ""
620- if ( answerContent ) answerParts . push ( answerContent . trim ( ) )
621- }
622- lastIndex = match . index + tag . length
623- }
624- const remainingText = content . substring ( lastIndex )
625- if ( remainingText . trim ( ) && ! inToolCallPhase ) {
626- answerParts . push ( remainingText . trim ( ) )
627- }
628-
629- const plainText = answerParts . join ( "\n\n" )
630- if ( plainText ) return plainText
631- return content . replace ( / < [ ^ > ] + > / g, "" ) . trim ( )
632- }
633-
634614 // --- Voice Mode Handlers ---
635- const handleStatusChange = useCallback ( ( status ) => {
636- setConnectionStatus ( status )
637- if ( status !== "connecting" && ringtoneAudioRef . current ) {
638- ringtoneAudioRef . current . pause ( )
639- ringtoneAudioRef . current . currentTime = 0
640- }
641- if ( status === "connected" ) {
642- if ( connectedAudioRef . current ) {
643- connectedAudioRef . current . volume = 0.4
644- connectedAudioRef . current
645- . play ( )
646- . catch ( ( e ) => console . error ( "Error playing sound:" , e ) )
615+ const handleStatusChange = useCallback (
616+ ( status ) => {
617+ setConnectionStatus ( status )
618+ if ( status !== "connecting" && ringtoneAudioRef . current ) {
619+ ringtoneAudioRef . current . pause ( )
620+ ringtoneAudioRef . current . currentTime = 0
647621 }
648- setVoiceStatusText ( "Listening..." )
649- } else if ( status === "disconnected" ) {
650- setVoiceStatusText ( "Click to start call" )
651- } else if ( status === "connecting" ) {
652- setVoiceStatusText ( "Connecting..." )
653- }
654- } , [ ] )
655-
656- const handleVoiceEvent = useCallback ( ( event ) => {
657- if ( event . type === "stt_result" && event . text ) {
658- setDisplayedMessages ( ( prev ) => [
659- ...prev ,
660- {
661- id : `user_${ Date . now ( ) } ` ,
662- role : "user" ,
663- content : event . text ,
664- timestamp : new Date ( ) . toISOString ( )
622+ if ( status === "connected" ) {
623+ if ( connectedAudioRef . current ) {
624+ connectedAudioRef . current . volume = 0.4
625+ connectedAudioRef . current
626+ . play ( )
627+ . catch ( ( e ) => console . error ( "Error playing sound:" , e ) )
665628 }
666- ] )
667- } else if ( event . type === "llm_result" && event . text ) {
668- setDisplayedMessages ( ( prev ) => [
669- ...prev ,
670- {
671- id : event . messageId || `assistant_${ Date . now ( ) } ` ,
672- role : "assistant" ,
673- content : event . text ,
674- timestamp : new Date ( ) . toISOString ( )
629+ // Add a delay to allow ICE connection to stabilize
630+ setVoiceStatusText ( "Please wait a moment..." )
631+ setMicrophoneEnabled ( false ) // Mute mic during stabilization
632+ setTimeout ( ( ) => {
633+ setVoiceStatusText ( "Listening..." )
634+ setMicrophoneEnabled ( true ) // Unmute after delay
635+ } , 4000 )
636+ } else if ( status === "disconnected" ) {
637+ setVoiceStatusText ( "Click to start call" )
638+ } else if ( status === "connecting" ) {
639+ setVoiceStatusText ( "Connecting..." )
640+ }
641+ } ,
642+ [ setMicrophoneEnabled ]
643+ )
644+
645+ const handleVoiceEvent = useCallback (
646+ ( event ) => {
647+ if ( event . type === "stt_result" && event . text ) {
648+ setDisplayedMessages ( ( prev ) => [
649+ ...prev ,
650+ {
651+ id : `user_${ Date . now ( ) } ` ,
652+ role : "user" ,
653+ content : event . text ,
654+ timestamp : new Date ( ) . toISOString ( )
655+ }
656+ ] )
657+ } else if ( event . type === "llm_result" && event . text ) {
658+ lastSpokenTextRef . current = event . text // Store the text for duration calculation
659+ setDisplayedMessages ( ( prev ) => [
660+ ...prev ,
661+ {
662+ id : event . messageId || `assistant_${ Date . now ( ) } ` ,
663+ role : "assistant" ,
664+ content : event . text ,
665+ timestamp : new Date ( ) . toISOString ( )
666+ }
667+ ] )
668+ } else if ( event . type === "status" ) {
669+ if ( event . message === "thinking" ) {
670+ setVoiceStatusText ( "Thinking..." )
671+ setMicrophoneEnabled ( false )
672+ } else if ( event . message === "speaking" ) {
673+ setVoiceStatusText ( "Speaking..." )
674+ setMicrophoneEnabled ( false )
675+ } else if ( event . message === "listening" ) {
676+ // The server sends 'listening' when it's done sending audio,
677+ // but client-side buffering can cause a delay. We estimate
678+ // the speaking duration based on the text length from the
679+ // `llm_result` event to avoid unmuting the mic too early.
680+ const textToMeasure = lastSpokenTextRef . current
681+ // Estimate duration: ~18 chars/sec -> ~55ms/char. Add a smaller buffer.
682+ const estimatedDuration = textToMeasure . length * 55 + 250 // ms
683+
684+ setTimeout ( ( ) => {
685+ if (
686+ webrtcClientRef . current ?. peerConnection
687+ ?. connectionState === "connected"
688+ ) {
689+ setVoiceStatusText ( "Listening..." )
690+ setMicrophoneEnabled ( true )
691+ }
692+ } , estimatedDuration )
693+
694+ // Reset for the next turn
695+ lastSpokenTextRef . current = ""
696+ } else if ( event . message === "transcribing" ) {
697+ setVoiceStatusText ( "Transcribing..." )
698+ setMicrophoneEnabled ( false ) // Mute as soon as transcription starts
699+ } else if ( event . message === "choosing_tools" )
700+ setVoiceStatusText ( "Choosing tools..." )
701+ else if (
702+ event . message &&
703+ event . message . startsWith ( "using_tool_" )
704+ ) {
705+ const toolName = event . message
706+ . replace ( "using_tool_" , "" )
707+ . replace ( "_server" , "" )
708+ . replace ( "_mcp" , "" )
709+ setVoiceStatusText (
710+ `Using ${
711+ toolName . charAt ( 0 ) . toUpperCase ( ) + toolName . slice ( 1 )
712+ } ...`
713+ )
675714 }
676- ] )
677- } else if ( event . type === "status" ) {
678- if ( event . message === "thinking" ) setVoiceStatusText ( "Thinking..." )
679- else if ( event . message === "speaking" )
680- setVoiceStatusText ( "Speaking..." )
681- else if ( event . message === "listening" )
682- setVoiceStatusText ( "Listening..." )
683- else if ( event . message === "transcribing" )
684- setVoiceStatusText ( "Transcribing..." )
685- else if ( event . message === "choosing_tools" )
686- setVoiceStatusText ( "Choosing tools..." )
687- else if ( event . message && event . message . startsWith ( "using_tool_" ) ) {
688- const toolName = event . message
689- . replace ( "using_tool_" , "" )
690- . replace ( "_server" , "" )
691- . replace ( "_mcp" , "" )
692- setVoiceStatusText (
693- `Using ${
694- toolName . charAt ( 0 ) . toUpperCase ( ) + toolName . slice ( 1 )
695- } ...`
696- )
715+ } else if ( event . type === "error" ) {
716+ toast . error ( `Voice Error: ${ event . message } ` )
717+ setVoiceStatusText ( "Error. Click to retry." )
697718 }
698- } else if ( event . type === "error" ) {
699- toast . error ( `Voice Error: ${ event . message } ` )
700- setVoiceStatusText ( "Error. Click to retry." )
701- }
702- } , [ ] )
719+ } ,
720+ [ setMicrophoneEnabled ]
721+ )
703722
704723 const handleAudioLevel = useCallback ( ( level ) => {
705724 setAudioLevel ( ( prev ) => prev * 0.7 + level * 0.3 )
@@ -918,7 +937,7 @@ export default function ChatPage() {
918937 : "the assistant" }
919938 </ p >
920939 < p className = "text-sm text-neutral-200 mt-1 truncate" >
921- { getFinalAnswer ( replyingTo . content ) }
940+ { replyingTo . content . replace ( / < [ ^ > ] + > / g , "" ) . trim ( ) }
922941 </ p >
923942 </ div >
924943 < button
@@ -988,8 +1007,8 @@ export default function ChatPage() {
9881007 style = { { maxHeight : "200px" } }
9891008 />
9901009 { ! input && ! uploadedFilename && (
991- < div className = "absolute top-1/2 left-4 - translate-y-1/2 text-neutral-500 pointer-events-none z-0" >
992- < TextLoop className = "text-base ml-5" >
1010+ < div className = "absolute top-1/2 left-4 right-4 - translate-y-1/2 text-neutral-500 pointer-events-none z-0 overflow-hidden " >
1011+ < TextLoop className = "text-base ml-5 whitespace-normal md:whitespace-nowrap " >
9931012 < span > Ask anything...</ span >
9941013 < span > Summarize my unread emails from today</ span >
9951014 < span >
@@ -1480,9 +1499,7 @@ export default function ChatPage() {
14801499 duration : 0.3
14811500 } }
14821501 >
1483- { getFinalAnswer (
1484- msg . content
1485- ) }
1502+ { msg . content }
14861503 </ motion . div >
14871504 ) ) }
14881505 </ AnimatePresence >
@@ -1533,6 +1550,9 @@ export default function ChatPage() {
15331550 role = { msg . role }
15341551 content = { msg . content }
15351552 tools = { msg . tools || [ ] }
1553+ thoughts = { msg . thoughts || [ ] }
1554+ tool_calls = { msg . tool_calls || [ ] }
1555+ tool_results = { msg . tool_results || [ ] }
15361556 onReply = { handleReply }
15371557 message = { msg }
15381558 allMessages = { displayedMessages }
0 commit comments