Skip to content

Commit ba1fba3

Browse files
committed
fix (chat): voice overflow issue
1 parent 9e75b9a commit ba1fba3

2 files changed

Lines changed: 120 additions & 66 deletions

File tree

src/client/app/chat/page.js

Lines changed: 118 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,21 @@ export default function ChatPage() {
131131
const remoteAudioRef = useRef(null)
132132
const voiceModeStartTimeRef = useRef(null)
133133

134+
const lastSpokenTextRef = useRef("")
135+
const setMicrophoneEnabled = useCallback((enabled) => {
136+
if (webrtcClientRef.current?.mediaStream) {
137+
const audioTracks =
138+
webrtcClientRef.current.mediaStream.getAudioTracks()
139+
if (audioTracks.length > 0) {
140+
// Only change if the state is different to avoid unnecessary operations
141+
if (audioTracks[0].enabled !== enabled) {
142+
audioTracks[0].enabled = enabled
143+
setIsMuted(!enabled)
144+
}
145+
}
146+
}
147+
}, [])
148+
134149
const fetchInitialMessages = useCallback(async () => {
135150
setIsLoading(true)
136151
try {
@@ -632,74 +647,113 @@ export default function ChatPage() {
632647
}
633648

634649
// --- Voice Mode Handlers ---
635-
const handleStatusChange = useCallback((status) => {
636-
setConnectionStatus(status)
637-
if (status !== "connecting" && ringtoneAudioRef.current) {
638-
ringtoneAudioRef.current.pause()
639-
ringtoneAudioRef.current.currentTime = 0
640-
}
641-
if (status === "connected") {
642-
if (connectedAudioRef.current) {
643-
connectedAudioRef.current.volume = 0.4
644-
connectedAudioRef.current
645-
.play()
646-
.catch((e) => console.error("Error playing sound:", e))
650+
const handleStatusChange = useCallback(
651+
(status) => {
652+
setConnectionStatus(status)
653+
if (status !== "connecting" && ringtoneAudioRef.current) {
654+
ringtoneAudioRef.current.pause()
655+
ringtoneAudioRef.current.currentTime = 0
647656
}
648-
setVoiceStatusText("Listening...")
649-
} else if (status === "disconnected") {
650-
setVoiceStatusText("Click to start call")
651-
} else if (status === "connecting") {
652-
setVoiceStatusText("Connecting...")
653-
}
654-
}, [])
655-
656-
const handleVoiceEvent = useCallback((event) => {
657-
if (event.type === "stt_result" && event.text) {
658-
setDisplayedMessages((prev) => [
659-
...prev,
660-
{
661-
id: `user_${Date.now()}`,
662-
role: "user",
663-
content: event.text,
664-
timestamp: new Date().toISOString()
657+
if (status === "connected") {
658+
if (connectedAudioRef.current) {
659+
connectedAudioRef.current.volume = 0.4
660+
connectedAudioRef.current
661+
.play()
662+
.catch((e) => console.error("Error playing sound:", e))
665663
}
666-
])
667-
} else if (event.type === "llm_result" && event.text) {
668-
setDisplayedMessages((prev) => [
669-
...prev,
670-
{
671-
id: event.messageId || `assistant_${Date.now()}`,
672-
role: "assistant",
673-
content: event.text,
674-
timestamp: new Date().toISOString()
664+
// Add a delay to allow ICE connection to stabilize
665+
setVoiceStatusText("Please wait a moment...")
666+
setMicrophoneEnabled(false) // Mute mic during stabilization
667+
setTimeout(() => {
668+
setVoiceStatusText("Listening...")
669+
setMicrophoneEnabled(true) // Unmute after delay
670+
}, 4000)
671+
} else if (status === "disconnected") {
672+
setVoiceStatusText("Click to start call")
673+
} else if (status === "connecting") {
674+
setVoiceStatusText("Connecting...")
675+
}
676+
},
677+
[setMicrophoneEnabled]
678+
)
679+
680+
const handleVoiceEvent = useCallback(
681+
(event) => {
682+
if (event.type === "stt_result" && event.text) {
683+
setDisplayedMessages((prev) => [
684+
...prev,
685+
{
686+
id: `user_${Date.now()}`,
687+
role: "user",
688+
content: event.text,
689+
timestamp: new Date().toISOString()
690+
}
691+
])
692+
} else if (event.type === "llm_result" && event.text) {
693+
lastSpokenTextRef.current = event.text // Store the text for duration calculation
694+
setDisplayedMessages((prev) => [
695+
...prev,
696+
{
697+
id: event.messageId || `assistant_${Date.now()}`,
698+
role: "assistant",
699+
content: event.text,
700+
timestamp: new Date().toISOString()
701+
}
702+
])
703+
} else if (event.type === "status") {
704+
if (event.message === "thinking") {
705+
setVoiceStatusText("Thinking...")
706+
setMicrophoneEnabled(false)
707+
} else if (event.message === "speaking") {
708+
setVoiceStatusText("Speaking...")
709+
setMicrophoneEnabled(false)
710+
} else if (event.message === "listening") {
711+
// The server sends 'listening' when it's done sending audio,
712+
// but client-side buffering can cause a delay. We estimate
713+
// the speaking duration based on the text length from the
714+
// `llm_result` event to avoid unmuting the mic too early.
715+
const textToMeasure = lastSpokenTextRef.current
716+
// Estimate duration: ~18 chars/sec -> ~55ms/char. Add a smaller buffer.
717+
const estimatedDuration = textToMeasure.length * 55 + 250 // ms
718+
719+
setTimeout(() => {
720+
if (
721+
webrtcClientRef.current?.peerConnection
722+
?.connectionState === "connected"
723+
) {
724+
setVoiceStatusText("Listening...")
725+
setMicrophoneEnabled(true)
726+
}
727+
}, estimatedDuration)
728+
729+
// Reset for the next turn
730+
lastSpokenTextRef.current = ""
731+
} else if (event.message === "transcribing") {
732+
setVoiceStatusText("Transcribing...")
733+
setMicrophoneEnabled(false) // Mute as soon as transcription starts
734+
} else if (event.message === "choosing_tools")
735+
setVoiceStatusText("Choosing tools...")
736+
else if (
737+
event.message &&
738+
event.message.startsWith("using_tool_")
739+
) {
740+
const toolName = event.message
741+
.replace("using_tool_", "")
742+
.replace("_server", "")
743+
.replace("_mcp", "")
744+
setVoiceStatusText(
745+
`Using ${
746+
toolName.charAt(0).toUpperCase() + toolName.slice(1)
747+
}...`
748+
)
675749
}
676-
])
677-
} else if (event.type === "status") {
678-
if (event.message === "thinking") setVoiceStatusText("Thinking...")
679-
else if (event.message === "speaking")
680-
setVoiceStatusText("Speaking...")
681-
else if (event.message === "listening")
682-
setVoiceStatusText("Listening...")
683-
else if (event.message === "transcribing")
684-
setVoiceStatusText("Transcribing...")
685-
else if (event.message === "choosing_tools")
686-
setVoiceStatusText("Choosing tools...")
687-
else if (event.message && event.message.startsWith("using_tool_")) {
688-
const toolName = event.message
689-
.replace("using_tool_", "")
690-
.replace("_server", "")
691-
.replace("_mcp", "")
692-
setVoiceStatusText(
693-
`Using ${
694-
toolName.charAt(0).toUpperCase() + toolName.slice(1)
695-
}...`
696-
)
750+
} else if (event.type === "error") {
751+
toast.error(`Voice Error: ${event.message}`)
752+
setVoiceStatusText("Error. Click to retry.")
697753
}
698-
} else if (event.type === "error") {
699-
toast.error(`Voice Error: ${event.message}`)
700-
setVoiceStatusText("Error. Click to retry.")
701-
}
702-
}, [])
754+
},
755+
[setMicrophoneEnabled]
756+
)
703757

704758
const handleAudioLevel = useCallback((level) => {
705759
setAudioLevel((prev) => prev * 0.7 + level * 0.3)

src/client/components/tasks/CreateTaskInput.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ const CreateTaskInput = ({ onTaskAdded, prompt, setPrompt }) => {
7070
className="w-full rounded-l-full bg-transparent text-white placeholder-transparent border-1 border-brand-orange focus:ring-0 focus:ring-brand-black text-sm z-10 overflow-y-auto self-stretch py-2"
7171
/>
7272
{!prompt && (
73-
<div className="absolute top-1/2 left-4 -translate-y-1/2 text-neutral-500 pointer-events-none z-0">
74-
<TextLoop className="text-sm px-2">
73+
<div className="absolute top-1/2 left-4 right-12 -translate-y-1/2 text-neutral-500 pointer-events-none z-0 overflow-hidden">
74+
<TextLoop className="text-sm px-2 whitespace-normal md:whitespace-nowrap">
7575
<span>Create a task...</span>
7676
<span>
7777
Summarize my unread emails from today

0 commit comments

Comments
 (0)