fix: Use audio duration instead of chunk count for playback buffer threshold

studerus · studerus · commit adf7560127bc · 2026-01-02T09:19:11.000+01:00
- Changed hasSufficientBuffer() to check bytes instead of chunk count
- Fixes issue where x.ai (and potentially other providers) send larger audio chunks
- Previously waited for 6 chunks which could mean 3+ seconds with large chunks
- Now waits for 60ms of audio data (2880 bytes at 24kHz) regardless of chunk size
- This ensures consistent playback start timing across all providers
diff --git a/app/src/main/java/ch/fhnw/pepper_realtime/manager/AudioPlayer.kt b/app/src/main/java/ch/fhnw/pepper_realtime/manager/AudioPlayer.kt
@@ -376,9 +376,14 @@ class AudioPlayer {
     }
 
     private fun hasSufficientBuffer(): Boolean {
-        // Start only when we have a small headroom of chunks to avoid initial underflow
-        val minChunks = 6 // ~60ms at 10ms frames
-        return audioBuffer.size >= minChunks
+        // Start only when we have enough audio to avoid initial underflow.
+        // Using bytes instead of chunk count because chunk sizes vary by provider:
+        // - OpenAI: ~10ms chunks (240 bytes)
+        // - x.ai: larger chunks (potentially 200-500ms)
+        // - Google: variable chunk sizes
+        // Target: ~60ms of audio = 60 * 24 * 2 = 2880 bytes at 24kHz mono PCM16
+        val minBufferBytes = 60 * sampleRateHz * bytesPerSample * channels / 1000
+        return queuedBytes.get() >= minBufferBytes
     }
 
     /**