Skip to content

Commit 131fb2c

Browse files
committed
fix: route TTS TLS and headset audio
1 parent effcc22 commit 131fb2c

11 files changed

Lines changed: 233 additions & 32 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ LiteLLM/OpenAI-compatible proxies are supported for REST STT and TTS. Set the pr
4545
| Permission | Why |
4646
|---|---|
4747
| `RECORD_AUDIO` | Capture voice from microphone |
48+
| `BLUETOOTH_CONNECT` | Use a connected headset microphone on Android 12+ |
49+
| `MODIFY_AUDIO_SETTINGS` | Route recording through the active communication device |
4850
| `SYSTEM_ALERT_WINDOW` | Floating bubble overlay |
4951
| `FOREGROUND_SERVICE` | Keep the overlay alive |
5052
| `INTERNET` | Send audio to whisper server |
@@ -128,6 +130,7 @@ The app checks a rolling GitHub Release manifest at `app-latest`. When a newer `
128130
- **HTTP / ws://** works out of the box to any IP (cleartext traffic is allowed via network security config)
129131
- **HTTPS with self-signed certs** works — the client trusts all certificates (this is a private VPN tool, not a public app)
130132
- Works over **Tailscale**, **ZeroTier**, or any VPN — just use the VPN IP as the server URL
133+
- Recording prefers the active headset microphone when Android exposes one, then wired/USB headsets, then the built-in mic
131134
- Long-press the overlay to open the panel, then tap **SPEAK** to read the current clipboard with the selected Kokoro voice
132135

133136
## Live endpoint probe

app/src/main/AndroidManifest.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
<uses-permission android:name="android.permission.INTERNET" />
66
<uses-permission android:name="android.permission.RECORD_AUDIO" />
7+
<uses-permission android:name="android.permission.MODIFY_AUDIO_SETTINGS" />
8+
<uses-permission android:name="android.permission.BLUETOOTH_CONNECT" />
79
<uses-permission android:name="android.permission.SYSTEM_ALERT_WINDOW" />
810
<uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
911
<uses-permission android:name="android.permission.FOREGROUND_SERVICE_SPECIAL_USE" />

app/src/main/java/com/whispertranscriber/MainActivity.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ class MainActivity : ComponentActivity() {
134134

135135
private fun requestPermissions() {
136136
val permissions = mutableListOf(Manifest.permission.RECORD_AUDIO)
137+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
138+
permissions.add(Manifest.permission.BLUETOOTH_CONNECT)
139+
}
137140
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
138141
permissions.add(Manifest.permission.POST_NOTIFICATIONS)
139142
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package com.whispertranscriber.audio
2+
3+
import android.media.AudioDeviceInfo
4+
5+
data class AudioInputDevice(
6+
val id: Int,
7+
val type: Int,
8+
val name: String
9+
)
10+
11+
object AudioInputDeviceSelector {
12+
fun choosePreferredInput(devices: List<AudioInputDevice>): AudioInputDevice? {
13+
val priority = listOf(
14+
AudioDeviceInfo.TYPE_BLE_HEADSET,
15+
AudioDeviceInfo.TYPE_BLUETOOTH_SCO,
16+
AudioDeviceInfo.TYPE_WIRED_HEADSET,
17+
AudioDeviceInfo.TYPE_USB_HEADSET,
18+
AudioDeviceInfo.TYPE_USB_DEVICE,
19+
AudioDeviceInfo.TYPE_BUILTIN_MIC
20+
)
21+
return devices.minByOrNull { device ->
22+
priority.indexOf(device.type).takeIf { it >= 0 } ?: Int.MAX_VALUE
23+
}
24+
}
25+
}

app/src/main/java/com/whispertranscriber/audio/AudioRecorder.kt

Lines changed: 137 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
package com.whispertranscriber.audio
22

3+
import android.annotation.SuppressLint
4+
import android.content.Context
5+
import android.media.AudioDeviceInfo
36
import android.media.AudioFormat
47
import android.media.AudioRecord
8+
import android.media.AudioManager
59
import android.media.MediaRecorder
10+
import android.os.Build
611
import android.util.Log
712
import java.io.ByteArrayOutputStream
813
import java.nio.ByteBuffer
914
import java.nio.ByteOrder
1015

11-
class AudioRecorder {
16+
class AudioRecorder(private val context: Context) {
1217

1318
companion object {
1419
private const val TAG = "AudioRecorder"
@@ -22,9 +27,14 @@ class AudioRecorder {
2227
private var recordingThread: Thread? = null
2328
private val audioBuffer = ByteArrayOutputStream()
2429
private var sampleRate = SAMPLE_RATE_MEDIUM
30+
private var audioManager: AudioManager? = null
31+
private var previousAudioMode: Int? = null
32+
private var communicationDeviceActive = false
33+
private var bluetoothScoStarted = false
2534

2635
fun getSampleRate(): Int = sampleRate
2736

37+
@SuppressLint("MissingPermission")
2838
fun startRecording(quality: String = "medium", onPcmChunk: ((ByteArray) -> Unit)? = null) {
2939
if (isRecording) return
3040

@@ -44,21 +54,32 @@ class AudioRecorder {
4454
}
4555

4656
try {
47-
audioRecord = AudioRecord(
48-
MediaRecorder.AudioSource.MIC,
49-
sampleRate,
50-
channelConfig,
51-
audioFormat,
52-
bufferSize * 2
53-
)
57+
val preferredInput = prepareAudioRouting()
58+
audioRecord = AudioRecord.Builder()
59+
.setAudioSource(MediaRecorder.AudioSource.VOICE_COMMUNICATION)
60+
.setAudioFormat(
61+
AudioFormat.Builder()
62+
.setSampleRate(sampleRate)
63+
.setEncoding(audioFormat)
64+
.setChannelMask(channelConfig)
65+
.build()
66+
)
67+
.setBufferSizeInBytes(bufferSize * 2)
68+
.build()
5469

5570
if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
5671
Log.e(TAG, "AudioRecord failed to initialize")
5772
audioRecord?.release()
5873
audioRecord = null
74+
restoreAudioRouting()
5975
return
6076
}
6177

78+
preferredInput?.let { device ->
79+
val routed = audioRecord?.setPreferredDevice(device) == true
80+
Log.d(TAG, "Preferred input ${device.productName} (${device.type}) set: $routed")
81+
}
82+
6283
audioBuffer.reset()
6384
isRecording = true
6485
audioRecord?.startRecording()
@@ -83,6 +104,10 @@ class AudioRecorder {
83104
Log.d(TAG, "Recording started at ${sampleRate}Hz")
84105
} catch (e: SecurityException) {
85106
Log.e(TAG, "Missing RECORD_AUDIO permission", e)
107+
restoreAudioRouting()
108+
} catch (e: Exception) {
109+
Log.e(TAG, "Recording failed to start", e)
110+
restoreAudioRouting()
86111
}
87112
}
88113

@@ -94,6 +119,7 @@ class AudioRecorder {
94119
audioRecord?.stop()
95120
audioRecord?.release()
96121
audioRecord = null
122+
restoreAudioRouting()
97123

98124
val pcmData = synchronized(audioBuffer) {
99125
audioBuffer.toByteArray()
@@ -109,9 +135,112 @@ class AudioRecorder {
109135
recordingThread?.join(1000)
110136
audioRecord?.release()
111137
audioRecord = null
138+
restoreAudioRouting()
112139
audioBuffer.reset()
113140
}
114141

142+
@SuppressLint("MissingPermission")
143+
private fun prepareAudioRouting(): AudioDeviceInfo? {
144+
val manager = context.getSystemService(AudioManager::class.java) ?: return null
145+
audioManager = manager
146+
previousAudioMode = manager.mode
147+
try {
148+
manager.mode = AudioManager.MODE_IN_COMMUNICATION
149+
} catch (e: Exception) {
150+
Log.w(TAG, "Unable to enter communication audio mode", e)
151+
}
152+
153+
val inputDevices = manager.getDevices(AudioManager.GET_DEVICES_INPUTS).toList()
154+
val preferred = AudioInputDeviceSelector.choosePreferredInput(
155+
inputDevices.map { device ->
156+
AudioInputDevice(
157+
id = device.id,
158+
type = device.type,
159+
name = device.productName?.toString().orEmpty()
160+
)
161+
}
162+
)
163+
val preferredInput = preferred?.let { selected ->
164+
inputDevices.firstOrNull { it.id == selected.id }
165+
}
166+
if (preferred != null) {
167+
Log.d(TAG, "Selected input ${preferred.name.ifBlank { preferred.id.toString() }} (${preferred.type})")
168+
routeCommunicationDevice(manager, preferred.type)
169+
}
170+
return preferredInput
171+
}
172+
173+
@SuppressLint("MissingPermission")
174+
private fun routeCommunicationDevice(manager: AudioManager, preferredInputType: Int) {
175+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
176+
val communicationDevice = manager.availableCommunicationDevices.firstOrNull { device ->
177+
device.type == preferredInputType
178+
} ?: manager.availableCommunicationDevices.firstOrNull { device ->
179+
isBluetoothType(preferredInputType) && isBluetoothType(device.type)
180+
}
181+
if (communicationDevice != null) {
182+
try {
183+
communicationDeviceActive = manager.setCommunicationDevice(communicationDevice)
184+
Log.d(TAG, "Communication device ${communicationDevice.productName} (${communicationDevice.type}) set: $communicationDeviceActive")
185+
} catch (e: SecurityException) {
186+
Log.w(TAG, "Bluetooth routing permission denied", e)
187+
} catch (e: Exception) {
188+
Log.w(TAG, "Unable to set communication device", e)
189+
}
190+
}
191+
} else if (preferredInputType == AudioDeviceInfo.TYPE_BLUETOOTH_SCO) {
192+
try {
193+
@Suppress("DEPRECATION")
194+
manager.startBluetoothSco()
195+
@Suppress("DEPRECATION")
196+
manager.isBluetoothScoOn = true
197+
bluetoothScoStarted = true
198+
Log.d(TAG, "Bluetooth SCO routing requested")
199+
} catch (e: Exception) {
200+
Log.w(TAG, "Unable to start Bluetooth SCO", e)
201+
}
202+
}
203+
}
204+
205+
@SuppressLint("MissingPermission")
206+
private fun restoreAudioRouting() {
207+
val manager = audioManager ?: return
208+
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S && communicationDeviceActive) {
209+
try {
210+
manager.clearCommunicationDevice()
211+
} catch (e: SecurityException) {
212+
Log.w(TAG, "Bluetooth routing permission denied while clearing route", e)
213+
} catch (e: Exception) {
214+
Log.w(TAG, "Unable to clear communication device", e)
215+
}
216+
}
217+
if (bluetoothScoStarted) {
218+
try {
219+
@Suppress("DEPRECATION")
220+
manager.isBluetoothScoOn = false
221+
@Suppress("DEPRECATION")
222+
manager.stopBluetoothSco()
223+
} catch (e: Exception) {
224+
Log.w(TAG, "Unable to stop Bluetooth SCO", e)
225+
}
226+
}
227+
previousAudioMode?.let { mode ->
228+
try {
229+
manager.mode = mode
230+
} catch (e: Exception) {
231+
Log.w(TAG, "Unable to restore audio mode", e)
232+
}
233+
}
234+
communicationDeviceActive = false
235+
bluetoothScoStarted = false
236+
previousAudioMode = null
237+
audioManager = null
238+
}
239+
240+
private fun isBluetoothType(type: Int): Boolean =
241+
type == AudioDeviceInfo.TYPE_BLUETOOTH_SCO ||
242+
type == AudioDeviceInfo.TYPE_BLE_HEADSET
243+
115244
private fun createWavFile(pcmData: ByteArray, sampleRate: Int): ByteArray {
116245
val channels = 1
117246
val bitsPerSample = 16

app/src/main/java/com/whispertranscriber/network/KokoroTtsClient.kt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import kotlinx.coroutines.withContext
88
import okhttp3.Call
99
import okhttp3.Callback
1010
import okhttp3.MediaType.Companion.toMediaType
11-
import okhttp3.OkHttpClient
1211
import okhttp3.Request
1312
import okhttp3.RequestBody.Companion.toRequestBody
1413
import okhttp3.Response
@@ -18,7 +17,7 @@ import kotlin.coroutines.resume
1817
import kotlin.coroutines.resumeWithException
1918

2019
class KokoroTtsClient {
21-
private val client = OkHttpClient.Builder()
20+
private val client = PrivateNetworkOkHttpClientFactory.builder()
2221
.connectTimeout(15, TimeUnit.SECONDS)
2322
.readTimeout(120, TimeUnit.SECONDS)
2423
.build()
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package com.whispertranscriber.network
2+
3+
import okhttp3.OkHttpClient
4+
import java.security.SecureRandom
5+
import java.security.cert.X509Certificate
6+
import javax.net.ssl.SSLContext
7+
import javax.net.ssl.TrustManager
8+
import javax.net.ssl.X509TrustManager
9+
10+
object PrivateNetworkOkHttpClientFactory {
11+
private val trustAllManager = object : X509TrustManager {
12+
override fun checkClientTrusted(chain: Array<X509Certificate>, authType: String) {}
13+
override fun checkServerTrusted(chain: Array<X509Certificate>, authType: String) {}
14+
override fun getAcceptedIssuers(): Array<X509Certificate> = emptyArray()
15+
}
16+
17+
fun builder(): OkHttpClient.Builder {
18+
val sslContext = SSLContext.getInstance("TLS").apply {
19+
init(null, arrayOf<TrustManager>(trustAllManager), SecureRandom())
20+
}
21+
return OkHttpClient.Builder()
22+
.sslSocketFactory(sslContext.socketFactory, trustAllManager)
23+
.hostnameVerifier { _, _ -> true }
24+
}
25+
}

app/src/main/java/com/whispertranscriber/network/WhisperApiClient.kt

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,18 @@ import okhttp3.RequestBody.Companion.toRequestBody
1313
import okhttp3.Response
1414
import org.json.JSONObject
1515
import java.io.IOException
16-
import java.security.SecureRandom
17-
import java.security.cert.X509Certificate
1816
import java.util.concurrent.TimeUnit
19-
import javax.net.ssl.SSLContext
20-
import javax.net.ssl.TrustManager
21-
import javax.net.ssl.X509TrustManager
2217
import kotlin.coroutines.resume
2318
import kotlin.coroutines.resumeWithException
2419

2520
class WhisperApiClient {
2621

27-
private val trustAllManager = object : X509TrustManager {
28-
override fun checkClientTrusted(chain: Array<X509Certificate>, authType: String) {}
29-
override fun checkServerTrusted(chain: Array<X509Certificate>, authType: String) {}
30-
override fun getAcceptedIssuers(): Array<X509Certificate> = emptyArray()
31-
}
32-
33-
private val client: OkHttpClient = run {
34-
val sslContext = SSLContext.getInstance("TLS").apply {
35-
init(null, arrayOf<TrustManager>(trustAllManager), SecureRandom())
36-
}
37-
OkHttpClient.Builder()
38-
.sslSocketFactory(sslContext.socketFactory, trustAllManager)
39-
.hostnameVerifier { _, _ -> true }
22+
private val client: OkHttpClient =
23+
PrivateNetworkOkHttpClientFactory.builder()
4024
.connectTimeout(30, TimeUnit.SECONDS)
4125
.readTimeout(120, TimeUnit.SECONDS)
4226
.writeTimeout(60, TimeUnit.SECONDS)
4327
.build()
44-
}
4528

4629
suspend fun transcribe(
4730
serverUrl: String,

app/src/main/java/com/whispertranscriber/network/WhisperLiveKitClient.kt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import kotlinx.coroutines.CompletableDeferred
44
import kotlinx.coroutines.Dispatchers
55
import kotlinx.coroutines.withContext
66
import kotlinx.coroutines.withTimeout
7-
import okhttp3.OkHttpClient
87
import okhttp3.Request
98
import okhttp3.Response
109
import okhttp3.WebSocket
@@ -17,7 +16,7 @@ import java.util.concurrent.TimeUnit
1716

1817
class WhisperLiveKitClient {
1918

20-
private val client = OkHttpClient.Builder()
19+
private val client = PrivateNetworkOkHttpClientFactory.builder()
2120
.connectTimeout(10, TimeUnit.SECONDS)
2221
.readTimeout(0, TimeUnit.SECONDS)
2322
.build()

app/src/main/java/com/whispertranscriber/service/FloatingOverlayService.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class FloatingOverlayService : Service() {
5959
private lateinit var windowManager: WindowManager
6060
private lateinit var settingsStore: SettingsStore
6161
private lateinit var transcriptionLog: TranscriptionLog
62-
private val audioRecorder = AudioRecorder()
62+
private lateinit var audioRecorder: AudioRecorder
6363
private val whisperClient = WhisperApiClient()
6464
private val liveKitClient = WhisperLiveKitClient()
6565
private val ttsClient = KokoroTtsClient()
@@ -86,6 +86,7 @@ class FloatingOverlayService : Service() {
8686
windowManager = getSystemService(WINDOW_SERVICE) as WindowManager
8787
settingsStore = SettingsStore(this)
8888
transcriptionLog = TranscriptionLog(this)
89+
audioRecorder = AudioRecorder(this)
8990
ttsAudioPlayer = TtsAudioPlayer()
9091
createNotificationChannel()
9192
startForeground(NOTIFICATION_ID, buildNotification())

0 commit comments

Comments
 (0)