Skip to content

Commit 61b9431

Browse files
authored
Whisper demo app update (#196)
1 parent feed004 commit 61b9431

3 files changed

Lines changed: 32 additions & 14 deletions

File tree

whisper/android/WhisperApp/app/src/main/java/com/example/whisperapp/MainActivity.kt

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,14 @@ class MainActivity : ComponentActivity(), AsrCallback {
5555

5656
companion object {
5757
private const val TAG = "MainActivity"
58-
private const val RECORDING_DURATION_MS = 5000L // 5 seconds
58+
private const val RECORDING_DURATION_MS = 30000L // 30 seconds
5959
// Token lengths to remove from transcription output
6060
private const val START_TOKEN_LENGTH = 37
6161
private const val END_TOKEN_LENGTH = 13
6262
}
6363

6464
private var transcriptionOutput by mutableStateOf("")
65+
private var rawTranscriptionOutput = ""
6566
private var buttonText by mutableStateOf("Record")
6667
private var buttonEnabled by mutableStateOf(true)
6768
private var statusText by mutableStateOf("")
@@ -202,9 +203,12 @@ class MainActivity : ComponentActivity(), AsrCallback {
202203
return
203204
}
204205

206+
rawTranscriptionOutput = ""
205207
runOnUiThread {
206208
transcriptionOutput = ""
207209
statusText = "Loading model..."
210+
buttonText = "Transcribing..."
211+
buttonEnabled = false
208212
}
209213

210214
val whisperModule = AsrModule(
@@ -218,30 +222,38 @@ class MainActivity : ComponentActivity(), AsrCallback {
218222
runOnUiThread {
219223
statusText = "Transcribing..."
220224
}
221-
whisperModule.transcribe(wavFilePath, this@MainActivity)
222-
Log.v(TAG, "Finished transcribe")
225+
val startTime = System.currentTimeMillis()
226+
whisperModule.transcribe(wavFilePath, callback = this@MainActivity)
227+
val elapsedTime = System.currentTimeMillis() - startTime
228+
val elapsedSeconds = elapsedTime / 1000.0
229+
Log.v(TAG, "Finished transcribe in ${elapsedSeconds}s")
223230

224231
// Display result in Text view instead of Toast
225232
// hack to remove start and end tokens; ideally the runner should not do callback on these tokens
226233
runOnUiThread {
227234
val minLength = START_TOKEN_LENGTH + END_TOKEN_LENGTH
228-
if (transcriptionOutput.length > minLength) {
229-
val endIndex = transcriptionOutput.length - END_TOKEN_LENGTH
235+
if (rawTranscriptionOutput.length > minLength) {
236+
val endIndex = rawTranscriptionOutput.length - END_TOKEN_LENGTH
230237
if (endIndex > START_TOKEN_LENGTH) {
231-
transcriptionOutput = transcriptionOutput.substring(START_TOKEN_LENGTH, endIndex)
238+
transcriptionOutput = rawTranscriptionOutput.substring(START_TOKEN_LENGTH, endIndex)
232239
}
233240
}
234-
statusText = "Transcription complete"
241+
statusText = "Transcription complete (%.2fs)".format(elapsedSeconds)
242+
buttonText = "Record"
235243
buttonEnabled = true
236244
}
237245
}
238246

239247
override fun onToken(result: String) {
240248
Log.v(TAG, "Called callback: here's the current output")
249+
rawTranscriptionOutput += result
241250
runOnUiThread {
242-
transcriptionOutput += result
251+
// Strip start token prefix for display while transcribing
252+
if (rawTranscriptionOutput.length > START_TOKEN_LENGTH) {
253+
transcriptionOutput = rawTranscriptionOutput.substring(START_TOKEN_LENGTH)
254+
}
243255
}
244-
Log.v(TAG, transcriptionOutput)
256+
Log.v(TAG, rawTranscriptionOutput)
245257
}
246258

247259
private fun startRecording() {
@@ -269,10 +281,10 @@ class MainActivity : ComponentActivity(), AsrCallback {
269281
audioRecord?.startRecording()
270282
isRecording = true
271283

272-
buttonText = "Recording... (5s)"
284+
buttonText = "Recording... (30s)"
273285
buttonEnabled = false
274286

275-
// Schedule automatic stop after 5 seconds
287+
// Schedule automatic stop after 30 seconds
276288
stopRecordingRunnable = Runnable {
277289
stopRecording()
278290
}

whisper/android/WhisperApp/app/src/main/java/com/example/whisperapp/ModelSettingsScreen.kt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.example.whisperapp
22

3+
import androidx.compose.foundation.clickable
34
import androidx.compose.foundation.layout.*
45
import androidx.compose.foundation.rememberScrollState
56
import androidx.compose.foundation.verticalScroll
@@ -286,7 +287,9 @@ fun FileSelectionDialog(
286287
Column(modifier = Modifier.verticalScroll(rememberScrollState())) {
287288
if (allowNone) {
288289
Row(
289-
modifier = Modifier.fillMaxWidth(),
290+
modifier = Modifier
291+
.fillMaxWidth()
292+
.clickable { onSelect("") },
290293
verticalAlignment = Alignment.CenterVertically
291294
) {
292295
RadioButton(
@@ -301,7 +304,9 @@ fun FileSelectionDialog(
301304

302305
files.forEach { filePath ->
303306
Row(
304-
modifier = Modifier.fillMaxWidth(),
307+
modifier = Modifier
308+
.fillMaxWidth()
309+
.clickable { onSelect(filePath) },
305310
verticalAlignment = Alignment.CenterVertically
306311
) {
307312
RadioButton(

whisper/android/WhisperApp/app/src/test/java/com/example/whisperapp/ModelSettingsViewModelTest.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.example.whisperapp
22

3-
import org.junit.Assert.*
3+
import org.junit.Assert.assertEquals
4+
import org.junit.Assert.assertTrue
45
import org.junit.Before
56
import org.junit.Rule
67
import org.junit.Test

0 commit comments

Comments
 (0)