Skip to content

Commit 42c2923

Browse files
committed
v2: Audio transcription feeding strategie
1 parent fd6f241 commit 42c2923

3 files changed

Lines changed: 429 additions & 147 deletions

File tree

shared/src/androidMain/kotlin/com/module/notelycompose/platform/Transcriper.android.kt

Lines changed: 110 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ import android.os.Environment
77
import androidx.core.content.ContextCompat
88
import audio.utils.LauncherHolder
99
import com.module.notelycompose.core.debugPrintln
10-
import com.module.notelycompose.utils.decodeWaveFile
10+
import com.module.notelycompose.utils.StreamingAudioChunker
11+
import com.module.notelycompose.utils.StreamingAudioChunk
12+
import com.module.notelycompose.utils.ChunkTranscriptionResult
1113
import com.whispercpp.whisper.WhisperCallback
1214
import com.whispercpp.whisper.WhisperContext
1315
import kotlinx.coroutines.suspendCancellableCoroutine
@@ -23,6 +25,7 @@ actual class Transcriber(
2325
private val modelsPath = context.getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS)
2426
private var whisperContext: WhisperContext? = null
2527
private var permissionContinuation: ((Boolean) -> Unit)? = null
28+
private val streamingChunker = StreamingAudioChunker()
2629

2730

2831
actual fun hasRecordingPermission(): Boolean {
@@ -103,40 +106,130 @@ actual class Transcriber(
103106
}
104107

105108
canTranscribe = false
109+
isTranscribing = true
106110

107111
try {
108-
debugPrintln{"Reading wave samples... "}
109-
val file = File(filePath)
110-
val data = decodeWaveFile(file)
111-
debugPrintln{"${data.size / (16000 / 1000)} ms\n"}
112-
debugPrintln{"Transcribing data...\n"}
112+
debugPrintln{"Reading WAV file chunks directly from disk...\n"}
113+
114+
// Split WAV file into streaming chunks without loading entire file into memory
115+
val streamingChunks = streamingChunker.splitWavFileIntoChunks(filePath)
116+
debugPrintln{"Processing ${streamingChunks.size} streaming chunks...\n"}
117+
113118
val start = System.currentTimeMillis()
114-
val text = whisperContext?.transcribeData(data, language, callback = object : WhisperCallback{
115-
override fun onNewSegment(startMs: Long, endMs: Long, text: String) {
116-
onNewSegment(startMs, endMs, text)
117-
}
119+
val chunkResults = mutableListOf<ChunkTranscriptionResult>()
120+
var completedChunks = 0
118121

119-
override fun onProgress(progress: Int) {
120-
onProgress(progress)
122+
streamingChunks.forEachIndexed { chunkIndex, streamingChunk ->
123+
if (!isTranscribing) {
124+
debugPrintln{"Transcription stopped by user"}
125+
return@forEachIndexed
121126
}
122127

123-
override fun onComplete() {
124-
onComplete()
128+
debugPrintln{"Processing streaming chunk ${chunkIndex + 1}/${streamingChunks.size} (${streamingChunk.durationSeconds}s)"}
129+
130+
val chunkSegments = mutableListOf<com.module.notelycompose.utils.TranscriptionSegment>()
131+
var chunkText = ""
132+
133+
try {
134+
// Read chunk data directly from file (using reusable arrays)
135+
val chunkData = streamingChunker.readChunkData(streamingChunk)
136+
debugPrintln{"Transcription: Read ${chunkData.size} samples from chunk $chunkIndex (reusable array)"}
137+
138+
// Update progress to show chunk is starting
139+
val chunkProgress = 100.0 / streamingChunks.size
140+
val startProgress = (completedChunks * chunkProgress).toInt().coerceIn(0, 100)
141+
onProgress(startProgress)
142+
143+
val result = whisperContext?.transcribeData(chunkData, language, callback = object : WhisperCallback {
144+
override fun onNewSegment(startMs: Long, endMs: Long, text: String) {
145+
// Adjust timing to account for chunk position in original audio
146+
val chunkStartTimeMs = (streamingChunk.startOffset - 44) / (streamingChunk.header.sampleRate * streamingChunk.header.channels * (streamingChunk.header.bitsPerSample / 8.0) / 1000.0)
147+
val adjustedStartMs = startMs + chunkStartTimeMs.toLong()
148+
val adjustedEndMs = endMs + chunkStartTimeMs.toLong()
149+
150+
chunkSegments.add(com.module.notelycompose.utils.TranscriptionSegment(
151+
adjustedStartMs, adjustedEndMs, text
152+
))
153+
154+
// Call the original callback with adjusted timing
155+
onNewSegment(adjustedStartMs, adjustedEndMs, text)
156+
}
157+
158+
override fun onProgress(progress: Int) {
159+
// Simple chunk-based progress: each chunk represents equal progress
160+
val totalChunks = streamingChunks.size
161+
val chunkProgress = 100.0 / totalChunks
162+
163+
// Progress = completed chunks + current chunk progress
164+
val overallProgress = ((completedChunks * chunkProgress) + (progress * chunkProgress / 100.0)).toInt().coerceIn(0, 100)
165+
166+
debugPrintln{"Transcription: Chunk $chunkIndex progress: $progress%, Overall: $overallProgress%"}
167+
onProgress(overallProgress)
168+
}
169+
170+
override fun onComplete() {
171+
// This will be called for each chunk
172+
completedChunks++
173+
debugPrintln{"Transcription: Transcription completed for chunk $chunkIndex (${completedChunks}/${streamingChunks.size} completed)"}
174+
}
175+
})
176+
177+
chunkText = result ?: ""
178+
179+
// Create a temporary AudioChunk for compatibility with existing merge logic
180+
val tempAudioChunk = com.module.notelycompose.utils.AudioChunk(
181+
startSample = ((streamingChunk.startOffset - 44) / (streamingChunk.header.channels * (streamingChunk.header.bitsPerSample / 8))).toInt(),
182+
endSample = ((streamingChunk.endOffset - 44) / (streamingChunk.header.channels * (streamingChunk.header.bitsPerSample / 8))).toInt(),
183+
data = chunkData
184+
)
185+
186+
chunkResults.add(ChunkTranscriptionResult(tempAudioChunk, chunkText, chunkSegments))
187+
188+
// Clear chunk data from memory after processing (reusable array)
189+
chunkData.fill(0.0f)
190+
debugPrintln{"Transcription: Cleared chunk $chunkIndex data from memory (${chunkData.size} samples, reusable array)"}
191+
192+
} catch (e: Exception) {
193+
debugPrintln{"Error processing streaming chunk $chunkIndex: ${e.localizedMessage}"}
194+
e.printStackTrace()
125195
}
196+
}
197+
198+
// Merge results from all chunks
199+
if (isTranscribing && chunkResults.isNotEmpty()) {
200+
201+
// Clear chunk results from memory after merging
202+
chunkResults.clear()
203+
debugPrintln{"Transcription: Cleared all chunk results from memory"}
204+
}
126205

127-
})
128206
val elapsed = System.currentTimeMillis() - start
129-
debugPrintln{"Done ($elapsed ms): \n$text\n"}
207+
debugPrintln{"Done ($elapsed ms)\n"}
208+
209+
// Clear streaming chunks from memory
210+
streamingChunks.clear()
211+
debugPrintln{"Transcription: Cleared streaming chunks list from memory"}
212+
213+
// Clear reusable arrays from memory
214+
streamingChunker.clearReusableArrays()
215+
val arraySizes = streamingChunker.getReusableArraySizes()
216+
debugPrintln{"Transcription: Cleared reusable arrays from memory (FloatArray: ${arraySizes.first}, ByteArray: ${arraySizes.second})"}
217+
218+
if (isTranscribing) {
219+
onComplete()
220+
}
130221

131222
} catch (e: OutOfMemoryError) {
132223
onError()
224+
e.printStackTrace()
133225
debugPrintln{"OutOfMemoryError: File too large to process - ${e.message}\n"}
134226
} catch (e: Exception) {
227+
onError()
135228
e.printStackTrace()
136229
debugPrintln{"${e.localizedMessage}\n"}
137230
}
138231

139232
canTranscribe = true
140-
233+
isTranscribing = false
141234
}
142235
}

shared/src/androidMain/kotlin/com/module/notelycompose/utils/RiffWaveHelper.kt

Lines changed: 0 additions & 130 deletions
This file was deleted.

0 commit comments

Comments
 (0)