Skip to content

Commit 9d6c063

Browse files
committed
improve fast dictation finalization
1 parent ff85d99 commit 9d6c063

2 files changed

Lines changed: 88 additions & 36 deletions

File tree

Sources/Fluid/Services/ASRService.swift

Lines changed: 84 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,12 @@ final class ASRService: ObservableObject {
519519
private var audioRouteRecoveryTask: Task<Void, Never>?
520520
private let audioRouteRecoveryDelayNanoseconds: UInt64 = 1_000_000_000
521521
private var isRecoveringAudioRoute = false
522+
private let fastPreviewStopGraceNanoseconds: UInt64 = 200_000_000
523+
private let fastPreviewSampleRate = 16_000
524+
private let fastPreviewMinimumSamples = 32_000
525+
private let fastPreviewTailAudioToleranceMs = 300
526+
private let fastPreviewStopGraceMinimumCoverage = 0.72
527+
private let fastPreviewStopGraceTargetCoverage = 0.88
522528

523529
/// Tracks whether we paused system media for this recording session.
524530
/// Used to resume playback only if we were the ones who paused it.
@@ -529,7 +535,10 @@ final class ASRService: ObservableObject {
529535
private var lastAudioLevelSentAt: TimeInterval = 0
530536

531537
private var streamingChunkDurationSeconds: Double {
532-
SettingsStore.shared.selectedSpeechModel.streamingPreviewIntervalSeconds
538+
if SettingsStore.shared.parakeetFinalizationMode == .tokenTimedChunkMerge {
539+
return 0.4
540+
}
541+
return SettingsStore.shared.selectedSpeechModel.streamingPreviewIntervalSeconds
533542
}
534543

535544
private var minimumStreamingPreviewSamples: Int {
@@ -925,12 +934,16 @@ final class ASRService: ObservableObject {
925934

926935
DebugLogger.shared.debug("📍 Preparing final transcription", source: "ASRService")
927936

928-
// CRITICAL: Set isRunning to false FIRST to signal any in-flight chunks to abort early
929-
DebugLogger.shared.debug("🚫 Setting isRunning = false...", source: "ASRService")
930-
self.isRunning = false
931937
DebugLogger.shared.debug("🚫 Setting audioCapturePipeline recording = false...", source: "ASRService")
932938
self.audioCapturePipeline.setRecordingEnabled(false)
933-
DebugLogger.shared.debug("✅ isRunning and capture pipeline disabled", source: "ASRService")
939+
DebugLogger.shared.debug("✅ Capture pipeline disabled", source: "ASRService")
940+
941+
await self.runFastPreviewStopGraceIfNeeded()
942+
943+
// CRITICAL: Set isRunning to false before teardown so in-flight chunks stop safely.
944+
DebugLogger.shared.debug("🚫 Setting isRunning = false...", source: "ASRService")
945+
self.isRunning = false
946+
DebugLogger.shared.debug("✅ isRunning disabled", source: "ASRService")
934947

935948
// Stop monitoring device to prevent callbacks after stop
936949
DebugLogger.shared.debug("👁️ Stopping device monitoring...", source: "ASRService")
@@ -2448,31 +2461,6 @@ final class ASRService: ObservableObject {
24482461
}
24492462
}
24502463

2451-
/// Stops the streaming timer and waits for the task to complete.
2452-
/// This prevents race conditions where the buffer is cleared while
2453-
/// a transcription task is still running.
2454-
private func stopStreamingTimerAndAwait() async {
2455-
guard let task = self.streamingTask else {
2456-
self.benchmarkLog("streaming_timer_stop no_task=true")
2457-
return
2458-
}
2459-
let startedAt = Date().timeIntervalSince1970
2460-
self.benchmarkLog("streaming_timer_stop begin")
2461-
task.cancel()
2462-
// Wait for the task to actually finish - this is critical!
2463-
// The task may be in the middle of processStreamingChunk()
2464-
_ = await task.result
2465-
self.streamingTask = nil
2466-
self.benchmarkLog("streaming_timer_stop end elapsedMs=\(self.elapsedMilliseconds(since: startedAt)) completedChunks=\(self.benchmarkCompletedStreamingChunks)")
2467-
}
2468-
2469-
/// Legacy sync version for cases where we can't await (e.g., stopWithoutTranscription)
2470-
/// WARNING: This can cause crashes if buffer is cleared immediately after!
2471-
private func stopStreamingTimer() {
2472-
self.streamingTask?.cancel()
2473-
self.streamingTask = nil
2474-
}
2475-
24762464
@MainActor
24772465
private func runStreamingLoop() async {
24782466
DebugLogger.shared.debug("🔄 runStreamingLoop() - ENTERED", source: "ASRService")
@@ -2796,6 +2784,72 @@ final class ASRService: ObservableObject {
27962784
}
27972785
}
27982786

2787+
private extension ASRService {
2788+
/// Stops the streaming timer and waits for the task to complete.
2789+
/// This prevents race conditions where the buffer is cleared while
2790+
/// a transcription task is still running.
2791+
func stopStreamingTimerAndAwait() async {
2792+
guard let task = self.streamingTask else {
2793+
self.benchmarkLog("streaming_timer_stop no_task=true")
2794+
return
2795+
}
2796+
let startedAt = Date().timeIntervalSince1970
2797+
self.benchmarkLog("streaming_timer_stop begin")
2798+
task.cancel()
2799+
// Wait for the task to actually finish - this is critical!
2800+
// The task may be in the middle of processStreamingChunk()
2801+
_ = await task.result
2802+
self.streamingTask = nil
2803+
self.benchmarkLog("streaming_timer_stop end elapsedMs=\(self.elapsedMilliseconds(since: startedAt)) completedChunks=\(self.benchmarkCompletedStreamingChunks)")
2804+
}
2805+
2806+
/// Legacy sync version for cases where we can't await (e.g., stopWithoutTranscription)
2807+
/// WARNING: This can cause crashes if buffer is cleared immediately after!
2808+
func stopStreamingTimer() {
2809+
self.streamingTask?.cancel()
2810+
self.streamingTask = nil
2811+
}
2812+
2813+
func runFastPreviewStopGraceIfNeeded() async {
2814+
guard SettingsStore.shared.parakeetFinalizationMode == .tokenTimedChunkMerge else { return }
2815+
guard SettingsStore.shared.selectedSpeechModel.supportsStreaming else { return }
2816+
guard self.transcriptionProvider is FluidAudioProvider else { return }
2817+
2818+
let currentSampleCount = self.audioBuffer.count
2819+
guard currentSampleCount >= self.fastPreviewMinimumSamples else {
2820+
self.benchmarkLog("fast_preview_stop_grace skipped=true reason=duration samples=\(currentSampleCount)")
2821+
return
2822+
}
2823+
2824+
let processedSampleCount = min(self.lastProcessedSampleCount, currentSampleCount)
2825+
let coverage = currentSampleCount > 0 ? Double(processedSampleCount) / Double(currentSampleCount) : 0
2826+
let tailSamples = max(0, currentSampleCount - processedSampleCount)
2827+
let tailMs = Int((Double(tailSamples) / Double(self.fastPreviewSampleRate) * 1000).rounded())
2828+
guard coverage < self.fastPreviewStopGraceTargetCoverage || tailMs > self.fastPreviewTailAudioToleranceMs else {
2829+
self.benchmarkLog(
2830+
"fast_preview_stop_grace skipped=true reason=already_covered coverage=\(String(format: "%.3f", coverage)) tailMs=\(tailMs)"
2831+
)
2832+
return
2833+
}
2834+
2835+
if self.isProcessingChunk {
2836+
self.benchmarkLog("fast_preview_stop_grace wait=in_flight coverage=\(String(format: "%.3f", coverage)) tailMs=\(tailMs)")
2837+
try? await Task.sleep(nanoseconds: self.fastPreviewStopGraceNanoseconds)
2838+
return
2839+
}
2840+
2841+
guard processedSampleCount > 0, coverage >= self.fastPreviewStopGraceMinimumCoverage else {
2842+
self.benchmarkLog("fast_preview_stop_grace skipped=true reason=not_close coverage=\(String(format: "%.3f", coverage)) tailMs=\(tailMs)")
2843+
return
2844+
}
2845+
2846+
let startedAt = Date().timeIntervalSince1970
2847+
self.benchmarkLog("fast_preview_stop_grace forced_chunk=true coverage=\(String(format: "%.3f", coverage)) tailMs=\(tailMs) samples=\(currentSampleCount)")
2848+
await self.processStreamingChunk()
2849+
self.benchmarkLog("fast_preview_stop_grace done elapsedMs=\(self.elapsedMilliseconds(since: startedAt)) samples=\(self.audioBuffer.count)")
2850+
}
2851+
}
2852+
27992853
// MARK: - Audio capture pipeline
28002854

28012855
//

Sources/Fluid/Services/FluidAudioProvider.swift

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ final class FluidAudioProvider: TranscriptionProvider {
3030
private var latestStreamingPreviewText: String = ""
3131
private var latestStreamingPreviewSampleCount: Int = 0
3232
private var latestStreamingPreviewFinishedAt: TimeInterval?
33+
private let fastPreviewMinimumSamples = 32_000
3334
private let fastPreviewTailSilenceRMS: Float = 0.002
35+
private let fastPreviewTailAudioToleranceMs = 300
3436
private(set) var isReady: Bool = false
3537
private(set) var isWordBoostingActive: Bool = false
3638
private(set) var boostedVocabularyTermsCount: Int = 0
@@ -281,14 +283,10 @@ final class FluidAudioProvider: TranscriptionProvider {
281283
self.logFastPreviewMiss(reason: "stale", tailMs: tailMs, coverage: coverage, ageMs: ageMs, tailRMS: tailRMS)
282284
return nil
283285
}
284-
guard finalSampleCount >= 80_000 else {
286+
guard finalSampleCount >= self.fastPreviewMinimumSamples else {
285287
self.logFastPreviewMiss(reason: "short_recording", tailMs: tailMs, coverage: coverage, ageMs: ageMs, tailRMS: tailRMS)
286288
return nil
287289
}
288-
guard finalSampleCount <= 480_000 else {
289-
self.logFastPreviewMiss(reason: "long_recording", tailMs: tailMs, coverage: coverage, ageMs: ageMs, tailRMS: tailRMS)
290-
return nil
291-
}
292290
guard coverage >= 0.88 else {
293291
self.logFastPreviewMiss(reason: "low_coverage", tailMs: tailMs, coverage: coverage, ageMs: ageMs, tailRMS: tailRMS)
294292
return nil
@@ -297,7 +295,7 @@ final class FluidAudioProvider: TranscriptionProvider {
297295
self.logFastPreviewMiss(reason: "large_tail", tailMs: tailMs, coverage: coverage, ageMs: ageMs, tailRMS: tailRMS)
298296
return nil
299297
}
300-
guard tailSamples == 0 || tailRMS <= self.fastPreviewTailSilenceRMS else {
298+
guard tailSamples == 0 || tailMs <= self.fastPreviewTailAudioToleranceMs || tailRMS <= self.fastPreviewTailSilenceRMS else {
301299
self.logFastPreviewMiss(reason: "tail_has_audio", tailMs: tailMs, coverage: coverage, ageMs: ageMs, tailRMS: tailRMS)
302300
return nil
303301
}

0 commit comments

Comments
 (0)