Skip to content

Commit 1e87ec4

Browse files
fix(desktop): persist transcript-only batch output (#5276)
Synthesize word timings for transcript-only batch responses and surface empty batch output as a no-speech error.
1 parent f356c52 commit 1e87ec4

4 files changed

Lines changed: 291 additions & 9 deletions

File tree

apps/desktop/src/store/zustand/listener/batch.ts

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import type {
77
} from "@hypr/plugin-transcription";
88

99
import type { BatchPersistCallback } from "./transcript";
10-
import { transformWordEntries } from "./utils";
10+
import { transformWordEntries, type WordEntry } from "./utils";
1111

1212
import { type RuntimeSpeakerHint, type WordLike } from "~/stt/segment";
1313

@@ -39,7 +39,7 @@ export type BatchState = {
3939
export type BatchActions = {
4040
handleBatchStarted: (sessionId: string, phase?: BatchPhase) => void;
4141
handleBatchCompleted: (sessionId: string) => void;
42-
handleBatchResponse: (sessionId: string, response: BatchResponse) => void;
42+
handleBatchResponse: (sessionId: string, response: BatchResponse) => boolean;
4343
handleBatchResponseStreamed: (
4444
sessionId: string,
4545
event: BatchStreamEvent,
@@ -57,6 +57,9 @@ export type BatchActions = {
5757
clearBatchPersist: (sessionId: string) => void;
5858
};
5959

60+
export const EMPTY_BATCH_TRANSCRIPT_ERROR =
61+
"No speech was detected in the audio.";
62+
6063
export const createBatchSlice = <T extends BatchState>(
6164
set: StoreApi<T>["setState"],
6265
get: StoreApi<T>["getState"],
@@ -112,7 +115,7 @@ export const createBatchSlice = <T extends BatchState>(
112115

113116
const [words, hints] = transformBatch(response);
114117
if (!words.length) {
115-
return;
118+
return false;
116119
}
117120

118121
persist?.(words, hints, { mode: "replace" });
@@ -130,6 +133,8 @@ export const createBatchSlice = <T extends BatchState>(
130133
batchPreview: restPreview,
131134
};
132135
});
136+
137+
return true;
133138
},
134139

135140
handleBatchResponseStreamed: (sessionId, event) => {
@@ -284,13 +289,22 @@ function transformBatch(
284289

285290
response.results.channels.forEach((channel, channelIndex) => {
286291
const alternative = channel.alternatives[0];
287-
if (!alternative || !alternative.words || !alternative.words.length) {
292+
if (!alternative) {
288293
return;
289294
}
290295

291-
const [words, hints] = transformWordEntries(
296+
const wordEntries = wordEntriesFromTranscript(
292297
alternative.words,
293298
alternative.transcript,
299+
{
300+
channel: channelIndex,
301+
durationSeconds: getBatchDurationSeconds(response),
302+
},
303+
);
304+
305+
const [words, hints] = transformWordEntries(
306+
wordEntries,
307+
alternative.transcript,
294308
channelIndex,
295309
);
296310

@@ -357,9 +371,19 @@ function mergeBatchPreview(
357371
return preview;
358372
}
359373

360-
const [incomingWords, incomingHints] = transformWordEntries(
374+
const wordEntries = wordEntriesFromTranscript(
361375
alternative.words,
362376
alternative.transcript,
377+
{
378+
channel: channelIndex,
379+
startSeconds: response.start,
380+
durationSeconds: response.duration,
381+
},
382+
);
383+
384+
const [incomingWords, incomingHints] = transformWordEntries(
385+
wordEntries,
386+
alternative.transcript,
363387
channelIndex,
364388
);
365389
if (incomingWords.length === 0) {
@@ -440,3 +464,56 @@ function getBatchStreamPercentage(event: BatchStreamEvent): number {
440464
return 0;
441465
}
442466
}
467+
468+
function wordEntriesFromTranscript(
469+
entries: WordEntry[] | null | undefined,
470+
transcript: string,
471+
{
472+
channel,
473+
startSeconds = 0,
474+
durationSeconds,
475+
}: {
476+
channel: number;
477+
startSeconds?: number;
478+
durationSeconds?: number;
479+
},
480+
): WordEntry[] {
481+
if (entries?.length || !transcript.trim()) {
482+
return entries ?? [];
483+
}
484+
485+
const tokens = transcript.trim().split(/\s+/).filter(Boolean);
486+
if (!tokens.length) {
487+
return [];
488+
}
489+
490+
const duration = Math.max(
491+
durationSeconds && Number.isFinite(durationSeconds)
492+
? durationSeconds
493+
: tokens.length * 0.4,
494+
tokens.length * 0.05,
495+
);
496+
497+
return tokens.map((token, index) => ({
498+
word: token,
499+
punctuated_word: token,
500+
start: startSeconds + (index / tokens.length) * duration,
501+
end: startSeconds + ((index + 1) / tokens.length) * duration,
502+
channel,
503+
speaker: null,
504+
}));
505+
}
506+
507+
function getBatchDurationSeconds(response: BatchResponse): number | undefined {
508+
const metadata = response.metadata;
509+
if (!metadata || typeof metadata !== "object" || Array.isArray(metadata)) {
510+
return undefined;
511+
}
512+
513+
const duration = (metadata as Record<string, unknown>).duration;
514+
return typeof duration === "number" &&
515+
Number.isFinite(duration) &&
516+
duration > 0
517+
? duration
518+
: undefined;
519+
}

apps/desktop/src/store/zustand/listener/general-batch.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { beforeEach, describe, expect, test, vi } from "vitest";
22

3+
import { EMPTY_BATCH_TRANSCRIPT_ERROR } from "./batch";
34
import { runBatchSession } from "./general-batch";
45

56
const { listenMock, startTranscriptionMock } = vi.hoisted(() => ({
@@ -213,6 +214,91 @@ describe("runBatchSession", () => {
213214
expect(handleBatchFailed).not.toHaveBeenCalled();
214215
});
215216

217+
test("rejects completed responses that have no transcribed words", async () => {
218+
const handleBatchStarted = vi.fn();
219+
const handleBatchResponse = vi.fn(() => false);
220+
const handleBatchCompleted = vi.fn();
221+
const clearBatchPersist = vi.fn();
222+
const clearBatchSession = vi.fn();
223+
const handleBatchResponseStreamed = vi.fn();
224+
const handleBatchFailed = vi.fn();
225+
const handleBatchStopped = vi.fn();
226+
const updateBatchProgress = vi.fn();
227+
const setBatchPersist = vi.fn();
228+
229+
let handler:
230+
| ((event: {
231+
payload: {
232+
type: string;
233+
session_id: string;
234+
response?: unknown;
235+
mode?: "direct" | "streamed";
236+
};
237+
}) => void)
238+
| undefined;
239+
240+
listenMock.mockImplementation(async (cb) => {
241+
handler = cb;
242+
return vi.fn();
243+
});
244+
245+
startTranscriptionMock.mockImplementation(async () => {
246+
queueMicrotask(() => {
247+
handler?.({
248+
payload: {
249+
type: "completed",
250+
session_id: "session-1",
251+
mode: "direct",
252+
response: {
253+
metadata: null,
254+
results: { channels: [] },
255+
},
256+
},
257+
});
258+
});
259+
260+
return {
261+
status: "ok",
262+
data: null,
263+
};
264+
});
265+
266+
await expect(
267+
runBatchSession(
268+
() => ({
269+
batch: {},
270+
batchPreview: {},
271+
batchPersist: {},
272+
handleBatchStarted,
273+
handleBatchResponse,
274+
handleBatchCompleted,
275+
clearBatchPersist,
276+
clearBatchSession,
277+
handleBatchResponseStreamed,
278+
handleBatchFailed,
279+
handleBatchStopped,
280+
updateBatchProgress,
281+
setBatchPersist,
282+
}),
283+
"session-1",
284+
{
285+
session_id: "session-1",
286+
provider: "hyprnote",
287+
file_path: "/tmp/session.wav",
288+
base_url: "",
289+
api_key: "",
290+
},
291+
),
292+
).rejects.toThrow(EMPTY_BATCH_TRANSCRIPT_ERROR);
293+
294+
expect(handleBatchFailed).toHaveBeenCalledWith(
295+
"session-1",
296+
EMPTY_BATCH_TRANSCRIPT_ERROR,
297+
);
298+
expect(clearBatchPersist).toHaveBeenCalledWith("session-1");
299+
expect(clearBatchSession).not.toHaveBeenCalled();
300+
});
301+
216302
test("rejects when the transcription is stopped", async () => {
217303
const handleBatchStarted = vi.fn();
218304
const handleBatchResponse = vi.fn();

apps/desktop/src/store/zustand/listener/general-batch.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ import {
77
events as transcriptionEvents,
88
} from "@hypr/plugin-transcription";
99

10-
import type { BatchActions, BatchState } from "./batch";
10+
import {
11+
EMPTY_BATCH_TRANSCRIPT_ERROR,
12+
type BatchActions,
13+
type BatchState,
14+
} from "./batch";
1115

1216
type BatchStore = BatchActions & BatchState;
1317

@@ -39,6 +43,7 @@ export const runBatchSession = async <T extends BatchStore>(
3943
response: Parameters<BatchStore["handleBatchResponse"]>[1];
4044
},
4145
resolve: () => void,
46+
reject: (reason?: unknown) => void,
4247
) => {
4348
if (settled) {
4449
return;
@@ -47,15 +52,19 @@ export const runBatchSession = async <T extends BatchStore>(
4752
settled = true;
4853

4954
try {
50-
get().handleBatchResponse(sessionId, output.response);
55+
const handled = get().handleBatchResponse(sessionId, output.response);
56+
if (handled === false) {
57+
throw new Error(EMPTY_BATCH_TRANSCRIPT_ERROR);
58+
}
5159
cleanup();
5260
} catch (error) {
5361
console.error("[runBatch] error handling batch response", error);
5462
const errorMessage =
5563
error instanceof Error ? error.message : String(error);
5664
get().handleBatchFailed(sessionId, errorMessage);
5765
cleanup(false);
58-
throw error;
66+
reject(error);
67+
return;
5968
}
6069

6170
resolve();
@@ -121,6 +130,7 @@ export const runBatchSession = async <T extends BatchStore>(
121130
response: payload.response,
122131
},
123132
resolve,
133+
reject,
124134
);
125135
return;
126136
}

0 commit comments

Comments
 (0)