-
Notifications
You must be signed in to change notification settings - Fork 69
Expand file tree
/
Copy pathuseSpeechToText.ts
More file actions
108 lines (102 loc) · 3.07 KB
/
useSpeechToText.ts
File metadata and controls
108 lines (102 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import { useEffect, useMemo, useState } from 'react';
import { SpeechToTextController } from '../../controllers/SpeechToTextController';
import { ResourceSource } from '../../types/common';
import { STREAMING_ACTION } from '../../constants/sttDefaults';
import { AvailableModels, SpeechToTextLanguage } from '../../types/stt';
interface SpeechToTextModule {
isReady: boolean;
isGenerating: boolean;
sequence: string;
downloadProgress: number;
configureStreaming: SpeechToTextController['configureStreaming'];
error: Error | undefined;
transcribe: (
input: number[],
audioLanguage?: SpeechToTextLanguage
) => ReturnType<SpeechToTextController['transcribe']>;
streamingTranscribe: (
streamAction: STREAMING_ACTION,
input?: number[],
audioLanguage?: SpeechToTextLanguage
) => ReturnType<SpeechToTextController['streamingTranscribe']>;
}
export const useSpeechToText = ({
modelName,
encoderSource,
decoderSource,
tokenizerSource,
overlapSeconds,
windowSize,
streamingConfig,
preventLoad = false,
}: {
modelName: AvailableModels;
encoderSource?: ResourceSource;
decoderSource?: ResourceSource;
tokenizerSource?: ResourceSource;
overlapSeconds?: ConstructorParameters<
typeof SpeechToTextController
>['0']['overlapSeconds'];
windowSize?: ConstructorParameters<
typeof SpeechToTextController
>['0']['windowSize'];
streamingConfig?: ConstructorParameters<
typeof SpeechToTextController
>['0']['streamingConfig'];
preventLoad?: boolean;
}): SpeechToTextModule => {
const [sequence, setSequence] = useState<string>('');
const [isReady, setIsReady] = useState(false);
const [downloadProgress, setDownloadProgress] = useState(0);
const [isGenerating, setIsGenerating] = useState(false);
const [error, setError] = useState<Error | undefined>();
const model = useMemo(
() =>
new SpeechToTextController({
transcribeCallback: setSequence,
isReadyCallback: setIsReady,
isGeneratingCallback: setIsGenerating,
onErrorCallback: setError,
modelDownloadProgressCallback: setDownloadProgress,
}),
[]
);
useEffect(() => {
model.configureStreaming(overlapSeconds, windowSize, streamingConfig);
}, [model, overlapSeconds, windowSize, streamingConfig]);
useEffect(() => {
const loadModel = async () => {
await model.loadModel(
modelName,
encoderSource,
decoderSource,
tokenizerSource
);
};
if (!preventLoad) {
loadModel();
}
}, [
model,
modelName,
encoderSource,
decoderSource,
tokenizerSource,
preventLoad,
]);
return {
isReady,
isGenerating,
downloadProgress,
configureStreaming: model.configureStreaming,
sequence,
error,
transcribe: (waveform: number[], audioLanguage?: SpeechToTextLanguage) =>
model.transcribe(waveform, audioLanguage),
streamingTranscribe: (
streamAction: STREAMING_ACTION,
waveform?: number[],
audioLanguage?: SpeechToTextLanguage
) => model.streamingTranscribe(streamAction, waveform, audioLanguage),
};
};