Skip to content

Commit fafc5b9

Browse files
committed
Merge branch 'v0.4.0-rc1' into @md/s2t_streaming
2 parents 33048ee + 6c8a629 commit fafc5b9

File tree

9 files changed

+368
-150
lines changed

9 files changed

+368
-150
lines changed

src/constants/modelUrls.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,14 @@ export const WHISPER_TINY_DECODER =
5959
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny.en/resolve/v0.3.0/xnnpack/whisper_tiny_en_xnnpack_decoder.pte';
6060
export const WHISPER_TINY_ENCODER =
6161
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny.en/resolve/v0.3.0/xnnpack/whisper_tiny_en_xnnpack_encoder.pte';
62+
export const WHISPER_TINY_MULTILINGUAL_ENCODER =
63+
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny/resolve/v0.4.0/xnnpack/xnnpack_whisper_encoder.pte';
64+
export const WHISPER_TINY_MULTILINGUAL_DECODER =
65+
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny/resolve/v0.4.0/xnnpack/xnnpack_whisper_decoder.pte';
66+
export const WHISPER_TINY_MULTILINGUAL_TOKENIZER =
67+
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny/resolve/v0.4.0/tokenizer.json';
6268

6369
// OCR
64-
6570
export const DETECTOR_CRAFT_1280 =
6671
'https://huggingface.co/software-mansion/react-native-executorch-detector-craft/resolve/v0.3.0/xnnpack/xnnpack_craft_1280.pte';
6772
export const DETECTOR_CRAFT_800 =

src/constants/sttDefaults.ts

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,62 @@ import {
55
WHISPER_TINY_ENCODER,
66
WHISPER_TINY_DECODER,
77
WHISPER_TOKENIZER,
8+
WHISPER_TINY_MULTILINGUAL_ENCODER,
9+
WHISPER_TINY_MULTILINGUAL_DECODER,
10+
WHISPER_TINY_MULTILINGUAL_TOKENIZER,
811
} from './modelUrls';
12+
import { AvailableModels, ModelConfig } from '../types/stt';
913

1014
export const SAMPLE_RATE = 16_000;
1115
export const SECOND = SAMPLE_RATE;
1216
export const HAMMING_DIST_THRESHOLD = 1;
1317

14-
export interface ModelConfig {
18+
const whisperTinyModelConfig = {
1519
sources: {
16-
encoder: string;
17-
decoder: string;
18-
};
20+
encoder: WHISPER_TINY_ENCODER,
21+
decoder: WHISPER_TINY_DECODER,
22+
},
1923
tokenizer: {
20-
source: string;
21-
sos: number;
22-
eos: number;
23-
specialChar: string;
24-
};
25-
}
24+
source: WHISPER_TOKENIZER,
25+
bos: 50257, // FIXME: this is a placeholder and needs to be changed
26+
eos: 50256, // FIXME: this is a placeholder and needs to be changed
27+
},
28+
isMultilingual: false,
29+
};
30+
31+
const moonshineTinyModelConfig = {
32+
sources: {
33+
encoder: MOONSHINE_TINY_ENCODER,
34+
decoder: MOONSHINE_TINY_DECODER,
35+
},
36+
tokenizer: {
37+
source: MOONSHINE_TOKENIZER,
38+
bos: 1, // FIXME: this is a placeholder and needs to be changed
39+
eos: 2, // FIXME: this is a placeholder and needs to be changed
40+
},
41+
isMultilingual: false,
42+
};
2643

27-
export const MODEL_CONFIGS: { [key in 'moonshine' | 'whisper']: ModelConfig } =
28-
{
29-
moonshine: {
30-
sources: {
31-
encoder: MOONSHINE_TINY_ENCODER,
32-
decoder: MOONSHINE_TINY_DECODER,
33-
},
34-
tokenizer: {
35-
source: MOONSHINE_TOKENIZER,
36-
sos: 1,
37-
eos: 2,
38-
specialChar: '\u2581',
39-
},
40-
},
41-
whisper: {
42-
sources: {
43-
encoder: WHISPER_TINY_ENCODER,
44-
decoder: WHISPER_TINY_DECODER,
45-
},
46-
tokenizer: {
47-
source: WHISPER_TOKENIZER,
48-
sos: 50257,
49-
eos: 50256,
50-
specialChar: 'Ġ',
51-
},
52-
},
53-
};
44+
const whisperTinyMultilingualModelConfig = {
45+
sources: {
46+
encoder: WHISPER_TINY_MULTILINGUAL_ENCODER,
47+
decoder: WHISPER_TINY_MULTILINGUAL_DECODER,
48+
},
49+
tokenizer: {
50+
source: WHISPER_TINY_MULTILINGUAL_TOKENIZER,
51+
bos: 50258, // FIXME: this is a placeholder and needs to be changed
52+
eos: 50257, // FIXME: this is a placeholder and needs to be changed
53+
},
54+
isMultilingual: true,
55+
};
56+
57+
export const MODEL_CONFIGS: {
58+
[key in AvailableModels]: ModelConfig;
59+
} = {
60+
moonshine: moonshineTinyModelConfig,
61+
whisper: whisperTinyModelConfig,
62+
whisperMultilingual: whisperTinyMultilingualModelConfig,
63+
};
5464

5565
export const MODES = {
5666
fast: {
@@ -67,6 +77,8 @@ export const MODES = {
6777
},
6878
};
6979

80+
export const NUM_TOKENS_TO_SLICE = 3;
81+
7082
export enum STREAMING_ACTION {
7183
START,
7284
DATA,

0 commit comments

Comments
 (0)