Skip to content

Commit a8e7f96

Browse files
chmjkbmkopcins
andauthored
feat: Add multilingual Whisper (#166)
## Description <!-- Provide a concise and descriptive summary of the changes implemented in this PR. --> ### Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Documentation update (improves or adds clarity to existing documentation) ### Tested on - [ ] iOS - [ ] Android ### Testing instructions <!-- Provide step-by-step instructions on how to test your changes. Include setup details if necessary. --> ### Screenshots <!-- Add screenshots here, if applicable --> ### Related issues <!-- Link related issues here using #issue-number --> ### Checklist - [ ] I have performed a self-review of my code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [ ] My changes generate no new warnings ### Additional notes <!-- Include any additional information, assumptions, or context that reviewers might need to understand this PR. --> --------- Co-authored-by: Mateusz Kopcinski <120639731+mkopcins@users.noreply.github.com>
1 parent 2145796 commit a8e7f96

File tree

9 files changed

+350
-153
lines changed

9 files changed

+350
-153
lines changed

src/constants/modelUrls.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,14 @@ export const WHISPER_TINY_DECODER =
5959
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny.en/resolve/v0.3.0/xnnpack/whisper_tiny_en_xnnpack_decoder.pte';
6060
export const WHISPER_TINY_ENCODER =
6161
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny.en/resolve/v0.3.0/xnnpack/whisper_tiny_en_xnnpack_encoder.pte';
62+
export const WHISPER_TINY_MULTILINGUAL_ENCODER =
63+
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny/resolve/v0.4.0/xnnpack/xnnpack_whisper_encoder.pte';
64+
export const WHISPER_TINY_MULTILINGUAL_DECODER =
65+
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny/resolve/v0.4.0/xnnpack/xnnpack_whisper_decoder.pte';
66+
export const WHISPER_TINY_MULTILINGUAL_TOKENIZER =
67+
'https://huggingface.co/software-mansion/react-native-executorch-whisper-tiny/resolve/v0.4.0/tokenizer.json';
6268

6369
// OCR
64-
6570
export const DETECTOR_CRAFT_1280 =
6671
'https://huggingface.co/software-mansion/react-native-executorch-detector-craft/resolve/v0.3.0/xnnpack/xnnpack_craft_1280.pte';
6772
export const DETECTOR_CRAFT_800 =

src/constants/sttDefaults.ts

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,52 +5,62 @@ import {
55
WHISPER_TINY_ENCODER,
66
WHISPER_TINY_DECODER,
77
WHISPER_TOKENIZER,
8+
WHISPER_TINY_MULTILINGUAL_ENCODER,
9+
WHISPER_TINY_MULTILINGUAL_DECODER,
10+
WHISPER_TINY_MULTILINGUAL_TOKENIZER,
811
} from './modelUrls';
12+
import { AvailableModels, ModelConfig } from '../types/stt';
913

1014
export const SAMPLE_RATE = 16_000;
1115
export const SECOND = SAMPLE_RATE;
1216
export const HAMMING_DIST_THRESHOLD = 1;
1317

14-
export interface ModelConfig {
18+
const whisperTinyModelConfig = {
1519
sources: {
16-
encoder: string;
17-
decoder: string;
18-
};
20+
encoder: WHISPER_TINY_ENCODER,
21+
decoder: WHISPER_TINY_DECODER,
22+
},
1923
tokenizer: {
20-
source: string;
21-
sos: number;
22-
eos: number;
23-
specialChar: string;
24-
};
25-
}
24+
source: WHISPER_TOKENIZER,
25+
bos: 50258, // FIXME: this is a placeholder and needs to be changed
26+
eos: 50257, // FIXME: this is a placeholder and needs to be changed
27+
},
28+
isMultilingual: false,
29+
};
2630

27-
export const MODEL_CONFIGS: { [key in 'moonshine' | 'whisper']: ModelConfig } =
28-
{
29-
moonshine: {
30-
sources: {
31-
encoder: MOONSHINE_TINY_ENCODER,
32-
decoder: MOONSHINE_TINY_DECODER,
33-
},
34-
tokenizer: {
35-
source: MOONSHINE_TOKENIZER,
36-
sos: 1,
37-
eos: 2,
38-
specialChar: '\u2581',
39-
},
40-
},
41-
whisper: {
42-
sources: {
43-
encoder: WHISPER_TINY_ENCODER,
44-
decoder: WHISPER_TINY_DECODER,
45-
},
46-
tokenizer: {
47-
source: WHISPER_TOKENIZER,
48-
sos: 50257,
49-
eos: 50256,
50-
specialChar: 'Ġ',
51-
},
52-
},
53-
};
31+
const moonshineTinyModelConfig = {
32+
sources: {
33+
encoder: MOONSHINE_TINY_ENCODER,
34+
decoder: MOONSHINE_TINY_DECODER,
35+
},
36+
tokenizer: {
37+
source: MOONSHINE_TOKENIZER,
38+
bos: 1, // FIXME: this is a placeholder and needs to be changed
39+
eos: 2, // FIXME: this is a placeholder and needs to be changed
40+
},
41+
isMultilingual: false,
42+
};
43+
44+
const whisperTinyMultilingualModelConfig = {
45+
sources: {
46+
encoder: WHISPER_TINY_MULTILINGUAL_ENCODER,
47+
decoder: WHISPER_TINY_MULTILINGUAL_DECODER,
48+
},
49+
tokenizer: {
50+
source: WHISPER_TINY_MULTILINGUAL_TOKENIZER,
51+
bos: 50258, // FIXME: this is a placeholder and needs to be changed
52+
eos: 50257, // FIXME: this is a placeholder and needs to be changed
53+
},
54+
isMultilingual: true,
55+
};
56+
57+
export const MODEL_CONFIGS: {
58+
[key in AvailableModels]: ModelConfig;
59+
} = {
60+
moonshine: moonshineTinyModelConfig,
61+
whisper: whisperTinyModelConfig,
62+
whisperMultilingual: whisperTinyMultilingualModelConfig,
63+
};
5464

5565
export const MODES = {
5666
fast: {
@@ -66,3 +76,5 @@ export const MODES = {
6676
overlapSeconds: 3,
6777
},
6878
};
79+
80+
export const NUM_TOKENS_TO_SLICE = 3;

0 commit comments

Comments
 (0)