Skip to content

Commit 0f12695

Browse files
Merge branch 'main' into @ml/llm-functional-api-context-management
2 parents 3ebcb6e + 19cc916 commit 0f12695

379 files changed

Lines changed: 62191 additions & 26057 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.cspell-wordlist.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ torch
66
huggingface
77
bbox
88
bboxes
9+
chatbots
10+
llms
911
deeplab
1012
unsqueeze
1113
qlora
@@ -105,3 +107,5 @@ POTTEDPLANT
105107
TVMONITOR
106108
sublist
107109
TTFT
110+
timestamping
111+
logprob

apps/computer-vision/app/image_segmentation/index.tsx

Lines changed: 59 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,13 @@ import {
1212
Skia,
1313
AlphaType,
1414
ColorType,
15+
SkImage,
1516
} from '@shopify/react-native-skia';
1617
import { View, StyleSheet, Image } from 'react-native';
1718
import React, { useContext, useEffect, useState } from 'react';
1819
import { GeneratingContext } from '../../context';
1920
import ScreenWrapper from '../../ScreenWrapper';
2021

21-
const width = 224;
22-
const height = 224;
23-
24-
let pixels = new Uint8Array(width * height * 4);
25-
pixels.fill(255);
26-
27-
let data = Skia.Data.fromBytes(pixels);
28-
let img = Skia.Image.MakeImage(
29-
{
30-
width: width,
31-
height: height,
32-
alphaType: AlphaType.Opaque,
33-
colorType: ColorType.RGBA_8888,
34-
},
35-
data,
36-
width * 4
37-
);
38-
3922
const numberToColor: number[][] = [
4023
[255, 87, 51], // 0 Red
4124
[51, 255, 87], // 1 Green
@@ -67,48 +50,58 @@ export default function ImageSegmentationScreen() {
6750
setGlobalGenerating(model.isGenerating);
6851
}, [model.isGenerating, setGlobalGenerating]);
6952
const [imageUri, setImageUri] = useState('');
53+
const [imageSize, setImageSize] = useState({ width: 0, height: 0 });
54+
const [segImage, setSegImage] = useState<SkImage | null>(null);
55+
const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 });
7056

7157
const handleCameraPress = async (isCamera: boolean) => {
7258
const image = await getImage(isCamera);
73-
const uri = image?.uri;
74-
setImageUri(uri as string);
59+
if (!image?.uri) return;
60+
setImageUri(image.uri);
61+
setImageSize({
62+
width: image.width ?? 0,
63+
height: image.height ?? 0,
64+
});
65+
setSegImage(null);
7566
};
7667

77-
const [resultPresent, setResultPresent] = useState(false);
78-
7968
const runForward = async () => {
80-
if (imageUri) {
81-
try {
82-
const output = await model.forward(imageUri);
83-
pixels = new Uint8Array(width * height * 4);
69+
if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return;
70+
try {
71+
const { width, height } = imageSize;
72+
const output = await model.forward(imageUri, [DeeplabLabel.ARGMAX]);
73+
const argmax = output[DeeplabLabel.ARGMAX] || [];
74+
const uniqueValues = new Set<number>();
75+
for (let i = 0; i < argmax.length; i++) {
76+
uniqueValues.add(argmax[i]);
77+
}
78+
const pixels = new Uint8Array(width * height * 4);
8479

85-
for (let x = 0; x < width; x++) {
86-
for (let y = 0; y < height; y++) {
87-
for (let i = 0; i < 3; i++) {
88-
pixels[(x * height + y) * 4 + i] =
89-
numberToColor[
90-
(output[DeeplabLabel.ARGMAX] || [])[x * height + y]
91-
][i];
92-
}
93-
pixels[(x * height + y) * 4 + 3] = 255;
94-
}
80+
for (let row = 0; row < height; row++) {
81+
for (let col = 0; col < width; col++) {
82+
const idx = row * width + col;
83+
const color = numberToColor[argmax[idx]] || [0, 0, 0];
84+
pixels[idx * 4] = color[0];
85+
pixels[idx * 4 + 1] = color[1];
86+
pixels[idx * 4 + 2] = color[2];
87+
pixels[idx * 4 + 3] = 255;
9588
}
96-
97-
data = Skia.Data.fromBytes(pixels);
98-
img = Skia.Image.MakeImage(
99-
{
100-
width: width,
101-
height: height,
102-
alphaType: AlphaType.Opaque,
103-
colorType: ColorType.RGBA_8888,
104-
},
105-
data,
106-
width * 4
107-
);
108-
setResultPresent(true);
109-
} catch (e) {
110-
console.error(e);
11189
}
90+
91+
const data = Skia.Data.fromBytes(pixels);
92+
const img = Skia.Image.MakeImage(
93+
{
94+
width,
95+
height,
96+
alphaType: AlphaType.Opaque,
97+
colorType: ColorType.RGBA_8888,
98+
},
99+
data,
100+
width * 4
101+
);
102+
setSegImage(img);
103+
} catch (e) {
104+
console.error(e);
112105
}
113106
};
114107

@@ -135,16 +128,24 @@ export default function ImageSegmentationScreen() {
135128
}
136129
/>
137130
</View>
138-
{resultPresent && (
139-
<View style={styles.canvasContainer}>
131+
{segImage && (
132+
<View
133+
style={styles.canvasContainer}
134+
onLayout={(e) =>
135+
setCanvasSize({
136+
width: e.nativeEvent.layout.width,
137+
height: e.nativeEvent.layout.height,
138+
})
139+
}
140+
>
140141
<Canvas style={styles.canvas}>
141142
<SkiaImage
142-
image={img}
143+
image={segImage}
143144
fit="contain"
144145
x={0}
145146
y={0}
146-
width={width}
147-
height={height}
147+
width={canvasSize.width}
148+
height={canvasSize.height}
148149
/>
149150
</Canvas>
150151
</View>
@@ -181,7 +182,7 @@ const styles = StyleSheet.create({
181182
padding: 4,
182183
},
183184
canvas: {
184-
width: width,
185-
height: height,
185+
width: '100%',
186+
height: '100%',
186187
},
187188
});

apps/llm/app/voice_chat/index.tsx

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { useContext, useEffect, useRef, useState } from 'react';
1+
import { useContext, useEffect, useState } from 'react';
22
import {
33
Keyboard,
44
KeyboardAvoidingView,
@@ -35,14 +35,16 @@ export default function VoiceChatScreenWrapper() {
3535

3636
function VoiceChatScreen() {
3737
const [isRecording, setIsRecording] = useState(false);
38+
const [liveTranscription, setLiveTranscription] = useState('');
39+
3840
const [recorder] = useState(
3941
() =>
4042
new AudioRecorder({
4143
sampleRate: 16000,
4244
bufferLengthInSamples: 1600,
4345
})
4446
);
45-
const messageRecorded = useRef<boolean>(false);
47+
4648
const { setGlobalGenerating } = useContext(GeneratingContext);
4749

4850
const llm = useLLM({ model: QWEN3_0_6B_QUANTIZED });
@@ -67,23 +69,39 @@ function VoiceChatScreen() {
6769
if (isRecording) {
6870
setIsRecording(false);
6971
recorder.stop();
70-
messageRecorded.current = true;
7172
speechToText.streamStop();
7273
} else {
7374
setIsRecording(true);
75+
setLiveTranscription('');
76+
7477
recorder.onAudioReady(({ buffer }) => {
7578
speechToText.streamInsert(buffer.getChannelData(0));
7679
});
7780
recorder.start();
78-
const transcription = await speechToText.stream();
79-
await llm.sendMessage(transcription);
81+
82+
let finalResult = '';
83+
84+
try {
85+
for await (const result of speechToText.stream()) {
86+
const text = result.committed.text + result.nonCommitted.text;
87+
setLiveTranscription(text);
88+
finalResult = text;
89+
}
90+
} catch (e) {
91+
console.error('Streaming error:', e);
92+
} finally {
93+
if (finalResult.trim().length > 0) {
94+
await llm.sendMessage(finalResult);
95+
setLiveTranscription('');
96+
}
97+
}
8098
}
8199
};
82100

83101
return !llm.isReady || !speechToText.isReady ? (
84102
<Spinner
85103
visible={!llm.isReady || !speechToText.isReady}
86-
textContent={`Loading the model ${(llm.downloadProgress * 100).toFixed(0)} %\nLoading the speech model ${(speechToText.downloadProgress * 100).toFixed(0)} %`}
104+
textContent={`Loading the LLM model ${(llm.downloadProgress * 100).toFixed(0)} %\nLoading the speech model ${(speechToText.downloadProgress * 100).toFixed(0)} %`}
87105
/>
88106
) : (
89107
<TouchableWithoutFeedback onPress={Keyboard.dismiss}>
@@ -96,16 +114,17 @@ function VoiceChatScreen() {
96114
<SWMIcon width={45} height={45} />
97115
<Text style={styles.textModelName}>Qwen 3 x Whisper</Text>
98116
</View>
99-
{llm.messageHistory.length || speechToText.committedTranscription ? (
117+
118+
{llm.messageHistory.length > 0 || liveTranscription.length > 0 ? (
100119
<View style={styles.chatContainer}>
101120
<Messages
102121
chatHistory={
103-
speechToText.isGenerating
122+
isRecording && liveTranscription.length > 0
104123
? [
105124
...llm.messageHistory,
106125
{
107126
role: 'user',
108-
content: speechToText.committedTranscription,
127+
content: liveTranscription,
109128
},
110129
]
111130
: llm.messageHistory
@@ -123,6 +142,7 @@ function VoiceChatScreen() {
123142
</Text>
124143
</View>
125144
)}
145+
126146
<View style={styles.bottomContainer}>
127147
{DeviceInfo.isEmulatorSync() ? (
128148
<View style={styles.emulatorBox}>

apps/speech/app.json

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,44 @@
1717
"bundleIdentifier": "com.anonymous.speech",
1818
"infoPlist": {
1919
"NSMicrophoneUsageDescription": "This app needs access to your microphone to record audio."
20+
},
21+
"entitlements": {
22+
"com.apple.developer.kernel.increased-memory-limit": true
2023
}
2124
},
2225
"android": {
2326
"adaptiveIcon": {
2427
"foregroundImage": "./assets/adaptive-icon.png",
2528
"backgroundColor": "#ffffff"
2629
},
27-
"package": "com.anonymous.speech"
30+
"package": "com.anonymous.speech",
31+
"permissions": [
32+
"android.permission.RECORD_AUDIO",
33+
"android.permission.MODIFY_AUDIO_SETTINGS",
34+
"android.permission.FOREGROUND_SERVICE",
35+
"android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK"
36+
]
2837
},
2938
"web": {
3039
"favicon": "./assets/favicon.png"
3140
},
32-
"plugins": ["expo-font"]
41+
"plugins": [
42+
"expo-font",
43+
[
44+
"react-native-audio-api",
45+
{
46+
"iosBackgroundMode": true,
47+
"iosMicrophonePermission": "This app requires access to the microphone to record audio.",
48+
"androidPermissions": [
49+
"android.permission.MODIFY_AUDIO_SETTINGS",
50+
"android.permission.FOREGROUND_SERVICE",
51+
"android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK",
52+
"android.permission.RECORD_AUDIO"
53+
],
54+
"androidForegroundService": true,
55+
"androidFSTypes": ["mediaPlayback", "microphone"]
56+
}
57+
]
58+
]
3359
}
3460
}

0 commit comments

Comments
 (0)