Skip to content

Commit 5881529

Browse files
a-szymanskamsluszniakmkopcins
committed
feat: Add text to image pipeline (#586)
## Description Introducing support for text-to-image tasks following the [Diffusion Pipeline](https://huggingface.co/docs/diffusers/en/using-diffusers/write_own_pipeline#deconstruct-the-stable-diffusion-pipeline). Adding the TextToImageModule and the useTextToImage hook to access the models. ### Introduces a breaking change? - [ ] Yes - [x] No ### Type of change - [ ] Bug fix (change which fixes an issue) - [x] New feature (change which adds functionality) - [ ] Documentation update (improves or adds clarity to existing documentation) - [ ] Other (chores, tests, code style improvements etc.) ### Tested on - [ ] iOS - [x] Android ### Testing instructions Run the computer vision app to test image generation with the [BK-SDM-Tiny](https://huggingface.co/aszymanska/bk-sdm-tiny-vpred) model for 256×256 or 512×512 outputs. ⚠️ Testing the model requires a phone with a reasonably large amount of RAM (preferably at least 8 GB for 256 model). ### Screenshots <!-- Add screenshots here, if applicable --> ### Related issues Closes #585 ### Checklist - [x] I have performed a self-review of my code - [x] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [x] My changes generate no new warnings ### Additional notes <!-- Include any additional information, assumptions, or context that reviewers might need to understand this PR. --> --------- Co-authored-by: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com> Co-authored-by: Mateusz Kopcinski <120639731+mkopcins@users.noreply.github.com>
1 parent 342e5b3 commit 5881529

File tree

34 files changed

+1528
-18
lines changed

34 files changed

+1528
-18
lines changed

.cspell-wordlist.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,11 @@ softmax
6666
logit
6767
logits
6868
probs
69+
unet
70+
Unet
71+
VPRED
72+
timesteps
73+
Timesteps
74+
denoises
75+
denoise
76+
denoising

apps/computer-vision/app/_layout.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ export default function _layout() {
100100
headerTitleStyle: { color: ColorPalette.primary },
101101
}}
102102
/>
103+
<Drawer.Screen
104+
name="text_to_image/index"
105+
options={{
106+
drawerLabel: 'Image Generation',
107+
title: 'Image Generation',
108+
headerTitleStyle: { color: ColorPalette.primary },
109+
}}
110+
/>
103111
<Drawer.Screen
104112
name="index"
105113
options={{

apps/computer-vision/app/index.tsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ export default function Home() {
4747
>
4848
<Text style={styles.buttonText}>Style Transfer</Text>
4949
</TouchableOpacity>
50+
<TouchableOpacity
51+
style={styles.button}
52+
onPress={() => router.navigate('text_to_image/')}
53+
>
54+
<Text style={styles.buttonText}>Image Generation</Text>
55+
</TouchableOpacity>
5056
</View>
5157
</View>
5258
);
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import {
2+
View,
3+
StyleSheet,
4+
Text,
5+
Image,
6+
Keyboard,
7+
TouchableWithoutFeedback,
8+
} from 'react-native';
9+
import React, { useContext, useEffect, useState } from 'react';
10+
import Spinner from 'react-native-loading-spinner-overlay';
11+
import { useTextToImage, BK_SDM_TINY_VPRED_256 } from 'react-native-executorch';
12+
import { GeneratingContext } from '../../context';
13+
import ColorPalette from '../../colors';
14+
import ProgressBar from '../../components/ProgressBar';
15+
import { BottomBarWithTextInput } from '../../components/BottomBarWithTextInput';
16+
17+
export default function TextToImageScreen() {
18+
const [inferenceStepIdx, setInferenceStepIdx] = useState<number>(0);
19+
const [imageTitle, setImageTitle] = useState<string | null>(null);
20+
const [image, setImage] = useState<string | null>(null);
21+
const [steps, setSteps] = useState<number>(10);
22+
const [showTextInput, setShowTextInput] = useState(false);
23+
const [keyboardVisible, setKeyboardVisible] = useState(false);
24+
25+
const imageSize = 224;
26+
const model = useTextToImage({
27+
model: BK_SDM_TINY_VPRED_256,
28+
inferenceCallback: (x) => setInferenceStepIdx(x),
29+
});
30+
31+
const { setGlobalGenerating } = useContext(GeneratingContext);
32+
33+
useEffect(() => {
34+
setGlobalGenerating(model.isGenerating);
35+
}, [model.isGenerating, setGlobalGenerating]);
36+
37+
useEffect(() => {
38+
const showSub = Keyboard.addListener('keyboardDidShow', () => {
39+
setKeyboardVisible(true);
40+
});
41+
const hideSub = Keyboard.addListener('keyboardDidHide', () => {
42+
setKeyboardVisible(false);
43+
});
44+
return () => {
45+
showSub.remove();
46+
hideSub.remove();
47+
};
48+
}, []);
49+
50+
const runForward = async (input: string, numSteps: number) => {
51+
if (!input || !input.trim()) return;
52+
const prevImageTitle = imageTitle;
53+
setImageTitle(input);
54+
setSteps(numSteps);
55+
try {
56+
const output = await model.generate(input, imageSize, steps);
57+
if (!output.length) {
58+
setImageTitle(prevImageTitle);
59+
return;
60+
}
61+
setImage(output);
62+
} catch (e) {
63+
console.error(e);
64+
setImageTitle(null);
65+
} finally {
66+
setInferenceStepIdx(0);
67+
}
68+
};
69+
70+
if (!model.isReady) {
71+
// TODO: Update when #614 merged
72+
return (
73+
<Spinner
74+
visible={!model.isReady}
75+
textContent={`Loading the model ${(model.downloadProgress * 100).toFixed(0)} %`}
76+
/>
77+
);
78+
}
79+
80+
return (
81+
<TouchableWithoutFeedback
82+
onPress={() => {
83+
Keyboard.dismiss();
84+
setShowTextInput(false);
85+
}}
86+
>
87+
<View style={styles.container}>
88+
{keyboardVisible && <View style={styles.overlay} />}
89+
90+
<View style={styles.titleContainer}>
91+
{imageTitle && <Text style={styles.titleText}>{imageTitle}</Text>}
92+
</View>
93+
94+
{model.isGenerating ? (
95+
<View style={styles.progressContainer}>
96+
<Text style={styles.text}>Generating...</Text>
97+
<ProgressBar numSteps={steps} currentStep={inferenceStepIdx} />
98+
</View>
99+
) : (
100+
<View style={styles.imageContainer}>
101+
{image?.length ? (
102+
<Image
103+
style={styles.image}
104+
source={{ uri: `data:image/png;base64,${image}` }}
105+
/>
106+
) : (
107+
<Image
108+
style={styles.image}
109+
source={require('../../assets/icons/executorch_logo.png')}
110+
/>
111+
)}
112+
</View>
113+
)}
114+
115+
<View style={styles.bottomContainer}>
116+
<BottomBarWithTextInput
117+
runModel={runForward}
118+
stopModel={model.interrupt}
119+
isGenerating={model.isGenerating}
120+
isReady={model.isReady}
121+
showTextInput={showTextInput}
122+
setShowTextInput={setShowTextInput}
123+
keyboardVisible={keyboardVisible}
124+
/>
125+
</View>
126+
</View>
127+
</TouchableWithoutFeedback>
128+
);
129+
}
130+
131+
const styles = StyleSheet.create({
132+
container: {
133+
flex: 1,
134+
width: '100%',
135+
alignItems: 'center',
136+
},
137+
overlay: {
138+
...StyleSheet.absoluteFillObject,
139+
backgroundColor: 'rgba(0,0,0,0.65)',
140+
zIndex: 5,
141+
},
142+
titleContainer: {
143+
alignItems: 'center',
144+
marginTop: 20,
145+
},
146+
titleText: {
147+
color: ColorPalette.primary,
148+
fontSize: 20,
149+
fontWeight: 'bold',
150+
marginBottom: 12,
151+
textAlign: 'center',
152+
},
153+
text: {
154+
fontSize: 16,
155+
color: '#000',
156+
},
157+
imageContainer: {
158+
flex: 1,
159+
position: 'absolute',
160+
top: 100,
161+
alignItems: 'center',
162+
justifyContent: 'center',
163+
},
164+
image: {
165+
width: 256,
166+
height: 256,
167+
marginVertical: 30,
168+
resizeMode: 'contain',
169+
},
170+
progressContainer: {
171+
flex: 1,
172+
justifyContent: 'center',
173+
alignItems: 'center',
174+
},
175+
bottomContainer: {
176+
flex: 1,
177+
width: '90%',
178+
position: 'absolute',
179+
bottom: 0,
180+
marginBottom: 25,
181+
zIndex: 10,
182+
},
183+
});
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import React, { useState } from 'react';
2+
import {
3+
View,
4+
Text,
5+
TextInput,
6+
TouchableOpacity,
7+
StyleSheet,
8+
KeyboardAvoidingView,
9+
Platform,
10+
} from 'react-native';
11+
import { Ionicons } from '@expo/vector-icons';
12+
import ColorPalette from '../colors';
13+
14+
interface BottomBarProps {
15+
runModel: (input: string, numSteps: number) => void;
16+
stopModel: () => void;
17+
isGenerating?: boolean;
18+
isReady?: boolean;
19+
showTextInput: boolean;
20+
setShowTextInput: React.Dispatch<React.SetStateAction<boolean>>;
21+
keyboardVisible: boolean;
22+
}
23+
24+
export const BottomBarWithTextInput = ({
25+
runModel,
26+
stopModel,
27+
isGenerating,
28+
isReady,
29+
showTextInput,
30+
setShowTextInput,
31+
keyboardVisible,
32+
}: BottomBarProps) => {
33+
const [input, setInput] = useState('');
34+
const [numSteps, setNumSteps] = useState(10);
35+
36+
const decreaseSteps = () => setNumSteps((prev) => Math.max(5, prev - 5));
37+
const increaseSteps = () => setNumSteps((prev) => Math.min(50, prev + 5));
38+
39+
if (!showTextInput) {
40+
if (isGenerating) {
41+
return (
42+
<TouchableOpacity
43+
style={styles.button}
44+
onPress={stopModel}
45+
disabled={!isReady}
46+
>
47+
<Text style={styles.buttonText}>Stop model</Text>
48+
</TouchableOpacity>
49+
);
50+
} else {
51+
return (
52+
<TouchableOpacity
53+
style={styles.button}
54+
onPress={() => setShowTextInput(true)}
55+
disabled={!isReady}
56+
>
57+
<Text style={styles.buttonText}>Run model</Text>
58+
</TouchableOpacity>
59+
);
60+
}
61+
}
62+
63+
return (
64+
<KeyboardAvoidingView
65+
style={styles.container}
66+
collapsable={false}
67+
behavior={Platform.OS === 'ios' ? 'padding' : undefined}
68+
keyboardVerticalOffset={Platform.OS === 'ios' ? 120 : 40}
69+
>
70+
<View style={styles.inputContainer}>
71+
<TextInput
72+
style={styles.input}
73+
placeholder="Enter prompt..."
74+
value={input}
75+
onChangeText={setInput}
76+
/>
77+
<TouchableOpacity
78+
style={[styles.button, styles.iconButton]}
79+
onPress={() => {
80+
setShowTextInput(false);
81+
setInput('');
82+
runModel(input, numSteps);
83+
}}
84+
disabled={!isReady || isGenerating}
85+
>
86+
<Ionicons name="send" size={20} color="#fff" />
87+
</TouchableOpacity>
88+
</View>
89+
90+
<View style={styles.stepsContainer}>
91+
<Text style={[styles.text, keyboardVisible && styles.textWhite]}>
92+
Steps: {numSteps}
93+
</Text>
94+
<View style={styles.stepsButtons}>
95+
<TouchableOpacity
96+
style={[styles.button, styles.iconButton]}
97+
onPress={decreaseSteps}
98+
>
99+
<Text style={styles.buttonText}>-</Text>
100+
</TouchableOpacity>
101+
<TouchableOpacity
102+
style={[styles.button, styles.iconButton]}
103+
onPress={increaseSteps}
104+
>
105+
<Text style={styles.buttonText}>+</Text>
106+
</TouchableOpacity>
107+
</View>
108+
</View>
109+
</KeyboardAvoidingView>
110+
);
111+
};
112+
113+
const styles = StyleSheet.create({
114+
container: {
115+
alignItems: 'center',
116+
},
117+
inputContainer: {
118+
flexDirection: 'row',
119+
alignItems: 'center',
120+
justifyContent: 'center',
121+
},
122+
input: {
123+
flex: 1,
124+
borderRadius: 6,
125+
padding: 8,
126+
marginRight: 8,
127+
backgroundColor: '#fff',
128+
color: '#000',
129+
},
130+
stepsContainer: {
131+
width: '100%',
132+
flexDirection: 'row',
133+
alignItems: 'center',
134+
justifyContent: 'space-between',
135+
marginTop: 10,
136+
},
137+
stepsButtons: {
138+
flexDirection: 'row',
139+
},
140+
button: {
141+
width: '100%',
142+
height: 40,
143+
justifyContent: 'center',
144+
alignItems: 'center',
145+
backgroundColor: ColorPalette.primary,
146+
borderRadius: 8,
147+
},
148+
buttonText: {
149+
color: '#fff',
150+
fontSize: 16,
151+
textAlign: 'center',
152+
},
153+
iconButton: {
154+
marginHorizontal: 5,
155+
width: 40,
156+
},
157+
text: {
158+
flex: 1,
159+
fontSize: 16,
160+
color: '#000',
161+
},
162+
textWhite: {
163+
color: '#fff',
164+
},
165+
});

0 commit comments

Comments
 (0)