@@ -12,6 +12,11 @@ import {
1212 View ,
1313} from 'react-native' ;
1414import { launchImageLibrary } from 'react-native-image-picker' ;
15+ import {
16+ AudioManager ,
17+ AudioRecorder ,
18+ AudioContext ,
19+ } from 'react-native-audio-api' ;
1520import { useIsFocused } from '@react-navigation/native' ;
1621import { useSafeAreaInsets } from 'react-native-safe-area-context' ;
1722import { useLLM , LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch' ;
@@ -46,7 +51,15 @@ function MultimodalLLMScreen() {
4651 const textInputRef = useRef < TextInput > ( null ) ;
4752 const { setGlobalGenerating } = useContext ( GeneratingContext ) ;
4853
49- // Added error state
54+ const [ audioBuffer , setAudioBuffer ] = useState < Float32Array | null > ( null ) ;
55+ const [ audioLabel , setAudioLabel ] = useState < string | null > ( null ) ;
56+ const [ audioUrl , setAudioUrl ] = useState ( '' ) ;
57+ const [ isFetchingAudio , setIsFetchingAudio ] = useState ( false ) ;
58+ const [ isRecording , setIsRecording ] = useState ( false ) ;
59+ const [ hasMicPermission , setHasMicPermission ] = useState ( false ) ;
60+ const recorder = useRef ( new AudioRecorder ( ) ) ;
61+ const recordChunks = useRef < Float32Array [ ] > ( [ ] ) ;
62+
5063 const [ error , setError ] = useState < string | null > ( null ) ;
5164
5265 const vlm = useLLM ( {
@@ -68,6 +81,87 @@ function MultimodalLLMScreen() {
6881 if ( vlm . error ) setError ( String ( vlm . error ) ) ;
6982 } , [ vlm . error ] ) ;
7083
84+ useEffect ( ( ) => {
85+ AudioManager . setAudioSessionOptions ( {
86+ iosCategory : 'playAndRecord' ,
87+ iosMode : 'spokenAudio' ,
88+ iosOptions : [ 'allowBluetoothHFP' , 'defaultToSpeaker' ] ,
89+ } ) ;
90+ ( async ( ) => {
91+ const status = await AudioManager . requestRecordingPermissions ( ) ;
92+ setHasMicPermission ( status === 'Granted' ) ;
93+ } ) ( ) ;
94+ } , [ ] ) ;
95+
96+ const loadAudioFromUrl = async ( ) => {
97+ const url = audioUrl . trim ( ) ;
98+ if ( ! url ) return ;
99+ setIsFetchingAudio ( true ) ;
100+ try {
101+ const ctx = new AudioContext ( { sampleRate : 16000 } ) ;
102+ const decoded = await ctx . decodeAudioData ( url ) ;
103+ const pcm = decoded . getChannelData ( 0 ) ;
104+ const name = url . split ( '/' ) . pop ( ) || 'audio' ;
105+ setAudioBuffer ( pcm ) ;
106+ setAudioLabel ( `${ name } · ${ ( pcm . length / 16000 ) . toFixed ( 1 ) } s` ) ;
107+ } catch ( e ) {
108+ setError ( e instanceof Error ? e . message : String ( e ) ) ;
109+ } finally {
110+ setIsFetchingAudio ( false ) ;
111+ }
112+ } ;
113+
114+ const startRecording = async ( ) => {
115+ if ( ! hasMicPermission ) {
116+ setError ( 'Microphone permission denied. Please enable it in Settings.' ) ;
117+ return ;
118+ }
119+ recordChunks . current = [ ] ;
120+ const sampleRate = 16000 ;
121+ recorder . current . onAudioReady (
122+ { sampleRate, bufferLength : 0.1 * sampleRate , channelCount : 1 } ,
123+ ( { buffer } ) => {
124+ recordChunks . current . push ( new Float32Array ( buffer . getChannelData ( 0 ) ) ) ;
125+ }
126+ ) ;
127+ try {
128+ const ok = await AudioManager . setAudioSessionActivity ( true ) ;
129+ if ( ! ok ) {
130+ setError ( 'Cannot start audio session' ) ;
131+ return ;
132+ }
133+ const result = recorder . current . start ( ) ;
134+ if ( result . status === 'error' ) {
135+ setError ( `Recording problems: ${ result . message } ` ) ;
136+ return ;
137+ }
138+ setIsRecording ( true ) ;
139+ } catch ( e ) {
140+ setError ( e instanceof Error ? e . message : String ( e ) ) ;
141+ }
142+ } ;
143+
144+ const stopRecording = ( ) => {
145+ recorder . current . stop ( ) ;
146+ setIsRecording ( false ) ;
147+ const total = recordChunks . current . reduce ( ( n , c ) => n + c . length , 0 ) ;
148+ if ( total === 0 ) return ;
149+ const pcm = new Float32Array ( total ) ;
150+ let off = 0 ;
151+ for ( const c of recordChunks . current ) {
152+ pcm . set ( c , off ) ;
153+ off += c . length ;
154+ }
155+ recordChunks . current = [ ] ;
156+ setAudioBuffer ( pcm ) ;
157+ setAudioLabel ( `Recording · ${ ( pcm . length / 16000 ) . toFixed ( 1 ) } s` ) ;
158+ } ;
159+
160+ const clearAudio = ( ) => {
161+ setAudioBuffer ( null ) ;
162+ setAudioLabel ( null ) ;
163+ } ;
164+
71165 const pickImage = async ( ) => {
72166 try {
73167 const result = await launchImageLibrary ( { mediaType : 'photo' } ) ;
@@ -88,12 +182,19 @@ function MultimodalLLMScreen() {
88182 textInputRef . current ?. clear ( ) ;
89183 Keyboard . dismiss ( ) ;
90184 const currentImageUri = imageUri ;
185+ const currentAudio = audioBuffer ;
91186 setImageUri ( null ) ;
187+ setAudioBuffer ( null ) ;
188+ setAudioLabel ( null ) ;
92189 try {
93- await vlm . sendMessage (
94- text ,
95- currentImageUri ? { imagePath : currentImageUri } : undefined
96- ) ;
190+ const media =
191+ currentImageUri || currentAudio
192+ ? {
193+ ...( currentImageUri ? { imagePath : currentImageUri } : { } ) ,
194+ ...( currentAudio ? { audioBuffer : currentAudio } : { } ) ,
195+ }
196+ : undefined ;
197+ await vlm . sendMessage ( text , media ) ;
97198 } catch ( e ) {
98199 // Updated to set UI error instead of just console.error
99200 setError ( e instanceof Error ? e . message : String ( e ) ) ;
@@ -159,6 +260,42 @@ function MultimodalLLMScreen() {
159260 </ TouchableOpacity >
160261 ) }
161262
263+ { /* Audio URL input */ }
264+ < View style = { styles . audioUrlRow } >
265+ < TextInput
266+ placeholder = "Audio URL (mp3/wav/…)"
267+ placeholderTextColor = "#C1C6E5"
268+ style = { styles . audioUrlInput }
269+ value = { audioUrl }
270+ onChangeText = { setAudioUrl }
271+ autoCapitalize = "none"
272+ autoCorrect = { false }
273+ />
274+ < TouchableOpacity
275+ style = { [
276+ styles . audioUrlButton ,
277+ ( ! audioUrl . trim ( ) || isFetchingAudio || vlm . isGenerating ) &&
278+ styles . disabled ,
279+ ] }
280+ onPress = { loadAudioFromUrl }
281+ disabled = { ! audioUrl . trim ( ) || isFetchingAudio || vlm . isGenerating }
282+ >
283+ < Text style = { styles . audioUrlButtonText } >
284+ { isFetchingAudio ? '…' : 'Load' }
285+ </ Text >
286+ </ TouchableOpacity >
287+ </ View >
288+
289+ { /* Audio attachment strip */ }
290+ { audioLabel && (
291+ < View style = { styles . audioAttachmentContainer } >
292+ < Text style = { styles . audioAttachmentText } > 🎵 { audioLabel } </ Text >
293+ < TouchableOpacity onPress = { clearAudio } >
294+ < Text style = { styles . audioAttachmentClear } > ✕</ Text >
295+ </ TouchableOpacity >
296+ </ View >
297+ ) }
298+
162299 < StatsBar stats = { stats } />
163300 < View
164301 style = { [
@@ -178,6 +315,17 @@ function MultimodalLLMScreen() {
178315 < Text style = { styles . imageButtonText } > 📷</ Text >
179316 </ TouchableOpacity >
180317
318+ { /* Mic record / stop button */ }
319+ < TouchableOpacity
320+ style = { styles . imageButton }
321+ onPress = { isRecording ? stopRecording : startRecording }
322+ disabled = { vlm . isGenerating }
323+ >
324+ < Text style = { styles . imageButtonText } >
325+ { isRecording ? '⏹️' : '🎤' }
326+ </ Text >
327+ </ TouchableOpacity >
328+
181329 < TextInput
182330 autoCorrect = { false }
183331 ref = { textInputRef }
@@ -319,6 +467,64 @@ const styles = StyleSheet.create({
319467 fontFamily : 'regular' ,
320468 color : ColorPalette . blueDark ,
321469 } ,
470+ audioAttachmentContainer : {
471+ flexDirection : 'row' ,
472+ alignItems : 'center' ,
473+ justifyContent : 'space-between' ,
474+ paddingHorizontal : 16 ,
475+ paddingVertical : 8 ,
476+ marginHorizontal : 16 ,
477+ marginBottom : 4 ,
478+ borderRadius : 8 ,
479+ borderWidth : 1 ,
480+ borderColor : ColorPalette . blueLight ,
481+ backgroundColor : '#fafbff' ,
482+ } ,
483+ audioAttachmentText : {
484+ fontSize : 13 ,
485+ fontFamily : 'regular' ,
486+ color : ColorPalette . blueDark ,
487+ } ,
488+ audioAttachmentClear : {
489+ fontSize : 16 ,
490+ color : ColorPalette . blueDark ,
491+ paddingHorizontal : 8 ,
492+ } ,
493+ audioUrlRow : {
494+ flexDirection : 'row' ,
495+ alignItems : 'center' ,
496+ marginHorizontal : 16 ,
497+ marginBottom : 4 ,
498+ } ,
499+ audioUrlInput : {
500+ flex : 1 ,
501+ padding : 10 ,
502+ borderTopLeftRadius : 8 ,
503+ borderBottomLeftRadius : 8 ,
504+ borderWidth : 1 ,
505+ borderColor : ColorPalette . blueLight ,
506+ borderRightWidth : 0 ,
507+ fontFamily : 'regular' ,
508+ fontSize : 13 ,
509+ color : ColorPalette . primary ,
510+ } ,
511+ audioUrlButton : {
512+ paddingVertical : 10 ,
513+ paddingHorizontal : 16 ,
514+ backgroundColor : ColorPalette . strongPrimary ,
515+ borderTopRightRadius : 8 ,
516+ borderBottomRightRadius : 8 ,
517+ justifyContent : 'center' ,
518+ alignItems : 'center' ,
519+ } ,
520+ audioUrlButtonText : {
521+ color : '#fff' ,
522+ fontFamily : 'medium' ,
523+ fontSize : 13 ,
524+ } ,
525+ disabled : {
526+ opacity : 0.5 ,
527+ } ,
322528 bottomContainer : {
323529 height : 100 ,
324530 width : '100%' ,
0 commit comments