diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index 2e50928012..7428cd147d 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -6,6 +6,8 @@ torch huggingface bbox bboxes +chatbots +llms deeplab unsqueeze qlora @@ -104,3 +106,9 @@ POTTEDPLANT TVMONITOR sublist TTFT +timestamping +logprob +RNFS +pogodin +kesha +antonov \ No newline at end of file diff --git a/.cspell.json b/.cspell.json index c1332b1183..372de01189 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,7 +1,7 @@ { "version": "0.2", "language": "en", - "ignorePaths": ["**/node_modules", "**/Pods"], + "ignorePaths": ["**/node_modules", "**/Pods", "**/readmes/**"], "dictionaryDefinitions": [ { "name": "project-words", diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7989ebb5d0..dd9585bc2d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: run: yarn lint - name: Typecheck files - run: yarn typecheck + run: yarn workspaces foreach --all --topological-dev run prepare && yarn typecheck build-library: runs-on: ubuntu-latest @@ -35,7 +35,5 @@ jobs: - name: Setup uses: ./.github/actions/setup - - name: Build package - run: | - cd packages/react-native-executorch - yarn prepare + - name: Build all packages + run: yarn workspaces foreach --all --topological-dev run prepare diff --git a/.nvmrc b/.nvmrc index 9a2a0e219c..53d1c14db3 100644 --- a/.nvmrc +++ b/.nvmrc @@ -1 +1 @@ -v20 +v22 diff --git a/README.md b/README.md index 43ffe6d7fc..8497adc5de 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,15 @@ React Native ExecuTorch is powering [Private Mind](https://privatemind.swmansion ```bash # Install the package yarn add react-native-executorch + +# If you use expo, please add these packages for resource fetching: +yarn add @react-native-executorch/expo-adapter +yarn add expo-file-system expo-asset + +#if you use bare React Native project use these packages: +yarn add @react-native-executorch/bare-adapter +yarn add @dr.pogodin/react-native-fs @kesha-antonov/react-native-background-downloader + # Depending on the platform, choose either iOS or Android yarn expo run:< ios | android > ``` @@ -88,8 +97,14 @@ Add this to your component file: import { useLLM, LLAMA3_2_1B, - Message + Message, + initExecutorch, } from 'react-native-executorch'; +import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher'; + +initExecutorch({ + resourceFetcher: ExpoResourceFetcher, +}); function MyComponent() { // Initialize the model 🚀 diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx index 5914d2fe8a..35fba7fb1d 100644 --- a/apps/computer-vision/app/_layout.tsx +++ b/apps/computer-vision/app/_layout.tsx @@ -1,4 +1,7 @@ import { Drawer } from 'expo-router/drawer'; +import { initExecutorch } from 'react-native-executorch'; +import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher'; + import ColorPalette from '../colors'; import React, { useState } from 'react'; import { Text, StyleSheet, View } from 'react-native'; @@ -10,6 +13,10 @@ import { } from '@react-navigation/drawer'; import { GeneratingContext } from '../context'; +initExecutorch({ + resourceFetcher: ExpoResourceFetcher, +}); + interface CustomDrawerProps extends DrawerContentComponentProps { isGenerating: boolean; } diff --git a/apps/computer-vision/app/image_segmentation/index.tsx b/apps/computer-vision/app/image_segmentation/index.tsx index 87293b01b9..61a98ddead 100644 --- a/apps/computer-vision/app/image_segmentation/index.tsx +++ b/apps/computer-vision/app/image_segmentation/index.tsx @@ -12,30 +12,13 @@ import { Skia, AlphaType, ColorType, + SkImage, } from '@shopify/react-native-skia'; import { View, StyleSheet, Image } from 'react-native'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; -const width = 224; -const height = 224; - -let pixels = new Uint8Array(width * height * 4); -pixels.fill(255); - -let data = Skia.Data.fromBytes(pixels); -let img = Skia.Image.MakeImage( - { - width: width, - height: height, - alphaType: AlphaType.Opaque, - colorType: ColorType.RGBA_8888, - }, - data, - width * 4 -); - const numberToColor: number[][] = [ [255, 87, 51], // 0 Red [51, 255, 87], // 1 Green @@ -67,48 +50,58 @@ export default function ImageSegmentationScreen() { setGlobalGenerating(model.isGenerating); }, [model.isGenerating, setGlobalGenerating]); const [imageUri, setImageUri] = useState(''); + const [imageSize, setImageSize] = useState({ width: 0, height: 0 }); + const [segImage, setSegImage] = useState(null); + const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 }); const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); - const uri = image?.uri; - setImageUri(uri as string); + if (!image?.uri) return; + setImageUri(image.uri); + setImageSize({ + width: image.width ?? 0, + height: image.height ?? 0, + }); + setSegImage(null); }; - const [resultPresent, setResultPresent] = useState(false); - const runForward = async () => { - if (imageUri) { - try { - const output = await model.forward(imageUri); - pixels = new Uint8Array(width * height * 4); + if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return; + try { + const { width, height } = imageSize; + const output = await model.forward(imageUri, [DeeplabLabel.ARGMAX]); + const argmax = output[DeeplabLabel.ARGMAX] || []; + const uniqueValues = new Set(); + for (let i = 0; i < argmax.length; i++) { + uniqueValues.add(argmax[i]); + } + const pixels = new Uint8Array(width * height * 4); - for (let x = 0; x < width; x++) { - for (let y = 0; y < height; y++) { - for (let i = 0; i < 3; i++) { - pixels[(x * height + y) * 4 + i] = - numberToColor[ - (output[DeeplabLabel.ARGMAX] || [])[x * height + y] - ][i]; - } - pixels[(x * height + y) * 4 + 3] = 255; - } + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const idx = row * width + col; + const color = numberToColor[argmax[idx]] || [0, 0, 0]; + pixels[idx * 4] = color[0]; + pixels[idx * 4 + 1] = color[1]; + pixels[idx * 4 + 2] = color[2]; + pixels[idx * 4 + 3] = 255; } - - data = Skia.Data.fromBytes(pixels); - img = Skia.Image.MakeImage( - { - width: width, - height: height, - alphaType: AlphaType.Opaque, - colorType: ColorType.RGBA_8888, - }, - data, - width * 4 - ); - setResultPresent(true); - } catch (e) { - console.error(e); } + + const data = Skia.Data.fromBytes(pixels); + const img = Skia.Image.MakeImage( + { + width, + height, + alphaType: AlphaType.Opaque, + colorType: ColorType.RGBA_8888, + }, + data, + width * 4 + ); + setSegImage(img); + } catch (e) { + console.error(e); } }; @@ -135,16 +128,24 @@ export default function ImageSegmentationScreen() { } /> - {resultPresent && ( - + {segImage && ( + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > @@ -181,7 +182,7 @@ const styles = StyleSheet.create({ padding: 4, }, canvas: { - width: width, - height: height, + width: '100%', + height: '100%', }, }); diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json index 63885109a0..cce918197e 100644 --- a/apps/computer-vision/package.json +++ b/apps/computer-vision/package.json @@ -11,7 +11,8 @@ "lint": "eslint . --ext .ts,.tsx --fix" }, "dependencies": { - "@react-native/metro-config": "^0.76.3", + "@react-native-executorch/expo-resource-fetcher": "workspace:*", + "@react-native/metro-config": "^0.81.5", "@react-navigation/drawer": "^7.3.9", "@react-navigation/native": "^7.1.6", "@shopify/react-native-skia": "2.2.12", @@ -21,7 +22,7 @@ "expo-linking": "~8.0.10", "expo-router": "~6.0.17", "expo-status-bar": "~3.0.9", - "metro-config": "^0.81.0", + "metro-config": "^0.81.5", "react": "19.1.0", "react-native": "0.81.5", "react-native-device-info": "^14.0.4", diff --git a/apps/computer-vision/tsconfig.json b/apps/computer-vision/tsconfig.json index 47026ce434..a08f2140a8 100644 --- a/apps/computer-vision/tsconfig.json +++ b/apps/computer-vision/tsconfig.json @@ -9,7 +9,10 @@ "customConditions": ["react-native"], "noEmit": true, "paths": { - "react-native-executorch": ["../../packages/react-native-executorch/src"] + "react-native-executorch": ["../../packages/react-native-executorch/src"], + "@react-native-executorch/expo-resource-fetcher": [ + "../../packages/expo-resource-fetcher/src" + ] } } } diff --git a/apps/llm/app/_layout.tsx b/apps/llm/app/_layout.tsx index 68c715a80c..5ece80f1fb 100644 --- a/apps/llm/app/_layout.tsx +++ b/apps/llm/app/_layout.tsx @@ -1,8 +1,9 @@ import { Drawer } from 'expo-router/drawer'; +import { initExecutorch } from 'react-native-executorch'; +import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher'; import ColorPalette from '../colors'; import React, { useState } from 'react'; import { Text, StyleSheet, View } from 'react-native'; - import { DrawerContentComponentProps, DrawerContentScrollView, @@ -10,6 +11,10 @@ import { } from '@react-navigation/drawer'; import { GeneratingContext } from '../context'; +initExecutorch({ + resourceFetcher: ExpoResourceFetcher, +}); + interface CustomDrawerProps extends DrawerContentComponentProps { isGenerating: boolean; } diff --git a/apps/llm/app/llm/index.tsx b/apps/llm/app/llm/index.tsx index b46e43c135..c159e745ce 100644 --- a/apps/llm/app/llm/index.tsx +++ b/apps/llm/app/llm/index.tsx @@ -34,7 +34,7 @@ function LLMScreen() { useEffect(() => { if (llm.error) { - console.log('LLM error:', llm.error); + console.error('LLM error:', llm.error); } }, [llm.error]); diff --git a/apps/llm/app/llm_structured_output/index.tsx b/apps/llm/app/llm_structured_output/index.tsx index 3fa230600c..e77900ab27 100644 --- a/apps/llm/app/llm_structured_output/index.tsx +++ b/apps/llm/app/llm_structured_output/index.tsx @@ -119,7 +119,7 @@ function LLMScreen() { useEffect(() => { if (llm.error) { - console.log('LLM error:', llm.error); + console.error('LLM error:', llm.error); } }, [llm.error]); diff --git a/apps/llm/app/llm_tool_calling/index.tsx b/apps/llm/app/llm_tool_calling/index.tsx index 6fbf49f194..1914105484 100644 --- a/apps/llm/app/llm_tool_calling/index.tsx +++ b/apps/llm/app/llm_tool_calling/index.tsx @@ -61,7 +61,7 @@ function LLMToolCallingScreen() { useEffect(() => { if (llm.error) { - console.log('LLM error:', llm.error); + console.error('LLM error:', llm.error); } }, [llm.error]); diff --git a/apps/llm/app/voice_chat/index.tsx b/apps/llm/app/voice_chat/index.tsx index 79a713c93d..abf101b1fe 100644 --- a/apps/llm/app/voice_chat/index.tsx +++ b/apps/llm/app/voice_chat/index.tsx @@ -1,4 +1,4 @@ -import { useContext, useEffect, useRef, useState } from 'react'; +import { useContext, useEffect, useState } from 'react'; import { Keyboard, KeyboardAvoidingView, @@ -35,6 +35,8 @@ export default function VoiceChatScreenWrapper() { function VoiceChatScreen() { const [isRecording, setIsRecording] = useState(false); + const [liveTranscription, setLiveTranscription] = useState(''); + const [recorder] = useState( () => new AudioRecorder({ @@ -42,7 +44,7 @@ function VoiceChatScreen() { bufferLengthInSamples: 1600, }) ); - const messageRecorded = useRef(false); + const { setGlobalGenerating } = useContext(GeneratingContext); const llm = useLLM({ model: QWEN3_0_6B_QUANTIZED }); @@ -67,23 +69,51 @@ function VoiceChatScreen() { if (isRecording) { setIsRecording(false); recorder.stop(); - messageRecorded.current = true; speechToText.streamStop(); } else { setIsRecording(true); + setLiveTranscription(''); + recorder.onAudioReady(({ buffer }) => { speechToText.streamInsert(buffer.getChannelData(0)); }); recorder.start(); - const transcription = await speechToText.stream(); - await llm.sendMessage(transcription); + + let finalResult = ''; + + try { + for await (const result of speechToText.stream()) { + const text = result.committed.text + result.nonCommitted.text; + setLiveTranscription(text); + finalResult = text; + } + } catch (e) { + console.error('Streaming error:', e); + } finally { + if (finalResult.trim().length > 0) { + await llm.sendMessage(finalResult); + setLiveTranscription(''); + } + } } }; + useEffect(() => { + if (llm.error) { + console.error('LLM error:', llm.error); + } + }, [llm.error]); + + useEffect(() => { + if (speechToText.error) { + console.error('speechToText error:', speechToText.error); + } + }, [speechToText.error]); + return !llm.isReady || !speechToText.isReady ? ( ) : ( @@ -96,16 +126,17 @@ function VoiceChatScreen() { Qwen 3 x Whisper - {llm.messageHistory.length || speechToText.committedTranscription ? ( + + {llm.messageHistory.length > 0 || liveTranscription.length > 0 ? ( 0 ? [ ...llm.messageHistory, { role: 'user', - content: speechToText.committedTranscription, + content: liveTranscription, }, ] : llm.messageHistory @@ -123,6 +154,7 @@ function VoiceChatScreen() { )} + {DeviceInfo.isEmulatorSync() ? ( diff --git a/apps/llm/package.json b/apps/llm/package.json index de046a2994..04597d963e 100644 --- a/apps/llm/package.json +++ b/apps/llm/package.json @@ -11,7 +11,8 @@ "lint": "eslint . --ext .ts,.tsx --fix" }, "dependencies": { - "@react-native/metro-config": "^0.76.3", + "@react-native-executorch/expo-resource-fetcher": "workspace:*", + "@react-native/metro-config": "^0.81.5", "@react-navigation/drawer": "^7.3.9", "@react-navigation/native": "^7.1.6", "expo": "^54.0.27", @@ -22,7 +23,7 @@ "expo-linking": "~8.0.10", "expo-router": "~6.0.17", "expo-status-bar": "~3.0.9", - "metro-config": "^0.81.0", + "metro-config": "^0.81.5", "react": "19.1.0", "react-native": "0.81.5", "react-native-audio-api": "^0.8.2", diff --git a/apps/llm/tsconfig.json b/apps/llm/tsconfig.json index 47026ce434..a08f2140a8 100644 --- a/apps/llm/tsconfig.json +++ b/apps/llm/tsconfig.json @@ -9,7 +9,10 @@ "customConditions": ["react-native"], "noEmit": true, "paths": { - "react-native-executorch": ["../../packages/react-native-executorch/src"] + "react-native-executorch": ["../../packages/react-native-executorch/src"], + "@react-native-executorch/expo-resource-fetcher": [ + "../../packages/expo-resource-fetcher/src" + ] } } } diff --git a/apps/speech/App.tsx b/apps/speech/App.tsx index af0598b59c..ab036678e8 100644 --- a/apps/speech/App.tsx +++ b/apps/speech/App.tsx @@ -5,6 +5,12 @@ import { SpeechToTextScreen } from './screens/SpeechToTextScreen'; import ColorPalette from './colors'; import ExecutorchLogo from './assets/executorch.svg'; import { Quiz } from './screens/Quiz'; +import { initExecutorch } from 'react-native-executorch'; +import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher'; + +initExecutorch({ + resourceFetcher: ExpoResourceFetcher, +}); export default function App() { const [currentScreen, setCurrentScreen] = useState< diff --git a/apps/speech/app.json b/apps/speech/app.json index 693c815cb1..1e6e364647 100644 --- a/apps/speech/app.json +++ b/apps/speech/app.json @@ -17,6 +17,9 @@ "bundleIdentifier": "com.anonymous.speech", "infoPlist": { "NSMicrophoneUsageDescription": "This app needs access to your microphone to record audio." + }, + "entitlements": { + "com.apple.developer.kernel.increased-memory-limit": true } }, "android": { @@ -24,11 +27,34 @@ "foregroundImage": "./assets/adaptive-icon.png", "backgroundColor": "#ffffff" }, - "package": "com.anonymous.speech" + "package": "com.anonymous.speech", + "permissions": [ + "android.permission.RECORD_AUDIO", + "android.permission.MODIFY_AUDIO_SETTINGS", + "android.permission.FOREGROUND_SERVICE", + "android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK" + ] }, "web": { "favicon": "./assets/favicon.png" }, - "plugins": ["expo-font"] + "plugins": [ + "expo-font", + [ + "react-native-audio-api", + { + "iosBackgroundMode": true, + "iosMicrophonePermission": "This app requires access to the microphone to record audio.", + "androidPermissions": [ + "android.permission.MODIFY_AUDIO_SETTINGS", + "android.permission.FOREGROUND_SERVICE", + "android.permission.FOREGROUND_SERVICE_MEDIA_PLAYBACK", + "android.permission.RECORD_AUDIO" + ], + "androidForegroundService": true, + "androidFSTypes": ["mediaPlayback", "microphone"] + } + ] + ] } } diff --git a/apps/speech/components/VerboseTranscription.tsx b/apps/speech/components/VerboseTranscription.tsx new file mode 100644 index 0000000000..1093b2bd1c --- /dev/null +++ b/apps/speech/components/VerboseTranscription.tsx @@ -0,0 +1,241 @@ +import React from 'react'; +import { View, Text, StyleSheet } from 'react-native'; +import { TranscriptionResult } from 'react-native-executorch'; + +export const VerboseTranscription = ({ + data, +}: { + data: TranscriptionResult; +}) => { + if (!data) return null; + + const hasSegments = Array.isArray(data.segments) && data.segments.length > 0; + + const hasLanguage = + !!data.language && data.language !== 'N/A' && data.language.trim() !== ''; + + const hasDuration = typeof data.duration === 'number' && data.duration > 0; + + const hasMetadata = hasLanguage || hasDuration; + + return ( + + + Full Text: + {data.text || ''} + + {hasMetadata && ( + + {hasLanguage && ( + Language: {data.language} + )} + {hasDuration && ( + + Duration: {data.duration?.toFixed(2)}s + + )} + + )} + + + {hasSegments && ( + <> + + Segments ({data.segments?.length}) + + + {data.segments?.map((seg, index) => ( + + + + {seg.start.toFixed(2)}s - {seg.end.toFixed(2)}s + + ID: {index} + + + "{seg.text}" + + {seg.words && seg.words.length > 0 && ( + + Word Timestamps: + + {seg.words.map((w, wIdx) => ( + + {w.word.trim()} + + {w.start.toFixed(2)}s + + + ))} + + + )} + + + + Avg LogProb + + {data.task === 'transcribe' + ? seg.avgLogprob?.toFixed(4) + : 'N/A'} + + + + Temp + + {data.task === 'transcribe' + ? seg.temperature?.toFixed(2) + : 'N/A'} + + + + {/*eslint-disable-next-line @cspell/spellchecker*/} + Compr. + + {data.task === 'transcribe' + ? seg.compressionRatio?.toFixed(2) + : 'N/A'} + + + + + ))} + + )} + + ); +}; + +const styles = StyleSheet.create({ + container: { + padding: 4, + }, + metaContainer: { + marginBottom: 16, + padding: 12, + backgroundColor: '#f0f2f5', + borderRadius: 8, + }, + label: { + fontWeight: 'bold', + color: '#0f186e', + marginBottom: 4, + }, + text: { + fontSize: 16, + color: '#333', + marginBottom: 8, + }, + row: { + flexDirection: 'row', + gap: 10, + marginTop: 8, + }, + metaItem: { + fontSize: 12, + color: '#666', + backgroundColor: '#e1e4e8', + paddingHorizontal: 8, + paddingVertical: 2, + borderRadius: 4, + overflow: 'hidden', + }, + sectionHeader: { + fontSize: 18, + fontWeight: 'bold', + color: '#0f186e', + marginBottom: 8, + marginTop: 8, + }, + segmentCard: { + backgroundColor: '#fff', + borderRadius: 8, + borderWidth: 1, + borderColor: '#e1e4e8', + marginBottom: 12, + padding: 12, + shadowColor: '#000', + shadowOffset: { width: 0, height: 1 }, + shadowOpacity: 0.1, + shadowRadius: 2, + elevation: 2, + }, + segmentHeader: { + flexDirection: 'row', + justifyContent: 'space-between', + marginBottom: 8, + }, + timeBadge: { + fontSize: 12, + fontWeight: 'bold', + color: '#fff', + backgroundColor: '#0f186e', + paddingHorizontal: 8, + paddingVertical: 2, + borderRadius: 12, + overflow: 'hidden', + }, + segmentId: { + fontSize: 12, + color: '#888', + }, + segmentText: { + fontSize: 15, + fontStyle: 'italic', + color: '#333', + marginBottom: 12, + }, + statsGrid: { + flexDirection: 'row', + flexWrap: 'wrap', + gap: 8, + borderTopWidth: 1, + borderTopColor: '#f0f0f0', + paddingTop: 8, + }, + statItem: { + flex: 1, + minWidth: '45%', + flexDirection: 'row', + justifyContent: 'space-between', + }, + statLabel: { + fontSize: 11, + color: '#888', + }, + statValue: { + fontSize: 11, + fontWeight: '600', + color: '#444', + }, + wordsContainer: { + marginVertical: 8, + backgroundColor: '#f8f9fa', + padding: 8, + borderRadius: 6, + }, + wordsGrid: { + flexDirection: 'row', + flexWrap: 'wrap', + gap: 6, + marginTop: 4, + }, + wordChip: { + backgroundColor: '#ffffff', + borderWidth: 1, + borderColor: '#e1e4e8', + borderRadius: 4, + paddingHorizontal: 6, + paddingVertical: 2, + alignItems: 'center', + }, + wordText: { + fontSize: 12, + color: '#333', + }, + wordTime: { + fontSize: 9, + color: '#888', + marginTop: 1, + }, +}); diff --git a/apps/speech/package.json b/apps/speech/package.json index 094fa2b784..9c9fdbee12 100644 --- a/apps/speech/package.json +++ b/apps/speech/package.json @@ -11,15 +11,16 @@ "lint": "eslint . --ext .ts,.tsx --fix" }, "dependencies": { - "@react-native/metro-config": "^0.76.3", + "@react-native-executorch/expo-resource-fetcher": "workspace:*", + "@react-native/metro-config": "^0.81.5", "buffer": "^6.0.3", "expo": "^54.0.27", "expo-font": "~14.0.10", "expo-status-bar": "~3.0.9", - "metro-config": "^0.81.0", + "metro-config": "^0.81.5", "react": "19.1.0", "react-native": "0.81.5", - "react-native-audio-api": "0.6.5", + "react-native-audio-api": "0.11.3", "react-native-device-info": "^14.0.4", "react-native-executorch": "workspace:*", "react-native-reanimated": "~4.1.1", diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx index da7ed0f7e1..06813dfcd6 100644 --- a/apps/speech/screens/SpeechToTextScreen.tsx +++ b/apps/speech/screens/SpeechToTextScreen.tsx @@ -8,9 +8,14 @@ import { TextInput, KeyboardAvoidingView, Platform, + Switch, } from 'react-native'; import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context'; -import { useSpeechToText, WHISPER_TINY_EN } from 'react-native-executorch'; +import { + useSpeechToText, + WHISPER_TINY_EN, + TranscriptionResult, +} from 'react-native-executorch'; import FontAwesome from '@expo/vector-icons/FontAwesome'; import { AudioManager, @@ -21,6 +26,8 @@ import * as FileSystem from 'expo-file-system/legacy'; import SWMIcon from '../assets/swm_icon.svg'; import DeviceInfo from 'react-native-device-info'; +import { VerboseTranscription } from '../components/VerboseTranscription'; + const isSimulator = DeviceInfo.isEmulatorSync(); export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { @@ -28,26 +35,34 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { model: WHISPER_TINY_EN, }); - const [transcription, setTranscription] = useState(''); + const [transcription, setTranscription] = + useState(null); + + const [liveResult, setLiveResult] = useState<{ + fullText: string; + segments: any[]; + } | null>(null); + + const [enableTimestamps, setEnableTimestamps] = useState(false); const [audioURL, setAudioURL] = useState(''); + + const isRecordingRef = useRef(false); const [liveTranscribing, setLiveTranscribing] = useState(false); const scrollViewRef = useRef(null); - const [recorder] = useState( - () => - new AudioRecorder({ - sampleRate: 16000, - bufferLengthInSamples: 1600, - }) - ); + const recorder = new AudioRecorder(); useEffect(() => { AudioManager.setAudioSessionOptions({ iosCategory: 'playAndRecord', iosMode: 'spokenAudio', - iosOptions: ['allowBluetooth', 'defaultToSpeaker'], + iosOptions: ['allowBluetoothHFP', 'defaultToSpeaker'], }); - AudioManager.requestRecordingPermissions(); + const checkPerms = async () => { + const granted = await AudioManager.requestRecordingPermissions(); + if (!granted) console.warn('Microphone permission denied!'); + }; + checkPerms(); }, []); async function getAudioFile(sourceUri: string) { @@ -72,40 +87,110 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { } const uri = await getAudioFile(audioURL); + // Reset previous states + setTranscription(null); + setLiveResult(null); const audioContext = new AudioContext({ sampleRate: 16000 }); try { - const decodedAudioData = await audioContext.decodeAudioDataSource(uri); + const decodedAudioData = await audioContext.decodeAudioData(uri); const audioBuffer = decodedAudioData.getChannelData(0); - setTranscription(await model.transcribe(audioBuffer)); + const result = await model.transcribe(audioBuffer, { + verbose: enableTimestamps, + }); + setTranscription(result); } catch (error) { console.error('Error decoding audio data', error); - console.warn('Note: Supported file formats: mp3, wav, flac'); return; } }; const handleStartTranscribeFromMicrophone = async () => { + isRecordingRef.current = true; setLiveTranscribing(true); - setTranscription(''); - recorder.onAudioReady(({ buffer }) => { - model.streamInsert(buffer.getChannelData(0)); - }); - recorder.start(); + + setTranscription(null); + setLiveResult({ fullText: '', segments: [] }); + + const sampleRate = 16000; + + recorder.onAudioReady( + { + sampleRate, + bufferLength: 0.1 * sampleRate, + channelCount: 1, + }, + ({ buffer }) => { + model.streamInsert(buffer.getChannelData(0)); + } + ); + + try { + const success = await AudioManager.setAudioSessionActivity(true); + if (!success) { + console.warn('Cannot start audio session correctly'); + } + const result = recorder.start(); + if (result.status === 'error') { + console.warn('Recording problems: ', result.message); + } + } catch (e) { + console.error('Failed to start recorder', e); + isRecordingRef.current = false; + setLiveTranscribing(false); + return; + } + + let accumulatedText = ''; + let accumulatedSegments: any[] = []; try { - await model.stream(); + const streamIter = model.stream({ + verbose: enableTimestamps, + }); + + for await (const { committed, nonCommitted } of streamIter) { + if (!isRecordingRef.current) break; + + if (committed.text) { + accumulatedText += committed.text; + } + if (committed.segments) { + accumulatedSegments = [...accumulatedSegments, ...committed.segments]; + } + + const currentDisplay = { + fullText: accumulatedText + nonCommitted.text, + segments: [...accumulatedSegments, ...(nonCommitted.segments || [])], + }; + + setLiveResult(currentDisplay); + } } catch (error) { console.error('Error during live transcription:', error); + } finally { + setLiveTranscribing(false); } }; const handleStopTranscribeFromMicrophone = () => { + isRecordingRef.current = false; + recorder.stop(); model.streamStop(); console.log('Live transcription stopped'); setLiveTranscribing(false); + + if (liveResult) { + setTranscription({ + text: liveResult.fullText, + segments: liveResult.segments, + language: 'en', + duration: 0, + }); + setLiveResult(null); + } }; const getModelStatus = () => { @@ -118,6 +203,20 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { const readyToTranscribe = !model.isGenerating && model.isReady; const recordingButtonDisabled = isSimulator || !readyToTranscribe; + const getDisplayData = (): TranscriptionResult | null => { + if (liveTranscribing && liveResult) { + return { + text: liveResult.fullText, + segments: liveResult.segments, + language: 'en', + duration: 0, + }; + } + return transcription; + }; + + const displayData = getDisplayData(); + return ( @@ -138,6 +237,21 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => { Status: {getModelStatus()} + + Enable Timestamps (Verbose) + { + setEnableTimestamps(val); + setTranscription(null); + setLiveResult(null); + }} + trackColor={{ false: '#767577', true: '#0f186e' }} + thumbColor={enableTimestamps ? '#fff' : '#f4f3f4'} + disabled={model.isGenerating} + /> + + Transcription void }) => { scrollViewRef.current?.scrollToEnd({ animated: true }) } > - - {transcription !== '' - ? transcription - : model.committedTranscription + - model.nonCommittedTranscription} - + {displayData ? ( + + ) : ( + + {liveTranscribing + ? 'Listening...' + : 'No transcription yet...'} + + )} @@ -241,6 +358,17 @@ const styles = StyleSheet.create({ marginTop: 12, alignItems: 'center', }, + toggleContainer: { + flexDirection: 'row', + alignItems: 'center', + marginTop: 10, + marginBottom: 5, + }, + toggleLabel: { + fontSize: 16, + marginRight: 10, + color: '#0f186e', + }, transcriptionContainer: { flex: 1, width: '100%', @@ -256,6 +384,11 @@ const styles = StyleSheet.create({ borderWidth: 1, borderColor: '#0f186e', padding: 12, + maxHeight: 400, + }, + placeholderText: { + color: '#aaa', + fontStyle: 'italic', }, inputContainer: { marginBottom: 12, diff --git a/apps/speech/tsconfig.json b/apps/speech/tsconfig.json index 47026ce434..a08f2140a8 100644 --- a/apps/speech/tsconfig.json +++ b/apps/speech/tsconfig.json @@ -9,7 +9,10 @@ "customConditions": ["react-native"], "noEmit": true, "paths": { - "react-native-executorch": ["../../packages/react-native-executorch/src"] + "react-native-executorch": ["../../packages/react-native-executorch/src"], + "@react-native-executorch/expo-resource-fetcher": [ + "../../packages/expo-resource-fetcher/src" + ] } } } diff --git a/apps/text-embeddings/app/_layout.tsx b/apps/text-embeddings/app/_layout.tsx index 16bf0e87a3..c0633a9937 100644 --- a/apps/text-embeddings/app/_layout.tsx +++ b/apps/text-embeddings/app/_layout.tsx @@ -1,4 +1,6 @@ import { Drawer } from 'expo-router/drawer'; +import { initExecutorch } from 'react-native-executorch'; +import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher'; import ColorPalette from '../colors'; import React, { useState } from 'react'; import { Text, StyleSheet, View } from 'react-native'; @@ -10,6 +12,10 @@ import { } from '@react-navigation/drawer'; import { GeneratingContext } from '../context'; +initExecutorch({ + resourceFetcher: ExpoResourceFetcher, +}); + interface CustomDrawerProps extends DrawerContentComponentProps { isGenerating: boolean; } diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx index 7a53a77dfb..66ca348757 100644 --- a/apps/text-embeddings/app/clip-embeddings/index.tsx +++ b/apps/text-embeddings/app/clip-embeddings/index.tsx @@ -141,7 +141,7 @@ function ClipEmbeddingsScreen() { const getModelStatusText = (model: typeof textModel | typeof imageModel) => { if (model.error) { - return `Oops! Error: ${model.error}`; + return `Oops! ${model.error}`; } if (!model.isReady) { return `Loading model ${(model.downloadProgress * 100).toFixed(2)}%`; diff --git a/apps/text-embeddings/package.json b/apps/text-embeddings/package.json index cbf0da96c3..64093b9191 100644 --- a/apps/text-embeddings/package.json +++ b/apps/text-embeddings/package.json @@ -14,6 +14,7 @@ "@react-navigation/native": "*" }, "dependencies": { + "@react-native-executorch/expo-resource-fetcher": "workspace:*", "@react-navigation/drawer": "^7.3.9", "expo": "^54.0.27", "expo-constants": "~18.0.11", diff --git a/apps/text-embeddings/tsconfig.json b/apps/text-embeddings/tsconfig.json index 47026ce434..a08f2140a8 100644 --- a/apps/text-embeddings/tsconfig.json +++ b/apps/text-embeddings/tsconfig.json @@ -9,7 +9,10 @@ "customConditions": ["react-native"], "noEmit": true, "paths": { - "react-native-executorch": ["../../packages/react-native-executorch/src"] + "react-native-executorch": ["../../packages/react-native-executorch/src"], + "@react-native-executorch/expo-resource-fetcher": [ + "../../packages/expo-resource-fetcher/src" + ] } } } diff --git a/docs/docs/01-fundamentals/01-getting-started.md b/docs/docs/01-fundamentals/01-getting-started.md index 109f6e4790..ea1673ca32 100644 --- a/docs/docs/01-fundamentals/01-getting-started.md +++ b/docs/docs/01-fundamentals/01-getting-started.md @@ -62,7 +62,7 @@ Installation is pretty straightforward, just use your favorite package manager. -If you're using bare React Native (instead of a managed Expo project), you also need to install Expo Modules because the underlying implementation relies on expo-file-system. Since expo-file-system is an Expo package, bare React Native projects need **Expo Modules** to properly integrate and use it. The link provided (https://docs.expo.dev/bare/installing-expo-modules/) offers guidance on setting up Expo Modules in a bare React Native environment. +Our library offers support for both bare React Native and Expo projects. Please follow the instructions from [Loading models section](./02-loading-models.md) to make sure you setup your project correctly. We encourage you to use Expo project if possible. If you are planning to migrate from bare React Native to Expo project, the link (https://docs.expo.dev/bare/installing-expo-modules/) offers a guidance on setting up Expo Modules in a bare React Native environment. If you plan on using your models via require() instead of fetching them from a url, you also need to add following lines to your `metro.config.js`: diff --git a/docs/docs/01-fundamentals/02-loading-models.md b/docs/docs/01-fundamentals/02-loading-models.md index 96be9784fe..6e84f96668 100644 --- a/docs/docs/01-fundamentals/02-loading-models.md +++ b/docs/docs/01-fundamentals/02-loading-models.md @@ -4,6 +4,46 @@ title: Loading Models There are three different methods available for loading model files, depending on their size and location. +## Prerequisites + +In our library, you can use two different resource fetching mechanisms. One is implemented using Expo FileSystem, the other one uses external library. We encourage you to use implementation utilizing Expo if possible. + +To use the Expo adapter, please add these libraries: + +```bash +yarn add @react-native-executorch/expo-adapter +yarn add expo-file-system expo-asset +``` + +and then add the following code in your React Native app: + +```typescript +import { initExecutorch } from 'react-native-executorch'; +import { ExpoResourceFetcher } from '@react-native-executorch/expo-resource-fetcher'; + +initExecutorch({ + resourceFetcher: ExpoResourceFetcher, +}); +``` + +If you cannot use Expo in your project, proceed with the following steps: + +```bash +yarn add @react-native-executorch/bare-adapter +yarn add @dr.pogodin/react-native-fs @kesha-antonov/react-native-background-downloader +``` + +and + +```typescript +import { initExecutorch } from 'react-native-executorch'; +import { BareResourceFetcher } from '@react-native-executorch/bare-adapter'; + +initExecutorch({ + resourceFetcher: BareResourceFetcher, +}); +``` + **1. Load from React Native assets folder (For Files < 512MB)** ```typescript diff --git a/docs/docs/01-fundamentals/03-frequently-asked-questions.md b/docs/docs/01-fundamentals/03-frequently-asked-questions.md index 9216c615f9..69e3792d41 100644 --- a/docs/docs/01-fundamentals/03-frequently-asked-questions.md +++ b/docs/docs/01-fundamentals/03-frequently-asked-questions.md @@ -31,7 +31,7 @@ If your model doesn't support it, you can still work around it using context. Fo ### Can I use React Native ExecuTorch in bare React Native apps? -To use the library, you need to install Expo Modules first. For a setup guide, refer to [this tutorial](https://docs.expo.dev/bare/installing-expo-modules/). This is because we use Expo File System under the hood to download and manage the model binaries. +Yes, staring from version `0.8.x` you can use React Native ExecuTorch in bare React Native apps. You just need to use bare React Native resource fetcher instead of Expo one, see: [Loading models section](./02-loading-models.md) for more details. ### Do you support the old architecture? diff --git a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md index 5b0545cf24..ce1aa3f063 100644 --- a/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md +++ b/docs/docs/03-hooks/01-natural-language-processing/useSpeechToText.md @@ -47,12 +47,12 @@ const { uri } = await FileSystem.downloadAsync( ); const audioContext = new AudioContext({ sampleRate: 16000 }); -const decodedAudioData = await audioContext.decodeAudioDataSource(uri); +const decodedAudioData = await audioContext.decodeAudioData(uri); const audioBuffer = decodedAudioData.getChannelData(0); try { const transcription = await model.transcribe(audioBuffer); - console.log(transcription); + console.log(transcription.text); } catch (error) { console.error('Error during audio transcription', error); } @@ -101,12 +101,52 @@ const model = useSpeechToText({ const transcription = await model.transcribe(spanishAudio, { language: 'es' }); ``` +### Timestamps & Transcription Stat Data + +You can obtain word-level timestamps and other useful parameters from transcription ([`transcribe`](../../06-api-reference/interfaces/SpeechToTextType.md#transcribe) and [`stream`](../../06-api-reference/interfaces/SpeechToTextType.md#stream) methods) by setting `verbose: true` in the options. The result mimics the _verbose_json_ format from OpenAI Whisper API. For more information please read [`transcribe`](../../06-api-reference/interfaces/SpeechToTextType.md#transcribe), [`stream`](../../06-api-reference/interfaces/SpeechToTextType.md#stream), and [`TranscriptionResult`](../../06-api-reference/interfaces/TranscriptionResult.md) API References. + +```typescript +const transcription = await model.transcribe(audioBuffer, { verbose: true }); +// Example result +// +// transcription: { +// task: "transcription", +// text: "Example text for a ...", +// duration: 9.05, +// language: "en", +// segments: [ +// { +// start: 0, +// end: 5.4, +// text: "Example text for", +// words: [ +// { +// word: "Example", +// start: 0, +// end: 1.4 +// }, +// ... +// ] +// tokens: [1, 32, 45, ...], +// temperature: 0.0, +// avgLogprob: -1.235, +// compressionRatio: 1.632 +// }, +// ... +// ] +// } +``` + ## Example ```tsx import React, { useState } from 'react'; -import { Button, Text } from 'react-native'; -import { useSpeechToText, WHISPER_TINY_EN } from 'react-native-executorch'; +import { Button, Text, View } from 'react-native'; +import { + useSpeechToText, + WHISPER_TINY_EN, + TranscriptionResult, +} from 'react-native-executorch'; import { AudioContext } from 'react-native-audio-api'; import * as FileSystem from 'expo-file-system'; @@ -115,7 +155,7 @@ function App() { model: WHISPER_TINY_EN, }); - const [transcription, setTranscription] = useState(''); + const [transcription, setTranscription] = useState(null); const loadAudio = async () => { const { uri } = await FileSystem.downloadAsync( @@ -132,14 +172,45 @@ function App() { const handleTranscribe = async () => { const audio = await loadAudio(); - await model.transcribe(audio); + // Default text transcription + const result = await model.transcribe(audio); + setTranscription(result); + }; + + const handleTranscribeWithTimestamps = async () => { + const audio = await loadAudio(); + // Transcription with timestamps + const result = await model.transcribe(audio, { verbose: true }); + setTranscription(result); + }; + + // Custom logic for printing transcription + // e.g. + + const renderContent = () => { + if (!transcription) return Press a button to transcribe; + + if (transcription.segments && transcription.segments.length > 0) { + return ( + + {transcription.text + + '\n\nNum segments: ' + + transcription.segments.length.toString()} + + ); + } + return {transcription.text}; }; return ( - <> - {transcription} -