Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .cspell-wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,7 @@ logprob
RNFS
pogodin
kesha
antonov
antonov
worklet
worklets
BGRA
133 changes: 125 additions & 8 deletions apps/computer-vision/app/object_detection/index.tsx
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import Spinner from '../../components/Spinner';
import { BottomBar } from '../../components/BottomBar';
import { getImage } from '../../utils';
import {
Detection,
useObjectDetection,
SSDLITE_320_MOBILENET_V3_LARGE,
ScalarType,
PixelData,
} from 'react-native-executorch';
import { View, StyleSheet, Image } from 'react-native';
import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
import ImageWithBboxes from '../../components/ImageWithBboxes';
import React, { useContext, useEffect, useState } from 'react';
import { GeneratingContext } from '../../context';
import ScreenWrapper from '../../ScreenWrapper';
import ColorPalette from '../../colors';

export default function ObjectDetectionScreen() {
const [imageUri, setImageUri] = useState('');
Expand Down Expand Up @@ -42,14 +44,59 @@ export default function ObjectDetectionScreen() {
const runForward = async () => {
if (imageUri) {
try {
const output = await ssdLite.forward(imageUri);
console.log('Running forward with string URI...');
const output = await ssdLite.forward(imageUri, 0.5);
console.log('String URI result:', output.length, 'detections');
setResults(output);
} catch (e) {
console.error(e);
console.error('Error in runForward:', e);
}
}
};

const runForwardPixels = async () => {
try {
console.log('Testing with hardcoded pixel data...');

// Create a simple 320x320 test image (all zeros - black image)
// In a real scenario, you would load actual image pixel data here
const width = 320;
const height = 320;
const channels = 3; // RGB

// Create a black image (you can replace this with actual pixel data)
const rgbData = new Uint8Array(width * height * channels);

// Optionally, add some test pattern (e.g., white square in center)
for (let y = 100; y < 220; y++) {
for (let x = 100; x < 220; x++) {
const idx = (y * width + x) * 3;
rgbData[idx + 0] = 255; // R
rgbData[idx + 1] = 255; // G
rgbData[idx + 2] = 255; // B
}
}

const pixelData: PixelData = {
dataPtr: rgbData,
sizes: [height, width, channels],
scalarType: ScalarType.BYTE,
};

console.log('Running forward with hardcoded pixel data...', {
sizes: pixelData.sizes,
dataSize: pixelData.dataPtr.byteLength,
});

// Run inference using unified forward() API
const output = await ssdLite.forward(pixelData, 0.3);
console.log('Pixel data result:', output.length, 'detections');
setResults(output);
} catch (e) {
console.error('Error in runForwardPixels:', e);
}
};

if (!ssdLite.isReady) {
return (
<Spinner
Expand Down Expand Up @@ -81,10 +128,41 @@ export default function ObjectDetectionScreen() {
)}
</View>
</View>
<BottomBar
handleCameraPress={handleCameraPress}
runForward={runForward}
/>

{/* Custom bottom bar with two buttons */}
<View style={styles.bottomContainer}>
<View style={styles.bottomIconsContainer}>
<TouchableOpacity onPress={() => handleCameraPress(false)}>
<Text style={styles.iconText}>📷 Gallery</Text>
</TouchableOpacity>
</View>

<View style={styles.buttonsRow}>
<TouchableOpacity
style={[
styles.button,
styles.halfButton,
!imageUri && styles.buttonDisabled,
]}
onPress={runForward}
disabled={!imageUri}
>
<Text style={styles.buttonText}>Run (String)</Text>
</TouchableOpacity>

<TouchableOpacity
style={[
styles.button,
styles.halfButton,
!imageUri && styles.buttonDisabled,
]}
onPress={runForwardPixels}
disabled={!imageUri}
>
<Text style={styles.buttonText}>Run (Pixels)</Text>
</TouchableOpacity>
</View>
</View>
</ScreenWrapper>
);
}
Expand Down Expand Up @@ -129,4 +207,43 @@ const styles = StyleSheet.create({
width: '100%',
height: '100%',
},
bottomContainer: {
width: '100%',
gap: 15,
alignItems: 'center',
padding: 16,
flex: 1,
},
bottomIconsContainer: {
flexDirection: 'row',
justifyContent: 'center',
width: '100%',
},
iconText: {
fontSize: 16,
color: ColorPalette.primary,
},
buttonsRow: {
flexDirection: 'row',
width: '100%',
gap: 10,
},
button: {
height: 50,
justifyContent: 'center',
alignItems: 'center',
backgroundColor: ColorPalette.primary,
color: '#fff',
borderRadius: 8,
},
halfButton: {
flex: 1,
},
buttonDisabled: {
opacity: 0.5,
},
buttonText: {
color: '#fff',
fontSize: 16,
},
});
9 changes: 6 additions & 3 deletions apps/computer-vision/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"@react-navigation/native": "^7.1.6",
"@shopify/react-native-skia": "2.2.12",
"expo": "^54.0.27",
"expo-build-properties": "~1.0.10",
"expo-constants": "~18.0.11",
"expo-font": "~14.0.10",
"expo-linking": "~8.0.10",
Expand All @@ -30,17 +31,19 @@
"react-native-gesture-handler": "~2.28.0",
"react-native-image-picker": "^7.2.2",
"react-native-loading-spinner-overlay": "^3.0.1",
"react-native-reanimated": "~4.1.1",
"react-native-nitro-image": "0.10.2",
"react-native-nitro-modules": "0.33.4",
"react-native-reanimated": "~4.2.1",
"react-native-safe-area-context": "~5.6.0",
"react-native-screens": "~4.16.0",
"react-native-svg": "15.12.1",
"react-native-svg-transformer": "^1.5.0",
"react-native-worklets": "0.5.1"
"react-native-worklets": "^0.7.2"
},
"devDependencies": {
"@babel/core": "^7.25.2",
"@types/pngjs": "^6.0.5",
"@types/react": "~19.1.10"
"@types/react": "~19.2.0"
},
"private": true
}
4 changes: 2 additions & 2 deletions packages/react-native-executorch/android/gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
RnExecutorch_kotlinVersion=1.7.0
RnExecutorch_minSdkVersion=21
RnExecutorch_minSdkVersion=26
RnExecutorch_targetSdkVersion=31
RnExecutorch_compileSdkVersion=31
RnExecutorch_ndkversion=21.4.7075529
RnExecutorch_ndkversion=21.4.7075529
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,13 @@ class RnExecutorchInstaller {
meta::createConstructorArgsWithCallInvoker<ModelT>(
args, runtime, jsCallInvoker);

auto modelImplementationPtr = std::make_shared<ModelT>(
std::make_from_tuple<ModelT>(constructorArgs));
auto modelImplementationPtr = std::apply(
[](auto &&...unpackedArgs) {
return std::make_shared<ModelT>(
std::forward<decltype(unpackedArgs)>(unpackedArgs)...);
},
std::move(constructorArgs));

auto modelHostObject = std::make_shared<ModelHostObject<ModelT>>(
modelImplementationPtr, jsCallInvoker);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,15 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
return {runtime, bigInt};
}

inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
jsi::Runtime &runtime) {
jsi::Array array(runtime, vec.size());
for (size_t i = 0; i < vec.size(); i++) {
array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
}
return {runtime, array};
}

inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
return {runtime, val};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <rnexecutorch/metaprogramming/FunctionHelpers.h>
#include <rnexecutorch/metaprogramming/TypeConcepts.h>
#include <rnexecutorch/models/BaseModel.h>
#include <rnexecutorch/models/VisionModel.h>
#include <rnexecutorch/models/llm/LLM.h>
#include <rnexecutorch/models/ocr/OCR.h>
#include <rnexecutorch/models/speech_to_text/SpeechToText.h>
Expand Down Expand Up @@ -45,12 +46,6 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
"getInputShape"));
}

if constexpr (meta::HasGenerate<Model>) {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::generate>,
"generate"));
}

if constexpr (meta::HasEncode<Model>) {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::encode>,
Expand Down Expand Up @@ -155,9 +150,26 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::stream>,
"stream"));
}

if constexpr (meta::HasGenerateFromString<Model>) {
addFunctions(
JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::generateFromString>,
"generateFromString"));
}

if constexpr (meta::HasGenerateFromFrame<Model>) {
addFunctions(JSI_EXPORT_FUNCTION(
ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
"streamStop"));
ModelHostObject<Model>, visionHostFunction<&Model::generateFromFrame>,
"generateFromFrame"));
}

if constexpr (meta::HasGenerateFromPixels<Model>) {
addFunctions(
JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
promiseHostFunction<&Model::generateFromPixels>,
"generateFromPixels"));
}
}

Expand Down Expand Up @@ -208,6 +220,68 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
}
}

/**
* Unlike promiseHostFunction, this runs synchronously on the JS thread,
* which is required for VisionCamera worklet frame processors.
*
* The key challenge is argument mapping: the C++ function takes
* (Runtime, frameData, Rest...) but from the JS side, Runtime is injected
* automatically and frameData is JS args[0]. The remaining args (Rest...)
* map to JS args[1..N].
*
* This is achieved via TailSignature: it extracts the Rest... parameter pack
* from the function pointer type, creates a dummy free function with only
* those types, then uses createArgsTupleFromJsi on that dummy to convert
* args[1..N] — bypassing the manually-handled frameData at args[0].
*
* Argument mapping:
* C++ params: (Runtime&, frameData, Rest[0], Rest[1], ...)
* JS args: ( args[0], args[1], args[2], ...)
* JS arg count = C++ arity - 1 (Runtime is injected, not counted)
*
*/
template <auto FnPtr> JSI_HOST_FUNCTION(visionHostFunction) {
constexpr std::size_t cppArgCount =
meta::FunctionTraits<decltype(FnPtr)>::arity;
constexpr std::size_t expectedJsArgs = cppArgCount - 1;

if (count != expectedJsArgs) {
throw jsi::JSError(runtime, "Argument count mismatch in vision function");
}

try {
auto dummyFuncPtr = &meta::TailSignature<decltype(FnPtr)>::dummy;
auto tailArgsTuple =
meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime);

using ReturnType =
typename meta::FunctionTraits<decltype(FnPtr)>::return_type;

if constexpr (std::is_void_v<ReturnType>) {
std::apply(
[&](auto &&...tailArgs) {
(model.get()->*FnPtr)(
runtime, args[0],
std::forward<decltype(tailArgs)>(tailArgs)...);
},
std::move(tailArgsTuple));
return jsi::Value::undefined();
} else {
auto result = std::apply(
[&](auto &&...tailArgs) {
return (model.get()->*FnPtr)(
runtime, args[0],
std::forward<decltype(tailArgs)>(tailArgs)...);
},
std::move(tailArgsTuple));

return jsi_conversion::getJsiValue(std::move(result), runtime);
}
} catch (const std::exception &e) {
throw jsi::JSError(runtime, e.what());
}
}

// A generic host function that resolves a promise with a result of a
// function. JSI arguments are converted to the types provided in the function
// signature, and the return value is converted back to JSI before resolving.
Expand Down
Loading