software-mansion · NorbertKlockiewicz · Feb 11, 2026 · Feb 12, 2026 · Feb 13, 2026 · Feb 16, 2026
diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
@@ -111,4 +111,7 @@ logprob
 RNFS
 pogodin
 kesha
-antonov
+antonov
+worklet
+worklets
+BGRA
diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
@@ -1,16 +1,18 @@
 import Spinner from '../../components/Spinner';
-import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  ScalarType,
+  PixelData,
 } from 'react-native-executorch';
-import { View, StyleSheet, Image } from 'react-native';
+import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import ColorPalette from '../../colors';
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -42,14 +44,59 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
-        const output = await ssdLite.forward(imageUri);
+        console.log('Running forward with string URI...');
+        const output = await ssdLite.forward(imageUri, 0.5);
+        console.log('String URI result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
-        console.error(e);
+        console.error('Error in runForward:', e);
       }
     }
   };
 
+  const runForwardPixels = async () => {
+    try {
+      console.log('Testing with hardcoded pixel data...');
+
+      // Create a simple 320x320 test image (all zeros - black image)
+      // In a real scenario, you would load actual image pixel data here
+      const width = 320;
+      const height = 320;
+      const channels = 3; // RGB
+
+      // Create a black image (you can replace this with actual pixel data)
+      const rgbData = new Uint8Array(width * height * channels);
+
+      // Optionally, add some test pattern (e.g., white square in center)
+      for (let y = 100; y < 220; y++) {
+        for (let x = 100; x < 220; x++) {
+          const idx = (y * width + x) * 3;
+          rgbData[idx + 0] = 255; // R
+          rgbData[idx + 1] = 255; // G
+          rgbData[idx + 2] = 255; // B
+        }
+      }
+
+      const pixelData: PixelData = {
+        dataPtr: rgbData,
+        sizes: [height, width, channels],
+        scalarType: ScalarType.BYTE,
+      };
+
+      console.log('Running forward with hardcoded pixel data...', {
+        sizes: pixelData.sizes,
+        dataSize: pixelData.dataPtr.byteLength,
+      });
+
+      // Run inference using unified forward() API
+      const output = await ssdLite.forward(pixelData, 0.3);
+      console.log('Pixel data result:', output.length, 'detections');
+      setResults(output);
+    } catch (e) {
+      console.error('Error in runForwardPixels:', e);
+    }
+  };
+
   if (!ssdLite.isReady) {
     return (
       <Spinner
@@ -81,10 +128,41 @@ export default function ObjectDetectionScreen() {
           )}
         </View>
       </View>
-      <BottomBar
-        handleCameraPress={handleCameraPress}
-        runForward={runForward}
-      />
+
+      {/* Custom bottom bar with two buttons */}
+      <View style={styles.bottomContainer}>
+        <View style={styles.bottomIconsContainer}>
+          <TouchableOpacity onPress={() => handleCameraPress(false)}>
+            <Text style={styles.iconText}>📷 Gallery</Text>
+          </TouchableOpacity>
+        </View>
+
+        <View style={styles.buttonsRow}>
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForward}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (String)</Text>
+          </TouchableOpacity>
+
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForwardPixels}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (Pixels)</Text>
+          </TouchableOpacity>
+        </View>
+      </View>
     </ScreenWrapper>
   );
 }
@@ -129,4 +207,43 @@ const styles = StyleSheet.create({
     width: '100%',
     height: '100%',
   },
+  bottomContainer: {
+    width: '100%',
+    gap: 15,
+    alignItems: 'center',
+    padding: 16,
+    flex: 1,
+  },
+  bottomIconsContainer: {
+    flexDirection: 'row',
+    justifyContent: 'center',
+    width: '100%',
+  },
+  iconText: {
+    fontSize: 16,
+    color: ColorPalette.primary,
+  },
+  buttonsRow: {
+    flexDirection: 'row',
+    width: '100%',
+    gap: 10,
+  },
+  button: {
+    height: 50,
+    justifyContent: 'center',
+    alignItems: 'center',
+    backgroundColor: ColorPalette.primary,
+    color: '#fff',
+    borderRadius: 8,
+  },
+  halfButton: {
+    flex: 1,
+  },
+  buttonDisabled: {
+    opacity: 0.5,
+  },
+  buttonText: {
+    color: '#fff',
+    fontSize: 16,
+  },
 });
diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json
@@ -17,6 +17,7 @@
     "@react-navigation/native": "^7.1.6",
     "@shopify/react-native-skia": "2.2.12",
     "expo": "^54.0.27",
+    "expo-build-properties": "~1.0.10",
     "expo-constants": "~18.0.11",
     "expo-font": "~14.0.10",
     "expo-linking": "~8.0.10",
@@ -30,17 +31,19 @@
     "react-native-gesture-handler": "~2.28.0",
     "react-native-image-picker": "^7.2.2",
     "react-native-loading-spinner-overlay": "^3.0.1",
-    "react-native-reanimated": "~4.1.1",
+    "react-native-nitro-image": "0.10.2",
+    "react-native-nitro-modules": "0.33.4",
+    "react-native-reanimated": "~4.2.1",
     "react-native-safe-area-context": "~5.6.0",
     "react-native-screens": "~4.16.0",
     "react-native-svg": "15.12.1",
     "react-native-svg-transformer": "^1.5.0",
-    "react-native-worklets": "0.5.1"
+    "react-native-worklets": "^0.7.2"
   },
   "devDependencies": {
     "@babel/core": "^7.25.2",
     "@types/pngjs": "^6.0.5",
-    "@types/react": "~19.1.10"
+    "@types/react": "~19.2.0"
   },
   "private": true
 }
diff --git a/packages/react-native-executorch/android/gradle.properties b/packages/react-native-executorch/android/gradle.properties
@@ -1,5 +1,5 @@
 RnExecutorch_kotlinVersion=1.7.0
-RnExecutorch_minSdkVersion=21
+RnExecutorch_minSdkVersion=26
 RnExecutorch_targetSdkVersion=31
 RnExecutorch_compileSdkVersion=31
-RnExecutorch_ndkversion=21.4.7075529
+RnExecutorch_ndkversion=21.4.7075529
diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h
@@ -54,8 +54,13 @@ class RnExecutorchInstaller {
                 meta::createConstructorArgsWithCallInvoker<ModelT>(
                     args, runtime, jsCallInvoker);
 
-            auto modelImplementationPtr = std::make_shared<ModelT>(
-                std::make_from_tuple<ModelT>(constructorArgs));
+            auto modelImplementationPtr = std::apply(
+                [](auto &&...unpackedArgs) {
+                  return std::make_shared<ModelT>(
+                      std::forward<decltype(unpackedArgs)>(unpackedArgs)...);
+                },
+                std::move(constructorArgs));
+
             auto modelHostObject = std::make_shared<ModelHostObject<ModelT>>(
                 modelImplementationPtr, jsCallInvoker);
 

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -360,6 +360,15 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
+inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
 inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
   return {runtime, val};
 }

diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -17,6 +17,7 @@
 #include <rnexecutorch/metaprogramming/FunctionHelpers.h>
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
 #include <rnexecutorch/models/BaseModel.h>
+#include <rnexecutorch/models/VisionModel.h>
 #include <rnexecutorch/models/llm/LLM.h>
 #include <rnexecutorch/models/ocr/OCR.h>
 #include <rnexecutorch/models/speech_to_text/SpeechToText.h>
@@ -45,12 +46,6 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
           "getInputShape"));
     }
 
-    if constexpr (meta::HasGenerate<Model>) {
-      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
-                                       promiseHostFunction<&Model::generate>,
-                                       "generate"));
-    }
-
     if constexpr (meta::HasEncode<Model>) {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::encode>,
@@ -155,9 +150,26 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::stream>,
                                        "stream"));
+    }
+
+    if constexpr (meta::HasGenerateFromString<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::generateFromString>,
+                              "generateFromString"));
+    }
+
+    if constexpr (meta::HasGenerateFromFrame<Model>) {
       addFunctions(JSI_EXPORT_FUNCTION(
-          ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
-          "streamStop"));
+          ModelHostObject<Model>, visionHostFunction<&Model::generateFromFrame>,
+          "generateFromFrame"));
+    }
+
+    if constexpr (meta::HasGenerateFromPixels<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::generateFromPixels>,
+                              "generateFromPixels"));
     }
   }
 
@@ -208,6 +220,68 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     }
   }
 
+  /**
+   * Unlike promiseHostFunction, this runs synchronously on the JS thread,
+   * which is required for VisionCamera worklet frame processors.
+   *
+   * The key challenge is argument mapping: the C++ function takes
+   * (Runtime, frameData, Rest...) but from the JS side, Runtime is injected
+   * automatically and frameData is JS args[0]. The remaining args (Rest...)
+   * map to JS args[1..N].
+   *
+   * This is achieved via TailSignature: it extracts the Rest... parameter pack
+   * from the function pointer type, creates a dummy free function with only
+   * those types, then uses createArgsTupleFromJsi on that dummy to convert
+   * args[1..N] — bypassing the manually-handled frameData at args[0].
+   *
+   * Argument mapping:
+   *   C++ params:  (Runtime&,  frameData,  Rest[0],   Rest[1], ...)
+   *   JS args:     (           args[0],    args[1],   args[2], ...)
+   *   JS arg count = C++ arity - 1  (Runtime is injected, not counted)
+   *
+   */
+  template <auto FnPtr> JSI_HOST_FUNCTION(visionHostFunction) {
+    constexpr std::size_t cppArgCount =
+        meta::FunctionTraits<decltype(FnPtr)>::arity;
+    constexpr std::size_t expectedJsArgs = cppArgCount - 1;
+
+    if (count != expectedJsArgs) {
+      throw jsi::JSError(runtime, "Argument count mismatch in vision function");
+    }
+
+    try {
+      auto dummyFuncPtr = &meta::TailSignature<decltype(FnPtr)>::dummy;
+      auto tailArgsTuple =
+          meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime);
+
+      using ReturnType =
+          typename meta::FunctionTraits<decltype(FnPtr)>::return_type;
+
+      if constexpr (std::is_void_v<ReturnType>) {
+        std::apply(
+            [&](auto &&...tailArgs) {
+              (model.get()->*FnPtr)(
+                  runtime, args[0],
+                  std::forward<decltype(tailArgs)>(tailArgs)...);
+            },
+            std::move(tailArgsTuple));
+        return jsi::Value::undefined();
+      } else {
+        auto result = std::apply(
+            [&](auto &&...tailArgs) {
+              return (model.get()->*FnPtr)(
+                  runtime, args[0],
+                  std::forward<decltype(tailArgs)>(tailArgs)...);
+            },
+            std::move(tailArgsTuple));
+
+        return jsi_conversion::getJsiValue(std::move(result), runtime);
+      }
+    } catch (const std::exception &e) {
+      throw jsi::JSError(runtime, e.what());
+    }
+  }
+
   // A generic host function that resolves a promise with a result of a
   // function. JSI arguments are converted to the types provided in the function
   // signature, and the return value is converted back to JSI before resolving.
-Original file line number
+Diff line change
@@ Expand Up / @@ -111,4 +111,7 @@ logprob @@
     RNFS
     pogodin
     kesha
-    antonov
+    antonov
+    worklet
+    worklets
+    BGRA