feat: add integration with vision camera v5#810
feat: add integration with vision camera v5#810NorbertKlockiewicz wants to merge 12 commits intomainfrom
Conversation
ee215f9 to
96f2c14
Compare
96f2c14 to
f3e17e2
Compare
| // Create a simple 320x320 test image (all zeros - black image) | ||
| // In a real scenario, you would load actual image pixel data here | ||
| const width = 320; | ||
| const height = 320; | ||
| const channels = 3; // RGB | ||
|
|
||
| // Create a black image (you can replace this with actual pixel data) | ||
| const rgbData = new Uint8Array(width * height * channels); | ||
|
|
||
| // Optionally, add some test pattern (e.g., white square in center) | ||
| for (let y = 100; y < 220; y++) { | ||
| for (let x = 100; x < 220; x++) { | ||
| const idx = (y * width + x) * 3; | ||
| rgbData[idx + 0] = 255; // R | ||
| rgbData[idx + 1] = 255; // G | ||
| rgbData[idx + 2] = 255; // B | ||
| } | ||
| } | ||
|
|
||
| const pixelData: PixelData = { | ||
| dataPtr: rgbData, | ||
| sizes: [height, width, channels], | ||
| scalarType: ScalarType.BYTE, | ||
| }; | ||
|
|
||
| console.log('Running forward with hardcoded pixel data...', { | ||
| sizes: pixelData.sizes, | ||
| dataSize: pixelData.dataPtr.byteLength, | ||
| }); | ||
|
|
||
| // Run inference using unified forward() API | ||
| const output = await ssdLite.forward(pixelData, 0.3); | ||
| console.log('Pixel data result:', output.length, 'detections'); | ||
| setResults(output); | ||
| } catch (e) { | ||
| console.error('Error in runForwardPixels:', e); |
There was a problem hiding this comment.
I think all the comments from here as the code is self-describing.
| // Get target size from model input shape | ||
| const std::vector<int32_t> tensorDims = getAllInputShapes()[0]; | ||
| cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1], | ||
| tensorDims[tensorDims.size() - 2]); | ||
|
|
||
| cv::Mat rgb; | ||
|
|
||
| // Convert RGBA/BGRA to RGB if needed (for VisionCamera frames) | ||
| if (frame.channels() == 4) { | ||
| // Platform-specific color conversion: | ||
| // iOS uses BGRA format, Android uses RGBA format | ||
| #ifdef __APPLE__ | ||
| // iOS: BGRA → RGB | ||
| cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); | ||
| #else | ||
| // Android: RGBA → RGB | ||
| cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); | ||
| #endif | ||
| } else if (frame.channels() == 3) { | ||
| // Already RGB |
There was a problem hiding this comment.
Again these comments are not needed, only comment Only resize if dimensions don't match seems to be valid one.
| auto [inputTensor, originalSize] = | ||
| image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); | ||
| ObjectDetection::runInference(cv::Mat image, double detectionThreshold) { | ||
| std::lock_guard<std::mutex> lock(inference_mutex_); |
There was a problem hiding this comment.
Scoped lock is superior to lock_guard, and since we use c++ >= 17, use only scoped_lock in such situations.
| std::lock_guard<std::mutex> lock(inference_mutex_); | |
| std::scoped_lock<std::mutex> lock(inference_mutex_); |
| // Store original size for postprocessing | ||
| cv::Size originalSize = image.size(); | ||
|
|
||
| // Preprocess the image using model-specific preprocessing | ||
| cv::Mat preprocessed = preprocessFrame(image); | ||
|
|
||
| // Create tensor and run inference |
There was a problem hiding this comment.
These comments are redundant.
| } // namespace rnexecutorch::models::object_detection | ||
|
|
||
| std::vector<types::Detection> | ||
| ObjectDetection::generateFromString(std::string imageSource, |
There was a problem hiding this comment.
Why you use passing string by copy and not const reference? If because of the fact that this function is called via JSI and const ref fails here, please resolve this comment.
| await moduleInstance.load(model, setDownloadProgress); | ||
| setIsReady(true); | ||
|
|
||
| // Extract runOnFrame worklet from VisionModule if available |
There was a problem hiding this comment.
| // Extract runOnFrame worklet from VisionModule if available |
| // Extract pure JSI function reference (runs on JS thread) | ||
| const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; | ||
|
|
||
| // Return worklet that captures ONLY the JSI function |
There was a problem hiding this comment.
| // Extract pure JSI function reference (runs on JS thread) | |
| const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; | |
| // Return worklet that captures ONLY the JSI function | |
| const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; | |
|
|
||
| // Type detection and routing | ||
| if (typeof input === 'string') { | ||
| // String path → generateFromString() |
There was a problem hiding this comment.
| // String path → generateFromString() |
| 'scalarType' in input && | ||
| input.scalarType === ScalarType.BYTE | ||
| ) { | ||
| // Pixel data → generateFromPixels() |
There was a problem hiding this comment.
| // Pixel data → generateFromPixels() |
| typeof input === 'object' && | ||
| 'dataPtr' in input && | ||
| input.dataPtr instanceof Uint8Array && | ||
| 'sizes' in input && | ||
| Array.isArray(input.sizes) && | ||
| input.sizes.length === 3 && | ||
| 'scalarType' in input && | ||
| input.scalarType === ScalarType.BYTE |
There was a problem hiding this comment.
Huuuh, abstract it into smaller function ;p

Description
Example of how to use the API with vision camera v5: https://gist.github.com/NorbertKlockiewicz/5d62915d16955979c029303591912d6a
For now this PR is in experimental phase so when reviewing please focus on the user facing API + implementation of ObjectDetection both on TypeScript and Native Side. The JSI part of the code isn't production ready yet and requires refactor + comprehensive comments
Introduces a breaking change?
Type of change
Tested on
Testing instructions
Screenshots
Related issues
Checklist
Additional notes