-
Notifications
You must be signed in to change notification settings - Fork 52
Description
i am using SSDMOBILENETV2 FPN LITE 320x32 pretrained model and i trained it with my custom sign language datasets.
here is my object_detection_service.dart
`import 'package:camera/camera.dart';
import 'package:tflite_flutter/tflite_flutter.dart';
import 'package:image/image.dart' as img;
import 'dart:developer' as dev;
class InferenceData {
final Interpreter interpreter;
final List<List<List<List>>> inputData;
InferenceData(this.interpreter, this.inputData);
}
class InferenceResult {
final List scores;
final List<List> boxes;
final List classes;
InferenceResult(this.scores, this.boxes, this.classes);
}
class ObjectDetectionService {
static const int inputSize = 320;
static const double confidenceThreshold = 0.1;
static void _log(String message) {
dev.log('[ObjectDetection] $message');
}
static Future<List<List<List<List>>>> preprocessImageIsolate(CameraImage image) async {
try {
final img.Image rgbImage;
if (image.format.group == ImageFormatGroup.yuv420) {
rgbImage = _convertYUV420(image);
} else if (image.format.group == ImageFormatGroup.bgra8888) {
rgbImage = _convertBGRA8888(image);
} else {
throw Exception('Unsupported image format: ${image.format.group}');
}
final resized = img.copyResize(
rgbImage,
width: inputSize,
height: inputSize,
interpolation: img.Interpolation.linear,
);
final input = List.generate(
1,
(index) => List.generate(
inputSize,
(y) => List.generate(
inputSize,
(x) => List.generate(
3,
(c) {
final pixel = resized.getPixel(x, y);
// Normalize to [0, 1] instead of [-1, 1]
return c == 0
? pixel.r / 255.0
: c == 1
? pixel.g / 255.0
: pixel.b / 255.0;
},
),
),
),
);
_log('Image preprocessed');
return input;
} catch (e, stack) {
_log('Preprocessing error: $e\n$stack');
throw Exception('Preprocessing failed: $e');
}
}
static img.Image _convertYUV420(CameraImage image) {
final width = image.width;
final height = image.height;
final yPlane = image.planes[0].bytes;
final uPlane = image.planes[1].bytes;
final vPlane = image.planes[2].bytes;
final yRowStride = image.planes[0].bytesPerRow;
final uvRowStride = image.planes[1].bytesPerRow;
final uvPixelStride = image.planes[1].bytesPerPixel!;
final output = img.Image(width: width, height: height);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
final int yIndex = y * yRowStride + x;
final int uvIndex = (y ~/ 2) * uvRowStride + (x ~/ 2) * uvPixelStride;
final yValue = yPlane[yIndex];
final uValue = uPlane[uvIndex];
final vValue = vPlane[uvIndex];
// Using standard YUV to RGB conversion
final int r = (yValue + (1.370705 * (vValue - 128))).toInt().clamp(0, 255);
final int g = (yValue - (0.698001 * (vValue - 128)) - (0.337633 * (uValue - 128))).toInt().clamp(0, 255);
final int b = (yValue + (1.732446 * (uValue - 128))).toInt().clamp(0, 255);
output.setPixelRgb(x, y, r, g, b);
}
}
return output;
}
static img.Image _convertBGRA8888(CameraImage image) {
return img.Image.fromBytes(
width: image.width,
height: image.height,
bytes: image.planes[0].bytes.buffer,
order: img.ChannelOrder.bgra,
);
}
static Future<InferenceResult?> runInferenceIsolate(InferenceData data) async {
try {
final outputBoxes = List<List>.generate(
100,
(_) => List.filled(4, 0.0),
);
final outputClasses = List.filled(100, 0);
final outputScores = List.filled(100, 0);
final outputCount = [1.0];
final outputs = {
0: outputScores,
1: outputBoxes,
2: outputCount,
3: outputClasses,
};
data.interpreter.runForMultipleInputs([data.inputData], outputs);
// Debug logging
_log('Scores: ${outputScores.take(5).toList()}');
_log('Classes: ${outputClasses.take(5).toList()}');
_log('First box: ${outputBoxes[0]}');
return InferenceResult(
outputScores,
outputBoxes,
outputClasses,
);
} catch (e, stack) {
_log('Inference error: $e\n$stack');
return null;
}
}
}
`
and my scan_controller.dart:
`import 'dart:developer' as dev;
import 'package:camera/camera.dart';
import 'package:get/get.dart';
import 'package:tflite_flutter/tflite_flutter.dart';
import 'package:flutter/services.dart';
import 'package:flutter/foundation.dart';
import 'camera_service.dart';
import 'object_detection_service.dart';
class ScanController extends GetxController {
final List cameras;
late CameraService cameraService;
late ObjectDetectionService detectionService;
Interpreter? interpreter;
List labels = [];
RxBool isCameraInitialized = false.obs;
RxString errorMessage = ''.obs;
RxString label = ''.obs;
RxDouble x = 0.0.obs;
RxDouble y = 0.0.obs;
RxDouble w = 0.0.obs;
RxDouble h = 0.0.obs;
RxBool isProcessing = false.obs;
RxBool isTabActive = true.obs;
static const String modelPath = 'assets/model.tflite';
static const String labelsPath = 'assets/labels.txt';
static const Duration _processingInterval = Duration(milliseconds: 100);
DateTime _lastProcessingTime = DateTime.now();
ScanController({required this.cameras}) {
cameraService = CameraService(cameras: cameras);
detectionService = ObjectDetectionService();
}
void _log(String message) {
dev.log('[ScanController] $message');
}
@OverRide
void onInit() {
super.onInit();
_initialize();
}
@OverRide
void onClose() {
disposeResources();
super.onClose();
}
Future _initialize() async {
try {
_log('Starting initialization');
final options = InterpreterOptions()..threads = 4;
interpreter = await Interpreter.fromAsset(
modelPath,
options: options,
);
// Log interpreter details
final inputTensor = interpreter!.getInputTensor(0);
final outputTensor = interpreter!.getOutputTensor(0);
_log('Input tensor shape: ${inputTensor.shape}');
_log('Output tensor shape: ${outputTensor.shape}');
await loadLabels();
await initializeCamera();
_log('Initialization complete');
} catch (e, stack) {
errorMessage.value = 'Initialization error: $e';
_log('Initialization error: $e\n$stack');
}
}
Future loadLabels() async {
try {
final labelData = await rootBundle.loadString(labelsPath);
labels = labelData.split('\n')
.where((label) => label.trim().isNotEmpty)
.toList();
_log('Labels loaded: ${labels.length}');
_log('First 5 labels: ${labels.take(5).toList()}');
} catch (e) {
_log('Error loading labels: $e');
rethrow;
}
}
Future initializeCamera() async {
try {
await cameraService.initialize();
if (isTabActive.value) {
await startCamera();
}
isCameraInitialized.value = true;
} catch (e) {
errorMessage.value = e.toString();
_log('Camera initialization error: $e');
rethrow;
}
}
Future startCamera() async {
if (!isCameraInitialized.value) return;
await cameraService.startImageStream(_processCameraImage);
}
Future stopCamera() async {
await cameraService.stopImageStream();
}
Future disposeResources() async {
try {
await cameraService.dispose();
interpreter?.close();
isProcessing.value = false;
isCameraInitialized.value = false;
} catch (e) {
_log('Error during resource disposal: $e');
}
}
Future _processCameraImage(CameraImage image) async {
if (isProcessing.value) return;
final now = DateTime.now();
if (now.difference(_lastProcessingTime) < _processingInterval) return;
_lastProcessingTime = now;
isProcessing.value = true;
try {
if (interpreter == null) {
_log('Interpreter not ready');
return;
}
final inputData = await compute(
ObjectDetectionService.preprocessImageIsolate,
image,
);
_log('Image preprocessed');
final outputs = await compute(
ObjectDetectionService.runInferenceIsolate,
InferenceData(interpreter!, inputData),
);
_log('Inference run completed');
if (outputs != null) {
_processDetections(
outputs.scores,
outputs.boxes,
outputs.classes,
);
}
} catch (e, stack) {
_log('Processing error: $e\n$stack');
} finally {
isProcessing.value = false;
}
}
void _processDetections(
List scores,
List<List> boxes,
List classes,
) {
try {
double maxScore = 0;
int maxIndex = -1;
_log('Processing detections:');
_log('Scores: ${scores.take(5)}');
_log('Classes: ${classes.take(5)}');
for (var i = 0; i < scores.length; i++) {
if (scores[i] > maxScore &&
scores[i] > ObjectDetectionService.confidenceThreshold) {
maxScore = scores[i];
maxIndex = i;
}
}
if (maxIndex != -1) {
final box = boxes[maxIndex];
final classIndex = classes[maxIndex].toInt();
if (classIndex < labels.length) {
label.value = '${labels[classIndex]} ${(maxScore * 100).toStringAsFixed(0)}%';
y.value = box[0];
x.value = box[1];
h.value = box[2] - box[0];
w.value = box[3] - box[1];
_log('Detection: ${label.value} at ($x, $y) with size ($w, $h)');
}
} else {
label.value = '';
}
} catch (e, stack) {
_log('Detection processing error: $e\n$stack');
}
}
}`
i have been working on this for weeks now and i dont know what else to do, i am also new to programming.