diff --git a/.gitignore b/.gitignore
index 1b01344..063e1ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,3 @@
-# ignore all mp4/mov in the YOLO Demo subfolder
-src/_archive_legacy/models/deepSORT/CNN/YOLO Demo/*.mp4
-src/_archive_legacy/models/deepSORT/CNN/YOLO Demo/*.mov
-
-# but keep this one file
-!src/_archive_legacy/models/deepSORT/CNN/YOLO Demo/Kestrel_Tracking_demo_Clip.mp4
-
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/docs/Model Team/README.md b/docs/Model Team/README.md
new file mode 100644
index 0000000..f5388b4
--- /dev/null
+++ b/docs/Model Team/README.md
@@ -0,0 +1,15 @@
+# Model Team Documentation
+## Pipeline Overview
+The model team is responsible for the tracking of people (PEV riders) infront of the drone.
+- The pipeline starts by feeding the drone's camera information to the `kestrel_vision` ROS2 package.
+- `kestrel_vision` contains `vision_node.py`, which creates the ROS2 node that subscribes to the camera, runs YOLO and our CNN, and publishes these detections (bounding box + feature embeddings).
+- Next, `kestrel_tracking` contains `tracking_node.py`, which similarly creates the node that subscribes to the vision node's detections, and begins tracking them. This involves initiating a TrackManager and publishing the associated Tracks.
+
+## Pipeline Specifics
+### kestrel_vision
+
+## ROS Topics/Services
+
+## Key Parameters
+
+## Debug Checklist
diff --git a/models/best_model_checkpoint.pth b/models/best_model_checkpoint.pth
new file mode 100644
index 0000000..5f704df
Binary files /dev/null and b/models/best_model_checkpoint.pth differ
diff --git a/models/requirements.txt b/models/requirements.txt
index beb9057..bf87520 100644
--- a/models/requirements.txt
+++ b/models/requirements.txt
@@ -1,210 +1,227 @@
# To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file
-absl-py==2.1.0
-annotated-types==0.7.0
-asttokens==2.4.1
-astunparse==1.6.3
-attrs==24.2.0
-babel==2.16.0
-backcall==0.2.0
-beautifulsoup4==4.12.3
-bleach==6.1.0
-certifi==2024.8.30
-charset-normalizer==3.3.2
-click==8.1.7
-colorama==0.4.6
-coloredlogs==15.0.1
-comet-ml==3.47.0
-comm==0.2.2
-configobj==5.0.9
-contourpy==1.3.0
-coverage==7.6.1
+action-msgs==2.0.3
+action-tutorials-interfaces==0.33.6
+action-tutorials-py==0.33.6
+actionlib-msgs==5.3.6
+ament-cmake-test==2.5.4
+ament-copyright==0.17.3
+ament-cppcheck==0.17.3
+ament-cpplint==0.17.3
+ament-flake8==0.17.3
+ament-index-python==1.8.1
+ament-lint==0.17.3
+ament-lint-cmake==0.17.3
+ament-package==0.16.4
+ament-pep257==0.17.3
+ament-uncrustify==0.17.3
+ament-xmllint==0.17.3
+angles==1.16.0
+argcomplete==3.6.3
+builtin-interfaces==2.0.3
+catkin-pkg==1.1.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+colcon-argcomplete==0.3.3
+colcon-bash==0.5.0
+colcon-cd==0.1.1
+colcon-cmake==0.2.29
+colcon-common-extensions==0.3.0
+colcon-core==0.20.1
+colcon-defaults==0.2.9
+colcon-devtools==0.3.0
+colcon-library-path==0.2.1
+colcon-metadata==0.2.5
+colcon-notification==0.3.0
+colcon-output==0.2.13
+colcon-package-information==0.4.0
+colcon-package-selection==0.2.10
+colcon-parallel-executor==0.4.0
+colcon-pkg-config==0.1.0
+colcon-powershell==0.4.0
+colcon-python-setup-py==0.2.9
+colcon-recursive-crawl==0.2.3
+colcon-ros==0.5.0
+colcon-test-result==0.3.8
+colcon-zsh==0.5.0
+composition-interfaces==2.0.3
+contourpy==1.3.3
+coverage==7.13.0
+cv-bridge==4.1.0
cycler==0.12.1
-Cython==3.0.11
-debugpy==1.8.6
-decorator==5.1.1
-deepsparse==1.8.0
-defusedxml==0.7.1
-dulwich==0.22.1
-easydict==1.13
-everett==3.1.0
-exceptiongroup==1.2.2
-executing==2.1.0
-fastjsonschema==2.20.0
-filelock==3.16.1
-filterpy==1.4.5
-flatbuffers==24.3.25
-fonttools==4.54.1
-fsspec==2024.9.0
-future==1.0.0
-gast==0.6.0
-geocoder==1.38.1
-ghp-import==2.1.0
-gitdb==4.0.11
-GitPython==3.1.43
-google-pasta==0.2.0
-GPUtil==1.4.0
-gputils==1.0.6
-griffe==1.3.2
-grpcio==1.66.2
-h5py==3.12.1
-humanfriendly==10.0
-idna==3.10
-imageio==2.36.0
-img2unicode==0.1a11
-iniconfig==2.0.0
-ipykernel==6.29.5
-ipython==8.12.0
-jedi==0.19.1
-Jinja2==3.1.4
-joblib==1.4.2
-jsonschema==4.23.0
-jsonschema-specifications==2023.12.1
-jupyter_client==8.6.3
-jupyter_core==5.7.2
-jupyterlab_pygments==0.3.0
-jupytext==1.16.4
-keras==3.5.0
-kiwisolver==1.4.7
-lapx==0.5.10.post1
-lazy_loader==0.4
-libclang==18.1.1
-Markdown==3.7
-markdown-it-py==3.0.0
-MarkupSafe==2.1.5
-matplotlib==3.9.2
-matplotlib-inline==0.1.7
-matplotlib-terminal==0.1a4
-mdit-py-plugins==0.4.2
-mdurl==0.1.2
-merge-args==0.1.5
-mergedeep==1.3.4
-mistune==3.0.2
-mkdocs==1.6.1
-mkdocs-autorefs==1.2.0
-mkdocs-get-deps==0.2.0
-mkdocs-jupyter==0.25.0
-mkdocs-macros-plugin==1.2.0
-mkdocs-material==9.5.39
-mkdocs-material-extensions==1.3.1
-mkdocs-redirects==1.2.1
-mkdocs-ultralytics-plugin==0.1.9
-mkdocstrings==0.26.1
-mkdocstrings-python==1.11.1
-ml-dtypes==0.4.1
+demo-nodes-py==0.33.6
+diagnostic-msgs==5.3.6
+distlib==0.4.0
+docutils==0.22.4
+domain-coordinator==0.12.0
+empy==4.2
+example-interfaces==0.12.0
+examples-rclpy-executors==0.19.6
+examples-rclpy-minimal-action-client==0.19.6
+examples-rclpy-minimal-action-server==0.19.6
+examples-rclpy-minimal-client==0.19.6
+examples-rclpy-minimal-publisher==0.19.6
+examples-rclpy-minimal-service==0.19.6
+examples-rclpy-minimal-subscriber==0.19.6
+filelock==3.20.1
+fonttools==4.61.1
+fsspec==2025.12.0
+geometry-msgs==5.3.6
+idna==3.11
+image-geometry==4.1.0
+iniconfig==2.3.0
+interactive-markers==2.5.4
+Jinja2==3.1.6
+kiwisolver==1.4.9
+lark-parser==0.12.0
+laser-geometry==2.7.1
+launch==3.4.6
+launch-ros==0.26.8
+launch-testing==3.4.6
+launch-testing-ros==0.26.8
+launch-xml==3.4.6
+launch-yaml==3.4.6
+lifecycle-msgs==2.0.3
+logging-demo==0.33.6
+map-msgs==2.4.1
+MarkupSafe==3.0.3
+matplotlib==3.10.8
+message-filters==4.11.7
mpmath==1.3.0
-n2==0.1.7
-namex==0.0.8
-nbclient==0.10.0
-nbconvert==7.16.4
-nbformat==5.10.4
-nest-asyncio==1.6.0
-networkx==3.3
-nm-yolov5==1.7.0.60200
-numpy==1.23.5
-nvidia-cublas-cu12==12.4.5.8
-nvidia-cuda-cupti-cu12==12.4.127
-nvidia-cuda-nvrtc-cu12==12.4.127
-nvidia-cuda-runtime-cu12==12.4.127
-nvidia-cudnn-cu12==9.1.0.70
-nvidia-cufft-cu12==11.2.1.3
-nvidia-curand-cu12==10.3.5.147
-nvidia-cusolver-cu12==11.6.1.9
-nvidia-cusparse-cu12==12.3.1.170
-nvidia-nccl-cu12==2.21.5
-nvidia-nvjitlink-cu12==12.4.127
-nvidia-nvtx-cu12==12.4.127
-onnx==1.14.1
-onnxruntime==1.19.2
-onnxslim==0.1.34
-opencv-python==4.6.0.66
-opt_einsum==3.4.0
-optree==0.12.1
-packaging==24.1
-paginate==0.5.7
-pandas==2.2.3
-pandocfilters==1.5.1
-parso==0.8.4
-pathspec==0.12.1
-pexpect==4.9.0
-pickleshare==0.7.5
-Pillow==9.5.0
-platformdirs==4.3.6
-pluggy==1.5.0
-prompt_toolkit==3.0.48
-protobuf==3.20.3
-psutil==6.0.0
-ptyprocess==0.7.0
-pure_eval==0.2.3
-py-cpuinfo==9.0.0
-py-machineid==0.6.0
-pydantic==2.7.4
-pydantic_core==2.18.4
-Pygments==2.18.0
-pymdown-extensions==10.11.2
-pyparsing==3.1.4
-PyQt5==5.15.9
-pyqt5-plugins==5.15.9.2.3
-PyQt5-Qt5==5.15.2
-pyqt5-tools==5.15.9.3.3
-PyQt5_sip==12.15.0
-pytest==8.3.3
-pytest-cov==5.0.0
-python-box==6.1.0
+nav-msgs==5.3.6
+networkx==3.6.1
+notify2==0.3.1
+numpy==1.26.4 # Required for compatibility ("numpy < 2")
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.5
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvshmem-cu12==3.3.20
+nvidia-nvtx-cu12==12.8.90
+opencv-python==4.12.0.88
+osrf-pycommon==2.1.7
+packaging==25.0
+pcl-msgs==1.0.0
+pendulum-msgs==0.33.6
+pillow==12.0.0
+pluggy==1.6.0
+polars==1.36.1
+polars-runtime-32==1.36.1
+psutil==7.2.0
+Pygments==2.19.2
+pyparsing==3.3.1
+pytest==9.0.2
+pytest-cov==7.0.0
+pytest-repeat==0.9.4
+pytest-rerunfailures==16.1
python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
-pytz==2024.2
-PyYAML==6.0.2
-pyyaml_env_tag==0.1
-pyzmq==26.2.0
-qt5-applications==5.15.2.2.3
-qt5-tools==5.15.2.1.3
-ratelim==0.1.6
-referencing==0.35.1
-regex==2024.9.11
-requests==2.32.3
-requests-toolbelt==1.0.0
-rich==13.8.1
-rpds-py==0.20.0
-scikit-image==0.24.0
-scikit-learn==1.5.2
-scipy==1.14.1
-seaborn==0.13.2
-semantic-version==2.10.0
-sentry-sdk==2.15.0
-simplejson==3.19.3
-six==1.16.0
-smmap==5.0.1
-soupsieve==2.6
-sparseml==1.8.0
-sparsezoo==1.8.1
-stack-data==0.6.3
-sympy==1.13.1
-tensorboard==2.17.1
-tensorboard-data-server==0.7.2
-tensorflow==2.17.0
-tensorflow-io-gcs-filesystem==0.37.1
-termcolor==2.4.0
-thop==0.1.1.post2209072238
-threadpoolctl==3.5.0
-tifffile==2024.9.20
-tinycss2==1.3.0
-tomli==2.0.2
-toposort==1.10
-torch==2.5.1
-torchvision==0.20.1
-tornado==6.4.1
-tqdm==4.66.5
-traitlets==5.14.3
-triton==3.1.0
-typing_extensions==4.12.2
-tzdata==2024.2
-ultralytics @ git+https://github.com/ultralytics/ultralytics.git@cece2ee2cf661502d0fe15656574f486625d8490
-ultralytics-thop==2.0.8
-urllib3==2.2.3
-watchdog==5.0.3
-wcwidth==0.2.13
-webencodings==0.5.1
-Werkzeug==3.0.4
-wrapt==1.16.0
-wurlitzer==3.1.1
\ No newline at end of file
+python-qt-binding==2.2.1
+PyYAML==6.0.3
+qt-dotgraph==2.7.5
+qt-gui==2.7.5
+qt-gui-cpp==2.7.5
+qt-gui-py-common==2.7.5
+quality-of-service-demo-py==0.33.6
+rcl-interfaces==2.0.3
+rclpy==7.1.5
+rcutils==6.7.3
+requests==2.32.5
+resource-retriever==3.4.4
+rmw-dds-common==3.1.0
+ros2action==0.32.5
+ros2bag==0.26.9
+ros2bag-mcap-cli==0.26.9
+ros2bag-sqlite3-cli==0.26.9
+ros2cli==0.32.5
+ros2component==0.32.5
+ros2doctor==0.32.5
+ros2interface==0.32.5
+ros2launch==0.26.8
+ros2lifecycle==0.32.5
+ros2multicast==0.32.5
+ros2node==0.32.5
+ros2param==0.32.5
+ros2pkg==0.32.5
+ros2run==0.32.5
+ros2service==0.32.5
+ros2topic==0.32.5
+rosbag2-interfaces==0.26.9
+rosbag2-py==0.26.9
+rosgraph-msgs==2.0.3
+rosidl-adapter==4.6.6
+rosidl-cli==4.6.6
+rosidl-cmake==4.6.6
+rosidl-generator-c==4.6.6
+rosidl-generator-cpp==4.6.6
+rosidl-generator-py==0.22.1
+rosidl-generator-type-description==4.6.6
+rosidl-parser==4.6.6
+rosidl-pycommon==4.6.6
+rosidl-runtime-py==0.13.1
+rosidl-typesupport-c==3.2.2
+rosidl-typesupport-cpp==3.2.2
+rosidl-typesupport-fastrtps-c==3.6.2
+rosidl-typesupport-fastrtps-cpp==3.6.2
+rosidl-typesupport-introspection-c==4.6.6
+rosidl-typesupport-introspection-cpp==4.6.6
+rpyutils==0.4.1
+rqt-action==2.2.0
+rqt-bag==1.5.5
+rqt-bag-plugins==1.5.5
+rqt-console==2.2.1
+rqt-graph==1.5.5
+rqt-gui==1.6.0
+rqt-gui-py==1.6.0
+rqt-msg==1.5.1
+rqt-plot==1.4.4
+rqt-publisher==1.7.2
+rqt-py-common==1.6.0
+rqt-py-console==1.2.2
+rqt-reconfigure==1.6.2
+rqt-service-caller==1.2.1
+rqt-shell==1.2.2
+rqt-srv==1.2.2
+rqt-topic==1.7.3
+scipy==1.16.3
+sensor-msgs==5.3.6
+sensor-msgs-py==5.3.6
+service-msgs==2.0.3
+setuptools==79.0.1
+shape-msgs==5.3.6
+six==1.17.0
+sros2==0.13.4
+statistics-msgs==2.0.3
+std-msgs==5.3.6
+std-srvs==5.3.6
+stereo-msgs==5.3.6
+sympy==1.14.0
+teleop-twist-keyboard==2.4.0
+tf2-geometry-msgs==0.36.14
+tf2-kdl==0.36.14
+tf2-msgs==0.36.14
+tf2-py==0.36.14
+tf2-ros-py==0.36.14
+tf2-sensor-msgs==0.36.14
+tf2-tools==0.36.14
+topic-monitor==0.33.6
+torch==2.9.1
+torchvision==0.24.1
+trajectory-msgs==5.3.6
+triton==3.5.1
+turtlesim==1.8.3
+type-description-interfaces==2.0.3
+typing_extensions==4.15.0
+ultralytics==8.3.241
+ultralytics-thop==2.0.18
+unique-identifier-msgs==2.5.0
+urllib3==2.6.2
+visualization-msgs==5.3.6
\ No newline at end of file
diff --git a/models/yolov8n.pt b/models/yolov8n.pt
index e69de29..0db4ca4 100644
Binary files a/models/yolov8n.pt and b/models/yolov8n.pt differ
diff --git a/src/_archive_legacy/models/SORT/kalmanTest.py b/src/_archive_legacy/models/SORT/kalmanTest.py
index 00c279a..315bf7c 100644
--- a/src/_archive_legacy/models/SORT/kalmanTest.py
+++ b/src/_archive_legacy/models/SORT/kalmanTest.py
@@ -1,111 +1,111 @@
-import cv2
-from ultralytics import YOLO
-# https://github.com/rlabbe/filterpy/blob/master/filterpy/kalman/kalman_filter.py
-from yoloKalman import KalmanTracker, convert_bbox_to_z
-# import numpy as np
-import sys
-from time import perf_counter
-
-trt_model = YOLO('yolo11m.pt')
-
-# Open the video file
-video_path = 'noResult.mp4'
-# video_path = './cars-stock-footage.mp4'
-cap = cv2.VideoCapture(video_path)
-
-
-# We need to set resolutions.
-# so, convert them from float to integer.
-frame_width = int(cap.get(3))
-frame_height = int(cap.get(4))
-fps = cap.get(cv2.CAP_PROP_FPS)
-
-size = (frame_width, frame_height)
-
-# Define the codec and create a VideoWriter object
-fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-out = cv2.VideoWriter('stream.avi', fourcc, fps, size)
-
-fc = 0
-
-# Frame delay so that the video is displayed with original fps
-# Use "python kalmanTest.py --show" to use
-if len(sys.argv) > 1:
- if sys.argv[1] == "--show":
- delay = int(1000 / fps)
- show_video = True
- else:
- print(f"Invalid argument: \"{sys.argv[1]}\"")
- exit()
-else:
- delay = 1
- show_video = False
-
-while True:
- start_time = perf_counter()
-
- ret, frame = cap.read()
- if not ret:
- break
- fc += 1
-
- # Run inference
- results = trt_model.predict(frame)
-
- # Process results (similar to your existing code)
- for result in results:
- bboxes = result.boxes.xywh.cpu().numpy().astype(int)
- if fc == 1: predBBoxes = KalmanTracker()
- tracks = predBBoxes.bboxes_to_tracks(bboxes)
-
- # TEST YOLO's DETECTIONS
- # for nbbox in bboxes:
- # x1 = int(nbbox[0] - nbbox[2]/2)
- # y1 = int(nbbox[1] - nbbox[3]/2)
- # x2 = int(nbbox[0] + nbbox[2]/2)
- # y2 = int(nbbox[1] + nbbox[3]/2)
- # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # TEST TRACK CREATION AND DELETION ALGORITHM
- # for track in tracks:
- # nbbox = track["history"][-1]
- # x1 = int(nbbox[0] - nbbox[2]/2)
- # y1 = int(nbbox[1] - nbbox[3]/2)
- # x2 = int(nbbox[0] + nbbox[2]/2)
- # y2 = int(nbbox[1] + nbbox[3]/2)
- # cv2.putText(
- # frame,
- # str(track["id"]),
- # (int(x1), int(y1) - 10),
- # fontFace = cv2.FONT_HERSHEY_SIMPLEX,
- # fontScale = 0.6,
- # color = (0, 255, 0),
- # thickness=2
- # )
- # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # TEST KALMAN FILTER PREDICTIONS
- for nbbox in predBBoxes.pred_tracks():
- x1 = int(nbbox[0] - nbbox[2]/2)
- y1 = int(nbbox[1] - nbbox[3]/2)
- x2 = int(nbbox[0] + nbbox[2]/2)
- y2 = int(nbbox[1] + nbbox[3]/2)
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # Write the frame to the VideoWriter object
- out.write(frame)
-
- if show_video: cv2.imshow("Video with BBoxes (press q to quit)", frame)
-
- end_time = perf_counter()
- op_time_delay = int((end_time - start_time) * 1000)
- adjusted_delay = max(1, int(delay - op_time_delay))
- # If the video is shown, then the delay on the output of each frame is adjusted
- # to keep the video at the original fps.
- if cv2.waitKey(adjusted_delay) & 0xFF == ord('q'):
- break
-
-
-out.release()
-cap.release()
-cv2.destroyAllWindows()
+import cv2
+from ultralytics import YOLO
+# https://github.com/rlabbe/filterpy/blob/master/filterpy/kalman/kalman_filter.py
+from yoloKalman import KalmanTracker, convert_bbox_to_z
+# import numpy as np
+import sys
+from time import perf_counter
+
+trt_model = YOLO('yolo11m.pt')
+
+# Open the video file
+video_path = 'noResult.mp4'
+# video_path = './cars-stock-footage.mp4'
+cap = cv2.VideoCapture(video_path)
+
+
+# We need to set resolutions.
+# so, convert them from float to integer.
+frame_width = int(cap.get(3))
+frame_height = int(cap.get(4))
+fps = cap.get(cv2.CAP_PROP_FPS)
+
+size = (frame_width, frame_height)
+
+# Define the codec and create a VideoWriter object
+fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+out = cv2.VideoWriter('stream.avi', fourcc, fps, size)
+
+fc = 0
+
+# Frame delay so that the video is displayed with original fps
+# Use "python kalmanTest.py --show" to use
+if len(sys.argv) > 1:
+ if sys.argv[1] == "--show":
+ delay = int(1000 / fps)
+ show_video = True
+ else:
+ print(f"Invalid argument: \"{sys.argv[1]}\"")
+ exit()
+else:
+ delay = 1
+ show_video = False
+
+while True:
+ start_time = perf_counter()
+
+ ret, frame = cap.read()
+ if not ret:
+ break
+ fc += 1
+
+ # Run inference
+ results = trt_model.predict(frame)
+
+ # Process results (similar to your existing code)
+ for result in results:
+ bboxes = result.boxes.xywh.cpu().numpy().astype(int)
+ if fc == 1: predBBoxes = KalmanTracker()
+ tracks = predBBoxes.bboxes_to_tracks(bboxes)
+
+ # TEST YOLO's DETECTIONS
+ # for nbbox in bboxes:
+ # x1 = int(nbbox[0] - nbbox[2]/2)
+ # y1 = int(nbbox[1] - nbbox[3]/2)
+ # x2 = int(nbbox[0] + nbbox[2]/2)
+ # y2 = int(nbbox[1] + nbbox[3]/2)
+ # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+ # TEST TRACK CREATION AND DELETION ALGORITHM
+ # for track in tracks:
+ # nbbox = track["history"][-1]
+ # x1 = int(nbbox[0] - nbbox[2]/2)
+ # y1 = int(nbbox[1] - nbbox[3]/2)
+ # x2 = int(nbbox[0] + nbbox[2]/2)
+ # y2 = int(nbbox[1] + nbbox[3]/2)
+ # cv2.putText(
+ # frame,
+ # str(track["id"]),
+ # (int(x1), int(y1) - 10),
+ # fontFace = cv2.FONT_HERSHEY_SIMPLEX,
+ # fontScale = 0.6,
+ # color = (0, 255, 0),
+ # thickness=2
+ # )
+ # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+ # TEST KALMAN FILTER PREDICTIONS
+ for nbbox in predBBoxes.pred_tracks():
+ x1 = int(nbbox[0] - nbbox[2]/2)
+ y1 = int(nbbox[1] - nbbox[3]/2)
+ x2 = int(nbbox[0] + nbbox[2]/2)
+ y2 = int(nbbox[1] + nbbox[3]/2)
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+ # Write the frame to the VideoWriter object
+ out.write(frame)
+
+ if show_video: cv2.imshow("Video with BBoxes (press q to quit)", frame)
+
+ end_time = perf_counter()
+ op_time_delay = int((end_time - start_time) * 1000)
+ adjusted_delay = max(1, int(delay - op_time_delay))
+ # If the video is shown, then the delay on the output of each frame is adjusted
+ # to keep the video at the original fps.
+ if cv2.waitKey(adjusted_delay) & 0xFF == ord('q'):
+ break
+
+
+out.release()
+cap.release()
+cv2.destroyAllWindows()
diff --git a/src/_archive_legacy/models/SORT/new_test_sort.py b/src/_archive_legacy/models/SORT/new_test_sort.py
index 1ada4e9..7678ec6 100644
--- a/src/_archive_legacy/models/SORT/new_test_sort.py
+++ b/src/_archive_legacy/models/SORT/new_test_sort.py
@@ -1,109 +1,109 @@
-import cv2
-from ultralytics import YOLO
-from sort import Sort
-import numpy as np
-from sklearn.cluster import KMeans
-
-trt_model = YOLO('yolo11s.pt')
-
-# Open the video file
-video_path = 'people.avi'
-cap = cv2.VideoCapture(video_path)
-
-
-# get the video's dimmension
-frame_width = int(cap.get(3))
-frame_height = int(cap.get(4))
-fps = cap.get(cv2.CAP_PROP_FPS)
-
-size = (frame_width, frame_height)
-
-# Define the codec and create a VideoWriter object
-fourcc = cv2.VideoWriter_fourcc(*'MJPG')
-out = cv2.VideoWriter('stream.avi', fourcc, 20, size)
-
-
-fc = 0
-sort = Sort()
-while True:
- ret, frame = cap.read()
- if not ret:
- break
- fc += 1
-
-
- #initialization sequence
- if (fc == 1):
- blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0)
- point_of_ref = (frame_width//2, frame_height//2)
- mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
- mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1)
- frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame)
- # for each subsequent frame
- else:
- blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0)
- mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
- mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1)
- frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame)
-
-
- # Run inference
- results = trt_model.predict(frame)
- # Process results
- for result in results:
- # process results as a dataframe (convinience)
- df = result.to_df()
- # filter out objects based on class and confidence score
- df.drop(df[(df['class'] != 0) | (df['confidence'] < .6)].index , inplace=True)
- # get a new array of bounding boxes
- bboxes = np.array([result.boxes.xywh.cpu().numpy().astype(int)[i] for i in df.index.values])
- # get the coordinates of each bbox
- positions = np.array([(x, y) for x, y, _, _, in bboxes])
-
- try:
- # do a kmeans clustering with only 1 cluster
- kmeans = KMeans(n_clusters=1, random_state=0, n_init='auto')
- labels = kmeans.fit_predict(positions).tolist()
- # get the center of that cluster
- centroid = kmeans.cluster_centers_[labels[0]]
- # update our point_of_reference (center of the circle)
- point_of_ref = tuple(centroid.astype(int).tolist())
- # if there are no objects to cluster
- except ValueError:
- # set the point of reference to the center of the screen
- point_of_ref = (frame_width//2, frame_height//2)
-
- # pass the bboxes to SORT
- try:
- tracks = sort.manage_tracks(bboxes, 0.5)
- # go to the next frame if there aren't any
- except:
- break
-
- # draw a bounding box for each track
- for track in tracks:
- nbbox = track["history"][-1]
- x1 = int(nbbox[0] - nbbox[2]/2)
- y1 = int(nbbox[1] - nbbox[3]/2)
- x2 = int(nbbox[0] + nbbox[2]/2)
- y2 = int(nbbox[1] + nbbox[3]/2)
- cv2.putText(
- frame,
- str(track["id"]),
- (int(x1), int(y1) - 10),
- fontFace = cv2.FONT_HERSHEY_SIMPLEX,
- fontScale = 0.6,
- color = (0, 255, 0),
- thickness=2
- )
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # Write the frame to the VideoWriter object
- out.write(frame)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
-
-
-out.release()
-cap.release()
+import cv2
+from ultralytics import YOLO
+from sort import Sort
+import numpy as np
+from sklearn.cluster import KMeans
+
+trt_model = YOLO('yolo11s.pt')
+
+# Open the video file
+video_path = 'people.avi'
+cap = cv2.VideoCapture(video_path)
+
+
+# get the video's dimmension
+frame_width = int(cap.get(3))
+frame_height = int(cap.get(4))
+fps = cap.get(cv2.CAP_PROP_FPS)
+
+size = (frame_width, frame_height)
+
+# Define the codec and create a VideoWriter object
+fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+out = cv2.VideoWriter('stream.avi', fourcc, 20, size)
+
+
+fc = 0
+sort = Sort()
+while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+ fc += 1
+
+
+ #initialization sequence
+ if (fc == 1):
+ blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0)
+ point_of_ref = (frame_width//2, frame_height//2)
+ mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
+ mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1)
+ frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame)
+ # for each subsequent frame
+ else:
+ blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0)
+ mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
+ mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1)
+ frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame)
+
+
+ # Run inference
+ results = trt_model.predict(frame)
+ # Process results
+ for result in results:
+ # process results as a dataframe (convinience)
+ df = result.to_df()
+ # filter out objects based on class and confidence score
+ df.drop(df[(df['class'] != 0) | (df['confidence'] < .6)].index , inplace=True)
+ # get a new array of bounding boxes
+ bboxes = np.array([result.boxes.xywh.cpu().numpy().astype(int)[i] for i in df.index.values])
+ # get the coordinates of each bbox
+ positions = np.array([(x, y) for x, y, _, _, in bboxes])
+
+ try:
+ # do a kmeans clustering with only 1 cluster
+ kmeans = KMeans(n_clusters=1, random_state=0, n_init='auto')
+ labels = kmeans.fit_predict(positions).tolist()
+ # get the center of that cluster
+ centroid = kmeans.cluster_centers_[labels[0]]
+ # update our point_of_reference (center of the circle)
+ point_of_ref = tuple(centroid.astype(int).tolist())
+ # if there are no objects to cluster
+ except ValueError:
+ # set the point of reference to the center of the screen
+ point_of_ref = (frame_width//2, frame_height//2)
+
+ # pass the bboxes to SORT
+ try:
+ tracks = sort.manage_tracks(bboxes, 0.5)
+ # go to the next frame if there aren't any
+ except:
+ break
+
+ # draw a bounding box for each track
+ for track in tracks:
+ nbbox = track["history"][-1]
+ x1 = int(nbbox[0] - nbbox[2]/2)
+ y1 = int(nbbox[1] - nbbox[3]/2)
+ x2 = int(nbbox[0] + nbbox[2]/2)
+ y2 = int(nbbox[1] + nbbox[3]/2)
+ cv2.putText(
+ frame,
+ str(track["id"]),
+ (int(x1), int(y1) - 10),
+ fontFace = cv2.FONT_HERSHEY_SIMPLEX,
+ fontScale = 0.6,
+ color = (0, 255, 0),
+ thickness=2
+ )
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+ # Write the frame to the VideoWriter object
+ out.write(frame)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+
+out.release()
+cap.release()
cv2.destroyAllWindows()
\ No newline at end of file
diff --git a/src/_archive_legacy/models/SORT/sort.py b/src/_archive_legacy/models/SORT/sort.py
index d33910d..28dae50 100644
--- a/src/_archive_legacy/models/SORT/sort.py
+++ b/src/_archive_legacy/models/SORT/sort.py
@@ -1,263 +1,263 @@
-from filterpy.kalman import KalmanFilter
-import numpy as np
-from math import sqrt, pow
-import scipy
-import scipy.optimize
-
-def cost_matrix(dect: np.ndarray, pred: np.ndarray):
- """
- This functions takes in a numpy array of detections from the YOLO models
- and a list of predictions from the kalman filter tracker, and returns a iou cost
- matrix.
- """
-
- # avoid broadcasting issues
- dect = np.expand_dims(dect, axis=1)
- pred = np.expand_dims(pred, axis=0)
-
- # get two points from each detected bbox
- x1D = dect[..., 0] - dect[..., 2]/2
- y1D = dect[..., 1] - dect[..., 3]/2
- x2D = dect[..., 0] + dect[..., 2]/2
- y2D = dect[..., 1] + dect[..., 3]/2
-
- # get two points from each predicted bbox
- x1P = pred[..., 0] - pred[..., 2]/2
- y1P = pred[..., 1] - pred[..., 3]/2
- x2P = pred[..., 0] + pred[..., 2]/2
- y2P = pred[..., 1] + pred[..., 3]/2
-
- # choose the maximum bewteen the two points
- x1 = np.maximum(x1D, x1P)
- y1 = np.maximum(y1D, y1P)
- x2 = np.minimum(x2D, x2P)
- y2 = np.minimum(y2D, y2P)
- intersection_width = np.maximum(0, x2-x1)
- intersection_height = np.maximum(0, y2-y1)
- area_of_intersection = intersection_height * intersection_width
-
- dect_area = dect[..., 3] * dect[..., 2]
- pred_area = pred[..., 3] * pred[..., 2]
- area_of_union = dect_area + pred_area - area_of_intersection
-
- iou_matrix = np.where(area_of_union > 0, area_of_intersection / area_of_union, 0.0)
-
- return iou_matrix
-
-def linear_assignment(costMatrix: np.float64):
- """
- This function will take in a cost matrix and return an array of matched
- indeces for the rows and columns of the matrix throught linear assignment
- """
- # do a linear assignment based on the cost matrix
- # the return is a list of row and column indexes with the optimal assignment
- dect, pred = scipy.optimize.linear_sum_assignment(costMatrix, True)
- # make an array of indexes of detections matched with indexes of predictions
- match_indxs = np.array(list(zip(dect,pred)))
-
- return match_indxs
-
-
-def convert_xywh_to_xyxy(bbox: np.int32):
- x1 = bbox[0] - bbox[2]/2
- x2 = bbox[0] + bbox[2]/2
- y1 = bbox[1] - bbox[3]/2
- y2 = bbox[1] + bbox[3]/2
- z = np.array([x1,y1,x2,y2]).astype(int)
- return z
-
-def convert_xyxy_to_xywh(bbox: np.int32):
- w = bbox[2]-bbox[0]
- h = bbox[3]-bbox[1]
- x = bbox[2] - w/2
- y = bbox[3] - h/2
- z = np.array([x,y,w,h]).astype(int)
- return z
-
-def convert_bbox_to_z(bbox: np.int32):
- w=bbox[2]
- h=bbox[3]
- area = w*h
- aspect_ratio = w/h
- z = np.array([bbox[0], bbox[1], area, aspect_ratio])
- return z
-
-def convert_z_to_bbox(bbox: np.int32):
- w = int(sqrt(abs(bbox[2]*bbox[3])))
- h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) )))
- new_bbox = np.array([bbox[0], bbox[1], w, h])
- return new_bbox
-
-class Kfilter():
- def __init__(self):
-
-
- # create a kalman filter instance
- self.dim_x = 7
- self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4)
-
- #state transition matrix
- self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0],
- [0, 1, 0, 0, 0, 1, 0],
- [0, 0, 1, 0, 0, 0, 1],
- [0, 0, 0, 1, 0, 0, 0],
- [0, 0, 0, 0, 1, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 0, 0, 0, 1]])
-
- # measurement function
- self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0],
- [0, 1, 0, 0, 0, 0, 0],
- [0, 0, 1, 0, 0, 0, 0],
- [0, 0, 0, 1, 0, 0, 0]])
-
- #covariance matrix, P already contains np.eye(dim_x)
- self.kalman.P *= 1000.
-
- # measurement noise
- self.kalman.R = np.eye(4)
- self.kalman.R *= 10.
-
- # assign the process noise
- self.kalman.Q = np.eye(self.dim_x)
- self.kalman.Q *= 0.01
-
- #set arbitrary initial value
- self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0])
-
- # make a prediction
- def predict(self):
- self.kalman.predict()
- bbox = convert_z_to_bbox(self.kalman.x)
-
- return bbox
-
- def update(self, bbox):
- bbox = convert_bbox_to_z(bbox)
- self.kalman.update(bbox)
-
-class KalmanTracker():
- def __init__(self):
- self.tracks = list()
- self.id = 0
-
- def create_track(self, bbox):
- track = {
- "id": self.id,
- "filter": Kfilter(),
- "history": [bbox],
- }
- self.id += 1
- return track
-
- def associate_detections(self, preds: list, dects: np.int32, iou_threshold = 0.3):
- """
- This function is in charged of associating the detections matched by the linear
- sum assignment to their corresponding track, and return a list of matched bboxes,
- unmatched bboxes, and unmatched tracks to be deleted
- """
-
-
- iou_matrix = cost_matrix(dects, preds)
- matched_indexes = linear_assignment(iou_matrix)
-
- # get unmatched bounding boxes list from the detections by comparing
- # if any of the indexes in the detections array are not in the
- # matched indeces array
- unmatched_bboxes = list()
- for i, dect in enumerate(dects):
- if i not in matched_indexes[:, 0]:
- unmatched_bboxes.append(i)
-
- # similarly, get the unmatched tracks
- unmatched_tracks = list()
- for j, pred in enumerate(preds):
- if j not in matched_indexes[:, 1]:
- unmatched_tracks.append(j)
-
- # get the matched detections and filter out the low ious
- matched_detections = list()
- for matched in matched_indexes:
- # if the detection doesn't pass the threshold
- if (iou_matrix[matched[0], matched[1]] < iou_threshold):
- # append the index of the detection to the unmatched detection list
- unmatched_bboxes.append(matched[0])
- # append the index of the prediction to the unmatched track list
- unmatched_tracks.append(matched[1])
-
- else:
- # append it to the matched detections list
- matched_detections.append(matched)
-
- # sort the unmatched track list to avoid indexing errors when deleting tracks
- unmatched_tracks.sort(reverse=True)
-
- return matched_detections, unmatched_bboxes, unmatched_tracks
-
- def pred_tracks(self):
- lstOfPred = list()
- if len(self.tracks) == 0:
- return
-
- for track in self.tracks:
- # if the length of the track is just 1, then initialize the kalman filter with that value
- if len(track["history"]) == 1:
- track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0])
- # predict the kalman filter, get the new bounding box
- nbbox = track["filter"].predict()
- else:
- nbbox = track["filter"].predict()
-
- lstOfPred.append(nbbox)
-
- return lstOfPred
-
- def update_tracks(self, matched_indexes: list, bboxes: np.ndarray):
- for m in matched_indexes:
- # add on the corresponding dectections to each track's history
- self.tracks[m[1]]["history"].append(bboxes[m[0]])
- # update each matched track filter
- self.tracks[m[1]]["filter"].update(self.tracks[m[1]]["history"][-1])
-
-class Sort(KalmanTracker):
- def __init__(self):
- super().__init__()
- self.bboxes = np.empty((2,3))
-
- def manage_tracks(self, bboxes: np.ndarray, iou_threshold=0.3):
- """
- This function is in charged of managing all the tracks correctly and returns
- a list of tracks
- """
- # get new bounding boxes
- self.bboxes = bboxes
-
- # check to see if there are no tracks
- if (len(self.tracks) == 0):
- for bbox in self.bboxes:
- self.tracks.append(self.create_track(bbox))
- else:
-
- # do a prediction using the kalman filter
- list_predictions = self.pred_tracks()
-
- # get list of matched detections, unmatched detections, and unmatched tracks
- matches, unmatched_detections, unmatched_tracks = self.associate_detections(list_predictions, bboxes, iou_threshold)
-
- # update kalman filter for each track
- self.update_tracks(matches, bboxes)
-
- # if a detection was not associated to any track
- # create a new track for it
- if len(unmatched_detections) > 0:
- for m in unmatched_detections:
- self.tracks.append(self.create_track(bboxes[m]))
-
- # if there is a track that was not matched to any
- # detection, delete it
- if len(unmatched_tracks) > 0:
- for t in unmatched_tracks:
- del self.tracks[t]
-
-
- return self.tracks
+from filterpy.kalman import KalmanFilter
+import numpy as np
+from math import sqrt, pow
+import scipy
+import scipy.optimize
+
+def cost_matrix(dect: np.ndarray, pred: np.ndarray):
+ """
+ This functions takes in a numpy array of detections from the YOLO models
+ and a list of predictions from the kalman filter tracker, and returns a iou cost
+ matrix.
+ """
+
+ # avoid broadcasting issues
+ dect = np.expand_dims(dect, axis=1)
+ pred = np.expand_dims(pred, axis=0)
+
+ # get two points from each detected bbox
+ x1D = dect[..., 0] - dect[..., 2]/2
+ y1D = dect[..., 1] - dect[..., 3]/2
+ x2D = dect[..., 0] + dect[..., 2]/2
+ y2D = dect[..., 1] + dect[..., 3]/2
+
+ # get two points from each predicted bbox
+ x1P = pred[..., 0] - pred[..., 2]/2
+ y1P = pred[..., 1] - pred[..., 3]/2
+ x2P = pred[..., 0] + pred[..., 2]/2
+ y2P = pred[..., 1] + pred[..., 3]/2
+
+ # choose the maximum bewteen the two points
+ x1 = np.maximum(x1D, x1P)
+ y1 = np.maximum(y1D, y1P)
+ x2 = np.minimum(x2D, x2P)
+ y2 = np.minimum(y2D, y2P)
+ intersection_width = np.maximum(0, x2-x1)
+ intersection_height = np.maximum(0, y2-y1)
+ area_of_intersection = intersection_height * intersection_width
+
+ dect_area = dect[..., 3] * dect[..., 2]
+ pred_area = pred[..., 3] * pred[..., 2]
+ area_of_union = dect_area + pred_area - area_of_intersection
+
+ iou_matrix = np.where(area_of_union > 0, area_of_intersection / area_of_union, 0.0)
+
+ return iou_matrix
+
+def linear_assignment(costMatrix: np.float64):
+ """
+ This function will take in a cost matrix and return an array of matched
+ indeces for the rows and columns of the matrix throught linear assignment
+ """
+ # do a linear assignment based on the cost matrix
+ # the return is a list of row and column indexes with the optimal assignment
+ dect, pred = scipy.optimize.linear_sum_assignment(costMatrix, True)
+ # make an array of indexes of detections matched with indexes of predictions
+ match_indxs = np.array(list(zip(dect,pred)))
+
+ return match_indxs
+
+
+def convert_xywh_to_xyxy(bbox: np.int32):
+ x1 = bbox[0] - bbox[2]/2
+ x2 = bbox[0] + bbox[2]/2
+ y1 = bbox[1] - bbox[3]/2
+ y2 = bbox[1] + bbox[3]/2
+ z = np.array([x1,y1,x2,y2]).astype(int)
+ return z
+
+def convert_xyxy_to_xywh(bbox: np.int32):
+ w = bbox[2]-bbox[0]
+ h = bbox[3]-bbox[1]
+ x = bbox[2] - w/2
+ y = bbox[3] - h/2
+ z = np.array([x,y,w,h]).astype(int)
+ return z
+
+def convert_bbox_to_z(bbox: np.int32):
+ w=bbox[2]
+ h=bbox[3]
+ area = w*h
+ aspect_ratio = w/h
+ z = np.array([bbox[0], bbox[1], area, aspect_ratio])
+ return z
+
+def convert_z_to_bbox(bbox: np.int32):
+ w = int(sqrt(abs(bbox[2]*bbox[3])))
+ h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) )))
+ new_bbox = np.array([bbox[0], bbox[1], w, h])
+ return new_bbox
+
+class Kfilter():
+ def __init__(self):
+
+
+ # create a kalman filter instance
+ self.dim_x = 7
+ self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4)
+
+ #state transition matrix
+ self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0],
+ [0, 1, 0, 0, 0, 1, 0],
+ [0, 0, 1, 0, 0, 0, 1],
+ [0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 0, 0, 1, 0, 0],
+ [0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 0, 0, 0, 1]])
+
+ # measurement function
+ self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0],
+ [0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0]])
+
+ #covariance matrix, P already contains np.eye(dim_x)
+ self.kalman.P *= 1000.
+
+ # measurement noise
+ self.kalman.R = np.eye(4)
+ self.kalman.R *= 10.
+
+ # assign the process noise
+ self.kalman.Q = np.eye(self.dim_x)
+ self.kalman.Q *= 0.01
+
+ #set arbitrary initial value
+ self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0])
+
+ # make a prediction
+ def predict(self):
+ self.kalman.predict()
+ bbox = convert_z_to_bbox(self.kalman.x)
+
+ return bbox
+
+ def update(self, bbox):
+ bbox = convert_bbox_to_z(bbox)
+ self.kalman.update(bbox)
+
+class KalmanTracker():
+ def __init__(self):
+ self.tracks = list()
+ self.id = 0
+
+ def create_track(self, bbox):
+ track = {
+ "id": self.id,
+ "filter": Kfilter(),
+ "history": [bbox],
+ }
+ self.id += 1
+ return track
+
+ def associate_detections(self, preds: list, dects: np.int32, iou_threshold = 0.3):
+ """
+ This function is in charged of associating the detections matched by the linear
+ sum assignment to their corresponding track, and return a list of matched bboxes,
+ unmatched bboxes, and unmatched tracks to be deleted
+ """
+
+
+ iou_matrix = cost_matrix(dects, preds)
+ matched_indexes = linear_assignment(iou_matrix)
+
+ # get unmatched bounding boxes list from the detections by comparing
+ # if any of the indexes in the detections array are not in the
+ # matched indeces array
+ unmatched_bboxes = list()
+ for i, dect in enumerate(dects):
+ if i not in matched_indexes[:, 0]:
+ unmatched_bboxes.append(i)
+
+ # similarly, get the unmatched tracks
+ unmatched_tracks = list()
+ for j, pred in enumerate(preds):
+ if j not in matched_indexes[:, 1]:
+ unmatched_tracks.append(j)
+
+ # get the matched detections and filter out the low ious
+ matched_detections = list()
+ for matched in matched_indexes:
+ # if the detection doesn't pass the threshold
+ if (iou_matrix[matched[0], matched[1]] < iou_threshold):
+ # append the index of the detection to the unmatched detection list
+ unmatched_bboxes.append(matched[0])
+ # append the index of the prediction to the unmatched track list
+ unmatched_tracks.append(matched[1])
+
+ else:
+ # append it to the matched detections list
+ matched_detections.append(matched)
+
+ # sort the unmatched track list to avoid indexing errors when deleting tracks
+ unmatched_tracks.sort(reverse=True)
+
+ return matched_detections, unmatched_bboxes, unmatched_tracks
+
+ def pred_tracks(self):
+ lstOfPred = list()
+ if len(self.tracks) == 0:
+ return
+
+ for track in self.tracks:
+ # if the length of the track is just 1, then initialize the kalman filter with that value
+ if len(track["history"]) == 1:
+ track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0])
+ # predict the kalman filter, get the new bounding box
+ nbbox = track["filter"].predict()
+ else:
+ nbbox = track["filter"].predict()
+
+ lstOfPred.append(nbbox)
+
+ return lstOfPred
+
+ def update_tracks(self, matched_indexes: list, bboxes: np.ndarray):
+ for m in matched_indexes:
+ # add on the corresponding dectections to each track's history
+ self.tracks[m[1]]["history"].append(bboxes[m[0]])
+ # update each matched track filter
+ self.tracks[m[1]]["filter"].update(self.tracks[m[1]]["history"][-1])
+
+class Sort(KalmanTracker):
+ def __init__(self):
+ super().__init__()
+ self.bboxes = np.empty((2,3))
+
+ def manage_tracks(self, bboxes: np.ndarray, iou_threshold=0.3):
+ """
+ This function is in charged of managing all the tracks correctly and returns
+ a list of tracks
+ """
+ # get new bounding boxes
+ self.bboxes = bboxes
+
+ # check to see if there are no tracks
+ if (len(self.tracks) == 0):
+ for bbox in self.bboxes:
+ self.tracks.append(self.create_track(bbox))
+ else:
+
+ # do a prediction using the kalman filter
+ list_predictions = self.pred_tracks()
+
+ # get list of matched detections, unmatched detections, and unmatched tracks
+ matches, unmatched_detections, unmatched_tracks = self.associate_detections(list_predictions, bboxes, iou_threshold)
+
+ # update kalman filter for each track
+ self.update_tracks(matches, bboxes)
+
+ # if a detection was not associated to any track
+ # create a new track for it
+ if len(unmatched_detections) > 0:
+ for m in unmatched_detections:
+ self.tracks.append(self.create_track(bboxes[m]))
+
+ # if there is a track that was not matched to any
+ # detection, delete it
+ if len(unmatched_tracks) > 0:
+ for t in unmatched_tracks:
+ del self.tracks[t]
+
+
+ return self.tracks
diff --git a/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py b/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py
index 8b13789..d3f5a12 100644
--- a/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py
+++ b/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py
@@ -1 +1 @@
-
+
diff --git a/src/_archive_legacy/models/SORT/test_sort.py b/src/_archive_legacy/models/SORT/test_sort.py
index 828ce0d..c905ef6 100644
--- a/src/_archive_legacy/models/SORT/test_sort.py
+++ b/src/_archive_legacy/models/SORT/test_sort.py
@@ -1,76 +1,76 @@
-import cv2
-from ultralytics import YOLO
-from sort import Sort
-
-import os
-print("Current working directory:", os.getcwd())
-
-
-trt_model = YOLO('yolo11m.pt')
-
-# Open the video file
-video_path = "C:/Users/dtj10/02-CLUBS_AND_PROJECTS/ACM/DroneProject/Repo/Main/SORT/bestResult.mp4"
-cap = cv2.VideoCapture(video_path)
-
-#Check if cap actually opened
-if not cap.isOpened():
- print('CAP NOT OPENED')
- exit()
-
-# We need to set resolutions.
-# so, convert them from float to integer.
-frame_width = int(cap.get(3))
-frame_height = int(cap.get(4))
-fps = cap.get(cv2.CAP_PROP_FPS)
-
-size = (frame_width, frame_height)
-
-# Define the codec and create a VideoWriter object
-fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-out = cv2.VideoWriter('./stream.mp4', fourcc, 24, size)
-
-fc = 0
-sort = Sort()
-while True:
- ret, frame = cap.read()
- if not ret:
- break
- fc += 1
-
- # Run inference
- # results = yolo_pipeline(images=frame)
- results = trt_model.predict(frame)
-
- # Process results (similar to your existing code)
- for result in results:
- bboxes = result.boxes.xywh.cpu().numpy().astype(int)
- #if fc == 1: predBBoxes = KalmanTracker()
- #tracks = predBBoxes.bboxes_to_tracks(bboxes)
- tracks = sort.manage_tracks(bboxes)
-
- for track in tracks:
- nbbox = track["history"][-1]
- x1 = int(nbbox[0] - nbbox[2]/2)
- y1 = int(nbbox[1] - nbbox[3]/2)
- x2 = int(nbbox[0] + nbbox[2]/2)
- y2 = int(nbbox[1] + nbbox[3]/2)
- cv2.putText(
- frame,
- str(track["id"]),
- (int(x1), int(y1) - 10),
- fontFace = cv2.FONT_HERSHEY_SIMPLEX,
- fontScale = 0.6,
- color = (0, 255, 0),
- thickness=2
- )
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
-
- # Write the frame to the VideoWriter object
- out.write(frame)
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
-
-
-out.release()
-cap.release()
+import cv2
+from ultralytics import YOLO
+from sort import Sort
+
+import os
+print("Current working directory:", os.getcwd())
+
+
+trt_model = YOLO('yolo11m.pt')
+
+# Open the video file
+video_path = "C:/Users/dtj10/02-CLUBS_AND_PROJECTS/ACM/DroneProject/Repo/Main/SORT/bestResult.mp4"
+cap = cv2.VideoCapture(video_path)
+
+#Check if cap actually opened
+if not cap.isOpened():
+ print('CAP NOT OPENED')
+ exit()
+
+# We need to set resolutions.
+# so, convert them from float to integer.
+frame_width = int(cap.get(3))
+frame_height = int(cap.get(4))
+fps = cap.get(cv2.CAP_PROP_FPS)
+
+size = (frame_width, frame_height)
+
+# Define the codec and create a VideoWriter object
+fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+out = cv2.VideoWriter('./stream.mp4', fourcc, 24, size)
+
+fc = 0
+sort = Sort()
+while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+ fc += 1
+
+ # Run inference
+ # results = yolo_pipeline(images=frame)
+ results = trt_model.predict(frame)
+
+ # Process results (similar to your existing code)
+ for result in results:
+ bboxes = result.boxes.xywh.cpu().numpy().astype(int)
+ #if fc == 1: predBBoxes = KalmanTracker()
+ #tracks = predBBoxes.bboxes_to_tracks(bboxes)
+ tracks = sort.manage_tracks(bboxes)
+
+ for track in tracks:
+ nbbox = track["history"][-1]
+ x1 = int(nbbox[0] - nbbox[2]/2)
+ y1 = int(nbbox[1] - nbbox[3]/2)
+ x2 = int(nbbox[0] + nbbox[2]/2)
+ y2 = int(nbbox[1] + nbbox[3]/2)
+ cv2.putText(
+ frame,
+ str(track["id"]),
+ (int(x1), int(y1) - 10),
+ fontFace = cv2.FONT_HERSHEY_SIMPLEX,
+ fontScale = 0.6,
+ color = (0, 255, 0),
+ thickness=2
+ )
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+
+ # Write the frame to the VideoWriter object
+ out.write(frame)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+
+out.release()
+cap.release()
cv2.destroyAllWindows()
\ No newline at end of file
diff --git a/src/_archive_legacy/models/SORT/yoloKalman.py b/src/_archive_legacy/models/SORT/yoloKalman.py
index b0a8a85..0267420 100644
--- a/src/_archive_legacy/models/SORT/yoloKalman.py
+++ b/src/_archive_legacy/models/SORT/yoloKalman.py
@@ -1,251 +1,251 @@
-import cv2
-import time
-from ultralytics import YOLO
-from filterpy.kalman import KalmanFilter
-from filterpy.common import Q_discrete_white_noise
-import numpy as np
-from math import sqrt, pow
-
-def iou_calculator(bb_test: np.int32, bb_gt: np.int32):
- """
- From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
- """
- # get the length and width of the box of the intersection
- x1 = max(bb_test[0], bb_gt[0])
- y1 = max(bb_test[1], bb_gt[1])
- x2 = min(bb_test[2], bb_gt[2])
- y2 = min(bb_test[3], bb_gt[3])
- intersection_width = max(0, x2-x1)
- intersection_height = max(0, y2-y1)
- area_of_intersection = intersection_height * intersection_width
- #get the area of the ground truth
- gt_bb_len = abs(bb_gt[2] - bb_gt[0])
- gt_bb_width = abs(bb_gt[3] - bb_gt[1])
- gt_bb_area = gt_bb_len * gt_bb_width
- #get the area of the prediction
- det_bb_len = abs(bb_test[2] - bb_test[0])
- det_bb_width = abs(bb_test[3] - bb_test[1])
- det_bb_area = det_bb_len * det_bb_width
- #get area of the union
- area_of_union = gt_bb_area + det_bb_area - area_of_intersection
-
- iou = area_of_intersection / area_of_union
-
- return iou
-
-def convert_xywh_to_xyxy(bbox: np.int32):
- x1 = bbox[0] - bbox[2]/2
- x2 = bbox[0] + bbox[2]/2
- y1 = bbox[1] - bbox[3]/2
- y2 = bbox[1] + bbox[3]/2
- z = np.array([x1,y1,x2,y2]).astype(int)
- return z
-
-def convert_xyxy_to_xywh(bbox: np.int32):
- w = bbox[2]-bbox[0]
- h = bbox[3]-bbox[1]
- x = bbox[2] - w/2
- y = bbox[3] - h/2
- z = np.array([x,y,w,h]).astype(int)
- return z
-
-def convert_bbox_to_z(bbox: np.int32):
- w=bbox[2]
- h=bbox[3]
- area = w*h
- aspect_ratio = w/h
- z = np.array([bbox[0], bbox[1], area, aspect_ratio])
- return z
-
-def convert_z_to_bbox(bbox: np.int32):
- w = int(sqrt(abs(bbox[2]*bbox[3])))
- h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) )))
- new_bbox = np.array([bbox[0], bbox[1], w, h])
- return new_bbox
-
-class Kfilter():
- def __init__(self):
-
-
- # create a kalman filter instance
- self.dim_x = 7
- self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4)
-
- #state transition matrix
- self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0],
- [0, 1, 0, 0, 0, 1, 0],
- [0, 0, 1, 0, 0, 0, 1],
- [0, 0, 0, 1, 0, 0, 0],
- [0, 0, 0, 0, 1, 0, 0],
- [0, 0, 0, 0, 0, 1, 0],
- [0, 0, 0, 0, 0, 0, 1]])
-
- # measurement function
- self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0],
- [0, 1, 0, 0, 0, 0, 0],
- [0, 0, 1, 0, 0, 0, 0],
- [0, 0, 0, 1, 0, 0, 0]])
-
- #covariance matrix, P already contains np.eye(dim_x)
- self.kalman.P *= 1000.
-
- # measurement noise
- self.kalman.R = np.eye(4)
- self.kalman.R *= 10.
-
- # assign the process noise
- self.kalman.Q = np.eye(self.dim_x)
- self.kalman.Q *= 0.01
-
- #set arbitrary initial value
- # center of bounding box
- # self.kalman.x = np.array([self.bbox[0], self.bbox[1],
- # # area and aspect ratio
- # self.bbox[2]*self.bbox[3], self.bbox[2]/self.bbox[3],
- # # change in position over time of center and area
- # 0, 0, 0])
- self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0])
-
- # make a prediction
- def predict(self):
- self.kalman.predict()
- bbox = convert_z_to_bbox(self.kalman.x)
-
- return bbox
-
- def update(self, bbox):
- bbox = convert_bbox_to_z(bbox)
- self.kalman.update(bbox)
-
-class KalmanTracker():
- def __init__(self):
- self.tracks = list()
- self.id = 0
-
- def create_track(self, bbox):
- track = {
- "id": self.id,
- "filter": Kfilter(),
- "history": [bbox]
- }
- self.id += 1
- return track
-
- def bboxes_to_tracks(self, bboxes: np.int32):
- """
- This function must handle:
- - creating tracks
- - extending tracks
- - ensure no two tracks have the same bounding box
- - iou calculation
- - eliminating uncessary tracks
- - controling tracks' size"""
-
- # if tracks is 0, then we must be at the first frame
- if len(self.tracks) == 0:
- for bbox in bboxes:
- track = self.create_track(bbox)
- self.tracks.append(track)
-
- return self.tracks
-
- convBBoxes = [convert_xywh_to_xyxy(bbox) for bbox in bboxes]
- convTrkedObj = [convert_xywh_to_xyxy(track["history"][-1]) for track in self.tracks]
- #print(f"Track Obj: {convTrkedObj}")
-
- # if there are more tracks in the list than bounding boxes, we have to delete some tracks
- if len(self.tracks) > len(bboxes):
- # find how many tracks are extra
- exTrack = len(self.tracks) - len(bboxes)
- iouLst = list()
- # Calculate each track's iou with each bounding box, find the maximum
- for track in convTrkedObj:
- maxIoU = 0
- for bbox in convBBoxes:
- iou = iou_calculator(track, bbox)
- if iou > maxIoU:
- maxIoU = iou
- # store max in a list
- iouLst.append(maxIoU)
- # NOTE: the id of the max value will correspond with the id of the track
- # delete the tracks with the lowests IoUs
- trkEnumSorted = sorted(enumerate(iouLst), key= lambda x: x[1])
- self.tracks = [self.tracks[i] for i, _ in trkEnumSorted[exTrack:]]
- return self.tracks
- # elif there are less tracks in the list than bounding boxes, we have to create some tracks
- elif len(self.tracks) < len(bboxes):
- # find how many tracks are needed
- nTracks = len(bboxes) - len(self.tracks)
- iouLst = list()
- # Calculate each bounding box's iou with each track, find the maximum
- for bbox in convBBoxes:
- maxIoU = 0
- for track in convTrkedObj:
- iou = iou_calculator(bbox, track)
- if iou > maxIoU:
- maxIoU = iou
- # store max in a list
- iouLst.append(maxIoU)
- # NOTE: the id of the max value will correspond with the id of the bounding box
- # append to the tracks list new tracks with the bounding boxes with the lowest IoUs
- iouLow = sorted(enumerate(iouLst), key= lambda x: x[1])[:nTracks]
- #print(f"iouLow: {iouLow}")
- self.tracks.extend([self.create_track(bboxes[i]) for i, _ in iouLow])
- return self.tracks
-
- # if the number of tracks equals the number of bounding boxes, then we don't need more tracks
- # Calculate the track's iou with each bounding box, store it in a list
- used = set()
- for _, track in enumerate(self.tracks):
- lasttrkobj = convert_xywh_to_xyxy(track["history"][-1])
- maxIoU = 0
- idx = -1
- for i, bbox in enumerate(convBBoxes):
- if i in used:
- continue
- iou = iou_calculator(lasttrkobj, bbox)
- if iou > maxIoU:
- # get the max value
- maxIoU = iou
- # find the index
- idx = i
-
- # append the bounding box to the corresponding track
- if idx != -1:
- track["history"].append(bboxes[idx])
- used.add(idx)
- # delete the track's previous bounding box
- #print(len(track))
- max_track_size = 3
- if len(track["history"]) > max_track_size:
- # keep the last three delete the rest
- track["history"] = track["history"][-max_track_size:]
- # return the tracks
- return self.tracks
-
- def pred_tracks(self):
- lstOfPred = list()
- if len(self.tracks) == 0:
- return []
-
- for track in self.tracks:
- # if the length of the track is just 1, then initialize the kalman filter with that value
- if len(track["history"]) == 1:
- track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0])
- # predict the kalman filter, get the new bounding box
- nbbox = track["filter"].predict()
- #update the kalman filter
- track["filter"].update(track["history"][-1])
- else:
- # do a prediction and update the filter based on the most current bbox in the track's history
- nbbox = track["filter"].predict()
- track["filter"].update(track["history"][-1])
-
- lstOfPred.append(nbbox)
-
- return lstOfPred
-
-
-
-
-
+import cv2
+import time
+from ultralytics import YOLO
+from filterpy.kalman import KalmanFilter
+from filterpy.common import Q_discrete_white_noise
+import numpy as np
+from math import sqrt, pow
+
+def iou_calculator(bb_test: np.int32, bb_gt: np.int32):
+ """
+ From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
+ """
+ # get the length and width of the box of the intersection
+ x1 = max(bb_test[0], bb_gt[0])
+ y1 = max(bb_test[1], bb_gt[1])
+ x2 = min(bb_test[2], bb_gt[2])
+ y2 = min(bb_test[3], bb_gt[3])
+ intersection_width = max(0, x2-x1)
+ intersection_height = max(0, y2-y1)
+ area_of_intersection = intersection_height * intersection_width
+ #get the area of the ground truth
+ gt_bb_len = abs(bb_gt[2] - bb_gt[0])
+ gt_bb_width = abs(bb_gt[3] - bb_gt[1])
+ gt_bb_area = gt_bb_len * gt_bb_width
+ #get the area of the prediction
+ det_bb_len = abs(bb_test[2] - bb_test[0])
+ det_bb_width = abs(bb_test[3] - bb_test[1])
+ det_bb_area = det_bb_len * det_bb_width
+ #get area of the union
+ area_of_union = gt_bb_area + det_bb_area - area_of_intersection
+
+ iou = area_of_intersection / area_of_union
+
+ return iou
+
+def convert_xywh_to_xyxy(bbox: np.int32):
+ x1 = bbox[0] - bbox[2]/2
+ x2 = bbox[0] + bbox[2]/2
+ y1 = bbox[1] - bbox[3]/2
+ y2 = bbox[1] + bbox[3]/2
+ z = np.array([x1,y1,x2,y2]).astype(int)
+ return z
+
+def convert_xyxy_to_xywh(bbox: np.int32):
+ w = bbox[2]-bbox[0]
+ h = bbox[3]-bbox[1]
+ x = bbox[2] - w/2
+ y = bbox[3] - h/2
+ z = np.array([x,y,w,h]).astype(int)
+ return z
+
+def convert_bbox_to_z(bbox: np.int32):
+ w=bbox[2]
+ h=bbox[3]
+ area = w*h
+ aspect_ratio = w/h
+ z = np.array([bbox[0], bbox[1], area, aspect_ratio])
+ return z
+
+def convert_z_to_bbox(bbox: np.int32):
+ w = int(sqrt(abs(bbox[2]*bbox[3])))
+ h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) )))
+ new_bbox = np.array([bbox[0], bbox[1], w, h])
+ return new_bbox
+
+class Kfilter():
+ def __init__(self):
+
+
+ # create a kalman filter instance
+ self.dim_x = 7
+ self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4)
+
+ #state transition matrix
+ self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0],
+ [0, 1, 0, 0, 0, 1, 0],
+ [0, 0, 1, 0, 0, 0, 1],
+ [0, 0, 0, 1, 0, 0, 0],
+ [0, 0, 0, 0, 1, 0, 0],
+ [0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 0, 0, 0, 1]])
+
+ # measurement function
+ self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0],
+ [0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 1, 0, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0]])
+
+ #covariance matrix, P already contains np.eye(dim_x)
+ self.kalman.P *= 1000.
+
+ # measurement noise
+ self.kalman.R = np.eye(4)
+ self.kalman.R *= 10.
+
+ # assign the process noise
+ self.kalman.Q = np.eye(self.dim_x)
+ self.kalman.Q *= 0.01
+
+ #set arbitrary initial value
+ # center of bounding box
+ # self.kalman.x = np.array([self.bbox[0], self.bbox[1],
+ # # area and aspect ratio
+ # self.bbox[2]*self.bbox[3], self.bbox[2]/self.bbox[3],
+ # # change in position over time of center and area
+ # 0, 0, 0])
+ self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0])
+
+ # make a prediction
+ def predict(self):
+ self.kalman.predict()
+ bbox = convert_z_to_bbox(self.kalman.x)
+
+ return bbox
+
+ def update(self, bbox):
+ bbox = convert_bbox_to_z(bbox)
+ self.kalman.update(bbox)
+
+class KalmanTracker():
+ def __init__(self):
+ self.tracks = list()
+ self.id = 0
+
+ def create_track(self, bbox):
+ track = {
+ "id": self.id,
+ "filter": Kfilter(),
+ "history": [bbox]
+ }
+ self.id += 1
+ return track
+
+ def bboxes_to_tracks(self, bboxes: np.int32):
+ """
+ This function must handle:
+ - creating tracks
+ - extending tracks
+ - ensure no two tracks have the same bounding box
+ - iou calculation
+ - eliminating uncessary tracks
+ - controling tracks' size"""
+
+ # if tracks is 0, then we must be at the first frame
+ if len(self.tracks) == 0:
+ for bbox in bboxes:
+ track = self.create_track(bbox)
+ self.tracks.append(track)
+
+ return self.tracks
+
+ convBBoxes = [convert_xywh_to_xyxy(bbox) for bbox in bboxes]
+ convTrkedObj = [convert_xywh_to_xyxy(track["history"][-1]) for track in self.tracks]
+ #print(f"Track Obj: {convTrkedObj}")
+
+ # if there are more tracks in the list than bounding boxes, we have to delete some tracks
+ if len(self.tracks) > len(bboxes):
+ # find how many tracks are extra
+ exTrack = len(self.tracks) - len(bboxes)
+ iouLst = list()
+ # Calculate each track's iou with each bounding box, find the maximum
+ for track in convTrkedObj:
+ maxIoU = 0
+ for bbox in convBBoxes:
+ iou = iou_calculator(track, bbox)
+ if iou > maxIoU:
+ maxIoU = iou
+ # store max in a list
+ iouLst.append(maxIoU)
+ # NOTE: the id of the max value will correspond with the id of the track
+ # delete the tracks with the lowests IoUs
+ trkEnumSorted = sorted(enumerate(iouLst), key= lambda x: x[1])
+ self.tracks = [self.tracks[i] for i, _ in trkEnumSorted[exTrack:]]
+ return self.tracks
+ # elif there are less tracks in the list than bounding boxes, we have to create some tracks
+ elif len(self.tracks) < len(bboxes):
+ # find how many tracks are needed
+ nTracks = len(bboxes) - len(self.tracks)
+ iouLst = list()
+ # Calculate each bounding box's iou with each track, find the maximum
+ for bbox in convBBoxes:
+ maxIoU = 0
+ for track in convTrkedObj:
+ iou = iou_calculator(bbox, track)
+ if iou > maxIoU:
+ maxIoU = iou
+ # store max in a list
+ iouLst.append(maxIoU)
+ # NOTE: the id of the max value will correspond with the id of the bounding box
+ # append to the tracks list new tracks with the bounding boxes with the lowest IoUs
+ iouLow = sorted(enumerate(iouLst), key= lambda x: x[1])[:nTracks]
+ #print(f"iouLow: {iouLow}")
+ self.tracks.extend([self.create_track(bboxes[i]) for i, _ in iouLow])
+ return self.tracks
+
+ # if the number of tracks equals the number of bounding boxes, then we don't need more tracks
+ # Calculate the track's iou with each bounding box, store it in a list
+ used = set()
+ for _, track in enumerate(self.tracks):
+ lasttrkobj = convert_xywh_to_xyxy(track["history"][-1])
+ maxIoU = 0
+ idx = -1
+ for i, bbox in enumerate(convBBoxes):
+ if i in used:
+ continue
+ iou = iou_calculator(lasttrkobj, bbox)
+ if iou > maxIoU:
+ # get the max value
+ maxIoU = iou
+ # find the index
+ idx = i
+
+ # append the bounding box to the corresponding track
+ if idx != -1:
+ track["history"].append(bboxes[idx])
+ used.add(idx)
+ # delete the track's previous bounding box
+ #print(len(track))
+ max_track_size = 3
+ if len(track["history"]) > max_track_size:
+ # keep the last three delete the rest
+ track["history"] = track["history"][-max_track_size:]
+ # return the tracks
+ return self.tracks
+
+ def pred_tracks(self):
+ lstOfPred = list()
+ if len(self.tracks) == 0:
+ return []
+
+ for track in self.tracks:
+ # if the length of the track is just 1, then initialize the kalman filter with that value
+ if len(track["history"]) == 1:
+ track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0])
+ # predict the kalman filter, get the new bounding box
+ nbbox = track["filter"].predict()
+ #update the kalman filter
+ track["filter"].update(track["history"][-1])
+ else:
+ # do a prediction and update the filter based on the most current bbox in the track's history
+ nbbox = track["filter"].predict()
+ track["filter"].update(track["history"][-1])
+
+ lstOfPred.append(nbbox)
+
+ return lstOfPred
+
+
+
+
+
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py
index c5659d8..24c9479 100644
--- a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py
@@ -1,138 +1,138 @@
-import os
-import cv2
-import torch
-from torch.utils.data import Dataset, DataLoader
-import torchvision.transforms as transforms
-
-class TrainingData(Dataset):
- def __init__(self, img_dirs, det_files, transform=None):
- """
- Args:
- img_dirs (list): List of image directories (one per sequence)
- det_files (list): List of detection files corresponding to img_dirs
- transform (callable, optional): Optional transform to apply to images
- """
- self.img_dirs = img_dirs
- self.det_files = det_files
- self.transform = transform
- self.detections = self.read_detections() # Read detections for all files
- self.image_paths, self.labels = self.get_all_image_paths_and_labels() # Gather images and labels
-
- def read_detections(self):
- """
- Reads detection data from multiple MOT16 detection files.
- Returns:
- Dictionary mapping (frame_id, img_dir) -> list of (object_id, bbox)
- """
- detections = {}
-
- for img_dir, det_file in zip(self.img_dirs, self.det_files):
- if not os.path.exists(det_file):
- print(f"Warning: Detection file {det_file} not found, skipping.")
- continue # Skip missing files
-
- with open(det_file, 'r') as f:
- for line in f:
- parts = line.strip().split(',')
- frame_id = int(parts[0]) # Frame ID
- object_id = int(parts[1]) # Object ID (LABEL)
-
- if object_id == -1: # Ignore unknown object IDs
- continue
-
- bbox = list(map(float, parts[2:6])) # Bounding Box (x, y, w, h)
- key = (frame_id, img_dir)
-
- if key not in detections:
- detections[key] = []
- detections[key].append((object_id, bbox))
-
- print(f"Total valid detections loaded: {sum(len(v) for v in detections.values())}")
- return detections
-
- def get_all_image_paths_and_labels(self):
- """
- Collects all image paths and their corresponding object labels.
- Returns:
- List of image paths and their associated object IDs (labels).
- """
- image_paths = []
- labels = []
-
- for img_dir in self.img_dirs:
- if not os.path.exists(img_dir):
- print(f"Warning: Image directory {img_dir} not found, skipping.")
- continue
-
- image_files = sorted(
- [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]
- )
-
- print(f"Found {len(image_files)} images in {img_dir}")
-
- for image_file in image_files:
- frame_id = int(os.path.basename(image_file).split('.')[0]) # Extract frame ID from filename
- key = (frame_id, img_dir)
-
- if key in self.detections:
- for obj_id, bbox in self.detections[key]:
- image_paths.append(image_file)
- labels.append(obj_id) # Store the object ID as the label
-
- print(f"Total valid images found: {len(image_paths)}")
- return image_paths, labels
-
- def __len__(self):
- return len(self.image_paths)
-
- def __getitem__(self, idx):
- image_path = self.image_paths[idx]
- label = self.labels[idx] # Retrieve the corresponding object ID
-
- if not os.path.exists(image_path):
- print(f"Warning: Image {image_path} not found, skipping.")
- return None, None
-
- image = cv2.imread(image_path)
- if image is None:
- print(f"Warning: Failed to load {image_path}, skipping.")
- return None, None
-
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-
- if self.transform:
- image = self.transform(image)
-
- return image, label # Return both image and object ID
-
-# Custom collate function to remove None values
-def custom_collate_fn(batch):
- batch = [b for b in batch if b[0] is not None] # Remove None values
- if len(batch) == 0:
- return torch.empty(0), torch.empty(0) # Return empty tensors if no valid samples
- return torch.utils.data.dataloader.default_collate(batch)
-
-# MOT16 Dataset File Paths
-img_dirs = [
- r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\img1',
- r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\img1',
- r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\img1',
-]
-
-det_files = [
- r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\det\det.txt',
- r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\det\det.txt',
- r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\det\det.txt',
-]
-
-# Define Image Transformations
-transform = transforms.Compose([
- transforms.ToPILImage(),
- transforms.Resize((64, 64)),
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-])
-
-# Create Dataset and DataLoader
-train_dataset = TrainingData(img_dirs, det_files, transform)
-train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
+import os
+import cv2
+import torch
+from torch.utils.data import Dataset, DataLoader
+import torchvision.transforms as transforms
+
+class TrainingData(Dataset):
+ def __init__(self, img_dirs, det_files, transform=None):
+ """
+ Args:
+ img_dirs (list): List of image directories (one per sequence)
+ det_files (list): List of detection files corresponding to img_dirs
+ transform (callable, optional): Optional transform to apply to images
+ """
+ self.img_dirs = img_dirs
+ self.det_files = det_files
+ self.transform = transform
+ self.detections = self.read_detections() # Read detections for all files
+ self.image_paths, self.labels = self.get_all_image_paths_and_labels() # Gather images and labels
+
+ def read_detections(self):
+ """
+ Reads detection data from multiple MOT16 detection files.
+ Returns:
+ Dictionary mapping (frame_id, img_dir) -> list of (object_id, bbox)
+ """
+ detections = {}
+
+ for img_dir, det_file in zip(self.img_dirs, self.det_files):
+ if not os.path.exists(det_file):
+ print(f"Warning: Detection file {det_file} not found, skipping.")
+ continue # Skip missing files
+
+ with open(det_file, 'r') as f:
+ for line in f:
+ parts = line.strip().split(',')
+ frame_id = int(parts[0]) # Frame ID
+ object_id = int(parts[1]) # Object ID (LABEL)
+
+ if object_id == -1: # Ignore unknown object IDs
+ continue
+
+ bbox = list(map(float, parts[2:6])) # Bounding Box (x, y, w, h)
+ key = (frame_id, img_dir)
+
+ if key not in detections:
+ detections[key] = []
+ detections[key].append((object_id, bbox))
+
+ print(f"Total valid detections loaded: {sum(len(v) for v in detections.values())}")
+ return detections
+
+ def get_all_image_paths_and_labels(self):
+ """
+ Collects all image paths and their corresponding object labels.
+ Returns:
+ List of image paths and their associated object IDs (labels).
+ """
+ image_paths = []
+ labels = []
+
+ for img_dir in self.img_dirs:
+ if not os.path.exists(img_dir):
+ print(f"Warning: Image directory {img_dir} not found, skipping.")
+ continue
+
+ image_files = sorted(
+ [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]
+ )
+
+ print(f"Found {len(image_files)} images in {img_dir}")
+
+ for image_file in image_files:
+ frame_id = int(os.path.basename(image_file).split('.')[0]) # Extract frame ID from filename
+ key = (frame_id, img_dir)
+
+ if key in self.detections:
+ for obj_id, bbox in self.detections[key]:
+ image_paths.append(image_file)
+ labels.append(obj_id) # Store the object ID as the label
+
+ print(f"Total valid images found: {len(image_paths)}")
+ return image_paths, labels
+
+ def __len__(self):
+ return len(self.image_paths)
+
+ def __getitem__(self, idx):
+ image_path = self.image_paths[idx]
+ label = self.labels[idx] # Retrieve the corresponding object ID
+
+ if not os.path.exists(image_path):
+ print(f"Warning: Image {image_path} not found, skipping.")
+ return None, None
+
+ image = cv2.imread(image_path)
+ if image is None:
+ print(f"Warning: Failed to load {image_path}, skipping.")
+ return None, None
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ if self.transform:
+ image = self.transform(image)
+
+ return image, label # Return both image and object ID
+
+# Custom collate function to remove None values
+def custom_collate_fn(batch):
+ batch = [b for b in batch if b[0] is not None] # Remove None values
+ if len(batch) == 0:
+ return torch.empty(0), torch.empty(0) # Return empty tensors if no valid samples
+ return torch.utils.data.dataloader.default_collate(batch)
+
+# MOT16 Dataset File Paths
+img_dirs = [
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\img1',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\img1',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\img1',
+]
+
+det_files = [
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\det\det.txt',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\det\det.txt',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\det\det.txt',
+]
+
+# Define Image Transformations
+transform = transforms.Compose([
+ transforms.ToPILImage(),
+ transforms.Resize((64, 64)),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+
+# Create Dataset and DataLoader
+train_dataset = TrainingData(img_dirs, det_files, transform)
+train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py
index f79be9a..991d3bd 100644
--- a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py
@@ -1,46 +1,46 @@
-import torch
-import torch.optim as optim
-import torch.nn as nn
-from CNN_Data_Processing import train_loader
-from CNN_model import CNNDeepSORT # Assuming this is your CNN model
-import cv2
-
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-# Define Triplet Loss
-criterion = nn.TripletMarginLoss(margin=1.0, p=2)
-
-# Initialize Model
-model = CNNDeepSORT().to(DEVICE)
-optimizer = optim.Adam(model.parameters(), lr=0.0001)
-
-def train(model, train_loader, criterion, optimizer, num_epochs):
- for epoch in range(num_epochs):
- model.train()
- running_loss = 0.0
-
- for i, (anchor, positive, negative) in enumerate(train_loader):
- if anchor.shape[0] == 0:
- continue # Skip empty batches
-
- anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE)
- optimizer.zero_grad()
-
- anchor_embedding = model(anchor)
- positive_embedding = model(positive)
- negative_embedding = model(negative)
-
- loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
- loss.backward()
- optimizer.step()
-
- running_loss += loss.item()
- if i % 10 == 0:
- print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
-
- print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {running_loss / len(train_loader):.4f}")
-
- torch.save(model.state_dict(), "CNNDeepSORT_final.pth")
- print("Training complete! Model saved to CNNDeepSORT_final.pth")
-
-train(model, train_loader, criterion, optimizer, num_epochs=10)
+import torch
+import torch.optim as optim
+import torch.nn as nn
+from CNN_Data_Processing import train_loader
+from CNN_model import CNNDeepSORT # Assuming this is your CNN model
+import cv2
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Define Triplet Loss
+criterion = nn.TripletMarginLoss(margin=1.0, p=2)
+
+# Initialize Model
+model = CNNDeepSORT().to(DEVICE)
+optimizer = optim.Adam(model.parameters(), lr=0.0001)
+
+def train(model, train_loader, criterion, optimizer, num_epochs):
+ for epoch in range(num_epochs):
+ model.train()
+ running_loss = 0.0
+
+ for i, (anchor, positive, negative) in enumerate(train_loader):
+ if anchor.shape[0] == 0:
+ continue # Skip empty batches
+
+ anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE)
+ optimizer.zero_grad()
+
+ anchor_embedding = model(anchor)
+ positive_embedding = model(positive)
+ negative_embedding = model(negative)
+
+ loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
+ loss.backward()
+ optimizer.step()
+
+ running_loss += loss.item()
+ if i % 10 == 0:
+ print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
+
+ print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {running_loss / len(train_loader):.4f}")
+
+ torch.save(model.state_dict(), "CNNDeepSORT_final.pth")
+ print("Training complete! Model saved to CNNDeepSORT_final.pth")
+
+train(model, train_loader, criterion, optimizer, num_epochs=10)
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py
index 5541779..6f9ec1f 100644
--- a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py
@@ -1,77 +1,77 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-#deepSORT uses a CNN to create appearance based I.D.'s, this augments SORT by allowing the model to re-identify occluded objects.
-#This CNN focuses on learning spatial features.
-class CNNDeepSORT(nn.Module):
-
- #Embedding_dim is the number of features or dimensions (independent variables) in the representation space.
- #We chose 128 because it is a sweet spot, small enough for efficient training but large enough to capture unique features.
- #128 dimensional embeddings is a standard in object tracking/re-identification models.
-
- def __init__(self, embedding_dim = 128):
- super(CNNDeepSORT, self).__init__()
- '''Convolution Parameters:
-
- in_channels: 3 Channels, RGB
- out_channels: Number of feature maps the layer will produce, doubles per feature.
- kernel_size: A filter that will essentially slide along each channel and capture features using dot product.
- stride: The distance in which the kernel travels across the input channels.
- padding: Extra pixels that are added around the input image before applying convolution, this is so image details aren't lost.
- '''
- # Batch Normalization normalizes activations between the layers during training. This reduces the covariate shift
- # aka the change of the distribution of inputs from one layer to the next. Which ensures faster training.
- #x(hat) = (x - mean)/std dev
- #then y = w(x hat) + b
- #We do this to normalize the embedding values before the activation function is applied, this also reduces overfitting (by regularizing the data)
-
- #The first convolutional layer where the convolution math operation is performed. We are performing this in a 2D space.
- self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
- self.bn1 = nn.BatchNorm2d(32)
-
- self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3, stride=1, padding=1)
- self.bn2 = nn.BatchNorm2d(64)
-
- self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
- self.bn3 = nn.BatchNorm2d(128)
-
- '''Global Average Pooling:
- Used instead of multiple pooling layers because it reduces the spatial dimensions of
- the feature map to 1x1, summarizing the entire feature map into 1 vector per channel. This reduces the loss of spatial detail, which is very important
- for appearance embedding.
- '''
- #Since DeepSORT's goal is feature extraction and not classification, we only need a single fully connected layer with linear connections.
- #The linear transformation of the neurons in the last conv layer to the dense layer is y=Wx+b, which extracts the features into an
- #embedding space which is perfect for cosine similarity matching.
- self.pool = nn.AdaptiveAvgPool2d((1,1))
- self.fc = nn.Linear(in_features=128, out_features=embedding_dim)
-
- #Forward propagation method for training the CNN.
- def forward(self,x):
- '''
- Convolutional Layer:
- input: Image
- output: Feature maps that highlight different patterns in the image/frame
-
- Batch Normalization:
- input: Feature map and number of features
- output: A feature map that will converge faster and reduce covariate shift.
-
- Rectified Linear Unit:
- input: The summation of all neurons in one layers' activations multplied by their weights and added their biases
- output: If input < 0, output is 0. If input > 0, output is input. This removes any negative values that do not meet a threshold.
- It also introduces non-linearity to the network, allowing for complex learning.
- '''
- x = F.relu(self.bn1(self.conv1(x)))
- x = F.relu(self.bn2(self.conv2(x)))
- x = F.relu(self.bn3(self.conv3(x)))
-
- x = self.pool(x) #Downsizes the spatial dimensions of the feature map to 1x1 using Global Average Pooling
- x = torch.flatten(x,1) #Converts the multi-dimensional tensors into a 2D tensor, (batch size * features)
- x = self.fc(x) #A linear layer that matches the feature map to the desired embedding dimension (128)
- return F.normalize(x,p=2, dim=1) #Uses euclidean norm to normalize a tensor along a specific dimension
-
-#instantiate the model
-model = CNNDeepSORT()
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+#deepSORT uses a CNN to create appearance based I.D.'s, this augments SORT by allowing the model to re-identify occluded objects.
+#This CNN focuses on learning spatial features.
+class CNNDeepSORT(nn.Module):
+
+ #Embedding_dim is the number of features or dimensions (independent variables) in the representation space.
+ #We chose 128 because it is a sweet spot, small enough for efficient training but large enough to capture unique features.
+ #128 dimensional embeddings is a standard in object tracking/re-identification models.
+
+ def __init__(self, embedding_dim = 128):
+ super(CNNDeepSORT, self).__init__()
+ '''Convolution Parameters:
+
+ in_channels: 3 Channels, RGB
+ out_channels: Number of feature maps the layer will produce, doubles per feature.
+ kernel_size: A filter that will essentially slide along each channel and capture features using dot product.
+ stride: The distance in which the kernel travels across the input channels.
+ padding: Extra pixels that are added around the input image before applying convolution, this is so image details aren't lost.
+ '''
+ # Batch Normalization normalizes activations between the layers during training. This reduces the covariate shift
+ # aka the change of the distribution of inputs from one layer to the next. Which ensures faster training.
+ #x(hat) = (x - mean)/std dev
+ #then y = w(x hat) + b
+ #We do this to normalize the embedding values before the activation function is applied, this also reduces overfitting (by regularizing the data)
+
+ #The first convolutional layer where the convolution math operation is performed. We are performing this in a 2D space.
+ self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
+ self.bn1 = nn.BatchNorm2d(32)
+
+ self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3, stride=1, padding=1)
+ self.bn2 = nn.BatchNorm2d(64)
+
+ self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
+ self.bn3 = nn.BatchNorm2d(128)
+
+ '''Global Average Pooling:
+ Used instead of multiple pooling layers because it reduces the spatial dimensions of
+ the feature map to 1x1, summarizing the entire feature map into 1 vector per channel. This reduces the loss of spatial detail, which is very important
+ for appearance embedding.
+ '''
+ #Since DeepSORT's goal is feature extraction and not classification, we only need a single fully connected layer with linear connections.
+ #The linear transformation of the neurons in the last conv layer to the dense layer is y=Wx+b, which extracts the features into an
+ #embedding space which is perfect for cosine similarity matching.
+ self.pool = nn.AdaptiveAvgPool2d((1,1))
+ self.fc = nn.Linear(in_features=128, out_features=embedding_dim)
+
+ #Forward propagation method for training the CNN.
+ def forward(self,x):
+ '''
+ Convolutional Layer:
+ input: Image
+ output: Feature maps that highlight different patterns in the image/frame
+
+ Batch Normalization:
+ input: Feature map and number of features
+ output: A feature map that will converge faster and reduce covariate shift.
+
+ Rectified Linear Unit:
+ input: The summation of all neurons in one layers' activations multplied by their weights and added their biases
+ output: If input < 0, output is 0. If input > 0, output is input. This removes any negative values that do not meet a threshold.
+ It also introduces non-linearity to the network, allowing for complex learning.
+ '''
+ x = F.relu(self.bn1(self.conv1(x)))
+ x = F.relu(self.bn2(self.conv2(x)))
+ x = F.relu(self.bn3(self.conv3(x)))
+
+ x = self.pool(x) #Downsizes the spatial dimensions of the feature map to 1x1 using Global Average Pooling
+ x = torch.flatten(x,1) #Converts the multi-dimensional tensors into a 2D tensor, (batch size * features)
+ x = self.fc(x) #A linear layer that matches the feature map to the desired embedding dimension (128)
+ return F.normalize(x,p=2, dim=1) #Uses euclidean norm to normalize a tensor along a specific dimension
+
+#instantiate the model
+model = CNNDeepSORT()
#print(model)
\ No newline at end of file
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Data_Processing.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Data_Processing.py
new file mode 100644
index 0000000..24c9479
--- /dev/null
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Data_Processing.py
@@ -0,0 +1,138 @@
+import os
+import cv2
+import torch
+from torch.utils.data import Dataset, DataLoader
+import torchvision.transforms as transforms
+
+class TrainingData(Dataset):
+ def __init__(self, img_dirs, det_files, transform=None):
+ """
+ Args:
+ img_dirs (list): List of image directories (one per sequence)
+ det_files (list): List of detection files corresponding to img_dirs
+ transform (callable, optional): Optional transform to apply to images
+ """
+ self.img_dirs = img_dirs
+ self.det_files = det_files
+ self.transform = transform
+ self.detections = self.read_detections() # Read detections for all files
+ self.image_paths, self.labels = self.get_all_image_paths_and_labels() # Gather images and labels
+
+ def read_detections(self):
+ """
+ Reads detection data from multiple MOT16 detection files.
+ Returns:
+ Dictionary mapping (frame_id, img_dir) -> list of (object_id, bbox)
+ """
+ detections = {}
+
+ for img_dir, det_file in zip(self.img_dirs, self.det_files):
+ if not os.path.exists(det_file):
+ print(f"Warning: Detection file {det_file} not found, skipping.")
+ continue # Skip missing files
+
+ with open(det_file, 'r') as f:
+ for line in f:
+ parts = line.strip().split(',')
+ frame_id = int(parts[0]) # Frame ID
+ object_id = int(parts[1]) # Object ID (LABEL)
+
+ if object_id == -1: # Ignore unknown object IDs
+ continue
+
+ bbox = list(map(float, parts[2:6])) # Bounding Box (x, y, w, h)
+ key = (frame_id, img_dir)
+
+ if key not in detections:
+ detections[key] = []
+ detections[key].append((object_id, bbox))
+
+ print(f"Total valid detections loaded: {sum(len(v) for v in detections.values())}")
+ return detections
+
+ def get_all_image_paths_and_labels(self):
+ """
+ Collects all image paths and their corresponding object labels.
+ Returns:
+ List of image paths and their associated object IDs (labels).
+ """
+ image_paths = []
+ labels = []
+
+ for img_dir in self.img_dirs:
+ if not os.path.exists(img_dir):
+ print(f"Warning: Image directory {img_dir} not found, skipping.")
+ continue
+
+ image_files = sorted(
+ [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]
+ )
+
+ print(f"Found {len(image_files)} images in {img_dir}")
+
+ for image_file in image_files:
+ frame_id = int(os.path.basename(image_file).split('.')[0]) # Extract frame ID from filename
+ key = (frame_id, img_dir)
+
+ if key in self.detections:
+ for obj_id, bbox in self.detections[key]:
+ image_paths.append(image_file)
+ labels.append(obj_id) # Store the object ID as the label
+
+ print(f"Total valid images found: {len(image_paths)}")
+ return image_paths, labels
+
+ def __len__(self):
+ return len(self.image_paths)
+
+ def __getitem__(self, idx):
+ image_path = self.image_paths[idx]
+ label = self.labels[idx] # Retrieve the corresponding object ID
+
+ if not os.path.exists(image_path):
+ print(f"Warning: Image {image_path} not found, skipping.")
+ return None, None
+
+ image = cv2.imread(image_path)
+ if image is None:
+ print(f"Warning: Failed to load {image_path}, skipping.")
+ return None, None
+
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+ if self.transform:
+ image = self.transform(image)
+
+ return image, label # Return both image and object ID
+
+# Custom collate function to remove None values
+def custom_collate_fn(batch):
+ batch = [b for b in batch if b[0] is not None] # Remove None values
+ if len(batch) == 0:
+ return torch.empty(0), torch.empty(0) # Return empty tensors if no valid samples
+ return torch.utils.data.dataloader.default_collate(batch)
+
+# MOT16 Dataset File Paths
+img_dirs = [
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\img1',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\img1',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\img1',
+]
+
+det_files = [
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\det\det.txt',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\det\det.txt',
+ r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\det\det.txt',
+]
+
+# Define Image Transformations
+transform = transforms.Compose([
+ transforms.ToPILImage(),
+ transforms.Resize((64, 64)),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+
+# Create Dataset and DataLoader
+train_dataset = TrainingData(img_dirs, det_files, transform)
+train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Training_Script.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Training_Script.py
new file mode 100644
index 0000000..991d3bd
--- /dev/null
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Training_Script.py
@@ -0,0 +1,46 @@
+import torch
+import torch.optim as optim
+import torch.nn as nn
+from CNN_Data_Processing import train_loader
+from CNN_model import CNNDeepSORT # Assuming this is your CNN model
+import cv2
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Define Triplet Loss
+criterion = nn.TripletMarginLoss(margin=1.0, p=2)
+
+# Initialize Model
+model = CNNDeepSORT().to(DEVICE)
+optimizer = optim.Adam(model.parameters(), lr=0.0001)
+
+def train(model, train_loader, criterion, optimizer, num_epochs):
+ for epoch in range(num_epochs):
+ model.train()
+ running_loss = 0.0
+
+ for i, (anchor, positive, negative) in enumerate(train_loader):
+ if anchor.shape[0] == 0:
+ continue # Skip empty batches
+
+ anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE)
+ optimizer.zero_grad()
+
+ anchor_embedding = model(anchor)
+ positive_embedding = model(positive)
+ negative_embedding = model(negative)
+
+ loss = criterion(anchor_embedding, positive_embedding, negative_embedding)
+ loss.backward()
+ optimizer.step()
+
+ running_loss += loss.item()
+ if i % 10 == 0:
+ print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}")
+
+ print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {running_loss / len(train_loader):.4f}")
+
+ torch.save(model.state_dict(), "CNNDeepSORT_final.pth")
+ print("Training complete! Model saved to CNNDeepSORT_final.pth")
+
+train(model, train_loader, criterion, optimizer, num_epochs=10)
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_model.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_model.py
new file mode 100644
index 0000000..6f9ec1f
--- /dev/null
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_model.py
@@ -0,0 +1,77 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+#deepSORT uses a CNN to create appearance based I.D.'s, this augments SORT by allowing the model to re-identify occluded objects.
+#This CNN focuses on learning spatial features.
+class CNNDeepSORT(nn.Module):
+
+ #Embedding_dim is the number of features or dimensions (independent variables) in the representation space.
+ #We chose 128 because it is a sweet spot, small enough for efficient training but large enough to capture unique features.
+ #128 dimensional embeddings is a standard in object tracking/re-identification models.
+
+ def __init__(self, embedding_dim = 128):
+ super(CNNDeepSORT, self).__init__()
+ '''Convolution Parameters:
+
+ in_channels: 3 Channels, RGB
+ out_channels: Number of feature maps the layer will produce, doubles per feature.
+ kernel_size: A filter that will essentially slide along each channel and capture features using dot product.
+ stride: The distance in which the kernel travels across the input channels.
+ padding: Extra pixels that are added around the input image before applying convolution, this is so image details aren't lost.
+ '''
+ # Batch Normalization normalizes activations between the layers during training. This reduces the covariate shift
+ # aka the change of the distribution of inputs from one layer to the next. Which ensures faster training.
+ #x(hat) = (x - mean)/std dev
+ #then y = w(x hat) + b
+ #We do this to normalize the embedding values before the activation function is applied, this also reduces overfitting (by regularizing the data)
+
+ #The first convolutional layer where the convolution math operation is performed. We are performing this in a 2D space.
+ self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
+ self.bn1 = nn.BatchNorm2d(32)
+
+ self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3, stride=1, padding=1)
+ self.bn2 = nn.BatchNorm2d(64)
+
+ self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
+ self.bn3 = nn.BatchNorm2d(128)
+
+ '''Global Average Pooling:
+ Used instead of multiple pooling layers because it reduces the spatial dimensions of
+ the feature map to 1x1, summarizing the entire feature map into 1 vector per channel. This reduces the loss of spatial detail, which is very important
+ for appearance embedding.
+ '''
+ #Since DeepSORT's goal is feature extraction and not classification, we only need a single fully connected layer with linear connections.
+ #The linear transformation of the neurons in the last conv layer to the dense layer is y=Wx+b, which extracts the features into an
+ #embedding space which is perfect for cosine similarity matching.
+ self.pool = nn.AdaptiveAvgPool2d((1,1))
+ self.fc = nn.Linear(in_features=128, out_features=embedding_dim)
+
+ #Forward propagation method for training the CNN.
+ def forward(self,x):
+ '''
+ Convolutional Layer:
+ input: Image
+ output: Feature maps that highlight different patterns in the image/frame
+
+ Batch Normalization:
+ input: Feature map and number of features
+ output: A feature map that will converge faster and reduce covariate shift.
+
+ Rectified Linear Unit:
+ input: The summation of all neurons in one layers' activations multplied by their weights and added their biases
+ output: If input < 0, output is 0. If input > 0, output is input. This removes any negative values that do not meet a threshold.
+ It also introduces non-linearity to the network, allowing for complex learning.
+ '''
+ x = F.relu(self.bn1(self.conv1(x)))
+ x = F.relu(self.bn2(self.conv2(x)))
+ x = F.relu(self.bn3(self.conv3(x)))
+
+ x = self.pool(x) #Downsizes the spatial dimensions of the feature map to 1x1 using Global Average Pooling
+ x = torch.flatten(x,1) #Converts the multi-dimensional tensors into a 2D tensor, (batch size * features)
+ x = self.fc(x) #A linear layer that matches the feature map to the desired embedding dimension (128)
+ return F.normalize(x,p=2, dim=1) #Uses euclidean norm to normalize a tensor along a specific dimension
+
+#instantiate the model
+model = CNNDeepSORT()
+#print(model)
\ No newline at end of file
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/dataset.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/dataset.py
new file mode 100644
index 0000000..47b57d2
--- /dev/null
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/dataset.py
@@ -0,0 +1,81 @@
+import os
+import torch
+from torch.utils.data import Dataset # The base class we will inherit from to create our custom dataset
+from PIL import Image # Python Imaging Library (Pillow), used for opening and manipulating image files
+
+class Market1501(Dataset):
+ """
+ Custom PyTorch Dataset for the Market-1501 dataset.
+
+ This class handles loading images and parsing their person IDs (PIDs)
+ from the filenames to be used as integer labels for training a classification model.
+ """
+
+ # CHANGED: Added 'transform=None' to accept an image transformation pipeline.
+ def __init__(self, image_dir, transform=None):
+ """
+ Args:
+ image_dir (string): Directory path with all the training images.
+ transform (callable, optional): A function/transform to be applied to each image.
+ """
+ # Store the directory path for later use in __getitem__.
+ self.image_dir = image_dir
+
+ # Store the transform pipeline (e.g., resize, normalize) for later use.
+ self.transform = transform
+
+ # Create a list of all filenames in the directory that end with '.jpg'.
+ # This is a 'list comprehension', a compact way to build a list.
+ # It filters out any non-JPEG files (like '.txt' or system files) that might cause errors.
+ self.image_paths = [file for file in os.listdir(image_dir) if file.endswith(".jpg")]
+
+ # Create a set of unique Person IDs (PIDs) from the filenames.
+ # Example filename: '0002_c1s1_000451_01.jpg'. We split by '_' and take the first part '0002'.
+ # Using a 'set' automatically removes all duplicate PIDs, giving us a list of unique people.
+ pids_in_dataset = set([path.split("_")[0] for path in self.image_paths])
+
+ # Create a mapping dictionary to convert string PIDs to integer labels (0, 1, 2, ...).
+ # Neural networks and loss functions require integer labels, not strings.
+ # 'enumerate' provides a counter 'i' for each unique 'pid'.
+ # Example: {'0002': 0, '0007': 1, '-1': 2}
+ self.pid_to_label = {pid: i for i, pid in enumerate(pids_in_dataset)}
+
+ # Store the total number of unique classes (people).
+ # This is needed to correctly configure the output layer of our neural network.
+ self.num_classes = len(self.pid_to_label)
+
+ # The __getitem__ method defines how to retrieve a single sample (one image and its label) from the dataset.
+ # The DataLoader will call this method automatically for each index from 0 to len(dataset)-1.
+ def __getitem__(self, index):
+ # Retrieve the filename for the given index from our list of paths.
+ name = self.image_paths[index]
+
+ # Construct the full, platform-independent path to the image file.
+ # os.path.join is used so this code works on both Windows ('\') and Linux/Mac ('/').
+ path = os.path.join(self.image_dir, name)
+
+ # Open the image file using Pillow and ensure it's in RGB format (corresponds to 3 channels).
+ # This is important because some images might be in grayscale or have an alpha channel.
+ image = Image.open(path).convert("RGB")
+
+ # CHANGED: Apply the transformations to the image.
+ # This is a CRITICAL step. It will resize, normalize, and convert the PIL Image to a PyTorch Tensor.
+ if self.transform:
+ image = self.transform(image)
+
+ # Parse the filename again to get the string Person ID.
+ pid = name.split("_")[0]
+
+ # Use our mapping dictionary to look up the integer label for the corresponding PID.
+ label = self.pid_to_label[pid]
+
+ # Return the processed image tensor and its corresponding label as a tensor.
+ # The label must be a LongTensor (64-bit integer) for PyTorch's CrossEntropyLoss function.
+ return image, torch.tensor(label, dtype=torch.long)
+
+ # The __len__ method must return the total number of samples in the dataset.
+ # The DataLoader needs this to know the dataset's size for batching, shuffling, and epoch completion.
+ def __len__(self):
+ # Return the total count of the image paths we found during initialization.
+ return len(self.image_paths)
+
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/model.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/model.py
new file mode 100644
index 0000000..a1ac40c
--- /dev/null
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/model.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class CNNdeepSORT(nn.Module):
+
+ def __init__(self, embedding_dim):
+ super().__init__()
+
+ self.convolution = nn.Sequential(
+ #Input Dimensions: [B, 3, H, W]
+ #inChannels= 3 (RGB), outChannels = embedding_dim/4 (out being # of kernels)
+
+ # Stage 1
+ nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), # without padding, convolution shrinks images quickly, potentially losing info @ edges.
+ nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True),
+ nn.MaxPool2d(2), #downsample using maxmimum values in kernel -> H/2 x W/2
+
+ # Stage 2
+ nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True),
+ nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True),
+ nn.MaxPool2d(2), # H/2 x W/2
+
+ # Stage 3
+ nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(True),
+ nn.Conv2d(in_channels=256, out_channels=embedding_dim, kernel_size=3, padding=1), nn.BatchNorm2d(embedding_dim), nn.ReLU(True),
+
+ #Final reduction later into 1x1 embedding: [B, embedding_dim, 1, 1]
+ nn.AdaptiveAvgPool2d((1,1))
+ )
+
+ # Fully connected classifier for Re-ID training
+ #self.classifier = nn.Linear(in_features = embedding_dim, out_features=num_classes)
+
+ def forward(self, inputTensor):
+
+ output = self.convolution(inputTensor) # CNN encoder block -> [B, emb_dim, 1, 1]
+ output = torch.flatten(output,1) # [B, emb_dim] -> person's appearance vector
+
+ output = self.classifier(output) # For use as classifier -> [B, num_classes]
+ # else: For use as feature extractor
+ return output
\ No newline at end of file
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-03-15/events.out.tfevents.1752418995.1660-SUPER.4968.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-03-15/events.out.tfevents.1752418995.1660-SUPER.4968.0
new file mode 100644
index 0000000..72213b0
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-03-15/events.out.tfevents.1752418995.1660-SUPER.4968.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-49-46/events.out.tfevents.1752421786.1660-SUPER.17424.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-49-46/events.out.tfevents.1752421786.1660-SUPER.17424.0
new file mode 100644
index 0000000..b46e664
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-49-46/events.out.tfevents.1752421786.1660-SUPER.17424.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_12-08-54/events.out.tfevents.1752422934.1660-SUPER.38948.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_12-08-54/events.out.tfevents.1752422934.1660-SUPER.38948.0
new file mode 100644
index 0000000..decb8d0
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_12-08-54/events.out.tfevents.1752422934.1660-SUPER.38948.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267721.1660-SUPER.43972.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267721.1660-SUPER.43972.0
new file mode 100644
index 0000000..a8e40c0
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267721.1660-SUPER.43972.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267725.1660-SUPER.16484.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267725.1660-SUPER.16484.0
new file mode 100644
index 0000000..a41083e
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267725.1660-SUPER.16484.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267865.1660-SUPER.45020.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267865.1660-SUPER.45020.0
new file mode 100644
index 0000000..3ab3372
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267865.1660-SUPER.45020.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267868.1660-SUPER.47964.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267868.1660-SUPER.47964.0
new file mode 100644
index 0000000..056e3ea
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267868.1660-SUPER.47964.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752268438.1660-SUPER.38940.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752268438.1660-SUPER.38940.0
new file mode 100644
index 0000000..d84f3e0
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752268438.1660-SUPER.38940.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269132.1660-SUPER.19448.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269132.1660-SUPER.19448.0
new file mode 100644
index 0000000..9752b05
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269132.1660-SUPER.19448.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269766.1660-SUPER.42140.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269766.1660-SUPER.42140.0
new file mode 100644
index 0000000..adf7856
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269766.1660-SUPER.42140.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752289904.1660-SUPER.17300.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752289904.1660-SUPER.17300.0
new file mode 100644
index 0000000..70cb2ef
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752289904.1660-SUPER.17300.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752346955.1660-SUPER.2188.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752346955.1660-SUPER.2188.0
new file mode 100644
index 0000000..e1b6b03
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752346955.1660-SUPER.2188.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347004.1660-SUPER.16044.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347004.1660-SUPER.16044.0
new file mode 100644
index 0000000..ce662cc
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347004.1660-SUPER.16044.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347084.1660-SUPER.6076.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347084.1660-SUPER.6076.0
new file mode 100644
index 0000000..ae6e6c1
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347084.1660-SUPER.6076.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752417153.1660-SUPER.30424.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752417153.1660-SUPER.30424.0
new file mode 100644
index 0000000..4a06bdd
Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752417153.1660-SUPER.30424.0 differ
diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/train.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/train.py
new file mode 100644
index 0000000..0d99346
--- /dev/null
+++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/train.py
@@ -0,0 +1,292 @@
+import os
+import sys
+import time
+import torch
+import torch.nn as nn
+import torchvision
+from datetime import datetime
+from torchvision import transforms
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from torch.utils.data import random_split
+from torch.utils.tensorboard import SummaryWriter
+from torch.amp import autocast, GradScaler # instead of torch.cuda.amp -- deprecated
+from torch.nn import TripletMarginWithDistanceLoss
+# Custom Modules
+import dataset
+from model import CNNdeepSORT
+
+# ---------------------------------------------------
+# TRAINING FUNCTION
+# ---------------------------------------------------
+def euclidean_distance(x, y):
+ return torch.norm(x - y, p=2, dim=1)
+
+def training(model, dataloader, loss_fn, optimizer, device):
+ model.train() # Set model to training mode
+ total_loss = 0
+ total_correct = 0
+ total_samples = 0
+ progress_bar = tqdm(dataloader, "Training Progress...", unit = "batch")
+
+ for (archor, positive, negative) in progress_bar:
+ anchor.to(device, non_blocking=True)
+ positive = positive.to(device, non_blocking=True)
+ negative = negative.to(device, non_blocking=True)
+ # Non_blocking lets compute overlap with data transfer (small but free speed-up). Launches host --> GPU copy async if tensor already pinned (it is)
+
+ with autocast(device_type=device):
+ anchor_out = model(anchor)
+ positive_out = model(positive)
+ negative_out = model(negative)
+
+ loss = loss_fn(anchor_out, positive_out, negative_out)
+
+ # Backward pass and optimization
+ scaler.scale(loss).backward() # 1. Calculate gradients based on the current loss.
+ torch.nn.utils.clip_grad_norm_(model.parameters(), clip_val) # Compues total L2 norm for all gradients. If exceeds `clip_val`, rescales them to be equal. Prevents gradient explosion.
+ scaler.step(optimizer) # 2. Update the model's weights using the calculated gradients. With scaler, undoes the scaling and skips the step if gradients overflowed (inf)
+ scaler.update() # Adjusts the scale factor up/down depending on whether overflow was detected, keeping training stable.
+ optimizer.zero_grad(set_to_none=True)# 3. Reset gradients to zero for the next iteration. Instead of writing 0s into every grad tensor, it just sets the .grad pointer to None (saving a full CUDA memset each iteration)
+
+ # Metrics
+ total_loss += loss.item()
+ d_ap = euclidean_distance(anchor_out, positive_out)
+ d_an = euclidean_distance(anchor_out, negative_out)
+ total_correct += (d_ap < d_an).sum().item()
+ total_samples += anchor.size(0)
+
+ progress_bar.set_postfix(loss=loss.item())
+
+ avg_loss = total_loss / len(dataloader)
+ accuracy = total_correct / total_samples
+ return avg_loss, accuracy
+
+# ---------------------------------------------------
+# 1. SETUP
+# ---------------------------------------------------
+
+# This code will only run when you execute `python train.py` directly
+if __name__ == '__main__':
+ # Hyperparameters
+ LEARNING_RATE = 1e-3
+ EPOCHS = 50
+ BATCH_SIZE = 128 # e.g. 32, 64, 128
+ # Increased from 64. AMP frees memory, and more data per step = fewer GPU idles
+
+
+ # Move the model to the appropriate device (GPU if available)
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ print(f"Using device: {device}")
+
+ # Tries several convolution algorithms on the first batch, then uses fastest one thereafter.
+ # Pays off only if input sizes stay fixed (normalized to 256x128)
+ torch.backends.cudnn.benchmark = True
+
+ scaler = GradScaler() # WIth FP16, gradients can under-flow to 0. This multiplies the loss by a large factor, performs backward, then divides them back down during optimizer.
+ clip_val = 1.0 # gradient-clip norm
+
+ # ---------------------------------------------------
+ # 2. MODEL, LOSS, OPTIMIZER
+ # ---------------------------------------------------
+
+ # Instantiate the model and move to device
+ # 751 is the number of classes for Market-1501
+ model = CNNdeepSORT(embedding_dim=128).to(device)
+
+ # Define loss function, optimizer, and learning rate scheduler
+
+ #TODO distance function, margin depends on distance choosen
+
+ loss_function = nn.TripletMarginWithDistanceLoss(distance_function=euclidean_distance, margin=0.3, swap = True ) # Cross Entropy Loss is simpler, so more suitable for validation
+ optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE) # AdamW is an upgraded version of Adam
+
+ scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Decays LR by a factor of 0.1 every 10 epochs
+ # Gamma of 0.1 is common -- sharp reduction in LR, allowing model to switch from large adjustments to fine-tuning
+
+ # Load from saved checkpoint if found (most generalized model)
+ checkpoint_path = 'best_model_checkpoint.pth'
+ start_epoch = 0
+ if(len(sys.argv) > 1 and sys.argv[1].lower() == "load"):
+ if(os.path.exists(checkpoint_path)):
+ print(f"Loading checkpoint from {checkpoint_path}...")
+ checkpoint = torch.load(checkpoint_path)
+ model.load_state_dict(checkpoint['model_state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+ scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
+ scaler.load_state_dict(checkpoint.get('scaler_state_dict', {}))
+ start_epoch = checkpoint['epoch'] + 1
+ print(f"Resumed at epoch {start_epoch} | val_loss={checkpoint['loss']:.4f}")
+ else:
+ print("No checkpoint found, starting from scratch.")
+ else:
+ print("Training from scratch...")
+
+ #TODO
+
+
+ # ---------------------------------------------------
+ # 3. DATA LOADING & SMALL VALIDATION SPLIT
+ # ---------------------------------------------------
+
+ # Define more robust transforms for training
+ # Images are resized and normalized for better model performance.
+ transform = transforms.Compose([
+ transforms.Resize((128, 64)), # native Market-1501
+ transforms.RandomHorizontalFlip(p=0.5), # Adds ~1% mAP with no speed cost
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # mean and std are pre-calculated mean/stdev of the ImageNet dataset.
+ # For each channel of the image: output[channel] = (input[channel] - mean[channel]) / std[channel]
+ # Standardizes the pixel value range to train more effectively. Helps converge faster
+ ])
+
+ # Load dataset and create DataLoader
+ Market1501_train_file_path = r'C:\Users\david\Downloads\Market-1501-v15.09.15\bounding_box_train'
+ train_data = dataset.Market1501(Market1501_train_file_path, transform=transform)
+
+ # Only used for early-stopping / LR checks, NOT for final testing. Tiny split keeps epochs fast but still shows over-/under-fitting trends
+ val_len = int(0.05 * len(train_data)) # split 5 % off for quick val
+ train_len = len(train_data) - val_len
+
+ # split dataset before building both dataLoaders
+ train_ds, val_ds = random_split(train_data, [train_len, val_len])
+
+ # Entire training data
+ training_dataloader = DataLoader(
+ dataset=train_ds,
+ batch_size=BATCH_SIZE,
+ shuffle=True,
+ persistent_workers=True,# Keeps DataLoader workers alive across epochs and
+ prefetch_factor=4, # queues 4 batches per worker. Eliminates Windows process-spawn penalty; feeds GPU continuously
+ num_workers=4, # start with 4; 6–8 if CPU has threads to spare
+ pin_memory=True) # pin_memory puts tensors into pinned memory, allowing for faster transfer from CPU to GPU.
+ # Can significantly speed up training. Set to true with GPU.
+
+ val_loader = DataLoader(
+ val_ds,
+ batch_size=BATCH_SIZE,
+ shuffle=False, # no need to shuffle for evaluation
+ num_workers=4,
+ pin_memory=True,
+ persistent_workers=True)
+
+ # Evaluation helper
+ @torch.no_grad() # we don't track gradients here
+ def evaluate(model, loader):
+ """
+ Runs one full pass on `loader` with AMP and returns:
+ (mean_cross_entropy_loss, top-1_accuracy)
+ """
+ model.eval() # turn off dropout / batch-norm update
+ total_loss = 0.0
+ correct = 0
+ samples = 0
+
+ for anchor, positive, negative in loader:
+ anchor = anchor.to(device, non_blocking=True)
+ positive = positive.to(device, non_blocking=True)
+ negative = negative.to(device, non_blocking=True)
+
+ with autocast(device_type=device): # AMP even in eval -> less VRAM, faster
+ anchor_out = model(anchor)
+ positive_out = model(positive)
+ negative_out = model(negative)
+
+
+
+
+ loss = loss_function(anchor_out, positive_out, negative_out)
+
+ total_loss += loss.item()
+ #TODO
+ d_ap = euclidean_distance(anchor_out, positive_out)
+ d_an = euclidean_distance(anchor_out, negative_out)
+ correct += (d_ap < d_an).sum().item()
+
+ samples += anchor.size(0)
+
+ mean_loss = total_loss / len(loader)
+ accuracy = correct / samples
+ model.train() # restore training mode for caller
+ return mean_loss, accuracy
+
+ # ---------------------------------------------------
+ # 4. TENSORBOARD LOGGING (Initial)
+ # ---------------------------------------------------
+
+ print("Logging model graph and sample images to TensorBoard...")
+ # Get a single batch from the dataloader to log graph and images
+ anchor, positibe, negative = next(iter(training_dataloader))
+
+ print("Done.")
+
+ # ---------------------------------------------------
+ # 6. MAIN TRAINING LOOP
+ # ---------------------------------------------------
+ best_val_loss = float('inf')
+ patience = 5
+ patience_counter = 0
+
+ for epoch in range(start_epoch, EPOCHS):
+ start_time = time.time()
+
+ # Train for one epoch
+ avg_loss, accuracy = training(model, training_dataloader, loss_function, optimizer, device) #tells you if optimizer is doing its job on the data it sees
+ val_loss, val_acc = evaluate(model, val_loader) #tells you if the network is generalizing; early stopping
+
+ #*NEW* Update the learning rate
+ scheduler.step() # Without this, the model is unable to converge.
+
+
+
+ # ----- save best on *validation* loss -----
+ #if avg_loss < best_val_loss:
+ if val_loss < best_val_loss:
+ #best_val_loss = avg_loss
+ best_val_loss = val_loss
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'scheduler_state_dict': scheduler.state_dict(),
+ 'loss': val_loss
+ }, 'best_model_checkpoint.pth')
+ print(f"Epoch {epoch+1}: val_loss improved to {val_loss:.4f}")
+ patience_counter = 0
+ else:
+ patience_counter += 1
+ print(f" Epoch {epoch+1}: Loss did not improve. Patience = {patience_counter}/{patience}")
+
+ # Print epoch summary
+ elapsed_time = time.time() - start_time
+ print(f"\nEpoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | Acc: {accuracy:.2%} | Time: {elapsed_time:.2f}s")
+
+ # Always save latest weights
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'scheduler_state_dict': scheduler.state_dict(),
+ 'scaler_state_dict': scaler.state_dict(),
+ 'loss': avg_loss
+ }, 'latest_model_checkpoint_epoch.pth')
+
+ # Early stopping check
+ if patience_counter >= patience:
+ print(f"\n Early stopping triggered after {epoch+1} epochs.")
+ break
+
+ # Log weight histograms
+
+
+ print("\nTraining Finished!")
+
+
+ # ---------------------------------------------------
+ # 7. CLEANUP
+ # ---------------------------------------------------
+
+
+
+ # To view the logs, open a terminal in your project's root directory and run:
+ # tensorboard --logdir=run
\ No newline at end of file
diff --git a/src/_archive_legacy/models/deepSORT/DataLoading.py b/src/_archive_legacy/models/deepSORT/DataLoading.py
index 59d95de..fc6b3f1 100644
--- a/src/_archive_legacy/models/deepSORT/DataLoading.py
+++ b/src/_archive_legacy/models/deepSORT/DataLoading.py
@@ -1,17 +1,17 @@
-import pandas as pd
-import torch
-class DataLoading:
- def load_youtube_data(self, csv_path):
- '''
- This method will load data from the Youtube BoundingBoxes data set from google.
- We'll use the data from this dataset to help train and test the data association metric for deepSORT
- '''
- df = pd.read_csv(csv_path, header=None, names=["youtube_id", "timestamp_ms", "class_id", "class_name", "object_id", "object_presence", "xmin", "xmax", "ymin", "ymax"])
- tracking_data = {}
-
- #Assuming the videos are in 30 FPS, we want to normalize the timestamp to frame indexes since they're in ms
-
-
-csv_path = "C:\Users\adamm\OneDrive\Documents\AI Projects\Kestrel\yt_bb_classification_validation.csv"
-load_data = DataLoading()
+import pandas as pd
+import torch
+class DataLoading:
+ def load_youtube_data(self, csv_path):
+ '''
+ This method will load data from the Youtube BoundingBoxes data set from google.
+ We'll use the data from this dataset to help train and test the data association metric for deepSORT
+ '''
+ df = pd.read_csv(csv_path, header=None, names=["youtube_id", "timestamp_ms", "class_id", "class_name", "object_id", "object_presence", "xmin", "xmax", "ymin", "ymax"])
+ tracking_data = {}
+
+ #Assuming the videos are in 30 FPS, we want to normalize the timestamp to frame indexes since they're in ms
+
+
+csv_path = "C:\Users\adamm\OneDrive\Documents\AI Projects\Kestrel\yt_bb_classification_validation.csv"
+load_data = DataLoading()
tracking_data = load_data.load_youtube_data(csv_path)
\ No newline at end of file
diff --git a/src/_archive_legacy/models/deepSORT/README.md b/src/_archive_legacy/models/deepSORT/README.md
index 983b7cc..57c5865 100644
--- a/src/_archive_legacy/models/deepSORT/README.md
+++ b/src/_archive_legacy/models/deepSORT/README.md
@@ -1,9 +1,9 @@
-# Project Kestrel: Deep-SORT
-________________________
-* Deep-SORT is a MOT algorithm that improves on SORT's ability to handle occlusion by using deep learning. A convolutional neural network is used to extract appearance based vectors from each object, then the data association algorithm can associate apperance vectors to tracks.
-## Adam Mouedden's Work Explained:
-* CNN_model.py: Contains the CNN model.
-* CNN_Data_Processing.py: Contains the data loader, trained on MOT 16.
-* CNN_Training_Script.py: Contains the training loop.
-* Data Association.py: The skeleton code for deep-SORT's data association algorithm.
+# Project Kestrel: Deep-SORT
+________________________
+* Deep-SORT is a MOT algorithm that improves on SORT's ability to handle occlusion by using deep learning. A convolutional neural network is used to extract appearance based vectors from each object, then the data association algorithm can associate apperance vectors to tracks.
+## Adam Mouedden's Work Explained:
+* CNN_model.py: Contains the CNN model.
+* CNN_Data_Processing.py: Contains the data loader, trained on MOT 16.
+* CNN_Training_Script.py: Contains the training loop.
+* Data Association.py: The skeleton code for deep-SORT's data association algorithm.
* Cost functions implemented from research paper: https://www.mdpi.com/2076-3417/12/3/1319
\ No newline at end of file
diff --git a/src/kestrel_clustering/kestrel_clustering/clustering_node.py b/src/kestrel_clustering/kestrel_clustering/clustering_node.py
new file mode 100644
index 0000000..143d5f9
--- /dev/null
+++ b/src/kestrel_clustering/kestrel_clustering/clustering_node.py
@@ -0,0 +1,105 @@
+# Clustering node will subscribe to the tracking node and emit a message called TrackCenter.msg using ROS2 Jazzy
+'''
+This file will create the clustering node which will send a message called TrackCenter.msg to the next node
+This node susbcribes to the input node and publishes to the next node
+'''
+import rclpy #ROS Common Library Python
+from rclpy.node import Node
+from kestrel_msgs.msg import TrackCenter
+from kestrel_msgs.msg import TrackArray
+
+
+#GLOBAL VARIABLES
+#Topics
+subscribed_topic = "/kestrel/whitelist_tracks" #David may need to change this once the input node is implemented
+published_topic = "/kestrel/track_center" #The next node in the ROS graph needs to subscribe to this one
+
+
+class ClusteringNode(Node):
+ def __init__(self): #Constructor
+ super().__init__('clustering_node') #Create a node in the ROS graph
+
+ queue_size = 10 # Default queue size
+
+ self.subscription = self.create_subscription(TrackArray, subscribed_topic, self.vision_callback, queue_size)
+ self.publisher_ = self.create_publisher(TrackCenter, published_topic, queue_size)
+
+
+ def _calculate_center(self, x1, y1, x2, y2):
+ return (x1+x2)/2 , (y1+y2)/2
+
+
+ def _minimum_bounding_rectangle(self, input_message):
+ #Image coordinate system involves top left of the frame being the origin
+ min_x = float('inf')
+ max_x = float('-inf')
+ min_y = float('inf')
+ max_y = float('-inf')
+
+ for track in input_message.tracks:
+
+ #Instead of comparing 8 points, we realized that we can half that number with the following logic:
+ #Take max_x for example, x2 being the bottom-right corner of any bbox, meaning it's x value will always be larger than x1
+ #So we need only compare max_x with x2 and there is no need to compare max_x with x1. This logic applies both ways for all points
+ max_x = track.x2 if track.x2 > max_x else max_x
+ min_x = track.x1 if track.x1 < min_x else min_x
+ max_y = track.y2 if track.y2 > max_y else max_y
+ min_y = track.y1 if track.y1 < min_y else min_y
+
+
+ if min_x == float('inf'):
+ return None
+
+ return (min_x, min_y, max_x, max_y)
+
+
+ def _publish_center(self, input_message: TrackArray):
+ rectangle = self._minimum_bounding_rectangle(input_message)
+
+ if rectangle == None:
+ self.get_logger().debug("No tracks in message. Skipping publish.")
+ return
+
+ x1, y1, x2, y2 = rectangle
+
+ #Calculate the center and the extents of the minimum bounding rectangle
+ center_x, center_y = self._calculate_center(x1, y1, x2, y2)
+
+ #Initialize the output message
+ output_message = TrackCenter()
+
+ #Default parameters that come from the TrackArray message
+ output_message.header = input_message.header
+
+ #Newly calculated parameters, unique to the clustering node:
+ #Top left corner and bottom right corner
+ output_message.x1 = x1
+ output_message.y1 = y1
+ output_message.x2 = x2
+ output_message.y2 = y2
+
+ #Center of the minimum bounding rectangle
+ output_message.center_x = center_x
+ output_message.center_y = center_y
+
+ self.publisher_.publish(output_message)
+ self.get_logger().info(f'Publishing: {output_message}')
+
+
+ def vision_callback(self, input_message: TrackArray):
+ self.get_logger().info(f'Received: {input_message}')
+ self._publish_center(input_message) #Call the function that will then create the TrackCenter message and publish it
+
+
+def main():
+ rclpy.init(args=None)
+ print("Running the clustering node!")
+ clustering_node = ClusteringNode()
+ rclpy.spin(clustering_node)
+
+ clustering_node.destroy_node()
+ rclpy.shutdown()
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/src/kestrel_clustering/package.xml b/src/kestrel_clustering/package.xml
index c485d9d..7dc8d66 100644
--- a/src/kestrel_clustering/package.xml
+++ b/src/kestrel_clustering/package.xml
@@ -1,22 +1,22 @@
-
-
- kestrel_clustering
- 0.0.1
- ROS 2 node for clustering PEV group centroids
- David Orjuela
- MIT
-
- ament_python
-
- rclpy
- kestrel_msgs
-
- ament_copyright
- ament_flake8
- ament_pep257
- python3-pytest
-
-
- ament_python
-
-
+
+
+ kestrel_clustering
+ 0.0.1
+ ROS 2 node for clustering PEV group centroids
+ David Orjuela
+ MIT
+
+ ament_python
+
+ rclpy
+ kestrel_msgs
+
+ ament_copyright
+ ament_flake8
+ ament_pep257
+ python3-pytest
+
+
+ ament_python
+
+
diff --git a/src/kestrel_clustering/setup.cfg b/src/kestrel_clustering/setup.cfg
index 4a35092..8654622 100644
--- a/src/kestrel_clustering/setup.cfg
+++ b/src/kestrel_clustering/setup.cfg
@@ -1,4 +1,4 @@
-[develop]
-script_dir=$base/lib/kestrel_clustering
-[install]
-install_scripts=$base/lib/kestrel_clustering
+[develop]
+script_dir=$base/lib/kestrel_clustering
+[install]
+install_scripts=$base/lib/kestrel_clustering
diff --git a/src/kestrel_clustering/setup.py b/src/kestrel_clustering/setup.py
index 1887f71..7051a08 100644
--- a/src/kestrel_clustering/setup.py
+++ b/src/kestrel_clustering/setup.py
@@ -1,25 +1,26 @@
-from setuptools import find_packages, setup
-
-package_name = 'kestrel_clustering'
-
-setup(
- name=package_name,
- version='0.0.0',
- packages=find_packages(exclude=['test']),
- data_files=[
- ('share/ament_index/resource_index/packages',
- ['resource/' + package_name]),
- ('share/' + package_name, ['package.xml']),
- ],
- install_requires=['setuptools'],
- zip_safe=True,
- maintainer='davidorjuela',
- maintainer_email='davidorjuela@todo.todo',
- description='TODO: Package description',
- license='TODO: License declaration',
- tests_require=['pytest'],
- entry_points={
- 'console_scripts': [
- ],
- },
-)
+from setuptools import find_packages, setup
+
+package_name = 'kestrel_clustering'
+
+setup(
+ name=package_name,
+ version='0.0.0',
+ packages=find_packages(exclude=['test']),
+ data_files=[
+ ('share/ament_index/resource_index/packages',
+ ['resource/' + package_name]),
+ ('share/' + package_name, ['package.xml']),
+ ],
+ install_requires=['setuptools'],
+ zip_safe=True,
+ maintainer='davidorjuela',
+ maintainer_email='davidorjuela@todo.todo',
+ description='TODO: Package description',
+ license='TODO: License declaration',
+ tests_require=['pytest'],
+ entry_points={
+ 'console_scripts': [
+ 'clustering_node = kestrel_clustering.clustering_node:main',
+ ],
+ },
+)
diff --git a/src/kestrel_clustering/test/test_copyright.py b/src/kestrel_clustering/test/test_copyright.py
index 97a3919..282981a 100644
--- a/src/kestrel_clustering/test/test_copyright.py
+++ b/src/kestrel_clustering/test/test_copyright.py
@@ -1,25 +1,25 @@
-# Copyright 2015 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_copyright.main import main
-import pytest
-
-
-# Remove the `skip` decorator once the source file(s) have a copyright header
-@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
-@pytest.mark.copyright
-@pytest.mark.linter
-def test_copyright():
- rc = main(argv=['.', 'test'])
- assert rc == 0, 'Found errors'
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_copyright.main import main
+import pytest
+
+
+# Remove the `skip` decorator once the source file(s) have a copyright header
+@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
+@pytest.mark.copyright
+@pytest.mark.linter
+def test_copyright():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found errors'
diff --git a/src/kestrel_clustering/test/test_flake8.py b/src/kestrel_clustering/test/test_flake8.py
index 27ee107..625324e 100644
--- a/src/kestrel_clustering/test/test_flake8.py
+++ b/src/kestrel_clustering/test/test_flake8.py
@@ -1,25 +1,25 @@
-# Copyright 2017 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_flake8.main import main_with_errors
-import pytest
-
-
-@pytest.mark.flake8
-@pytest.mark.linter
-def test_flake8():
- rc, errors = main_with_errors(argv=[])
- assert rc == 0, \
- 'Found %d code style errors / warnings:\n' % len(errors) + \
- '\n'.join(errors)
+# Copyright 2017 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_flake8.main import main_with_errors
+import pytest
+
+
+@pytest.mark.flake8
+@pytest.mark.linter
+def test_flake8():
+ rc, errors = main_with_errors(argv=[])
+ assert rc == 0, \
+ 'Found %d code style errors / warnings:\n' % len(errors) + \
+ '\n'.join(errors)
diff --git a/src/kestrel_clustering/test/test_pep257.py b/src/kestrel_clustering/test/test_pep257.py
index b234a38..4569717 100644
--- a/src/kestrel_clustering/test/test_pep257.py
+++ b/src/kestrel_clustering/test/test_pep257.py
@@ -1,23 +1,23 @@
-# Copyright 2015 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_pep257.main import main
-import pytest
-
-
-@pytest.mark.linter
-@pytest.mark.pep257
-def test_pep257():
- rc = main(argv=['.', 'test'])
- assert rc == 0, 'Found code style errors / warnings'
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_pep257.main import main
+import pytest
+
+
+@pytest.mark.linter
+@pytest.mark.pep257
+def test_pep257():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found code style errors / warnings'
diff --git a/src/kestrel_input/kestrel_input/PartB_00000.jpg b/src/kestrel_input/kestrel_input/PartB_00000.jpg
new file mode 100644
index 0000000..73dd415
Binary files /dev/null and b/src/kestrel_input/kestrel_input/PartB_00000.jpg differ
diff --git a/src/kestrel_msgs/msg/ObjectTrack.msg b/src/kestrel_input/kestrel_input/__init__.py
similarity index 100%
rename from src/kestrel_msgs/msg/ObjectTrack.msg
rename to src/kestrel_input/kestrel_input/__init__.py
diff --git a/src/kestrel_input/kestrel_input/testing_GUI.py b/src/kestrel_input/kestrel_input/testing_GUI.py
new file mode 100644
index 0000000..6adcea8
--- /dev/null
+++ b/src/kestrel_input/kestrel_input/testing_GUI.py
@@ -0,0 +1,68 @@
+import dearpygui.dearpygui as dpg
+import dearpygui.demo as demo
+import cv2
+import numpy as np
+
+# For Testing
+image_path = "PartB_00000.jpg"
+image_array = cv2.imread(image_path, 256)
+image_array = cv2.resize(image_array, (100, 100)) # if needed
+alpha = np.full((image_array.shape[0], image_array.shape[1], 1), 255, dtype=np.uint8)
+rgba = np.concatenate([image_array, alpha], axis=2) # HxWx4, RGBA uint8
+tex = (rgba.astype(np.float32) / 255.0).ravel() # flat float32 in [0,1]
+
+dpg.create_context()
+dpg.create_viewport(title='Kestrel Tracking Management', width=1080, height=1920)
+
+texture_data = image_array
+
+# Test add texture
+# for i in range(0, 100 * 100):
+# texture_data.append(255 / 255)
+# texture_data.append(0)
+# texture_data.append(255 / 255)
+# texture_data.append(255 / 255)
+
+with dpg.texture_registry():
+ texture_id = dpg.add_dynamic_texture(width=100, height=100, default_value=tex, tag="texture_tag") # Textures cannot be resized once created
+
+def _update_dynamic_textures(sender, app_data, user_data):
+ new_color = dpg.get_value(sender)
+ new_color[0] = new_color[0] / 255
+ new_color[1] = new_color[1] / 255
+ new_color[2] = new_color[2] / 255
+ new_color[3] = new_color[3] / 255
+
+ new_texture_data = []
+ for i in range(0, 100 * 100):
+ new_texture_data.append(new_color[0])
+ new_texture_data.append(new_color[1])
+ new_texture_data.append(new_color[2])
+ new_texture_data.append(new_color[3])
+
+ dpg.set_value("texture_tag", new_texture_data)
+
+with dpg.window(tag="Primary Window", label="Whitelist Tracks"):
+ dpg.add_text("Select the bounding boxes you wish to track.")
+ dpg.set_primary_window("Primary Window", True)
+ with dpg.table(header_row=False):
+ dpg.add_table_column()
+ dpg.add_table_column()
+ dpg.add_table_column()
+ dpg.add_table_column()
+ dpg.add_table_column()
+
+ for i in range(0, 7):
+ with dpg.table_row():
+ for j in range(0, 6):
+ dpg.add_image("texture_tag")
+
+ # dpg.add_color_picker((255, 0, 255, 255), label="Texture",
+ # no_side_preview=True, alpha_bar=True, width=200,
+ # callback=_update_dynamic_textures)
+
+
+dpg.setup_dearpygui()
+dpg.show_viewport()
+dpg.start_dearpygui()
+dpg.destroy_context()
\ No newline at end of file
diff --git a/src/kestrel_input/kestrel_input/whitelistTracks.py b/src/kestrel_input/kestrel_input/whitelistTracks.py
new file mode 100644
index 0000000..3f2aff6
--- /dev/null
+++ b/src/kestrel_input/kestrel_input/whitelistTracks.py
@@ -0,0 +1,296 @@
+import rclpy
+from rclpy.node import Node
+import cv2
+import dearpygui.dearpygui as dpg
+import numpy as np
+from sensor_msgs.msg import Image
+from kestrel_msgs.msg import TrackArray, Track as TrackMsg
+from std_msgs.msg import Header
+from builtin_interfaces.msg import Time
+from collections import OrderedDict
+from typing import Dict, List, Optional, Tuple, Set
+
+from cv_bridge import CvBridge
+
+def clip_bbox_to_image(bbox, w: int, h: int) -> Optional[Tuple[int, int, int, int]]:
+ """Clip [x1,y1,x2,y2] to image bounds; return None if invalid/empty."""
+ x1, y1, x2, y2 = bbox
+ x1 = int(np.clip(x1, 0, max(0, w - 1)))
+ x2 = int(np.clip(x2, 0, max(0, w - 1)))
+ y1 = int(np.clip(y1, 0, max(0, h - 1)))
+ y2 = int(np.clip(y2, 0, max(0, h - 1)))
+ if x2 <= x1 or y2 <= y1:
+ return None
+ return x1, y1, x2, y2
+
+def ns_to_time(timestamp_ns: int) -> Time:
+ t = Time()
+ t.sec = int(timestamp_ns // 1_000_000_000)
+ t.nanosec = int(timestamp_ns % 1_000_000_000)
+ return t
+
+class InputNode(Node):
+ """
+ Subscribes to camera frames and TrackArray; time-matches them,
+ shows per-ID crops in a GUI, and publishes a whitelist of selected IDs.
+ """
+ def __init__(self):
+ super().__init__('input_node')
+
+ # --- Buffers / state ---
+ self.images: "OrderedDict[int, np.ndarray]" = OrderedDict()
+ self.track_batches: "OrderedDict[int, List[TrackMsg]]" = OrderedDict()
+
+ self.max_len: int = 30
+ self.delta_ns: int = int(30e6) # 30 ms tolerance
+
+ self.current_ts: Optional[int] = None
+ self.current_tracks_by_id: Dict[int, TrackMsg] = {}
+ # display_items: id -> {"crop": np.ndarray, "conf": float, "class": str}
+ self.display_items: Dict[int, Dict[str, object]] = {}
+
+ # Selected (whitelisted) IDs
+ self.selected_tracks: Set[int] = set()
+
+ # GUI assets
+ self.bridge = CvBridge()
+ self.thumb_size = (128, 128) # W,H for thumbnails
+ self.tex_tags_by_id: Dict[int, str] = {} # id -> texture tag
+ self.image_item_by_id: Dict[int, str] = {} # id -> image item tag
+ self.gui_initialized = False
+
+ # --- Initalize ros publishers/subscribers ---
+
+ # Raw images from YOLO, with timestamp
+ self.image_subscriber = self.create_subscription(Image, '/camera/image_raw', self.image_callback, 10)
+ # Array of Tracks from tracking_node -- each track is a bounding box with timestamp
+ self.tracking_subscriber = self.create_subscription(TrackArray, '/kestrel/tracks', self.tracking_callback, 10)
+ # Publish whitelisted array of tracks for the camera to track
+ self.pub_input = self.create_publisher(TrackArray, '/kestrel/whitelist_tracks', 10)
+
+ # ----------------- Callbacks -----------------
+
+ # receives raw image from /camera/image_raw
+ def image_callback(self, msg: Image):
+ # convert ROS stamp (sec, nanosec) to combined timestamp in nano sec
+ ts = msg.header.stamp.sec * 1e9 + msg.header.nanosec
+
+ # Convert to RGBA for dearpygui
+ frame = self.bridge.imgmsg_to_cv2(msg, "rgba8")
+
+ # Buffer and evict
+ # mapping timestamp in nanosec as key, to the actual image as value
+ self.images[ts] = frame
+ while len(self.images) > self.max_len:
+ self.images.popitem(last=False)
+
+ # Try to match with an existing track batch
+ self._try_match_after_new_image(ts)
+
+ # receives TrackArray.msg from /kestrel/tracks
+ def tracking_callback(self, msg: TrackArray):
+ # All individual track timestamps should match TrackArray (msg).header timestamp
+ ts = msg.header.stamp.sec * 1e9 + msg.header.nanosec
+
+ # Buffer and evict
+ self.track_batches[ts] = list(msg.tracks)
+ while len(self.track_batches) > self.max_len:
+ self.track_batches.popitem(last=False)
+
+ # Try to match with an existing image
+ self._try_match_after_new_tracks(ts)
+
+ # ----------------- Matching & View Build -----------------
+ def _find_nearest_key(self, target_ts: int, keys: List[int], tol_ns: int) -> Optional[int]:
+ """Return key with smallest |key - target_ts| within tol_ns, else None."""
+ if not keys:
+ return None
+ diffs = [(abs(k - target_ts), k) for k in keys]
+ diffs.sort()
+ best_diff, best_key = diffs[0]
+ return best_key if best_diff <= tol_ns else None
+
+ def _try_match_after_new_image(self, image_ts: int):
+ # Find nearest track batch
+ nearest_tracks_ts = self._find_nearest_key(image_ts, list(self.track_batches.keys()), self.delta_ns)
+ if nearest_tracks_ts is None:
+ return
+ self._build_current_view(image_ts, nearest_tracks_ts)
+
+ def _try_match_after_new_tracks(self, tracks_ts: int):
+ # Find nearest image
+ nearest_image_ts = self._find_nearest_key(tracks_ts, list(self.images.keys()), self.delta_ns)
+ if nearest_image_ts is None:
+ return
+ self._build_current_view(nearest_image_ts, tracks_ts)
+
+ def _build_current_view(self, image_ts: int, tracks_ts: int):
+ """Set current view: compute crops for each track and refresh GUI."""
+ frame = self.images.get(image_ts, None)
+ tracks = self.track_batches.get(tracks_ts, None)
+ if frame is None or tracks is None:
+ return
+
+ h, w = frame.shape[:2]
+ self.current_ts = tracks_ts
+ self.current_tracks_by_id = {}
+ self.display_items = {}
+
+ for t in tracks:
+ # Map ID -> message
+ self.current_tracks_by_id[int(t.id)] = t
+
+ # Clip and crop
+ clipped = clip_bbox_to_image((t.x1, t.y1, t.x2, t.y2), w, h)
+ if clipped is None:
+ continue # skip invalid boxes
+ x1, y1, x2, y2 = clipped
+ crop = frame[y1:y2, x1:x2]
+
+ # Resize to thumb for GUI
+ if crop.size == 0:
+ continue
+ crop_resized = cv2.resize(crop, self.thumb_size, interpolation=cv2.INTER_LINEAR)
+
+ self.display_items[int(t.id)] = {
+ "crop": crop_resized,
+ "conf": float(t.conf),
+ "class": t.class_name
+ }
+
+ # Update GUI from display_items
+ if self.gui_initialized:
+ self._refresh_gui()
+
+
+ # ----------------- GUI -----------------
+
+ def run_gui(self):
+ """Build GUI once and then rely on _refresh_gui() to update textures."""
+ dpg.create_context()
+ dpg.create_viewport(title='Kestrel Tracking Management', width=900, height=700)
+
+ with dpg.texture_registry(show=True):
+ dpg.add_texture_registry(tag="tex_registry")
+
+ with dpg.window(tag="Primary Window", label="Whitelist Tracks", width=880, height=660):
+ dpg.set_primary_window("Primary Window", True)
+ dpg.add_text("Click a thumbnail to toggle selection; selected items are highlighted.")
+ dpg.add_separator()
+ # A grid container; we'll add image buttons per ID
+ dpg.add_child_window(tag="grid", width=-1, height=-1)
+
+ dpg.setup_dearpygui()
+ dpg.show_viewport()
+ self.gui_initialized = True
+
+ # Initial fill (in case we had a match before GUI launched)
+ self._refresh_gui()
+
+ dpg.start_dearpygui()
+ dpg.destroy_context()
+
+ # On GUI close, publish current whitelist once (optional)
+ self.publish_whitelist()
+
+ def _refresh_gui(self):
+ """Create/update textures & image widgets per track ID from self.display_items."""
+ if not dpg.does_item_exist("grid"):
+ return
+
+ # Remove widgets for IDs no longer present
+ for old_id in list(self.image_item_by_id.keys()):
+ if old_id not in self.display_items:
+ # delete image widget and texture
+ if dpg.does_item_exist(self.image_item_by_id[old_id]):
+ dpg.delete_item(self.image_item_by_id[old_id])
+ if dpg.does_item_exist(self.tex_tags_by_id[old_id]):
+ dpg.delete_item(self.tex_tags_by_id[old_id])
+ self.image_item_by_id.pop(old_id, None)
+ self.tex_tags_by_id.pop(old_id, None)
+
+ # Add/update current IDs
+ for tid, item in self.display_items.items():
+ crop = item["crop"] # RGBA 128x128
+ # TODO: Ensure RGBA; if source is BGRA, convert here
+ if crop.shape[2] == 3:
+ crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGBA)
+
+ # Normalize to [0,1] and flatten for Dear PyGui texture
+ tex_data = (crop.astype(np.float32) / 255.0).reshape(-1)
+
+ # Create texture if missing
+ if tid not in self.tex_tags_by_id or not dpg.does_item_exist(self.tex_tags_by_id[tid]):
+ tex_tag = f"tex_{tid}"
+ dpg.add_dynamic_texture(
+ width=self.thumb_size[0], height=self.thumb_size[1],
+ default_value=tex_data, tag=tex_tag, parent="tex_registry"
+ )
+ self.tex_tags_by_id[tid] = tex_tag
+ else:
+ dpg.set_value(self.tex_tags_by_id[tid], tex_data)
+
+ # Create/Update image button widget
+ selected = (tid in self.selected_tracks)
+ tint = (0.2, 1.0, 0.2, 1.0) if selected else (1.0, 1.0, 1.0, 1.0)
+ tooltip = f"ID {tid} | conf {item['conf']:.2f} | {item['class']}"
+
+ if tid not in self.image_item_by_id or not dpg.does_item_exist(self.image_item_by_id[tid]):
+ btn_tag = f"img_{tid}"
+ with dpg.group(parent="grid"):
+ dpg.add_image_button(
+ texture_tag=self.tex_tags_by_id[tid],
+ tag=btn_tag,
+ width=self.thumb_size[0],
+ height=self.thumb_size[1],
+ tint_color=tint,
+ callback=self._on_tile_click,
+ user_data=tid
+ )
+ dpg.add_text(tooltip)
+ self.image_item_by_id[tid] = btn_tag
+ else:
+ dpg.configure_item(self.image_item_by_id[tid], texture_tag=self.tex_tags_by_id[tid], tint_color=tint)
+ # The text below the button isn't stored; simplest is to ignore or recreate group if needed.
+
+ def _on_tile_click(self, sender, app_data, user_data):
+ """Toggle selection for a given track ID and refresh the tile tint."""
+ tid = int(user_data)
+ if tid in self.selected_tracks:
+ self.selected_tracks.remove(tid)
+ else:
+ self.selected_tracks.add(tid)
+ # Update tint immediately
+ if tid in self.image_item_by_id:
+ tint = (0.2, 1.0, 0.2, 1.0) if tid in self.selected_tracks else (1.0, 1.0, 1.0, 1.0)
+ dpg.configure_item(self.image_item_by_id[tid], tint_color=tint)
+
+ # ----------------- Whitelist publisher -----------------
+
+ def publish_whitelist(self):
+ """Publish currently selected tracks as a TrackArray."""
+ if not self.selected_tracks or not self.current_tracks_by_id or self.current_ts is None:
+ return
+
+ whitelisted = [self.current_tracks_by_id[tid] for tid in self.selected_tracks if tid in self.current_tracks_by_id]
+ if not whitelisted:
+ return
+
+ arr = TrackArray()
+ hdr = Header()
+ hdr.stamp = ns_to_time(self.current_ts)
+ # hdr.frame_id could be set if you carry it through your pipeline
+ arr.header = hdr
+ arr.tracks = whitelisted
+ self.pub_input.publish(arr)
+
+
+def main(args=None):
+ rclpy.init(args=args)
+ node = InputNode()
+ rclpy.spin(node)
+ node.destroy_node()
+ rclpy.shutdown()
+
+if __name__ == '__main__':
+ main()
diff --git a/src/kestrel_input/package.xml b/src/kestrel_input/package.xml
new file mode 100644
index 0000000..354ed12
--- /dev/null
+++ b/src/kestrel_input/package.xml
@@ -0,0 +1,18 @@
+
+
+
+ kestrel_input
+ 0.0.0
+ TODO: Package description
+ davidorjuela
+ TODO: License declaration
+
+ ament_copyright
+ ament_flake8
+ ament_pep257
+ python3-pytest
+
+
+ ament_python
+
+
diff --git a/src/kestrel_input/requirements.txt b/src/kestrel_input/requirements.txt
new file mode 100644
index 0000000..873c0fc
--- /dev/null
+++ b/src/kestrel_input/requirements.txt
@@ -0,0 +1,3 @@
+opencv-python
+dearpygui
+cv_bridge
diff --git a/src/kestrel_msgs/msg/TrackCentroid.msg b/src/kestrel_input/resource/kestrel_input
similarity index 100%
rename from src/kestrel_msgs/msg/TrackCentroid.msg
rename to src/kestrel_input/resource/kestrel_input
diff --git a/src/kestrel_input/setup.cfg b/src/kestrel_input/setup.cfg
new file mode 100644
index 0000000..786b8a7
--- /dev/null
+++ b/src/kestrel_input/setup.cfg
@@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/kestrel_input
+[install]
+install_scripts=$base/lib/kestrel_input
diff --git a/src/kestrel_input/setup.py b/src/kestrel_input/setup.py
new file mode 100644
index 0000000..f9238e2
--- /dev/null
+++ b/src/kestrel_input/setup.py
@@ -0,0 +1,25 @@
+from setuptools import find_packages, setup
+
+package_name = 'kestrel_input'
+
+setup(
+ name=package_name,
+ version='0.0.0',
+ packages=find_packages(exclude=['test']),
+ data_files=[
+ ('share/ament_index/resource_index/packages',
+ ['resource/' + package_name]),
+ ('share/' + package_name, ['package.xml']),
+ ],
+ install_requires=['setuptools'],
+ zip_safe=True,
+ maintainer='davidorjuela',
+ maintainer_email='davidorjuela@todo.todo',
+ description='TODO: Package description',
+ license='TODO: License declaration',
+ entry_points={
+ 'console_scripts': [
+ 'whitelistTracks = kestrel_input.whitelistTracks:main',
+ ],
+ },
+)
diff --git a/src/kestrel_input/test/test_copyright.py b/src/kestrel_input/test/test_copyright.py
new file mode 100644
index 0000000..97a3919
--- /dev/null
+++ b/src/kestrel_input/test/test_copyright.py
@@ -0,0 +1,25 @@
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_copyright.main import main
+import pytest
+
+
+# Remove the `skip` decorator once the source file(s) have a copyright header
+@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
+@pytest.mark.copyright
+@pytest.mark.linter
+def test_copyright():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found errors'
diff --git a/src/kestrel_input/test/test_flake8.py b/src/kestrel_input/test/test_flake8.py
new file mode 100644
index 0000000..27ee107
--- /dev/null
+++ b/src/kestrel_input/test/test_flake8.py
@@ -0,0 +1,25 @@
+# Copyright 2017 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_flake8.main import main_with_errors
+import pytest
+
+
+@pytest.mark.flake8
+@pytest.mark.linter
+def test_flake8():
+ rc, errors = main_with_errors(argv=[])
+ assert rc == 0, \
+ 'Found %d code style errors / warnings:\n' % len(errors) + \
+ '\n'.join(errors)
diff --git a/src/kestrel_input/test/test_pep257.py b/src/kestrel_input/test/test_pep257.py
new file mode 100644
index 0000000..b234a38
--- /dev/null
+++ b/src/kestrel_input/test/test_pep257.py
@@ -0,0 +1,23 @@
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_pep257.main import main
+import pytest
+
+
+@pytest.mark.linter
+@pytest.mark.pep257
+def test_pep257():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found code style errors / warnings'
diff --git a/src/kestrel_msgs/CMakeLists.txt b/src/kestrel_msgs/CMakeLists.txt
index c65d346..06b1623 100644
--- a/src/kestrel_msgs/CMakeLists.txt
+++ b/src/kestrel_msgs/CMakeLists.txt
@@ -1,38 +1,37 @@
-message(STATUS "PACKAGE_XML_PATH=${CMAKE_CURRENT_SOURCE_DIR}/package.xml")
-file(READ "${CMAKE_CURRENT_SOURCE_DIR}/package.xml" PKGXML)
-message(STATUS "XML_CONTENT=${PKGXML}")
-
-cmake_minimum_required(VERSION 3.10)
-project(kestrel_msgs)
-
-find_package(ament_cmake REQUIRED)
-find_package(rosidl_default_generators REQUIRED)
-find_package(std_msgs REQUIRED)
-find_package(geometry_msgs REQUIRED)
-find_package(diagnostic_msgs REQUIRED)
-#find_package(vision_msgs REQUIRED)
-
-rosidl_generate_interfaces(${PROJECT_NAME}
- "msg/EmbeddedDetection2D.msg"
- "msg/EmbeddedDetection2DArray.msg"
- "msg/CameraCommand.msg"
- "msg/FlightStatus.msg"
- "msg/ObjectTrack.msg"
- "msg/ObstacleGrid.msg"
- "msg/ProximityAlert.msg"
- "msg/SensorArray.msg"
- "msg/SensorReading.msg"
- "msg/SystemHealth.msg"
- "msg/TrackCentroid.msg"
- "srv/CalibrateCamera.srv"
- "srv/GetSystemStatus.srv"
- "srv/ResetSensors.srv"
- "srv/SetFlightMode.srv"
- "srv/TriggerEmergencyStop.srv"
- "action/FollowTarget.action"
- "action/NavigateToWaypoint.action"
- DEPENDENCIES std_msgs geometry_msgs diagnostic_msgs#vision_msgs
-)
-
-ament_export_dependencies(rosidl_default_runtime diagnostic_msgs)
-ament_package()
+message(STATUS "PACKAGE_XML_PATH=${CMAKE_CURRENT_SOURCE_DIR}/package.xml")
+file(READ "${CMAKE_CURRENT_SOURCE_DIR}/package.xml" PKGXML)
+message(STATUS "XML_CONTENT=${PKGXML}")
+
+cmake_minimum_required(VERSION 3.10)
+project(kestrel_msgs)
+
+find_package(ament_cmake REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(geometry_msgs REQUIRED)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+ "msg/Detection.msg"
+ "msg/DetectionArray.msg"
+ "msg/Track.msg"
+ "msg/TrackArray.msg"
+ "msg/CameraCommand.msg"
+ "msg/FlightStatus.msg"
+ "msg/ObstacleGrid.msg"
+ "msg/ProximityAlert.msg"
+ "msg/SensorArray.msg"
+ "msg/SensorReading.msg"
+ "msg/SystemHealth.msg"
+ "msg/TrackCenter.msg"
+ "srv/CalibrateCamera.srv"
+ "srv/GetSystemStatus.srv"
+ "srv/ResetSensors.srv"
+ "srv/SetFlightMode.srv"
+ "srv/TriggerEmergencyStop.srv"
+ "action/FollowTarget.action"
+ "action/NavigateToWaypoint.action"
+ DEPENDENCIES std_msgs geometry_msgs
+)
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
diff --git a/src/kestrel_msgs/action/FollowTarget.action b/src/kestrel_msgs/action/FollowTarget.action
index ea915eb..b48d6b9 100644
--- a/src/kestrel_msgs/action/FollowTarget.action
+++ b/src/kestrel_msgs/action/FollowTarget.action
@@ -1,11 +1,11 @@
-# Minimal Placeholder for build
-# Goal
-int32 dummy_goal
----
-
-# Result
-bool dummy_result
----
-
-# Feedback
-float32 dummy_feedback
+# Minimal Placeholder for build
+# Goal
+int32 dummy_goal
+---
+
+# Result
+bool dummy_result
+---
+
+# Feedback
+float32 dummy_feedback
diff --git a/src/kestrel_msgs/action/NavigateToWaypoint.action b/src/kestrel_msgs/action/NavigateToWaypoint.action
index ea915eb..b48d6b9 100644
--- a/src/kestrel_msgs/action/NavigateToWaypoint.action
+++ b/src/kestrel_msgs/action/NavigateToWaypoint.action
@@ -1,11 +1,11 @@
-# Minimal Placeholder for build
-# Goal
-int32 dummy_goal
----
-
-# Result
-bool dummy_result
----
-
-# Feedback
-float32 dummy_feedback
+# Minimal Placeholder for build
+# Goal
+int32 dummy_goal
+---
+
+# Result
+bool dummy_result
+---
+
+# Feedback
+float32 dummy_feedback
diff --git a/src/kestrel_msgs/msg/CameraCommand.msg b/src/kestrel_msgs/msg/CameraCommand.msg
index 6db9c42..e69de29 100644
--- a/src/kestrel_msgs/msg/CameraCommand.msg
+++ b/src/kestrel_msgs/msg/CameraCommand.msg
@@ -1,8 +0,0 @@
-# instructions to move the camera up ig
-
-# the values are typically in radians
-# Positive pan: right, Positive tilt: up
-
-float64 pan # Pan angle
-float64 tilt # Tilt angle
-
diff --git a/src/kestrel_msgs/msg/EmbeddedDetection2D.msg b/src/kestrel_msgs/msg/Detection.msg
similarity index 92%
rename from src/kestrel_msgs/msg/EmbeddedDetection2D.msg
rename to src/kestrel_msgs/msg/Detection.msg
index 8b7c410..fe0be05 100644
--- a/src/kestrel_msgs/msg/EmbeddedDetection2D.msg
+++ b/src/kestrel_msgs/msg/Detection.msg
@@ -1,16 +1,16 @@
-std_msgs/Header header
-float32 x1
-float32 y1
-float32 x2
-float32 y2
-
-float32 w
-float32 h
-
-float32 center_x
-float32 center_y
-
-float32 conf
-string class_name
-
+std_msgs/Header header
+float32 x1
+float32 y1
+float32 x2
+float32 y2
+
+float32 w
+float32 h
+
+float32 center_x
+float32 center_y
+
+float32 conf
+string class_name
+
float32[128] embedding
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/DetectionArray.msg b/src/kestrel_msgs/msg/DetectionArray.msg
new file mode 100644
index 0000000..6767302
--- /dev/null
+++ b/src/kestrel_msgs/msg/DetectionArray.msg
@@ -0,0 +1,2 @@
+std_msgs/Header header
+kestrel_msgs/Detection[] detections
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/EmbeddedDetection2DArray.msg b/src/kestrel_msgs/msg/EmbeddedDetection2DArray.msg
deleted file mode 100644
index 663e236..0000000
--- a/src/kestrel_msgs/msg/EmbeddedDetection2DArray.msg
+++ /dev/null
@@ -1,2 +0,0 @@
-std_msgs/Header header
-kestrel_msgs/EmbeddedDetection2D[] detections
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/FlightStatus.msg b/src/kestrel_msgs/msg/FlightStatus.msg
index 887f326..e69de29 100644
--- a/src/kestrel_msgs/msg/FlightStatus.msg
+++ b/src/kestrel_msgs/msg/FlightStatus.msg
@@ -1,26 +0,0 @@
-# Current state of the drone (flying, landed, etc.)
-
-# Header for timestamp
-
-std_msgs/Header header
-
-#Flight mode constraints for clarity
-
-uint8 MODE_UNKNOWN = 0
-uint8 MODE_MANUAL = 1
-uint8 MODE_AUTO = 2
-uint8 MODE_LANDING = 3
-uint8 MODE_EMERGENCY = 4
-uint8 MODE_LANDED = 5
-
-# Current flight mode
-uint8 flight_mode
-
-# Armed state of the vehicle
-bool armed
-
-# Current battery percentage
-float32 battery_percent
-
-# Propeller speeds in RPM. The array order SHOULD be consistent.
-uint8[] propeller_speeds
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/ObstacleGrid.msg b/src/kestrel_msgs/msg/ObstacleGrid.msg
index 5b9ae7b..e69de29 100644
--- a/src/kestrel_msgs/msg/ObstacleGrid.msg
+++ b/src/kestrel_msgs/msg/ObstacleGrid.msg
@@ -1,18 +0,0 @@
-# Represents a 3D map showing where obstacles are in space
-
-# Header for timestamp and frame ID
-std_msgs/Header header
-
-# Grid metadata
-
-float32 resolution # Resolution of the grid in meters/cell
-uint32 width # self explanitory
-uint32 height # self explanitory
-uint32 depth #depth of the grid
-
-# Origin of the grid [m] in the frame given in the Header
-geometry_msgs/Pose origin
-
-# The grid data is stored in a 1D array in a row-major order lol
-# 0 means free, 100 means occupied, -1 means unknown
-int8[] data
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/ProximityAlert.msg b/src/kestrel_msgs/msg/ProximityAlert.msg
index 2c59ed3..e69de29 100644
--- a/src/kestrel_msgs/msg/ProximityAlert.msg
+++ b/src/kestrel_msgs/msg/ProximityAlert.msg
@@ -1,17 +0,0 @@
-# Warns when something gets too close
-
-# Header for timestamp and frame ID
-
-std_msgs/Header header
-
-# Distance to the closest detected object
-
-float32 distance
-
-# Severity level of the alert
-
-uint8 SEVERITY_LOW = 1 # CAUTION
-uint8 SEVERITY_MEDIUM = 2 # WARNING
-uint8 SEVERITY_HIGH = 3 # CRITICAL
-
-uint8 severity
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/SensorArray.msg b/src/kestrel_msgs/msg/SensorArray.msg
index 2b0db85..e69de29 100644
--- a/src/kestrel_msgs/msg/SensorArray.msg
+++ b/src/kestrel_msgs/msg/SensorArray.msg
@@ -1,7 +0,0 @@
-# Collection of data from multiple sensors
-
-# Header for timestamp
-std_msgs/Header header
-
-# This is an array of individual sensor readings
-kestrel_msgs/SensorReading[] readings
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/SensorReading.msg b/src/kestrel_msgs/msg/SensorReading.msg
index 0134cc5..e69de29 100644
--- a/src/kestrel_msgs/msg/SensorReading.msg
+++ b/src/kestrel_msgs/msg/SensorReading.msg
@@ -1,14 +0,0 @@
-# Data from one sensor (Like distance or temperature)
-
-# Header for timestamp and frame_id of the sensor
-
-std_msgs/Header header
-
-# Type of the sensor (Like ToF and crap)
-string sensor_type
-
-# The actual sensor values
-float64 value
-
-# Variance of the reading, if available
-float64 variance
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/SystemHealth.msg b/src/kestrel_msgs/msg/SystemHealth.msg
index eca2a1a..e69de29 100644
--- a/src/kestrel_msgs/msg/SystemHealth.msg
+++ b/src/kestrel_msgs/msg/SystemHealth.msg
@@ -1,22 +0,0 @@
-# Report on how well all systems are functioning
-
-# Header for timestamp
-std_msgs/Header header
-
-# Overall system status lole
-
-uint8 STATUS_OK = 0
-uint8 STATUS_WARN = 1
-uint8 STATUS_ERROR = 2
-uint8 STATUS_CRITICAL = 3
-uint8 status
-
-# CPU and Memory usage
-float32 cpu_usage_percent
-float32 memory_usage_percent
-
-# System temperature IN CELSIUS!!
-float32 temperature_celsius
-
-# Ok so this is a list of key-value pairs for other component-specific status lol
-diagnostic_msgs/KeyValue[] my_key_values
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/Track.msg b/src/kestrel_msgs/msg/Track.msg
new file mode 100644
index 0000000..eb57306
--- /dev/null
+++ b/src/kestrel_msgs/msg/Track.msg
@@ -0,0 +1,16 @@
+std_msgs/Header header
+
+int16 id
+
+float32 x1
+float32 y1
+float32 x2
+float32 y2
+
+float32 center_x
+float32 center_y
+
+float32 conf
+string class_name
+
+float32[128] embedding
diff --git a/src/kestrel_msgs/msg/TrackArray.msg b/src/kestrel_msgs/msg/TrackArray.msg
new file mode 100644
index 0000000..99da94b
--- /dev/null
+++ b/src/kestrel_msgs/msg/TrackArray.msg
@@ -0,0 +1,2 @@
+std_msgs/Header header
+kestrel_msgs/Track[] tracks
\ No newline at end of file
diff --git a/src/kestrel_msgs/msg/TrackCenter.msg b/src/kestrel_msgs/msg/TrackCenter.msg
new file mode 100644
index 0000000..40536f4
--- /dev/null
+++ b/src/kestrel_msgs/msg/TrackCenter.msg
@@ -0,0 +1,10 @@
+std_msgs/Header header
+
+float32 x1
+float32 y1
+float32 x2
+float32 y2
+
+float32 center_x
+float32 center_y
+
diff --git a/src/kestrel_msgs/package.xml b/src/kestrel_msgs/package.xml
index 6c2bfa0..7f19585 100644
--- a/src/kestrel_msgs/package.xml
+++ b/src/kestrel_msgs/package.xml
@@ -1,27 +1,24 @@
-
-
- kestrel_msgs
- 0.1.0
- Custom messages, services, and actions for the Kestrel Drone
- Rafeed Khan
- MIT
-
- ament_cmake
-
- rosidl_default_generators
- rosidl_default_runtime
-
- std_msgs
- std_msgs
- geometry_msgs
- geometry_msgs
- diagnostic_msgs
-
-
- rosidl_interface_packages
-
-
- ament_cmake
-
+
+
+ kestrel_msgs
+ 0.1.0
+ Custom messages, services, and actions for the Kestrel Drone
+ David Orjuela
+ MIT
+
+ ament_cmake
+
+ rosidl_default_generators
+ rosidl_default_runtime
+
+ std_msgs
+ std_msgs
+ geometry_msgs
+ geometry_msgs
+
+ rosidl_interface_packages
+
+
+ ament_cmake
+
\ No newline at end of file
diff --git a/src/kestrel_msgs/srv/CalibrateCamera.srv b/src/kestrel_msgs/srv/CalibrateCamera.srv
index d139c88..0c4f109 100644
--- a/src/kestrel_msgs/srv/CalibrateCamera.srv
+++ b/src/kestrel_msgs/srv/CalibrateCamera.srv
@@ -1,13 +1,4 @@
-# Service to trigger a camera calibration routine
-
-# Request
-# Calibration mode constants
-uint8 CALIBRATE_INTRINSIC = 0
-uint8 CALIBRATE_EXTRINSIC = 1
-uint8 CALIBRATE_WHITE_BALANCE = 2
-
-uint8 calibration_mode
----
-# Response
-bool success
-string message # Provides details on success or failure (like "Calibration complete" or "FAILED: Camera not found")
+# Empty request
+---
+
+# Empty response
diff --git a/src/kestrel_msgs/srv/GetSystemStatus.srv b/src/kestrel_msgs/srv/GetSystemStatus.srv
index 990125e..0c4f109 100644
--- a/src/kestrel_msgs/srv/GetSystemStatus.srv
+++ b/src/kestrel_msgs/srv/GetSystemStatus.srv
@@ -1,7 +1,4 @@
-# Service to retrieve the latest system health status.
-
-# Request (should be empty)
----
-# Response
-# Returns the complete system health message
-kestrel_msgs/SystemHealth system_health
\ No newline at end of file
+# Empty request
+---
+
+# Empty response
diff --git a/src/kestrel_msgs/srv/ResetSensors.srv b/src/kestrel_msgs/srv/ResetSensors.srv
index 757fdc8..0c4f109 100644
--- a/src/kestrel_msgs/srv/ResetSensors.srv
+++ b/src/kestrel_msgs/srv/ResetSensors.srv
@@ -1,9 +1,4 @@
-# Service to restart and re-initialize one or more sensors.
-
-# Request
-# A list of sensor_ids to reset. If empty, reset all sensors.
-string[] sensor_ids
----
-# Response
-bool success
-string message # example.. like "Successfully reset 3 sensors" or "Error: sensor 'tof_front' failed to respond"
\ No newline at end of file
+# Empty request
+---
+
+# Empty response
diff --git a/src/kestrel_msgs/srv/SetFlightMode.srv b/src/kestrel_msgs/srv/SetFlightMode.srv
index a46d878..0c4f109 100644
--- a/src/kestrel_msgs/srv/SetFlightMode.srv
+++ b/src/kestrel_msgs/srv/SetFlightMode.srv
@@ -1,9 +1,4 @@
-# Service to switch the drones flight mode
-
-# Request
-# ThIS uses the flight mode constants defined in kestrel_msgs/FlightStatus
-uint8 flight_mode
----
-# Response
-bool success
-string message # ex... "Flight mode set to AUTO" or "Failed: Transition to AUTO not possible in current state"
\ No newline at end of file
+# Empty request
+---
+
+# Empty response
diff --git a/src/kestrel_msgs/srv/TriggerEmergencyStop.srv b/src/kestrel_msgs/srv/TriggerEmergencyStop.srv
index cb9b6ec..0c4f109 100644
--- a/src/kestrel_msgs/srv/TriggerEmergencyStop.srv
+++ b/src/kestrel_msgs/srv/TriggerEmergencyStop.srv
@@ -1,9 +1,4 @@
-# Service to immediately stop the drone in an emergency
-
-# Request
-# A brief HUMAN READABLE reason for the emergency stop for logging purposes
-string reason
----
-# Response
-bool acknowledged # so this confirms that the emergency stop command has been received and is being executed
-string message # ex... "Emergency stop acknowledged, Reason: Proximity breach"
+# Empty request
+---
+
+# Empty response
diff --git a/src/_archive_legacy/models/deepSORT/DataAssociation.py b/src/kestrel_tracking/kestrel_tracking/DataAssociation.py
similarity index 51%
rename from src/_archive_legacy/models/deepSORT/DataAssociation.py
rename to src/kestrel_tracking/kestrel_tracking/DataAssociation.py
index bb77623..093dde6 100644
--- a/src/_archive_legacy/models/deepSORT/DataAssociation.py
+++ b/src/kestrel_tracking/kestrel_tracking/DataAssociation.py
@@ -1,366 +1,397 @@
-#This skeleton code's structure comes from the research paper: https://www.mdpi.com/2076-3417/12/3/1319
-
-#Contain the CNN that deepSORT uses in replace of the Hungarion based Cost Matrix
-from filterpy.kalman import KalmanFilter
-import numpy as np
-from numpy.typing import NDArray
-
-class DataAssociation:
- """
- For all parameters:
- detections : List[List[float]] or List[np.ndarray]
- A list of bounding boxes representing current detections, each in the format [x, y, w, h].
- tracks : List[List[float]] or List[np.ndarray]
- A list of bounding boxes representing predicted tracks, each in the format [x, y, w, h].
- """
-
-
- #Euclidean Distance Based Cost Matrix (𝐷𝐸(𝐷,𝑃))
- def euclidean_cost(
- self,
- detections: NDArray[np.float_],
- tracks: NDArray[np.float_],
- image_dims: tuple[int, int]
- ) -> NDArray[np.float_]:
- """
- Computes the Euclidean distance cost matrix (𝐷𝐸(𝐷,𝑃)), which represents
- the distance between bounding box central points normalized into half
- of the image dimension. To formulate the problem as a maximization
- problem, the distance is obtained by the difference between 1 and the
- normalized Euclidean distance.
-
- d(Di, Pi) = 1 - [sqrt((u_Di - u_Pi)^2 + (v_Di - v_Pi)^2) / (1/2) * sqrt(h^2 + w^2)]
-
- where (h, w) are the height and width of the input image.
- """
- #Retrieve lengths
- detections = np.array(detections)
- tracks = np.array(tracks)
-
- #Store bounding boxes centers for computation
- detections_pos = detections[:, 0:2] + detections[:, 2:4] / 2.0 # (D, 2) - Center of each detection
- tracks_pos = tracks[:, 0:2] + tracks[:, 2:4] / 2.0 # (T, 2) - Center of each track
-
- #Calculate norm based off image size
- norm = 0.5 * np.sqrt(image_dims[0]**2 + image_dims[1]**2)
-
- #Subtract so u_Di - u_Pi & v_Di - v_Pi
- delta = detections_pos[:, None, :] - tracks_pos[None, :, :]
-
- #Perform linear norm of sum of deltas
- dist_matrix = np.linalg.norm(delta, axis=2)
-
- #Compute cost matrix
- euclidean_cost_matrix = np.clip((dist_matrix / norm), 0.0, 1.0) # Ensures all values are bounded between 0 and 1.
-
- return euclidean_cost_matrix
-
- #Bounding Box Ratio Based Cost Matrix (𝑅(𝐷,𝑃))
- def bbox_ratio_cost(
- self,
- detections: NDArray[np.float_],
- tracks: NDArray[np.float_]
- ) -> NDArray[np.float_]:
-
- """
- Computes the bounding box ratio-based cost matrix (𝑅(𝐷,𝑃)), which is
- implemented as a ratio between the product of each width and height.
-
- r(Di, Pi) = min( (w_Di * h_Di) / (w_Pi * h_Pi), (w_Pi * h_Pi) / (w_Di * h_Di) )
-
- Returns a cost matrix where lower values indicate better box shape alignment.
-
- Box shape similarity ranges from 0 (different) to 1 (identical), and is converted to cost as:
- cost_r = 1.0 - similarity_r.
-
- Note: This implementation matches the paper definition, which compares area ratios only.
- This may return cost = 0 even for mismatched shapes (e.g., 10x1 vs 1x10), since area = same.
-
- """
- if len(detections) == 0 or len(tracks) == 0:
- return np.array([])
-
- detections = np.array(detections) # (D, 4)
- tracks = np.array(tracks) # (T, 4)
-
- # Gets every width and height from each row into 2 1D arrays and calculates area
- detection_areas = detections[:, 2] * detections[:, 3]
- track_areas = tracks[:, 2] * tracks[:, 3] # (T,) = (T,) * (T,)
-
- # Transform the 1D arrays to broadcast into (D, T)
- detection_areas = detection_areas[:, None]# (D, 1): [[1], [2], [3]]
- track_areas = track_areas[None, :] # (1, T): [[1, 2, 3, 4]]
-
- # Prevent divide-by-zero
- eps = 1e-6
- detection_areas = np.maximum(detection_areas, eps)
- track_areas = np.maximum(track_areas, eps)
-
- if np.any(detection_areas == eps) or np.any(track_areas == eps):
- print("[Warning] Zero-area bounding box encountered. Auto-corrected to epsilon.")
-
- # Calculates ratio and broadcasts to (D, T)
- ratio1 = detection_areas / track_areas
- ratio2 = track_areas / detection_areas
-
- # Calculates cost at every [i, j]
- bbox_cost_matrix = 1.0 - np.minimum(ratio1, ratio2)
- return bbox_cost_matrix
-
-
- #SORT’s IoU Cost Matrix
- def iou_cost(
- self,
- detections: NDArray[np.float_],
- tracks: NDArray[np.float_]
- ) -> NDArray[np.float_]:
-
- detections = np.array(detections)
- tracks = np.array(tracks)
-
- det_x1 = detections[:, 0:1]
- det_y1 = detections[:, 1:2]
- det_x2 = det_x1 + detections[:, 2:3]
- det_y2 = det_y1 + detections[:, 3:4]
-
- trk_x1 = tracks[:, 0].reshape(1,-1)
- trk_y1 = tracks[:, 1].reshape(1,-1)
- trk_x2 = trk_x1 + tracks[:, 2].reshape(1,-1)
- trk_y2 = trk_y1 + tracks[:, 3].reshape(1,-1)
-
- detectionWidth = detections[:,2:3]
- detectionHeight = detections[:,3:4]
-
- areaDetection = detectionWidth * detectionHeight
- areaTrack = tracks[:, 2].reshape(1, -1) * tracks[:, 3].reshape(1, -1)
-
- inter_x1 = np.maximum(det_x1, trk_x1)
- inter_y1 = np.maximum(det_y1, trk_y1)
- inter_x2 = np.minimum(det_x2, trk_x2)
- inter_y2 = np.minimum(det_y2, trk_y2)
-
- inter_w = np.maximum(0.0, inter_x2 - inter_x1)
- inter_h = np.maximum(0.0, inter_y2 - inter_y1)
-
- # AoI = Area of Intersection, AoU = Area of Union
- AoI = inter_w * inter_h
- AoU = areaDetection + areaTrack - AoI
-
-
- # Explicitly Guard Against Divide-by-Zero
- with np.errstate(divide='ignore', invalid='ignore'):
- iou_raw = np.divide(AoI, AoU, out=np.zeros_like(AoI), where=AoU > 0)
- if np.any(AoU == 0):
- print("[Warning] Divide by zero encountered in IoU computation.") # Consider removal when production ready!
-
- # Convert NaN to 0 before computing cost
- iou_raw = np.nan_to_num(iou_raw, nan=0.0, posinf=0.0, neginf=0.0)
-
- iou_matrix = np.clip(iou_raw, 0.0, 1.0)
-
- return 1.0 - iou_matrix
-
- #SORT’s IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix (𝐸𝐼𝑜𝑈𝐷(𝐷,𝑃))
- def iou_euclidean_cost(self, detections, tracks, image_dims):
- """
- Computes the IoU cost matrix combined with the Euclidean distance cost
- matrix using the Hadamard (element-wise) product:
-
- EIoUD(D, P) = IoU(D, P) ∘ DE(D, P)
-
- where ∘ represents element-wise multiplication.
- """
- #Call iou cost matrix
- iou_matrix = np.array(self.iou_cost(detections, tracks))
-
- #Call euclidean cost matrix
- euclidean_matrix = np.array(self.euclidean_cost(detections, tracks, image_dims))
-
- #Perform Hadamard product
- iou_euclidean_cost_matrix = iou_matrix * euclidean_matrix
-
- return iou_euclidean_cost_matrix
-
-
- #SORT’s IoU Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐼𝑜𝑈(𝐷,𝑃))
- def iou_bbox_ratio_cost(self, detections, tracks):
- """
- Computes the IoU cost matrix combined with the bounding box ratio-based
- cost matrix using the Hadamard (element-wise) product:
-
- RIoU(D, P) = IoU(D, P) ∘ R(D, P)
-
- where ∘ represents element-wise multiplication.
- """
- num_detections, num_tracks = len(detections), len(tracks)
-
- if num_detections == 0 or num_tracks == 0:
- return np.array([])
-
- cost_iou = np.asarray(self.iou_cost(detections, tracks)) # IoU cost matrix
- cost_bbr = np.asarray(self.bbox_ratio_cost(detections, tracks)) # Bounding box ratio cost matrix
-
- if np.shape(cost_iou) != np.shape(cost_bbr):
- raise ValueError("IoU cost matrix and bbox ratio cost matrix are of different shapes")
-
- return cost_iou * cost_bbr # Element-wise multiplication
-
-
- #Euclidean Distance Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐷𝐸(𝐷,𝑃))
- def euclidean_bbox_ratio_cost(self, detections, tracks, image_dims):
- """
- Computes the Euclidean distance cost matrix combined with the bounding box
- ratio-based cost matrix using the Hadamard (element-wise) product:
- RDE(D, P) = DE(D, P) ∘ R(D, P)
- where ∘ represents element-wise multiplication.
- """
- num_detections, num_tracks = len(detections), len(tracks)
-
- if num_detections == 0 or num_tracks == 0:
- return np.array([])
-
- cost_de = np.asarray(self.euclidean_cost(detections, tracks, image_dims))
- cost_r = np.asarray(self.bbox_ratio_cost(detections, tracks))
-
- if np.shape(cost_de) != np.shape(cost_r):
- raise ValueError("Euclidean cost matrix and bbox ratio cost matrix are of different shapes")
-
- # performs element-wise multiplication
- cost_rde = np.multiply(cost_de, cost_r)
-
- return cost_rde
-
-
- #Step 7: SORT's IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix and the Bounding Box Ratio Based Cost Matrix (𝑀(𝐷,𝑃))
- def combined_cost_matrix(self, detections, tracks, image_dims):
- """
- Computes the IoU cost matrix combined with the Euclidean distance cost
- matrix and the bounding box ratio-based cost matrix using the Hadamard
- (element-wise) product:
-
- M(D, P) = IoU(D, P) ∘ DE(D, P) ∘ R(D, P)
-
- where ∘ represents element-wise multiplication.
- """
-
- num_detections = len(detections)
- num_tracks = len(tracks)
-
- if num_detections == 0 or num_tracks == 0:
- return np.array([])
-
- matrix_iou = self.iou_cost(detections, tracks)
- matrix_de = self.euclidean_cost(detections, tracks, image_dims)
- matrix_r = self.bbox_ratio_cost(detections, tracks)
-
- # Ensure all matrices have the same shape
- if not (matrix_iou.shape == matrix_de.shape == matrix_r.shape == (num_detections, num_tracks)):
- raise ValueError("Cost matrices must have the same shape.")
-
- # Compute the combined cost matrix using element-wise multiplication
- # Each component is already a cost (lower = better), so multiplication is safe
- combined_matrix = matrix_iou * matrix_de * matrix_r
-
- return combined_matrix
-
-
- #Element-wise Average of Every Cost Matrix (𝐴(𝐷,𝑃))
- def average_cost_matrix(self, detections, tracks, image_dims):
- """
- Computes the element-wise average of every cost matrix:
-
- A(Di, Pi) = (IoU(Di, Pj) + DE(Di, Pj) + R(Di, Pj)) / 3, for i ∈ D, j ∈ P
- """
- num_detections = len(detections)
- num_tracks = len(tracks)
-
- if num_detections == 0 or num_tracks == 0:
- return np.array([]) #Return an empty array if there are no tracks or detections
-
- cost_iou = self.iou_cost(detections, tracks)
- cost_de = self.euclidean_cost(detections, tracks, image_dims)
- cost_r = self.bbox_ratio_cost(detections, tracks)
-
- # Ensure all shapes match
- if not (cost_iou.shape == cost_de.shape == cost_r.shape):
- raise ValueError("Cost matrices are not aligned in shape.")
-
- #Final cost matrix
- return (cost_iou + cost_de + cost_r) / 3
-
-
- #Element-wise Weighted Mean of Every Cost Matrix Value (𝑊𝑀(𝐷,𝑃))
- def weighted_mean_cost_matrix(self, detections, tracks, image_dims, lambda_iou=0.33, lambda_de=0.33, lambda_r=0.34):
- """
- Computes the element-wise weighted mean of every cost matrix value:
-
- WM(Di, Pi) = (λ_IoU * IoU(Di, Pi) + λ_DE * DE(Di, Pi) + λ_R * R(Di, Pi))
-
- where λ_IoU + λ_DE + λ_R = 1.
- """
- '''
- It calculates a combined cost based on the Intersection over Union (IoU),
- Euclidean distance, and bounding box ratio metrics, using specified weights.
- '''
- num_detections = len(detections)
- num_tracks = len(tracks)
-
- if num_detections == 0 or num_tracks == 0:
- return np.array([]) #Return an empty array if there are no tracks or detections
-
- cost_matrix = np.zeros((num_detections, num_tracks))
-
- #Ensure the weights sum to 1.0
- sum_lambdas = lambda_iou + lambda_de + lambda_r
- if not np.isclose(sum_lambdas, 1.0):
- print("Warning: Lambda weights do not sum to 1.0. I will normalize them.")
- lambda_iou /= sum_lambdas
- lambda_de /= sum_lambdas
- lambda_r /= sum_lambdas
-
- #Compute the cost matrices using other cost functions. All other functions SHOULD return cost matrices, if not change to: 1.0 - output_matrix
- cost_iou = self.iou_cost(detections, tracks)
- cost_euclidean = self.euclidean_cost(detections, tracks, image_dims)
- cost_ratio = self.bbox_ratio_cost(detections, tracks)
-
- #Vectorized weight sum. NumPy arrays are implemented in C under the hood.
- #So with these arrays, math operations are executed in compiled C code, not interpreted Python
- #Rather than iterating through with nested loops, we can perform vector/matrix multiplication on the arrays as a whole
- cost_matrix = (
- lambda_iou * cost_iou +
- lambda_de * cost_euclidean +
- lambda_r * cost_ratio
- )
-
- return cost_matrix
-
-
- #Class Gate Update Based on Object Class Match (𝐶∗(𝐷,𝑃))
- def class_gate_cost_matrix(self, cost_matrix, detection_classes, track_classes):
- """
- Updates the cost matrix based on the match between predicted and detected
- object class. If the class labels do not match, the cost is set to 0:
-
- C*(Ci, j, Di, Pi) = { Ci, j if Class_Di = Class_Pi, 0 otherwise }
-
- for i ∈ D, j ∈ P.
- """
- '''
- This function updates cost matrices based on the match between
- predicted and detected object classes'''
-
- num_detections = cost_matrix.shape[0]
- num_tracks = cost_matrix.shape[1]
-
- if num_detections != len(detection_classes) or num_tracks != len(track_classes):
- raise ValueError("Dimensions of cost_matrix, detection_classes, and track_classes do not match - Class Gate Cost Matrix")
-
- #Create a boolean mask where classses match
- # Reshapes to (num_detections, 1) Reshapes to (1, num_tracks) -> (D, T)
- match_mask = (np.array(detection_classes)[:, None] == np.array(track_classes)[None, :]) #Shape = [num_detections, num_tracks]
- #Because detection_classes has the same number of rows as track_classes has columns, we can perform matrix multiplication
-
- #Apply the mask and keep the values where classes match, zero where they do not
- gated_cost_matrix = cost_matrix * match_mask
-
- return gated_cost_matrix
\ No newline at end of file
+#This skeleton code's structure comes from the research paper: https://www.mdpi.com/2076-3417/12/3/1319
+
+import numpy as np
+from numpy.typing import NDArray
+
+class DataAssociation:
+ """
+ All bbox arrays are expected in format:
+ [cx, cy, w, h] (center coordinates + width/height)
+
+ All cost matrices returned are shape:
+ (T, D) = (num_tracks, num_detections)
+
+ Convention:
+ - Lower cost is better
+ - Costs are clipped/bounded into [0, 1] for the included costs
+ """
+
+
+ @staticmethod
+ def _cxcywh_to_xyxy(boxes: NDArray[np.float_]) -> tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]:
+ """
+ Convert boxes (N,4) in cxcywh into x1,y1,x2,y2 (each (N,1) or (1,N) depending on reshape).
+ """
+ boxes = np.asarray(boxes, dtype=np.float32)
+ cx = boxes[:, 0]
+ cy = boxes[:, 1]
+ w = boxes[:, 2]
+ h = boxes[:, 3]
+
+ x1 = cx - w / 2.0
+ y1 = cy - h / 2.0
+ x2 = cx + w / 2.0
+ y2 = cy + h / 2.0
+ return x1, y1, x2, y2
+
+ #Euclidean Distance Based Cost Matrix (𝐷𝐸(𝐷,𝑃))
+ def euclidean_cost(
+ self,
+ tracks: NDArray[np.float_], # (T,4) cxcywh
+ detections: NDArray[np.float_], # (D,4) cxcywh
+ image_dims: tuple[int, int],
+ ) -> NDArray[np.float_]:
+ """
+ Euclidean distance between centers normalized by half the image diagonal.
+ Returns (T, D) in [0, 1] where 0 is best (same center), 1 is worst (far).
+
+ d(Di, Pi) = 1 - [sqrt((u_Di - u_Pi)^2 + (v_Di - v_Pi)^2) / (1/2) * sqrt(h^2 + w^2)]
+
+ where (h, w) are the height and width of the input image.
+ """
+ tracks = np.asarray(tracks, dtype=np.float32)
+ detections = np.asarray(detections, dtype=np.float32)
+
+ if tracks.size == 0 or detections.size == 0:
+ return np.zeros((tracks.shape[0], detections.shape[0]), dtype=np.float32)
+
+ # Centers are already (cx, cy)
+ trk_pos = tracks[:, 0:2] # (T,2)
+ det_pos = detections[:, 0:2] # (D,2)
+
+ # Normalization constant: half the image diagonal
+ H, W = image_dims
+ norm = 0.5 * np.sqrt(float(H) ** 2 + float(W) ** 2)
+
+ # Broadcast subtract:
+ # trk_pos[:, None, :] -> (T,1,2)
+ # det_pos[None, :, :] -> (1,D,2)
+ delta = trk_pos[:, None, :] - det_pos[None, :, :] # (T,D,2)
+ dist = np.linalg.norm(delta, axis=2) # (T,D)
+
+ cost = np.clip(dist / (norm + 1e-12), 0.0, 1.0).astype(np.float32) # (T,D)
+ return cost
+
+ #Bounding Box Ratio Based Cost Matrix (𝑅(𝐷,𝑃))
+ def bbox_ratio_cost(
+ self,
+ tracks: NDArray[np.float_], # (T,4) cxcywh
+ detections: NDArray[np.float_], # (D,4) cxcywh
+ ) -> NDArray[np.float_]:
+
+ """
+ Computes the bounding box ratio-based cost matrix (𝑅(𝐷,𝑃)), which is
+ implemented as a ratio between the product of each width and height.
+
+ r(Di, Pi) = min( (w_Di * h_Di) / (w_Pi * h_Pi), (w_Pi * h_Pi) / (w_Di * h_Di) )
+
+ cost = 1 - min(area_det/area_trk, area_trk/area_det)
+
+ Note: this is area-only, so shapes like 10x1 and 1x10 have same area.
+ """
+ tracks = np.asarray(tracks, dtype=np.float32)
+ detections = np.asarray(detections, dtype=np.float32)
+
+ T = tracks.shape[0]
+ D = detections.shape[0]
+ if T == 0 or D == 0:
+ return np.zeros((T, D), dtype=np.float32)
+
+ trk_areas = tracks[:, 2] * tracks[:, 3] # (T,)
+ det_areas = detections[:, 2] * detections[:, 3] # (D,)
+
+ eps = 1e-6
+ trk_areas = np.maximum(trk_areas, eps)
+ det_areas = np.maximum(det_areas, eps)
+
+ # Broadcast to (T,D)
+ # trk_areas[:,None] => (T,1)
+ # det_areas[None,:] => (1,D)
+ ratio1 = det_areas[None, :] / trk_areas[:, None] # (T,D) det/trk
+ ratio2 = trk_areas[:, None] / det_areas[None, :] # (T,D) trk/det
+
+ cost = 1.0 - np.minimum(ratio1, ratio2)
+ cost = np.clip(cost, 0.0, 1.0).astype(np.float32)
+ return cost
+
+
+ # SORT's IoU Cost
+ def iou_cost(
+ self,
+ tracks: NDArray[np.float_], # (T,4) cxcywh
+ detections: NDArray[np.float_], # (D,4) cxcywh
+ ) -> NDArray[np.float_]:
+ """
+ IoU cost = 1 - IoU, returned as (T, D).
+ """
+ tracks = np.asarray(tracks, dtype=np.float32)
+ detections = np.asarray(detections, dtype=np.float32)
+
+ T = tracks.shape[0]
+ D = detections.shape[0]
+ if T == 0 or D == 0:
+ return np.zeros((T, D), dtype=np.float32)
+
+ # Convert both to xyxy
+ trk_x1, trk_y1, trk_x2, trk_y2 = self._cxcywh_to_xyxy(tracks) # each (T,)
+ det_x1, det_y1, det_x2, det_y2 = self._cxcywh_to_xyxy(detections) # each (D,)
+
+ # Reshape for broadcasting:
+ # Tracks along rows (T,1), detections along cols (1,D)
+ trk_x1 = trk_x1[:, None]
+ trk_y1 = trk_y1[:, None]
+ trk_x2 = trk_x2[:, None]
+ trk_y2 = trk_y2[:, None]
+
+ det_x1 = det_x1[None, :]
+ det_y1 = det_y1[None, :]
+ det_x2 = det_x2[None, :]
+ det_y2 = det_y2[None, :]
+
+ inter_x1 = np.maximum(trk_x1, det_x1)
+ inter_y1 = np.maximum(trk_y1, det_y1)
+ inter_x2 = np.minimum(trk_x2, det_x2)
+ inter_y2 = np.minimum(trk_y2, det_y2)
+
+ inter_w = np.maximum(0.0, inter_x2 - inter_x1)
+ inter_h = np.maximum(0.0, inter_y2 - inter_y1)
+ inter_area = inter_w * inter_h # (T,D)
+
+ trk_area = np.maximum(0.0, (trk_x2 - trk_x1)) * np.maximum(0.0, (trk_y2 - trk_y1)) # (T,1)
+ det_area = np.maximum(0.0, (det_x2 - det_x1)) * np.maximum(0.0, (det_y2 - det_y1)) # (1,D)
+
+ union = trk_area + det_area - inter_area # (T,D)
+
+ with np.errstate(divide="ignore", invalid="ignore"):
+ iou = np.divide(inter_area, union, out=np.zeros_like(inter_area), where=union > 0)
+
+ iou = np.nan_to_num(iou, nan=0.0, posinf=0.0, neginf=0.0)
+ iou = np.clip(iou, 0.0, 1.0).astype(np.float32)
+
+ cost = (1.0 - iou).astype(np.float32) # (T,D)
+ return cost
+
+ #SORT's IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix (𝐸𝐼𝑜𝑈𝐷(𝐷,𝑃))
+ def iou_euclidean_cost(self, detections, tracks, image_dims):
+ """
+ Computes the IoU cost matrix combined with the Euclidean distance cost
+ matrix using the Hadamard (element-wise) product:
+
+ EIoUD(D, P) = IoU(D, P) ∘ DE(D, P)
+
+ where ∘ represents element-wise multiplication.
+ """
+ #Call iou cost matrix
+ iou_matrix = np.array(self.iou_cost(detections, tracks))
+
+ #Call euclidean cost matrix
+ euclidean_matrix = np.array(self.euclidean_cost(detections, tracks, image_dims))
+
+ #Perform Hadamard product
+ iou_euclidean_cost_matrix = iou_matrix * euclidean_matrix
+
+ return iou_euclidean_cost_matrix
+
+
+ #SORT’s IoU Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐼𝑜𝑈(𝐷,𝑃))
+ def iou_bbox_ratio_cost(self, detections, tracks):
+ """
+ Computes the IoU cost matrix combined with the bounding box ratio-based
+ cost matrix using the Hadamard (element-wise) product:
+
+ RIoU(D, P) = IoU(D, P) ∘ R(D, P)
+
+ where ∘ represents element-wise multiplication.
+ """
+ num_detections, num_tracks = len(detections), len(tracks)
+
+ if num_detections == 0 or num_tracks == 0:
+ return np.array([])
+
+ cost_iou = np.asarray(self.iou_cost(detections, tracks)) # IoU cost matrix
+ cost_bbr = np.asarray(self.bbox_ratio_cost(detections, tracks)) # Bounding box ratio cost matrix
+
+ if np.shape(cost_iou) != np.shape(cost_bbr):
+ raise ValueError("IoU cost matrix and bbox ratio cost matrix are of different shapes")
+
+ return cost_iou * cost_bbr # Element-wise multiplication
+
+
+ #Euclidean Distance Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐷𝐸(𝐷,𝑃))
+ def euclidean_bbox_ratio_cost(self, detections, tracks, image_dims):
+ """
+ Computes the Euclidean distance cost matrix combined with the bounding box
+ ratio-based cost matrix using the Hadamard (element-wise) product:
+ RDE(D, P) = DE(D, P) ∘ R(D, P)
+ where ∘ represents element-wise multiplication.
+ """
+ num_detections, num_tracks = len(detections), len(tracks)
+
+ if num_detections == 0 or num_tracks == 0:
+ return np.array([])
+
+ cost_de = np.asarray(self.euclidean_cost(detections, tracks, image_dims))
+ cost_r = np.asarray(self.bbox_ratio_cost(detections, tracks))
+
+ if np.shape(cost_de) != np.shape(cost_r):
+ raise ValueError("Euclidean cost matrix and bbox ratio cost matrix are of different shapes")
+
+ # performs element-wise multiplication
+ cost_rde = np.multiply(cost_de, cost_r)
+
+ return cost_rde
+
+
+ #Step 7: SORT's IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix and the Bounding Box Ratio Based Cost Matrix (𝑀(𝐷,𝑃))
+ def combined_cost_matrix(self, detections, tracks, image_dims):
+ """
+ Computes the IoU cost matrix combined with the Euclidean distance cost
+ matrix and the bounding box ratio-based cost matrix using the Hadamard
+ (element-wise) product:
+
+ M(D, P) = IoU(D, P) ∘ DE(D, P) ∘ R(D, P)
+
+ where ∘ represents element-wise multiplication.
+ """
+
+ num_detections = len(detections)
+ num_tracks = len(tracks)
+
+ if num_detections == 0 or num_tracks == 0:
+ return np.array([])
+
+ matrix_iou = self.iou_cost(detections, tracks)
+ matrix_de = self.euclidean_cost(detections, tracks, image_dims)
+ matrix_r = self.bbox_ratio_cost(detections, tracks)
+
+ # Ensure all matrices have the same shape
+ if not (matrix_iou.shape == matrix_de.shape == matrix_r.shape == (num_detections, num_tracks)):
+ raise ValueError("Cost matrices must have the same shape.")
+
+ # Compute the combined cost matrix using element-wise multiplication
+ # Each component is already a cost (lower = better), so multiplication is safe
+ combined_matrix = matrix_iou * matrix_de * matrix_r
+
+ return combined_matrix
+
+
+ #Element-wise Average of Every Cost Matrix (𝐴(𝐷,𝑃))
+ def average_cost_matrix(self, detections, tracks, image_dims):
+ """
+ Computes the element-wise average of every cost matrix:
+
+ A(Di, Pi) = (IoU(Di, Pj) + DE(Di, Pj) + R(Di, Pj)) / 3, for i ∈ D, j ∈ P
+ """
+ num_detections = len(detections)
+ num_tracks = len(tracks)
+
+ if num_detections == 0 or num_tracks == 0:
+ return np.array([]) #Return an empty array if there are no tracks or detections
+
+ cost_iou = self.iou_cost(detections, tracks)
+ cost_de = self.euclidean_cost(detections, tracks, image_dims)
+ cost_r = self.bbox_ratio_cost(detections, tracks)
+
+ # Ensure all shapes match
+ if not (cost_iou.shape == cost_de.shape == cost_r.shape):
+ raise ValueError("Cost matrices are not aligned in shape.")
+
+ #Final cost matrix
+ return (cost_iou + cost_de + cost_r) / 3
+
+
+ #Element-wise Weighted Mean of Every Cost Matrix Value (𝑊𝑀(𝐷,𝑃))
+ def weighted_mean_cost_matrix(
+ self,
+ tracks: NDArray[np.float_], # (T,4) cxcywh
+ detections: NDArray[np.float_], # (D,4) cxcywh
+ image_dims: tuple[int, int],
+ lambda_iou: float = 7 / 10,
+ lambda_de: float = 2 / 10,
+ lambda_r: float = 1 / 10,
+ ) -> NDArray[np.float_]:
+ """
+ WM = lambda_iou * IoU_cost + lambda_de * Euclidean_cost + lambda_r * Ratio_cost
+ Returns (T, D).
+ """
+ tracks = np.asarray(tracks, dtype=np.float32)
+ detections = np.asarray(detections, dtype=np.float32)
+
+ T = tracks.shape[0]
+ D = detections.shape[0]
+ if T == 0 or D == 0:
+ return np.zeros((T, D), dtype=np.float32)
+
+ # Ensure weights sum to 1.0
+ s = float(lambda_iou + lambda_de + lambda_r)
+ if not np.isclose(s, 1.0):
+ lambda_iou /= s
+ lambda_de /= s
+ lambda_r /= s
+
+ cost_iou = self.iou_cost(tracks, detections) # (T,D)
+ cost_de = self.euclidean_cost(tracks, detections, image_dims) # (T,D)
+ cost_r = self.bbox_ratio_cost(tracks, detections) # (T,D)
+
+ wm = (lambda_iou * cost_iou) + (lambda_de * cost_de) + (lambda_r * cost_r)
+ wm = np.clip(wm, 0.0, 1.0).astype(np.float32)
+ return wm
+
+
+ #Class Gate Update Based on Object Class Match (𝐶∗(𝐷,𝑃))
+ def class_gate_cost_matrix(self, cost_matrix, detection_classes, track_classes):
+ """
+ Updates the cost matrix based on the match between predicted and detected
+ object class. If the class labels do not match, the cost is set to 0:
+
+ C*(Ci, j, Di, Pi) = { Ci, j if Class_Di = Class_Pi, 0 otherwise }
+
+ for i ∈ D, j ∈ P.
+ """
+ '''
+ This function updates cost matrices based on the match between
+ predicted and detected object classes'''
+
+ num_detections = cost_matrix.shape[0]
+ num_tracks = cost_matrix.shape[1]
+
+ if num_detections != len(detection_classes) or num_tracks != len(track_classes):
+ raise ValueError("Dimensions of cost_matrix, detection_classes, and track_classes do not match - Class Gate Cost Matrix")
+
+ #Create a boolean mask where classses match
+ # Reshapes to (num_detections, 1) Reshapes to (1, num_tracks) -> (D, T)
+ match_mask = (np.array(detection_classes)[:, None] == np.array(track_classes)[None, :]) #Shape = [num_detections, num_tracks]
+ #Because detection_classes has the same number of rows as track_classes has columns, we can perform matrix multiplication
+
+ #Apply the mask and keep the values where classes match, zero where they do not
+ gated_cost_matrix = cost_matrix * match_mask
+
+ return gated_cost_matrix
+
+
+_DA = DataAssociation()
+
+def weighted_mean_cost_matrix(
+ tracks: NDArray[np.float_],
+ detections: NDArray[np.float_],
+ image_dims: tuple[int, int],
+ lambda_iou: float = 7 / 10,
+ lambda_de: float = 2 / 10,
+ lambda_r: float = 1 / 10,
+) -> NDArray[np.float_]:
+ """
+ Pure NumPy in/out helper.
+ Expects:
+ tracks: (T,4) cxcywh
+ detections: (D,4) cxcywh
+ Returns:
+ (T,D) cost matrix
+ """
+ return _DA.weighted_mean_cost_matrix(
+ tracks=tracks,
+ detections=detections,
+ image_dims=image_dims,
+ lambda_iou=lambda_iou,
+ lambda_de=lambda_de,
+ lambda_r=lambda_r,
+ )
\ No newline at end of file
diff --git a/src/kestrel_tracking/kestrel_tracking/tracking_node.py b/src/kestrel_tracking/kestrel_tracking/tracking_node.py
new file mode 100644
index 0000000..0579cdd
--- /dev/null
+++ b/src/kestrel_tracking/kestrel_tracking/tracking_node.py
@@ -0,0 +1,555 @@
+import rclpy
+from rclpy.node import Node
+
+from kestrel_msgs.msg import DetectionArray, Track as TrackMsg, TrackArray
+from kestrel_tracking.DataAssociation import weighted_mean_cost_matrix
+
+import numpy as np
+from dataclasses import dataclass
+from typing import List, Sequence, Tuple
+from scipy.optimize import linear_sum_assignment
+
+from filterpy.kalman import KalmanFilter as FP_KalmanFilter
+
+
+# -----------------------------
+# Detection struct + adapters
+# -----------------------------
+@dataclass(frozen=True)
+class Det:
+ """One detection in convenient tracker format."""
+ x1: float
+ y1: float
+ x2: float
+ y2: float
+ z: np.ndarray # (4,1) measurement: [cx, cy, w, h]^T
+ embedding: np.ndarray # (128,) appearance embedding
+ conf: float = 0.0
+ class_name: str = ""
+
+def det_from_ros(det_msg) -> Det:
+ """Convert kestrel_msgs/Detection -> Det."""
+ z = np.array(
+ [det_msg.center_x, det_msg.center_y, det_msg.w, det_msg.h],
+ dtype=np.float32
+ ).reshape(4,1)
+
+ emb = np.array(det_msg.embedding, dtype=np.float32)
+
+ return Det(
+ x1=float(det_msg.x1),
+ y1=float(det_msg.y1),
+ x2=float(det_msg.x2),
+ y2=float(det_msg.y2),
+ z=z,
+ embedding=emb,
+ conf=float(det_msg.conf),
+ class_name=str(det_msg.class_name),
+ )
+
+
+# -----------------------------
+# Geometry helpers
+# -----------------------------
+def state2bbox_xyxy(state: Sequence[float]) -> List[float]:
+ """
+ Convert a KF state -> bbox [x1, y1, x2, y2] using state[:4] = [cx, cy, w, h].
+ Keep as floats; cast to int only for drawing.
+ """
+ cx, cy, w, h = state[:4]
+ x1 = cx - w / 2.0
+ y1 = cy - h / 2.0
+ x2 = cx + w / 2.0
+ y2 = cy + h / 2.0
+ return [float(x1), float(y1), float(x2), float(y2)]
+
+
+# -----------------------------
+# Costs + gating (vectorized)
+# -----------------------------
+def mahalanobis_gate_mask(
+ tracks: List["Track"],
+ det_z: np.ndarray,
+ gate_d2_thresh: float
+) -> np.ndarray:
+ """
+ Build a (T,D) boolean mask where True means the pairing is allowed by gating.
+
+ Implementation detail:
+ - We loop over tracks (T loops)
+ - We vectorize over detections per-track (no T*D Python loops)
+
+ Args:
+ tracks: list of Track objects
+ det_z: (D,4) array of detection measurements in cxcywh
+ gate_d2_thresh: squared Mahalanobis threshold
+
+ Returns:
+ mask: (T,D) boolean array
+ """
+ T = len(tracks)
+ D = det_z.shape[0]
+ mask = np.zeros((T, D), dtype=bool)
+
+ if T == 0 or D == 0:
+ return mask
+
+ # Ensure det_z is float32 for speed/consistency
+ det_z = np.asarray(det_z, dtype=np.float32)
+
+ # Precompute det_z as (4,D) once, so each track can reuse it cheaply
+ # det_z_T has columns = detections
+ det_z_T = det_z.T # (4,D)
+
+ for i, trk in enumerate(tracks):
+ kf = trk.kf
+
+ # Innovation covariance S = HPH^T + R (4x4)
+ H = kf.H.astype(np.float32)
+ P = kf.P.astype(np.float32)
+ R = kf.R.astype(np.float32)
+ S = H @ P @ H.T + R # (4,4)
+
+ # Predicted measurement Hx (4,1)
+ Hx = (H @ kf.x).astype(np.float32) # (4,1)
+
+ # Innovations for all detections at once:
+ # y = z - Hx, but z is (4,D) and Hx is (4,1) so broadcast works
+ y = det_z_T - Hx # (4,D)
+
+ # Solve S^{-1} y for all columns at once (4,D)
+ # This avoids explicitly computing inv(S)
+ try:
+ tmp = np.linalg.solve(S, y) # (4,D)
+ except np.linalg.LinAlgError:
+ # If S becomes singular for any reason, fail-safe: gate nothing for this track
+ continue
+
+ # d2 per detection = y^T * S^{-1} * y
+ # Since y and tmp are (4,D), columnwise dot is sum(y * tmp) over rows
+ d2 = np.sum(y * tmp, axis=0) # (D,)
+
+ mask[i, :] = (d2 <= gate_d2_thresh)
+
+ return mask
+
+
+def mahalanobis_distance_squared(kf: FP_KalmanFilter, z: np.ndarray) -> float:
+ """
+ Compute squared Mahalanobis distance:
+ d^2 = (z - Hx)^T S^{-1} (z - Hx),
+ where S = HPH^T + R.
+ """
+ H = kf.H
+ x = kf.x
+ y = z - (H @ x) # innovation
+ S = H @ kf.P @ H.T + kf.R # innovation covariance
+
+ # Solve S^{-1} y without explicitly inverting S
+ tmp = np.linalg.solve(S, y)
+ d2 = float((y.T @ tmp).item())
+ return d2
+
+
+def appearance_cost(track_feature: np.ndarray, det_feature: np.ndarray) -> float:
+ """Cosine distance = 1 - cosine similarity."""
+ if track_feature is None:
+ return 1.0 # max cost if no feature
+ f1 = track_feature / (np.linalg.norm(track_feature) + 1e-12)
+ f2 = det_feature / (np.linalg.norm(det_feature) + 1e-12)
+ return float(1.0 - np.dot(f1, f2))
+
+def appearance_cost_matrix(
+ track_feats: np.ndarray,
+ det_feats: np.ndarray,
+ track_has_feat: np.ndarray
+) -> np.ndarray:
+ """
+ Cosine distance matrix (T,D):
+ cost = 1 - cosine_similarity
+
+ If a track has no feature, we force its cost row to 1.0 (max cost),
+ matching your old scalar behavior.
+
+ Args:
+ track_feats: (T,F) float32
+ det_feats: (D,F) float32
+ track_has_feat: (T,) bool
+
+ Returns:
+ (T,D) float32 matrix in [0,2] but practically [0,1] if normalized; we clip to [0,1].
+ """
+ T = track_feats.shape[0]
+ D = det_feats.shape[0]
+
+ if T == 0 or D == 0:
+ return np.zeros((T, D), dtype=np.float32)
+
+ # Normalize features (avoid divide by 0)
+ eps = 1e-12
+
+ trk = track_feats.astype(np.float32, copy=False)
+ det = det_feats.astype(np.float32, copy=False)
+
+ trk_norm = trk / (np.linalg.norm(trk, axis=1, keepdims=True) + eps) # (T,F)
+ det_norm = det / (np.linalg.norm(det, axis=1, keepdims=True) + eps) # (D,F)
+
+ # Cosine similarity: (T,D) = (T,F) @ (F,D)
+ sim = trk_norm @ det_norm.T # (T,D)
+
+ # Convert to cosine distance cost
+ cost = (1.0 - sim).astype(np.float32)
+
+ # Tracks without features => max cost
+ # We set the entire row to 1.0 (same as your scalar function returning 1.0)
+ if track_has_feat is not None and track_has_feat.size == T:
+ cost[~track_has_feat, :] = 1.0
+
+ # Keep bounded for sanity
+ cost = np.clip(cost, 0.0, 1.0)
+ return cost
+
+
+# -----------------------------
+# Assignment
+# -----------------------------
+def hungarian_assign(cost: np.ndarray) -> Tuple[List[Tuple[int, int]], List[int], List[int]]:
+ """
+ Solve Hungarian for minimal assignment.
+ Returns:
+ matches: (track_idx, det_idx)
+ unmatched_tracks: [track_idx]
+ unmatched_dets: [det_idx]
+ """
+ if cost.size == 0:
+ # Nothing to match
+ return [], list(range(cost.shape[0])), list(range(cost.shape[1]))
+
+
+ # Run Hungarian
+ row_ind, col_ind = linear_sum_assignment(cost)
+
+ # Build the matched pairs
+ matches = list(zip(row_ind.tolist(), col_ind.tolist()))
+
+ # Determine unmatched pairs
+ all_tracks = set(range(cost.shape[0]))
+ all_dets = set(range(cost.shape[1]))
+ matched_tracks = set(row_ind.tolist())
+ matched_dets = set(col_ind.tolist())
+
+ unmatched_tracks = sorted(list(all_tracks - matched_tracks))
+ unmatched_dets = sorted(list(all_dets - matched_dets))
+
+ return matches, unmatched_tracks, unmatched_dets
+
+
+# -----------------------------
+# Track + TrackManager
+# -----------------------------
+class Track:
+ """
+ KF state is [cx, cy, w, h, vx, vy, vw, vh]^T
+ Measurement z is [cx, cy, w, h]^T
+ """
+ def __init__(self, track_id: int, init_det: Det, dt: float = 1.0):
+ self.id = int(track_id)
+ self.kf = FP_KalmanFilter(dim_x=8, dim_z=4)
+ self._init_kf(dt)
+
+
+ # Initialize state from detection measurement
+ cx, cy, w, h = init_det.z.flatten().tolist()
+ self.kf.x = np.array([cx, cy, w, h, 0.0, 0.0, 0.0, 0.0], dtype=np.float32).reshape(8, 1)
+
+ #Initialize other attributes
+ self.conf = init_det.conf # Store conf
+ self.class_name = init_det.class_name # Store class
+ self.bbox_xyxy: List[float] = [init_det.x1, init_det.y1, init_det.x2, init_det.y2]
+ self.feature: np.ndarray | None = init_det.embedding
+
+ self.missed: int = 0 # number of consecutive misses
+ self.hits: int = 1 # total number of hits
+
+
+ def _init_kf(self, dt: float) -> None:
+ # State transition matrix (constant velocity)
+ F = np.eye(8, dtype=np.float32)
+ for i in range(4):
+ F[i, i + 4] = dt
+ self.kf.F = F
+
+ # Measurement matrix: observe [cx, cy, w, h]
+ H = np.zeros((4, 8), dtype=np.float32)
+ H[:, :4] = np.eye(4, dtype=np.float32)
+ self.kf.H = H
+
+ #TODO: Tune Values Later
+ # Initialize covariance P (start uncertain about velocity)
+ self.kf.P *= 1000.0
+
+ # Measurement noise R
+ # Higher value -> trust measurements less
+ self.kf.R = np.eye(4, dtype=np.float32) * 10.0
+
+ # Process noise Q
+ # Small value -> assume near‐constant velocity
+ self.kf.Q = np.eye(8, dtype=np.float32) * 0.01
+
+
+ def predict(self) -> None:
+ self.kf.predict()
+ self.bbox_xyxy = state2bbox_xyxy(self.kf.x.flatten())
+ self.missed +=1 # increment; reset to 0 on successful update
+
+
+ def update(self, det: Det) -> None:
+ self.kf.update(det.z)
+ self.bbox_xyxy = state2bbox_xyxy(self.kf.x.flatten())
+ self.conf = det.conf # Update conf
+ self.class_name = det.class_name # Not sure how this could happen but...
+ self.feature = det.embedding
+ self.missed = 0
+ # Update the amount of times a track has been matched to a detection
+ self.hits += 1
+
+ def z_pred(self) -> np.ndarray:
+ """
+ Predicted measurement (4,) in cxcywh:
+ z_pred = Hx
+ Using this keeps motion costs consistent with Mahalanobis gating space.
+ """
+ z = (self.kf.H @ self.kf.x).reshape(4,)
+ return z.astype(np.float32, copy=False)
+
+
+class TrackManager:
+ def __init__(
+ self,
+ max_age: int,
+ min_hits: int,
+ gate_d2_thresh: float,
+ w_motion: float,
+ w_app: float,
+ dt: float = 1.0,
+ image_dims: tuple[int, int] = (720, 1280), # <--- TODO: placeholder dims, must verify
+ ):
+ self.tracks: List[Track] = []
+ self.next_id: int = 0
+
+ self.max_age = int(max_age)
+ self.min_hits = int(min_hits)
+
+ # This threshold is squared Mahalanobis distance
+ self.gate_d2_thresh = float(gate_d2_thresh)
+
+ self.w_motion = float(w_motion)
+ self.w_app = float(w_app)
+ self.dt = float(dt)
+
+ self.image_dims = image_dims
+
+
+ # ---- tiny adapters (for use with DataAssociation) ----
+ @staticmethod
+ def _stack_track_z(tracks: List[Track]) -> np.ndarray:
+ """
+ Stack predicted track measurements into (T,4) cxcywh.
+ """
+ if len(tracks) == 0:
+ return np.zeros((0, 4), dtype=np.float32)
+ return np.stack([t.z_pred() for t in tracks], axis=0).astype(np.float32)
+
+ @staticmethod
+ def _stack_det_z(detections: List[Det]) -> np.ndarray:
+ """
+ Stack detection measurements into (D,4) cxcywh.
+ """
+ if len(detections) == 0:
+ return np.zeros((0, 4), dtype=np.float32)
+ return np.stack([d.z.reshape(4,) for d in detections], axis=0).astype(np.float32)
+
+ @staticmethod
+ def _stack_track_features(tracks: List[Track], feat_dim: int = 128) -> tuple[np.ndarray, np.ndarray]:
+ """
+ Stack track features into (T,F). If a track has no feature, fill zeros and mark it invalid.
+
+ Returns:
+ feats: (T,F) float32
+ has_feat: (T,) bool
+ """
+ T = len(tracks)
+ if T == 0:
+ return np.zeros((0, feat_dim), dtype=np.float32), np.zeros((0,), dtype=bool)
+
+ feats = np.zeros((T, feat_dim), dtype=np.float32)
+ has_feat = np.zeros((T,), dtype=bool)
+
+ for i, t in enumerate(tracks):
+ if t.feature is None:
+ continue
+ feats[i, :] = t.feature.astype(np.float32, copy=False)
+ has_feat[i] = True
+
+ return feats, has_feat
+
+ @staticmethod
+ def _stack_det_features(detections: List[Det], feat_dim: int = 128) -> np.ndarray:
+ """
+ Stack detection embeddings into (D,F).
+ """
+ D = len(detections)
+ if D == 0:
+ return np.zeros((0, feat_dim), dtype=np.float32)
+ return np.stack([d.embedding.astype(np.float32, copy=False) for d in detections], axis=0)
+
+
+ def step(self, detections: List[Det]) -> List[Track]:
+ # 1) Predict existing tracks forward
+ for trk in self.tracks:
+ trk.predict()
+
+ # If no tracks exist, create from all detections
+ if len(self.tracks) == 0:
+ for det in detections:
+ self.tracks.append(Track(self.next_id, det, dt=self.dt))
+ self.next_id += 1
+ return [t for t in self.tracks if t.hits >= self.min_hits]
+
+ # If no detections, just age/prune tracks
+ if len(detections) == 0:
+ self.tracks = [t for t in self.tracks if t.missed <= self.max_age]
+ return [t for t in self.tracks if t.hits >= self.min_hits]
+
+ # ---- adapters (build arrays once per frame) ----
+ Zt = self._stack_track_z(self.tracks) # (T,4) cxcywh
+ Zd = self._stack_det_z(detections) # (D,4) cxcywh
+
+ Ft, track_has_feat = self._stack_track_features(self.tracks, feat_dim=128) # (T,128), (T,)
+ Fd = self._stack_det_features(detections, feat_dim=128) # (D,128)
+
+ # ---- vectorized motion cost via DataAssociation (returns (T,D)) ----
+ mc = weighted_mean_cost_matrix(
+ tracks=Zt,
+ detections=Zd,
+ image_dims=self.image_dims,
+ # You can pass custom lambdas here if you want:
+ # lambda_iou=0.7, lambda_de=0.2, lambda_r=0.1
+ ).astype(np.float32, copy=False) # (T,D)
+
+ # ---- vectorized appearance cost (returns (T,D)) ----
+ ac = appearance_cost_matrix(Ft, Fd, track_has_feat).astype(np.float32, copy=False) # (T,D)
+
+ # ---- gating mask (returns (T,D) bool) ----
+ gate = mahalanobis_gate_mask(self.tracks, Zd, self.gate_d2_thresh) # (T,D)
+
+ # ---- combine costs ----
+ cost = (self.w_motion * mc) + (self.w_app * ac) # (T,D)
+
+ # Any disallowed pairing becomes "impossible"
+ cost = cost.astype(np.float32, copy=False)
+ cost[~gate] = 1e6
+
+ # 3) Hungarian assignment
+ matches, unmatched_tracks, unmatched_dets = hungarian_assign(cost)
+
+ # 4) Update matched tracks
+ for trk_idx, det_idx in matches:
+ if cost[trk_idx, det_idx] >= 1e6:
+ unmatched_tracks.append(trk_idx)
+ unmatched_dets.append(det_idx)
+ continue
+ self.tracks[trk_idx].update(detections[det_idx])
+
+ # 5) Unmatched tracks: do nothing (they already had missed++ in predict())
+ unmatched_dets = sorted(set(unmatched_dets))
+
+ # 6) Create new tracks for unmatched detections
+ for det_idx in unmatched_dets:
+ self.tracks.append(Track(self.next_id, detections[det_idx], dt=self.dt))
+ self.next_id += 1
+
+ # 7) Prune dead tracks
+ self.tracks = [t for t in self.tracks if t.missed <= self.max_age]
+
+ # 8) Return only publishable tracks
+ return [t for t in self.tracks if t.hits >= self.min_hits]
+
+
+
+# -----------------------------
+# ROS Node
+# -----------------------------
+class TrackingNode(Node):
+ def __init__(self):
+ super().__init__('tracking_node')
+
+ image_dims = (720, 1280) # (H,W) <-- TODO: set to your camera feed size, consider as ROS params
+
+ self.track_manager = TrackManager(
+ max_age=30, # Deletes the track if it hasnt been updated in 30 frames
+ min_hits=1, # Only publishes tracks that have been updated at least 3
+ gate_d2_thresh=9.4877, # chi2.ppf(0.95, df=4) is ~9.49
+ w_motion=0.5, #W eight for motion cost
+ w_app=0.5, # Weight for appearance cost
+ dt=1.0,
+ image_dims=image_dims,
+ )
+
+ self.sub_det = self.create_subscription(
+ DetectionArray,
+ '/kestrel/detections',
+ self.dets_cb,
+ 10,
+ )
+ self.pub_tracks = self.create_publisher(TrackArray, '/kestrel/tracks', 10)
+
+ def publish_tracks(self, header, tracks: List[Track]) -> None:
+ """Turn internal tracks into TrackArray message and publish."""
+ msg = TrackArray()
+ msg.header = header
+
+ for trk in tracks:
+ t = TrackMsg()
+ t.id = int(trk.id)
+ # Publish current bbox (xyxy). Keep float.
+ t.x1 = float(trk.bbox_xyxy[0])
+ t.y1 = float(trk.bbox_xyxy[1])
+ t.x2 = float(trk.bbox_xyxy[2])
+ t.y2 = float(trk.bbox_xyxy[3])
+
+ t.center_x = (t.x1 + t.x2) / 2.0
+ t.center_y = (t.y1 + t.y2) / 2.0
+
+ t.conf = float(trk.conf)
+ t.class_name = str(trk.class_name)
+
+ # Copy the embedding from the internal Track object to the ROS message
+ if trk.feature is not None:
+ # trk.feature is a numpy array, convert to list for ROS
+ t.embedding = trk.feature.tolist()
+
+ msg.tracks.append(t)
+
+ self.pub_tracks.publish(msg)
+
+
+ def dets_cb(self, det_msg: DetectionArray) -> None:
+ dets = [det_from_ros(d) for d in det_msg.detections]
+ active_tracks = self.track_manager.step(dets)
+ self.publish_tracks(det_msg.header, active_tracks)
+
+
+def main(args=None):
+ rclpy.init(args=args)
+ node = TrackingNode()
+ try:
+ rclpy.spin(node)
+ except KeyboardInterrupt:
+ pass
+ finally:
+ node.destroy_node()
+ rclpy.shutdown()
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/src/kestrel_tracking/package.xml b/src/kestrel_tracking/package.xml
index dc60420..0a4bdd4 100644
--- a/src/kestrel_tracking/package.xml
+++ b/src/kestrel_tracking/package.xml
@@ -1,23 +1,22 @@
-
-
- kestrel_tracking
- 0.1.0
- Real-time object tracking node for Kestrel using DeepSORT and vision_msgs
- David Orjuela
- MIT
-
- ament_python
-
- rclpy
- vision_msgs
- kestrel_msgs
-
- ament_copyright
- ament_flake8
- ament_pep257
- python3-pytest
-
-
- ament_python
-
-
+
+
+ kestrel_tracking
+ 0.1.0
+ Real-time object tracking node for Kestrel using DeepSORT and vision_msgs
+ David Orjuela
+ MIT
+
+ ament_python
+
+ rclpy
+ kestrel_msgs
+
+ ament_copyright
+ ament_flake8
+ ament_pep257
+ python3-pytest
+
+
+ ament_python
+
+
diff --git a/src/kestrel_tracking/setup.cfg b/src/kestrel_tracking/setup.cfg
index 65f004a..a45a703 100644
--- a/src/kestrel_tracking/setup.cfg
+++ b/src/kestrel_tracking/setup.cfg
@@ -1,4 +1,4 @@
-[develop]
-script_dir=$base/lib/kestrel_tracking
-[install]
-install_scripts=$base/lib/kestrel_tracking
+[develop]
+script_dir=$base/lib/kestrel_tracking
+[install]
+install_scripts=$base/lib/kestrel_tracking
diff --git a/src/kestrel_tracking/setup.py b/src/kestrel_tracking/setup.py
index a6edecb..7912fef 100644
--- a/src/kestrel_tracking/setup.py
+++ b/src/kestrel_tracking/setup.py
@@ -1,25 +1,25 @@
-from setuptools import find_packages, setup
-
-package_name = 'kestrel_tracking'
-
-setup(
- name=package_name,
- version='0.0.0',
- packages=find_packages(exclude=['test']),
- data_files=[
- ('share/ament_index/resource_index/packages',
- ['resource/' + package_name]),
- ('share/' + package_name, ['package.xml']),
- ],
- install_requires=['setuptools'],
- zip_safe=True,
- maintainer='davidorjuela',
- maintainer_email='davidorjuela@todo.todo',
- description='TODO: Package description',
- license='TODO: License declaration',
- tests_require=['pytest'],
- entry_points={
- 'console_scripts': [
- ],
- },
-)
+from setuptools import find_packages, setup
+
+package_name = 'kestrel_tracking'
+
+setup(
+ name=package_name,
+ version='0.0.0',
+ packages=find_packages(exclude=['test']),
+ data_files=[
+ ('share/ament_index/resource_index/packages',
+ ['resource/' + package_name]),
+ ('share/' + package_name, ['package.xml']),
+ ],
+ install_requires=['setuptools'],
+ zip_safe=True,
+ maintainer='',
+ maintainer_email='davidorjuela@todo.todo',
+ description='TODO: Package description',
+ license='TODO: License declaration',
+ entry_points={
+ 'console_scripts': [
+ 'tracking_node = kestrel_tracking.tracking_node:main',
+ ],
+ },
+)
diff --git a/src/kestrel_tracking/test/test_copyright.py b/src/kestrel_tracking/test/test_copyright.py
index 97a3919..282981a 100644
--- a/src/kestrel_tracking/test/test_copyright.py
+++ b/src/kestrel_tracking/test/test_copyright.py
@@ -1,25 +1,25 @@
-# Copyright 2015 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_copyright.main import main
-import pytest
-
-
-# Remove the `skip` decorator once the source file(s) have a copyright header
-@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
-@pytest.mark.copyright
-@pytest.mark.linter
-def test_copyright():
- rc = main(argv=['.', 'test'])
- assert rc == 0, 'Found errors'
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_copyright.main import main
+import pytest
+
+
+# Remove the `skip` decorator once the source file(s) have a copyright header
+@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
+@pytest.mark.copyright
+@pytest.mark.linter
+def test_copyright():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found errors'
diff --git a/src/kestrel_tracking/test/test_flake8.py b/src/kestrel_tracking/test/test_flake8.py
index 27ee107..625324e 100644
--- a/src/kestrel_tracking/test/test_flake8.py
+++ b/src/kestrel_tracking/test/test_flake8.py
@@ -1,25 +1,25 @@
-# Copyright 2017 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_flake8.main import main_with_errors
-import pytest
-
-
-@pytest.mark.flake8
-@pytest.mark.linter
-def test_flake8():
- rc, errors = main_with_errors(argv=[])
- assert rc == 0, \
- 'Found %d code style errors / warnings:\n' % len(errors) + \
- '\n'.join(errors)
+# Copyright 2017 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_flake8.main import main_with_errors
+import pytest
+
+
+@pytest.mark.flake8
+@pytest.mark.linter
+def test_flake8():
+ rc, errors = main_with_errors(argv=[])
+ assert rc == 0, \
+ 'Found %d code style errors / warnings:\n' % len(errors) + \
+ '\n'.join(errors)
diff --git a/src/kestrel_tracking/test/test_pep257.py b/src/kestrel_tracking/test/test_pep257.py
index b234a38..4569717 100644
--- a/src/kestrel_tracking/test/test_pep257.py
+++ b/src/kestrel_tracking/test/test_pep257.py
@@ -1,23 +1,23 @@
-# Copyright 2015 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_pep257.main import main
-import pytest
-
-
-@pytest.mark.linter
-@pytest.mark.pep257
-def test_pep257():
- rc = main(argv=['.', 'test'])
- assert rc == 0, 'Found code style errors / warnings'
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_pep257.main import main
+import pytest
+
+
+@pytest.mark.linter
+@pytest.mark.pep257
+def test_pep257():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found code style errors / warnings'
diff --git a/src/kestrel_vision/kestrel_vision/dataset.py b/src/kestrel_vision/kestrel_vision/dataset.py
index 97301a8..cc954e1 100644
--- a/src/kestrel_vision/kestrel_vision/dataset.py
+++ b/src/kestrel_vision/kestrel_vision/dataset.py
@@ -1,81 +1,81 @@
-import os
-import torch
-from torch.utils.data import Dataset # The base class we will inherit from to create our custom dataset
-from PIL import Image # Python Imaging Library (Pillow), used for opening and manipulating image files
-
-class Market1501(Dataset):
- """
- Custom PyTorch Dataset for the Market-1501 dataset.
-
- This class handles loading images and parsing their person IDs (PIDs)
- from the filenames to be used as integer labels for training a classification model.
- """
-
- # CHANGED: Added 'transform=None' to accept an image transformation pipeline.
- def __init__(self, image_dir, transform=None):
- """
- Args:
- image_dir (string): Directory path with all the training images.
- transform (callable, optional): A function/transform to be applied to each image.
- """
- # Store the directory path for later use in __getitem__.
- self.image_dir = image_dir
-
- # Store the transform pipeline (e.g., resize, normalize) for later use.
- self.transform = transform
-
- # Create a list of all filenames in the directory that end with '.jpg'.
- # This is a 'list comprehension', a compact way to build a list.
- # It filters out any non-JPEG files (like '.txt' or system files) that might cause errors.
- self.image_paths = [file for file in os.listdir(image_dir) if file.endswith(".jpg")]
-
- # Create a set of unique Person IDs (PIDs) from the filenames.
- # Example filename: '0002_c1s1_000451_01.jpg'. We split by '_' and take the first part '0002'.
- # Using a 'set' automatically removes all duplicate PIDs, giving us a list of unique people.
- pids_in_dataset = set([path.split("_")[0] for path in self.image_paths])
-
- # Create a mapping dictionary to convert string PIDs to integer labels (0, 1, 2, ...).
- # Neural networks and loss functions require integer labels, not strings.
- # 'enumerate' provides a counter 'i' for each unique 'pid'.
- # Example: {'0002': 0, '0007': 1, '-1': 2}
- self.pid_to_label = {pid: i for i, pid in enumerate(pids_in_dataset)}
-
- # Store the total number of unique classes (people).
- # This is needed to correctly configure the output layer of our neural network.
- self.num_classes = len(self.pid_to_label)
-
- # The __getitem__ method defines how to retrieve a single sample (one image and its label) from the dataset.
- # The DataLoader will call this method automatically for each index from 0 to len(dataset)-1.
- def __getitem__(self, index):
- # Retrieve the filename for the given index from our list of paths.
- name = self.image_paths[index]
-
- # Construct the full, platform-independent path to the image file.
- # os.path.join is used so this code works on both Windows ('\') and Linux/Mac ('/').
- path = os.path.join(self.image_dir, name)
-
- # Open the image file using Pillow and ensure it's in RGB format (corresponds to 3 channels).
- # This is important because some images might be in grayscale or have an alpha channel.
- image = Image.open(path).convert("RGB")
-
- # CHANGED: Apply the transformations to the image.
- # This is a CRITICAL step. It will resize, normalize, and convert the PIL Image to a PyTorch Tensor.
- if self.transform:
- image = self.transform(image)
-
- # Parse the filename again to get the string Person ID.
- pid = name.split("_")[0]
-
- # Use our mapping dictionary to look up the integer label for the corresponding PID.
- label = self.pid_to_label[pid]
-
- # Return the processed image tensor and its corresponding label as a tensor.
- # The label must be a LongTensor (64-bit integer) for PyTorch's CrossEntropyLoss function.
- return image, torch.tensor(label, dtype=torch.long)
-
- # The __len__ method must return the total number of samples in the dataset.
- # The DataLoader needs this to know the dataset's size for batching, shuffling, and epoch completion.
- def __len__(self):
- # Return the total count of the image paths we found during initialization.
- return len(self.image_paths)
+import os
+import torch
+from torch.utils.data import Dataset # The base class we will inherit from to create our custom dataset
+from PIL import Image # Python Imaging Library (Pillow), used for opening and manipulating image files
+
+class Market1501(Dataset):
+ """
+ Custom PyTorch Dataset for the Market-1501 dataset.
+
+ This class handles loading images and parsing their person IDs (PIDs)
+ from the filenames to be used as integer labels for training a classification model.
+ """
+
+ # CHANGED: Added 'transform=None' to accept an image transformation pipeline.
+ def __init__(self, image_dir, transform=None):
+ """
+ Args:
+ image_dir (string): Directory path with all the training images.
+ transform (callable, optional): A function/transform to be applied to each image.
+ """
+ # Store the directory path for later use in __getitem__.
+ self.image_dir = image_dir
+
+ # Store the transform pipeline (e.g., resize, normalize) for later use.
+ self.transform = transform
+
+ # Create a list of all filenames in the directory that end with '.jpg'.
+ # This is a 'list comprehension', a compact way to build a list.
+ # It filters out any non-JPEG files (like '.txt' or system files) that might cause errors.
+ self.image_paths = [file for file in os.listdir(image_dir) if file.endswith(".jpg")]
+
+ # Create a set of unique Person IDs (PIDs) from the filenames.
+ # Example filename: '0002_c1s1_000451_01.jpg'. We split by '_' and take the first part '0002'.
+ # Using a 'set' automatically removes all duplicate PIDs, giving us a list of unique people.
+ pids_in_dataset = set([path.split("_")[0] for path in self.image_paths])
+
+ # Create a mapping dictionary to convert string PIDs to integer labels (0, 1, 2, ...).
+ # Neural networks and loss functions require integer labels, not strings.
+ # 'enumerate' provides a counter 'i' for each unique 'pid'.
+ # Example: {'0002': 0, '0007': 1, '-1': 2}
+ self.pid_to_label = {pid: i for i, pid in enumerate(pids_in_dataset)}
+
+ # Store the total number of unique classes (people).
+ # This is needed to correctly configure the output layer of our neural network.
+ self.num_classes = len(self.pid_to_label)
+
+ # The __getitem__ method defines how to retrieve a single sample (one image and its label) from the dataset.
+ # The DataLoader will call this method automatically for each index from 0 to len(dataset)-1.
+ def __getitem__(self, index):
+ # Retrieve the filename for the given index from our list of paths.
+ name = self.image_paths[index]
+
+ # Construct the full, platform-independent path to the image file.
+ # os.path.join is used so this code works on both Windows ('\') and Linux/Mac ('/').
+ path = os.path.join(self.image_dir, name)
+
+ # Open the image file using Pillow and ensure it's in RGB format (corresponds to 3 channels).
+ # This is important because some images might be in grayscale or have an alpha channel.
+ image = Image.open(path).convert("RGB")
+
+ # CHANGED: Apply the transformations to the image.
+ # This is a CRITICAL step. It will resize, normalize, and convert the PIL Image to a PyTorch Tensor.
+ if self.transform:
+ image = self.transform(image)
+
+ # Parse the filename again to get the string Person ID.
+ pid = name.split("_")[0]
+
+ # Use our mapping dictionary to look up the integer label for the corresponding PID.
+ label = self.pid_to_label[pid]
+
+ # Return the processed image tensor and its corresponding label as a tensor.
+ # The label must be a LongTensor (64-bit integer) for PyTorch's CrossEntropyLoss function.
+ return image, torch.tensor(label, dtype=torch.long)
+
+ # The __len__ method must return the total number of samples in the dataset.
+ # The DataLoader needs this to know the dataset's size for batching, shuffling, and epoch completion.
+ def __len__(self):
+ # Return the total count of the image paths we found during initialization.
+ return len(self.image_paths)
\ No newline at end of file
diff --git a/src/kestrel_vision/kestrel_vision/model.py b/src/kestrel_vision/kestrel_vision/model.py
index e20fa20..e0254cb 100644
--- a/src/kestrel_vision/kestrel_vision/model.py
+++ b/src/kestrel_vision/kestrel_vision/model.py
@@ -1,42 +1,42 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-class CNNdeepSORT(nn.Module):
-
- def __init__(self, embedding_dim):
- super().__init__()
-
- self.convolution = nn.Sequential(
- #Input Dimensions: [B, 3, H, W]
- #inChannels= 3 (RGB), outChannels = embedding_dim/4 (out being # of kernels)
-
- # Stage 1
- nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), # without padding, convolution shrinks images quickly, potentially losing info @ edges.
- nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True),
- nn.MaxPool2d(2), #downsample using maxmimum values in kernel -> H/2 x W/2
-
- # Stage 2
- nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True),
- nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True),
- nn.MaxPool2d(2), # H/2 x W/2
-
- # Stage 3
- nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(True),
- nn.Conv2d(in_channels=256, out_channels=embedding_dim, kernel_size=3, padding=1), nn.BatchNorm2d(embedding_dim), nn.ReLU(True),
-
- #Final reduction later into 1x1 embedding: [B, embedding_dim, 1, 1]
- nn.AdaptiveAvgPool2d((1,1))
- )
-
- # Fully connected classifier for Re-ID training
- #self.classifier = nn.Linear(in_features = embedding_dim, out_features=num_classes)
-
- def forward(self, inputTensor):
-
- output = self.convolution(inputTensor) # CNN encoder block -> [B, emb_dim, 1, 1]
- output = torch.flatten(output,1) # [B, emb_dim] -> person's appearance vector
-
- output = self.classifier(output) # For use as classifier -> [B, num_classes]
- # else: For use as feature extractor
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class CNNdeepSORT(nn.Module):
+
+ def __init__(self, embedding_dim):
+ super().__init__()
+
+ self.convolution = nn.Sequential(
+ #Input Dimensions: [B, 3, H, W]
+ #inChannels= 3 (RGB), outChannels = embedding_dim/4 (out being # of kernels)
+
+ # Stage 1
+ nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), # without padding, convolution shrinks images quickly, potentially losing info @ edges.
+ nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True),
+ nn.MaxPool2d(2), #downsample using maxmimum values in kernel -> H/2 x W/2
+
+ # Stage 2
+ nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True),
+ nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True),
+ nn.MaxPool2d(2), # H/2 x W/2
+
+ # Stage 3
+ nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(True),
+ nn.Conv2d(in_channels=256, out_channels=embedding_dim, kernel_size=3, padding=1), nn.BatchNorm2d(embedding_dim), nn.ReLU(True),
+
+ #Final reduction later into 1x1 embedding: [B, embedding_dim, 1, 1]
+ nn.AdaptiveAvgPool2d((1,1))
+ )
+
+ # Fully connected classifier for Re-ID training
+ #self.classifier = nn.Linear(in_features = embedding_dim, out_features=num_classes)
+
+ def forward(self, inputTensor):
+
+ output = self.convolution(inputTensor) # CNN encoder block -> [B, emb_dim, 1, 1]
+ output = torch.flatten(output,1) # [B, emb_dim] -> person's appearance vector
+
+ #output = self.classifier(output) # For use as classifier -> [B, num_classes]
+ # else: For use as feature extractor
return output
\ No newline at end of file
diff --git a/src/kestrel_vision/kestrel_vision/processing.py b/src/kestrel_vision/kestrel_vision/processing.py
index 023d3c0..0dac5c1 100644
--- a/src/kestrel_vision/kestrel_vision/processing.py
+++ b/src/kestrel_vision/kestrel_vision/processing.py
@@ -1,27 +1,27 @@
-import cv2
-import torch
-import numpy as np
-
-def extract_person_embedding(frame: np.ndarray, box, cnn_model) -> np.ndarray | None:
- """
- Crops a person from the frame, runs pre-processing, and extracts
- the appearance embedding using the CNN model.
- """
- x1, y1, x2, y2 = map(int, box.xyxy[0])
- person_crop = frame[y1:y2, x1:x2]
-
- if person_crop.size == 0:
- return None
-
- # Pre-process the crop for the CNN
- crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB) # get rgb instead of bgr
- crop_tensor = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0
-
- # Run CNN inference to get the appearance vector
- with torch.no_grad():
- appearance_vector = cnn_model(crop_tensor)
- if hasattr(appearance_vector, "detach"):
- appearance_vector = appearance_vector.detach().cpu().numpy()
-
- # Squeeze the batch dimension out
+import cv2
+import torch
+import numpy as np
+
+def extract_person_embedding(frame: np.ndarray, box, cnn_model) -> np.ndarray | None:
+ """
+ Crops a person from the frame, runs pre-processing, and extracts
+ the appearance embedding using the CNN model.
+ """
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
+ person_crop = frame[y1:y2, x1:x2]
+
+ if person_crop.size == 0:
+ return None
+
+ # Pre-process the crop for the CNN
+ crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB) # get rgb instead of bgr
+ crop_tensor = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0
+
+ # Run CNN inference to get the appearance vector
+ with torch.no_grad():
+ appearance_vector = cnn_model(crop_tensor)
+ if hasattr(appearance_vector, "detach"):
+ appearance_vector = appearance_vector.detach().cpu().numpy()
+
+ # Squeeze the batch dimension out
return np.squeeze(appearance_vector)
\ No newline at end of file
diff --git a/src/kestrel_vision/kestrel_vision/train.py b/src/kestrel_vision/kestrel_vision/train.py
index f78a55b..0d99346 100644
--- a/src/kestrel_vision/kestrel_vision/train.py
+++ b/src/kestrel_vision/kestrel_vision/train.py
@@ -1,292 +1,292 @@
-import os
-import sys
-import time
-import torch
-import torch.nn as nn
-import torchvision
-from datetime import datetime
-from torchvision import transforms
-from tqdm import tqdm
-from torch.utils.data import DataLoader
-from torch.utils.data import random_split
-from torch.utils.tensorboard import SummaryWriter
-from torch.amp import autocast, GradScaler # instead of torch.cuda.amp -- deprecated
-from torch.nn import TripletMarginWithDistanceLoss
-# Custom Modules
-import dataset
-from model import CNNdeepSORT
-
-# ---------------------------------------------------
-# TRAINING FUNCTION
-# ---------------------------------------------------
-def euclidean_distance(x, y):
- return torch.norm(x - y, p=2, dim=1)
-
-def training(model, dataloader, loss_fn, optimizer, device):
- model.train() # Set model to training mode
- total_loss = 0
- total_correct = 0
- total_samples = 0
- progress_bar = tqdm(dataloader, "Training Progress...", unit = "batch")
-
- for (archor, positive, negative) in progress_bar:
- anchor.to(device, non_blocking=True)
- positive = positive.to(device, non_blocking=True)
- negative = negative.to(device, non_blocking=True)
- # Non_blocking lets compute overlap with data transfer (small but free speed-up). Launches host --> GPU copy async if tensor already pinned (it is)
-
- with autocast(device_type=device):
- anchor_out = model(anchor)
- positive_out = model(positive)
- negative_out = model(negative)
-
- loss = loss_fn(anchor_out, positive_out, negative_out)
-
- # Backward pass and optimization
- scaler.scale(loss).backward() # 1. Calculate gradients based on the current loss.
- torch.nn.utils.clip_grad_norm_(model.parameters(), clip_val) # Compues total L2 norm for all gradients. If exceeds `clip_val`, rescales them to be equal. Prevents gradient explosion.
- scaler.step(optimizer) # 2. Update the model's weights using the calculated gradients. With scaler, undoes the scaling and skips the step if gradients overflowed (inf)
- scaler.update() # Adjusts the scale factor up/down depending on whether overflow was detected, keeping training stable.
- optimizer.zero_grad(set_to_none=True)# 3. Reset gradients to zero for the next iteration. Instead of writing 0s into every grad tensor, it just sets the .grad pointer to None (saving a full CUDA memset each iteration)
-
- # Metrics
- total_loss += loss.item()
- d_ap = euclidean_distance(anchor_out, positive_out)
- d_an = euclidean_distance(anchor_out, negative_out)
- total_correct += (d_ap < d_an).sum().item()
- total_samples += anchor.size(0)
-
- progress_bar.set_postfix(loss=loss.item())
-
- avg_loss = total_loss / len(dataloader)
- accuracy = total_correct / total_samples
- return avg_loss, accuracy
-
-# ---------------------------------------------------
-# 1. SETUP
-# ---------------------------------------------------
-
-# This code will only run when you execute `python train.py` directly
-if __name__ == '__main__':
- # Hyperparameters
- LEARNING_RATE = 1e-3
- EPOCHS = 50
- BATCH_SIZE = 128 # e.g. 32, 64, 128
- # Increased from 64. AMP frees memory, and more data per step = fewer GPU idles
-
-
- # Move the model to the appropriate device (GPU if available)
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
- print(f"Using device: {device}")
-
- # Tries several convolution algorithms on the first batch, then uses fastest one thereafter.
- # Pays off only if input sizes stay fixed (normalized to 256x128)
- torch.backends.cudnn.benchmark = True
-
- scaler = GradScaler() # WIth FP16, gradients can under-flow to 0. This multiplies the loss by a large factor, performs backward, then divides them back down during optimizer.
- clip_val = 1.0 # gradient-clip norm
-
- # ---------------------------------------------------
- # 2. MODEL, LOSS, OPTIMIZER
- # ---------------------------------------------------
-
- # Instantiate the model and move to device
- # 751 is the number of classes for Market-1501
- model = CNNdeepSORT(embedding_dim=128).to(device)
-
- # Define loss function, optimizer, and learning rate scheduler
-
- #TODO distance function, margin depends on distance choosen
-
- loss_function = nn.TripletMarginWithDistanceLoss(distance_function=euclidean_distance, margin=0.3, swap = True ) # Cross Entropy Loss is simpler, so more suitable for validation
- optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE) # AdamW is an upgraded version of Adam
-
- scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Decays LR by a factor of 0.1 every 10 epochs
- # Gamma of 0.1 is common -- sharp reduction in LR, allowing model to switch from large adjustments to fine-tuning
-
- # Load from saved checkpoint if found (most generalized model)
- checkpoint_path = 'best_model_checkpoint.pth'
- start_epoch = 0
- if(len(sys.argv) > 1 and sys.argv[1].lower() == "load"):
- if(os.path.exists(checkpoint_path)):
- print(f"Loading checkpoint from {checkpoint_path}...")
- checkpoint = torch.load(checkpoint_path)
- model.load_state_dict(checkpoint['model_state_dict'])
- optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
- scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
- scaler.load_state_dict(checkpoint.get('scaler_state_dict', {}))
- start_epoch = checkpoint['epoch'] + 1
- print(f"Resumed at epoch {start_epoch} | val_loss={checkpoint['loss']:.4f}")
- else:
- print("No checkpoint found, starting from scratch.")
- else:
- print("Training from scratch...")
-
- #TODO
-
-
- # ---------------------------------------------------
- # 3. DATA LOADING & SMALL VALIDATION SPLIT
- # ---------------------------------------------------
-
- # Define more robust transforms for training
- # Images are resized and normalized for better model performance.
- transform = transforms.Compose([
- transforms.Resize((128, 64)), # native Market-1501
- transforms.RandomHorizontalFlip(p=0.5), # Adds ~1% mAP with no speed cost
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # mean and std are pre-calculated mean/stdev of the ImageNet dataset.
- # For each channel of the image: output[channel] = (input[channel] - mean[channel]) / std[channel]
- # Standardizes the pixel value range to train more effectively. Helps converge faster
- ])
-
- # Load dataset and create DataLoader
- Market1501_train_file_path = r'C:\Users\david\Downloads\Market-1501-v15.09.15\bounding_box_train'
- train_data = dataset.Market1501(Market1501_train_file_path, transform=transform)
-
- # Only used for early-stopping / LR checks, NOT for final testing. Tiny split keeps epochs fast but still shows over-/under-fitting trends
- val_len = int(0.05 * len(train_data)) # split 5 % off for quick val
- train_len = len(train_data) - val_len
-
- # split dataset before building both dataLoaders
- train_ds, val_ds = random_split(train_data, [train_len, val_len])
-
- # Entire training data
- training_dataloader = DataLoader(
- dataset=train_ds,
- batch_size=BATCH_SIZE,
- shuffle=True,
- persistent_workers=True,# Keeps DataLoader workers alive across epochs and
- prefetch_factor=4, # queues 4 batches per worker. Eliminates Windows process-spawn penalty; feeds GPU continuously
- num_workers=4, # start with 4; 6–8 if CPU has threads to spare
- pin_memory=True) # pin_memory puts tensors into pinned memory, allowing for faster transfer from CPU to GPU.
- # Can significantly speed up training. Set to true with GPU.
-
- val_loader = DataLoader(
- val_ds,
- batch_size=BATCH_SIZE,
- shuffle=False, # no need to shuffle for evaluation
- num_workers=4,
- pin_memory=True,
- persistent_workers=True)
-
- # Evaluation helper
- @torch.no_grad() # we don't track gradients here
- def evaluate(model, loader):
- """
- Runs one full pass on `loader` with AMP and returns:
- (mean_cross_entropy_loss, top-1_accuracy)
- """
- model.eval() # turn off dropout / batch-norm update
- total_loss = 0.0
- correct = 0
- samples = 0
-
- for anchor, positive, negative in loader:
- anchor = anchor.to(device, non_blocking=True)
- positive = positive.to(device, non_blocking=True)
- negative = negative.to(device, non_blocking=True)
-
- with autocast(device_type=device): # AMP even in eval -> less VRAM, faster
- anchor_out = model(anchor)
- positive_out = model(positive)
- negative_out = model(negative)
-
-
-
-
- loss = loss_function(anchor_out, positive_out, negative_out)
-
- total_loss += loss.item()
- #TODO
- d_ap = euclidean_distance(anchor_out, positive_out)
- d_an = euclidean_distance(anchor_out, negative_out)
- correct += (d_ap < d_an).sum().item()
-
- samples += anchor.size(0)
-
- mean_loss = total_loss / len(loader)
- accuracy = correct / samples
- model.train() # restore training mode for caller
- return mean_loss, accuracy
-
- # ---------------------------------------------------
- # 4. TENSORBOARD LOGGING (Initial)
- # ---------------------------------------------------
-
- print("Logging model graph and sample images to TensorBoard...")
- # Get a single batch from the dataloader to log graph and images
- anchor, positibe, negative = next(iter(training_dataloader))
-
- print("Done.")
-
- # ---------------------------------------------------
- # 6. MAIN TRAINING LOOP
- # ---------------------------------------------------
- best_val_loss = float('inf')
- patience = 5
- patience_counter = 0
-
- for epoch in range(start_epoch, EPOCHS):
- start_time = time.time()
-
- # Train for one epoch
- avg_loss, accuracy = training(model, training_dataloader, loss_function, optimizer, device) #tells you if optimizer is doing its job on the data it sees
- val_loss, val_acc = evaluate(model, val_loader) #tells you if the network is generalizing; early stopping
-
- #*NEW* Update the learning rate
- scheduler.step() # Without this, the model is unable to converge.
-
-
-
- # ----- save best on *validation* loss -----
- #if avg_loss < best_val_loss:
- if val_loss < best_val_loss:
- #best_val_loss = avg_loss
- best_val_loss = val_loss
- torch.save({
- 'epoch': epoch,
- 'model_state_dict': model.state_dict(),
- 'optimizer_state_dict': optimizer.state_dict(),
- 'scheduler_state_dict': scheduler.state_dict(),
- 'loss': val_loss
- }, 'best_model_checkpoint.pth')
- print(f"Epoch {epoch+1}: val_loss improved to {val_loss:.4f}")
- patience_counter = 0
- else:
- patience_counter += 1
- print(f" Epoch {epoch+1}: Loss did not improve. Patience = {patience_counter}/{patience}")
-
- # Print epoch summary
- elapsed_time = time.time() - start_time
- print(f"\nEpoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | Acc: {accuracy:.2%} | Time: {elapsed_time:.2f}s")
-
- # Always save latest weights
- torch.save({
- 'epoch': epoch,
- 'model_state_dict': model.state_dict(),
- 'optimizer_state_dict': optimizer.state_dict(),
- 'scheduler_state_dict': scheduler.state_dict(),
- 'scaler_state_dict': scaler.state_dict(),
- 'loss': avg_loss
- }, 'latest_model_checkpoint_epoch.pth')
-
- # Early stopping check
- if patience_counter >= patience:
- print(f"\n Early stopping triggered after {epoch+1} epochs.")
- break
-
- # Log weight histograms
-
-
- print("\nTraining Finished!")
-
-
- # ---------------------------------------------------
- # 7. CLEANUP
- # ---------------------------------------------------
-
-
-
- # To view the logs, open a terminal in your project's root directory and run:
+import os
+import sys
+import time
+import torch
+import torch.nn as nn
+import torchvision
+from datetime import datetime
+from torchvision import transforms
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from torch.utils.data import random_split
+from torch.utils.tensorboard import SummaryWriter
+from torch.amp import autocast, GradScaler # instead of torch.cuda.amp -- deprecated
+from torch.nn import TripletMarginWithDistanceLoss
+# Custom Modules
+import dataset
+from model import CNNdeepSORT
+
+# ---------------------------------------------------
+# TRAINING FUNCTION
+# ---------------------------------------------------
+def euclidean_distance(x, y):
+ return torch.norm(x - y, p=2, dim=1)
+
+def training(model, dataloader, loss_fn, optimizer, device):
+ model.train() # Set model to training mode
+ total_loss = 0
+ total_correct = 0
+ total_samples = 0
+ progress_bar = tqdm(dataloader, "Training Progress...", unit = "batch")
+
+ for (archor, positive, negative) in progress_bar:
+ anchor.to(device, non_blocking=True)
+ positive = positive.to(device, non_blocking=True)
+ negative = negative.to(device, non_blocking=True)
+ # Non_blocking lets compute overlap with data transfer (small but free speed-up). Launches host --> GPU copy async if tensor already pinned (it is)
+
+ with autocast(device_type=device):
+ anchor_out = model(anchor)
+ positive_out = model(positive)
+ negative_out = model(negative)
+
+ loss = loss_fn(anchor_out, positive_out, negative_out)
+
+ # Backward pass and optimization
+ scaler.scale(loss).backward() # 1. Calculate gradients based on the current loss.
+ torch.nn.utils.clip_grad_norm_(model.parameters(), clip_val) # Compues total L2 norm for all gradients. If exceeds `clip_val`, rescales them to be equal. Prevents gradient explosion.
+ scaler.step(optimizer) # 2. Update the model's weights using the calculated gradients. With scaler, undoes the scaling and skips the step if gradients overflowed (inf)
+ scaler.update() # Adjusts the scale factor up/down depending on whether overflow was detected, keeping training stable.
+ optimizer.zero_grad(set_to_none=True)# 3. Reset gradients to zero for the next iteration. Instead of writing 0s into every grad tensor, it just sets the .grad pointer to None (saving a full CUDA memset each iteration)
+
+ # Metrics
+ total_loss += loss.item()
+ d_ap = euclidean_distance(anchor_out, positive_out)
+ d_an = euclidean_distance(anchor_out, negative_out)
+ total_correct += (d_ap < d_an).sum().item()
+ total_samples += anchor.size(0)
+
+ progress_bar.set_postfix(loss=loss.item())
+
+ avg_loss = total_loss / len(dataloader)
+ accuracy = total_correct / total_samples
+ return avg_loss, accuracy
+
+# ---------------------------------------------------
+# 1. SETUP
+# ---------------------------------------------------
+
+# This code will only run when you execute `python train.py` directly
+if __name__ == '__main__':
+ # Hyperparameters
+ LEARNING_RATE = 1e-3
+ EPOCHS = 50
+ BATCH_SIZE = 128 # e.g. 32, 64, 128
+ # Increased from 64. AMP frees memory, and more data per step = fewer GPU idles
+
+
+ # Move the model to the appropriate device (GPU if available)
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ print(f"Using device: {device}")
+
+ # Tries several convolution algorithms on the first batch, then uses fastest one thereafter.
+ # Pays off only if input sizes stay fixed (normalized to 256x128)
+ torch.backends.cudnn.benchmark = True
+
+ scaler = GradScaler() # WIth FP16, gradients can under-flow to 0. This multiplies the loss by a large factor, performs backward, then divides them back down during optimizer.
+ clip_val = 1.0 # gradient-clip norm
+
+ # ---------------------------------------------------
+ # 2. MODEL, LOSS, OPTIMIZER
+ # ---------------------------------------------------
+
+ # Instantiate the model and move to device
+ # 751 is the number of classes for Market-1501
+ model = CNNdeepSORT(embedding_dim=128).to(device)
+
+ # Define loss function, optimizer, and learning rate scheduler
+
+ #TODO distance function, margin depends on distance choosen
+
+ loss_function = nn.TripletMarginWithDistanceLoss(distance_function=euclidean_distance, margin=0.3, swap = True ) # Cross Entropy Loss is simpler, so more suitable for validation
+ optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE) # AdamW is an upgraded version of Adam
+
+ scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Decays LR by a factor of 0.1 every 10 epochs
+ # Gamma of 0.1 is common -- sharp reduction in LR, allowing model to switch from large adjustments to fine-tuning
+
+ # Load from saved checkpoint if found (most generalized model)
+ checkpoint_path = 'best_model_checkpoint.pth'
+ start_epoch = 0
+ if(len(sys.argv) > 1 and sys.argv[1].lower() == "load"):
+ if(os.path.exists(checkpoint_path)):
+ print(f"Loading checkpoint from {checkpoint_path}...")
+ checkpoint = torch.load(checkpoint_path)
+ model.load_state_dict(checkpoint['model_state_dict'])
+ optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+ scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
+ scaler.load_state_dict(checkpoint.get('scaler_state_dict', {}))
+ start_epoch = checkpoint['epoch'] + 1
+ print(f"Resumed at epoch {start_epoch} | val_loss={checkpoint['loss']:.4f}")
+ else:
+ print("No checkpoint found, starting from scratch.")
+ else:
+ print("Training from scratch...")
+
+ #TODO
+
+
+ # ---------------------------------------------------
+ # 3. DATA LOADING & SMALL VALIDATION SPLIT
+ # ---------------------------------------------------
+
+ # Define more robust transforms for training
+ # Images are resized and normalized for better model performance.
+ transform = transforms.Compose([
+ transforms.Resize((128, 64)), # native Market-1501
+ transforms.RandomHorizontalFlip(p=0.5), # Adds ~1% mAP with no speed cost
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # mean and std are pre-calculated mean/stdev of the ImageNet dataset.
+ # For each channel of the image: output[channel] = (input[channel] - mean[channel]) / std[channel]
+ # Standardizes the pixel value range to train more effectively. Helps converge faster
+ ])
+
+ # Load dataset and create DataLoader
+ Market1501_train_file_path = r'C:\Users\david\Downloads\Market-1501-v15.09.15\bounding_box_train'
+ train_data = dataset.Market1501(Market1501_train_file_path, transform=transform)
+
+ # Only used for early-stopping / LR checks, NOT for final testing. Tiny split keeps epochs fast but still shows over-/under-fitting trends
+ val_len = int(0.05 * len(train_data)) # split 5 % off for quick val
+ train_len = len(train_data) - val_len
+
+ # split dataset before building both dataLoaders
+ train_ds, val_ds = random_split(train_data, [train_len, val_len])
+
+ # Entire training data
+ training_dataloader = DataLoader(
+ dataset=train_ds,
+ batch_size=BATCH_SIZE,
+ shuffle=True,
+ persistent_workers=True,# Keeps DataLoader workers alive across epochs and
+ prefetch_factor=4, # queues 4 batches per worker. Eliminates Windows process-spawn penalty; feeds GPU continuously
+ num_workers=4, # start with 4; 6–8 if CPU has threads to spare
+ pin_memory=True) # pin_memory puts tensors into pinned memory, allowing for faster transfer from CPU to GPU.
+ # Can significantly speed up training. Set to true with GPU.
+
+ val_loader = DataLoader(
+ val_ds,
+ batch_size=BATCH_SIZE,
+ shuffle=False, # no need to shuffle for evaluation
+ num_workers=4,
+ pin_memory=True,
+ persistent_workers=True)
+
+ # Evaluation helper
+ @torch.no_grad() # we don't track gradients here
+ def evaluate(model, loader):
+ """
+ Runs one full pass on `loader` with AMP and returns:
+ (mean_cross_entropy_loss, top-1_accuracy)
+ """
+ model.eval() # turn off dropout / batch-norm update
+ total_loss = 0.0
+ correct = 0
+ samples = 0
+
+ for anchor, positive, negative in loader:
+ anchor = anchor.to(device, non_blocking=True)
+ positive = positive.to(device, non_blocking=True)
+ negative = negative.to(device, non_blocking=True)
+
+ with autocast(device_type=device): # AMP even in eval -> less VRAM, faster
+ anchor_out = model(anchor)
+ positive_out = model(positive)
+ negative_out = model(negative)
+
+
+
+
+ loss = loss_function(anchor_out, positive_out, negative_out)
+
+ total_loss += loss.item()
+ #TODO
+ d_ap = euclidean_distance(anchor_out, positive_out)
+ d_an = euclidean_distance(anchor_out, negative_out)
+ correct += (d_ap < d_an).sum().item()
+
+ samples += anchor.size(0)
+
+ mean_loss = total_loss / len(loader)
+ accuracy = correct / samples
+ model.train() # restore training mode for caller
+ return mean_loss, accuracy
+
+ # ---------------------------------------------------
+ # 4. TENSORBOARD LOGGING (Initial)
+ # ---------------------------------------------------
+
+ print("Logging model graph and sample images to TensorBoard...")
+ # Get a single batch from the dataloader to log graph and images
+ anchor, positibe, negative = next(iter(training_dataloader))
+
+ print("Done.")
+
+ # ---------------------------------------------------
+ # 6. MAIN TRAINING LOOP
+ # ---------------------------------------------------
+ best_val_loss = float('inf')
+ patience = 5
+ patience_counter = 0
+
+ for epoch in range(start_epoch, EPOCHS):
+ start_time = time.time()
+
+ # Train for one epoch
+ avg_loss, accuracy = training(model, training_dataloader, loss_function, optimizer, device) #tells you if optimizer is doing its job on the data it sees
+ val_loss, val_acc = evaluate(model, val_loader) #tells you if the network is generalizing; early stopping
+
+ #*NEW* Update the learning rate
+ scheduler.step() # Without this, the model is unable to converge.
+
+
+
+ # ----- save best on *validation* loss -----
+ #if avg_loss < best_val_loss:
+ if val_loss < best_val_loss:
+ #best_val_loss = avg_loss
+ best_val_loss = val_loss
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'scheduler_state_dict': scheduler.state_dict(),
+ 'loss': val_loss
+ }, 'best_model_checkpoint.pth')
+ print(f"Epoch {epoch+1}: val_loss improved to {val_loss:.4f}")
+ patience_counter = 0
+ else:
+ patience_counter += 1
+ print(f" Epoch {epoch+1}: Loss did not improve. Patience = {patience_counter}/{patience}")
+
+ # Print epoch summary
+ elapsed_time = time.time() - start_time
+ print(f"\nEpoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | Acc: {accuracy:.2%} | Time: {elapsed_time:.2f}s")
+
+ # Always save latest weights
+ torch.save({
+ 'epoch': epoch,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'scheduler_state_dict': scheduler.state_dict(),
+ 'scaler_state_dict': scaler.state_dict(),
+ 'loss': avg_loss
+ }, 'latest_model_checkpoint_epoch.pth')
+
+ # Early stopping check
+ if patience_counter >= patience:
+ print(f"\n Early stopping triggered after {epoch+1} epochs.")
+ break
+
+ # Log weight histograms
+
+
+ print("\nTraining Finished!")
+
+
+ # ---------------------------------------------------
+ # 7. CLEANUP
+ # ---------------------------------------------------
+
+
+
+ # To view the logs, open a terminal in your project's root directory and run:
# tensorboard --logdir=run
\ No newline at end of file
diff --git a/src/kestrel_vision/kestrel_vision/vision_node.py b/src/kestrel_vision/kestrel_vision/vision_node.py
index aab9392..797be19 100644
--- a/src/kestrel_vision/kestrel_vision/vision_node.py
+++ b/src/kestrel_vision/kestrel_vision/vision_node.py
@@ -1,113 +1,191 @@
-# ROS and system libraries
-import rclpy
-from rclpy.node import Node
-import sys
-from pathlib import Path
-
-# ROS messages
-from sensor_msgs.msg import Image
-from kestrel_msgs.msg import DetectionWithEmbedding, DetectionWithEmbeddingArray
-from vision_msgs.msg import Detection2D, ObjectHypothesisWithPose, BoundingBox2D
-
-# Computer vision and deep learning libraries
-import cv2
-import numpy as np
-import torch
-from cv_bridge import CvBridge
-from ultralytics import YOLO
-
-
-from model import CNNdeepSORT
-from . import processing
-
-
-class VisionNode(Node):
- """
- This node subscribes to an image topic, runs YOLO and cnn,
- and publishes the detections with their feature embeddings.
- """
- def __init__(self):
- super().__init__('vision_node')
-
- # initialize models
- self.yolo_model = YOLO('yolo11m.pt')
- self.cnn_model = CNNdeepSORT(embedding_dim=128)
- self.get_logger().info("Loaded YOLO and CNNdeepSORT models successfully.")
-
- self.bridge = CvBridge()
- # threshold to stop certain stuff to go into cnn
- self.conf_threshold = self.declare_parameter('conf_threshold', 0.5).get_parameter_value().double_value
-
- # create publishers and subscription
- self.pub_det = self.create_publisher(DetectionWithEmbeddingArray, '/kestrel/detections', 10)
- self.pub_dbg = self.create_publisher(Image, '/kestrel/debug/frame', 10)
- self.image_subscriber = self.create_subscription(Image, '/camera/image_raw', self.image_callback, 10)
-
- def image_callback(self, msg: Image):
- """
- Callback to process every frame
- """
- frame = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
- yolo_results = self.yolo_model(frame, verbose=False)[0]
-
- det_array_msg = DetectionWithEmbeddingArray()
- det_array_msg.header = msg.header
-
- for box in yolo_results.boxes:
- conf = float(box.conf[0])
- class_name = self.yolo_model.names[int(box.cls[0])]
-
- # checks if person and confidence is higher than threshold
- if class_name == 'person' and conf >= self.conf_threshold:
-
- appearance_vector = processing.extract_person_embedding(
- frame, box, self.cnn_model
- )
-
- if appearance_vector is None:
- continue
-
- # Populate the custom ROS message
- custom_detection = DetectionWithEmbedding()
- custom_detection.detection.header = msg.header
-
- # standard hypothesis msg from ros with info on detection
- hypothesis = ObjectHypothesisWithPose()
- hypothesis.hypothesis.class_id = class_name
- hypothesis.hypothesis.score = conf
- custom_detection.detection.results.append(hypothesis)
-
- x1, y1, x2, y2 = map(int, box.xyxy[0]) # get corners of bbox
- custom_detection.detection.bbox = BoundingBox2D(
- center=rclpy.geometry.Pose2D(x=float((x1+x2)/2), y=float((y1+y2)/2)),
- size_x=float(x2-x1),
- size_y=float(y2-y1)
- )
-
- # add this detection to rest of all detections (the array)
- custom_detection.embedding = appearance_vector.tolist()
- det_array_msg.detections.append(custom_detection)
-
- # draws bbox on the frame
- cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
- cv2.putText(frame, f'Person {conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
-
- # Publish the results
- self.pub_det.publish(det_array_msg)
- self.pub_dbg.publish(self.bridge.cv2_to_imgmsg(frame, 'bgr8'))
-
-
-def main(args=None):
- """Standard entry point for the ROS node."""
- rclpy.init(args=args)
- node = VisionNode()
- try:
- rclpy.spin(node)
- except KeyboardInterrupt:
- pass
- finally:
- node.destroy_node()
- rclpy.shutdown()
-
-if __name__ == '__main__':
+# ROS and system libraries
+import rclpy
+from rclpy.node import Node
+from pathlib import Path
+
+# ROS messages
+from sensor_msgs.msg import Image
+from kestrel_msgs.msg import Detection, DetectionArray
+
+# Computer vision and deep learning libraries
+import cv2
+import numpy as np
+import torch
+from cv_bridge import CvBridge
+from ultralytics import YOLO
+
+
+from .model import CNNdeepSORT
+from . import processing
+
+
+class VisionNode(Node):
+ """
+ This node subscribes to an image topic, runs YOLO and CNN,
+ and publishes the detections with their feature embeddings.
+ """
+ def __init__(self):
+ super().__init__('vision_node')
+
+ # initialize models
+ default_model_path = Path(__file__).resolve().parents[3] / 'models' / 'yolov8n.pt'
+
+ model_path = self.declare_parameter(
+ 'yolo_model_path',
+ str(default_model_path)
+ ).value
+
+ self.yolo_model = YOLO(model_path)
+ self.yolo_model.to('cpu') #TODO: why CPU?
+
+ # ---- CNN init ----
+ self.cnn_model = CNNdeepSORT(embedding_dim=128)
+
+ checkpoint_path = Path(__file__).resolve().parents[3] / 'models' / 'best_model_checkpoint.pth'
+ if checkpoint_path.is_file():
+ try:
+ # torch.load with weights_only when available (2.1+), else fallback
+ try:
+ ckpt = torch.load(checkpoint_path, map_location='cpu', weights_only=True)
+ except TypeError:
+ ckpt = torch.load(checkpoint_path, map_location='cpu')
+
+ # Find the actual state_dict inside common wrappers
+ state = None
+ if isinstance(ckpt, dict):
+ for k in ('model_state_dict', 'state_dict', 'net', 'model'):
+ if k in ckpt and isinstance(ckpt[k], dict):
+ state = ckpt[k]
+ break
+ if state is None:
+ # maybe it's already a raw state_dict
+ state = {k: v for k, v in ckpt.items() if hasattr(v, 'dtype')}
+ if state is None:
+ raise RuntimeError("Unrecognized checkpoint format")
+
+ # Clean keys: strip 'module.' and drop classifier / batch-tracking
+ from collections import OrderedDict
+ cleaned = OrderedDict()
+ for k, v in state.items():
+ if k.startswith('module.'):
+ k = k[len('module.'):]
+ if k.startswith('classifier.') or k.endswith('num_batches_tracked'):
+ continue
+ cleaned[k] = v
+
+ # Filter to keys that exist in the current model
+ model_sd = self.cnn_model.state_dict()
+ intersect = {k: v for k, v in cleaned.items() if k in model_sd and v.shape == model_sd[k].shape}
+
+ # Informative logs
+ dropped_unexpected = sorted(set(cleaned.keys()) - set(intersect.keys()))
+ missing_in_ckpt = sorted(set(model_sd.keys()) - set(intersect.keys()))
+ if dropped_unexpected:
+ self.get_logger().warn(f"Dropping {len(dropped_unexpected)} checkpoint keys not in the current model (e.g., {dropped_unexpected[:3]})")
+ if missing_in_ckpt:
+ self.get_logger().warn(f"{len(missing_in_ckpt)} model keys not found/mismatched in checkpoint (e.g., {missing_in_ckpt[:3]})")
+
+ # Load with strict=False so non-intersecting layers stay at init
+ self.cnn_model.load_state_dict(intersect, strict=False)
+ self.get_logger().info("Loaded CNN weights (filtered to matching layers).")
+ except Exception as e:
+ self.get_logger().error(f"Failed to load CNN checkpoint: {e}. Using randomly initialized weights.")
+ else:
+ self.get_logger().warn(f"Could not find CNN checkpoint at {checkpoint_path}. Using untrained model.")
+
+ self.cnn_model.eval()
+ self.cnn_model.to('cpu') # or to('cuda') later via a device param
+
+ # -----------------------------------------------------------------------
+
+ self.bridge = CvBridge()
+ # threshold to stop certain stuff to go into cnn
+ self.conf_threshold = self.declare_parameter('conf_threshold', 0.5).get_parameter_value().double_value
+
+ # create publishers and subscription
+ self.pub_det = self.create_publisher(DetectionArray, '/kestrel/detections', 10)
+ self.pub_dbg = self.create_publisher(Image, '/kestrel/debug/frame', 10)
+ self.image_subscriber = self.create_subscription(Image, '/camera/image_raw', self.image_callback, 10)
+
+ def image_callback(self, msg: Image):
+ """
+ Callback to process every frame
+ """
+ frame = self.bridge.imgmsg_to_cv2(msg, 'bgr8')
+ yolo_results = self.yolo_model(frame, verbose=False)[0]
+
+ det_array_msg = DetectionArray()
+ det_array_msg.header = msg.header
+
+ for box in yolo_results.boxes:
+ conf = float(box.conf[0])
+ class_name = self.yolo_model.names[int(box.cls[0])]
+
+ # checks if person and confidence is higher than threshold
+ if class_name == 'person' and conf >= self.conf_threshold:
+
+ appearance_vector = processing.extract_person_embedding(
+ frame, box, self.cnn_model
+ )
+
+ if appearance_vector is None:
+ continue
+
+ # Populate the custom ROS message
+ detection_msg = Detection()
+ detection_msg.header = msg.header
+
+ # populate msg with info on detection
+ detection_msg.class_name = class_name
+ detection_msg.conf = conf
+
+ x1, y1, x2, y2 = [float(v) for v in box.xyxy[0].tolist()] # get corners of bbox
+ detection_msg.x1 = float(x1)
+ detection_msg.y1 = float(y1)
+ detection_msg.x2 = float(x2)
+ detection_msg.y2 = float(y2)
+
+ detection_msg.w = float(x2 - x1)
+ detection_msg.h = float(y2 - y1)
+
+ detection_msg.center_x = float((x1 + x2) / 2.0)
+ detection_msg.center_y = float((y1 + y2) / 2.0)
+
+ emb = appearance_vector.reshape(-1)
+
+ if emb.shape[0] != 128:
+ self.get_logger().warn(f"Bad embedding length: {emb.shape[0]}")
+ continue
+
+ # add this detection to rest of all detections (the array)
+ detection_msg.embedding = [float(v) for v in emb.tolist()]
+ det_array_msg.detections.append(detection_msg)
+
+ # draws bbox on the frame
+ x1i, y1i, x2i, y2i = map(int, [x1, y1, x2, y2])
+ cv2.rectangle(frame, (x1i, y1i), (x2i, y2i), (0,255,0), 2)
+ cv2.putText(frame, f'Person {conf:.2f}', (x1i, y1i - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+
+ # Publish the results
+ self.pub_det.publish(det_array_msg)
+ self.pub_dbg.publish(self.bridge.cv2_to_imgmsg(frame, 'bgr8'))
+
+
+def main(args=None):
+ """Standard entry point for the ROS node."""
+ rclpy.init(args=args)
+ node = VisionNode()
+ try:
+ rclpy.spin(node)
+ except KeyboardInterrupt:
+ node.get_logger().info("Shutting down on Ctrl-C")
+ finally:
+ node.destroy_node()
+ try:
+ if rclpy.ok(): # not already shut down
+ rclpy.shutdown()
+ except Exception:
+ pass
+
+if __name__ == '__main__':
main()
\ No newline at end of file
diff --git a/src/kestrel_vision/package.xml b/src/kestrel_vision/package.xml
index bc3f8d0..0f8fa21 100644
--- a/src/kestrel_vision/package.xml
+++ b/src/kestrel_vision/package.xml
@@ -1,25 +1,24 @@
-
-
- kestrel_vision
- 0.1.0
- YOLO-based vision detection node for Kestrel Drone using rclpy and OpenCV
- David Orjuela
- MIT
-
- ament_python
-
- rclpy
- sensor_msgs
- cv_bridge
- vision_msgs
- ultralytics
-
- ament_copyright
- ament_flake8
- ament_pep257
- python3-pytest
-
-
- ament_python
-
-
+
+
+ kestrel_vision
+ 0.1.0
+ YOLO-based vision detection node for Kestrel Drone using rclpy and OpenCV
+ David Orjuela
+ MIT
+
+ ament_python
+
+ rclpy
+ sensor_msgs
+ cv_bridge
+ ultralytics
+
+ ament_copyright
+ ament_flake8
+ ament_pep257
+ python3-pytest
+
+
+ ament_python
+
+
diff --git a/src/kestrel_vision/setup.cfg b/src/kestrel_vision/setup.cfg
index ff7a8e2..27c1f88 100644
--- a/src/kestrel_vision/setup.cfg
+++ b/src/kestrel_vision/setup.cfg
@@ -1,4 +1,4 @@
-[develop]
-script_dir=$base/lib/kestrel_vision
-[install]
-install_scripts=$base/lib/kestrel_vision
+[develop]
+script_dir=$base/lib/kestrel_vision
+[install]
+install_scripts=$base/lib/kestrel_vision
diff --git a/src/kestrel_vision/setup.py b/src/kestrel_vision/setup.py
index 6b75bde..aef3709 100644
--- a/src/kestrel_vision/setup.py
+++ b/src/kestrel_vision/setup.py
@@ -1,26 +1,25 @@
-from setuptools import setup
-
-package_name = 'kestrel_vision'
-
-setup(
- name=package_name,
- version='0.1.0',
- packages=[package_name],
- data_files=[
- ('share/ament_index/resource_index/packages',
- ['resource/' + package_name]),
- ('share/' + package_name, ['package.xml']),
- ],
- install_requires=['setuptools'],
- zip_safe=True,
- maintainer='David Orjuela',
- maintainer_email='da364739@ucf.edu',
- description='Vision node for Kestrel drone using YOLO and DeepSORT',
- license='MIT',
- tests_require=['pytest'],
- entry_points={
- 'console_scripts': [
- 'vision_node = kestrel_vision.vision_node:main',
- ],
- },
-)
+from setuptools import setup
+
+package_name = 'kestrel_vision'
+
+setup(
+ name=package_name,
+ version='0.1.0',
+ packages=[package_name],
+ data_files=[
+ ('share/ament_index/resource_index/packages',
+ ['resource/' + package_name]),
+ ('share/' + package_name, ['package.xml']),
+ ],
+ install_requires=['setuptools'],
+ zip_safe=True,
+ maintainer='David Orjuela',
+ maintainer_email='da364739@ucf.edu',
+ description='Vision node for Kestrel drone using YOLO and DeepSORT',
+ license='MIT',
+ entry_points={
+ 'console_scripts': [
+ 'vision_node = kestrel_vision.vision_node:main',
+ ],
+ },
+)
diff --git a/src/kestrel_vision/test/test_copyright.py b/src/kestrel_vision/test/test_copyright.py
index 97a3919..282981a 100644
--- a/src/kestrel_vision/test/test_copyright.py
+++ b/src/kestrel_vision/test/test_copyright.py
@@ -1,25 +1,25 @@
-# Copyright 2015 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_copyright.main import main
-import pytest
-
-
-# Remove the `skip` decorator once the source file(s) have a copyright header
-@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
-@pytest.mark.copyright
-@pytest.mark.linter
-def test_copyright():
- rc = main(argv=['.', 'test'])
- assert rc == 0, 'Found errors'
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_copyright.main import main
+import pytest
+
+
+# Remove the `skip` decorator once the source file(s) have a copyright header
+@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
+@pytest.mark.copyright
+@pytest.mark.linter
+def test_copyright():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found errors'
diff --git a/src/kestrel_vision/test/test_flake8.py b/src/kestrel_vision/test/test_flake8.py
index 27ee107..625324e 100644
--- a/src/kestrel_vision/test/test_flake8.py
+++ b/src/kestrel_vision/test/test_flake8.py
@@ -1,25 +1,25 @@
-# Copyright 2017 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_flake8.main import main_with_errors
-import pytest
-
-
-@pytest.mark.flake8
-@pytest.mark.linter
-def test_flake8():
- rc, errors = main_with_errors(argv=[])
- assert rc == 0, \
- 'Found %d code style errors / warnings:\n' % len(errors) + \
- '\n'.join(errors)
+# Copyright 2017 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_flake8.main import main_with_errors
+import pytest
+
+
+@pytest.mark.flake8
+@pytest.mark.linter
+def test_flake8():
+ rc, errors = main_with_errors(argv=[])
+ assert rc == 0, \
+ 'Found %d code style errors / warnings:\n' % len(errors) + \
+ '\n'.join(errors)
diff --git a/src/kestrel_vision/test/test_pep257.py b/src/kestrel_vision/test/test_pep257.py
index b234a38..4569717 100644
--- a/src/kestrel_vision/test/test_pep257.py
+++ b/src/kestrel_vision/test/test_pep257.py
@@ -1,23 +1,23 @@
-# Copyright 2015 Open Source Robotics Foundation, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ament_pep257.main import main
-import pytest
-
-
-@pytest.mark.linter
-@pytest.mark.pep257
-def test_pep257():
- rc = main(argv=['.', 'test'])
- assert rc == 0, 'Found code style errors / warnings'
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_pep257.main import main
+import pytest
+
+
+@pytest.mark.linter
+@pytest.mark.pep257
+def test_pep257():
+ rc = main(argv=['.', 'test'])
+ assert rc == 0, 'Found code style errors / warnings'