diff --git a/.gitignore b/.gitignore index 1b01344..063e1ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,3 @@ -# ignore all mp4/mov in the YOLO Demo subfolder -src/_archive_legacy/models/deepSORT/CNN/YOLO Demo/*.mp4 -src/_archive_legacy/models/deepSORT/CNN/YOLO Demo/*.mov - -# but keep this one file -!src/_archive_legacy/models/deepSORT/CNN/YOLO Demo/Kestrel_Tracking_demo_Clip.mp4 - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/docs/Model Team/README.md b/docs/Model Team/README.md new file mode 100644 index 0000000..f5388b4 --- /dev/null +++ b/docs/Model Team/README.md @@ -0,0 +1,15 @@ +# Model Team Documentation +## Pipeline Overview +The model team is responsible for the tracking of people (PEV riders) infront of the drone. +- The pipeline starts by feeding the drone's camera information to the `kestrel_vision` ROS2 package. +- `kestrel_vision` contains `vision_node.py`, which creates the ROS2 node that subscribes to the camera, runs YOLO and our CNN, and publishes these detections (bounding box + feature embeddings). +- Next, `kestrel_tracking` contains `tracking_node.py`, which similarly creates the node that subscribes to the vision node's detections, and begins tracking them. This involves initiating a TrackManager and publishing the associated Tracks. + +## Pipeline Specifics +### kestrel_vision + +## ROS Topics/Services + +## Key Parameters + +## Debug Checklist diff --git a/models/best_model_checkpoint.pth b/models/best_model_checkpoint.pth new file mode 100644 index 0000000..5f704df Binary files /dev/null and b/models/best_model_checkpoint.pth differ diff --git a/models/requirements.txt b/models/requirements.txt index beb9057..bf87520 100644 --- a/models/requirements.txt +++ b/models/requirements.txt @@ -1,210 +1,227 @@ # To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file -absl-py==2.1.0 -annotated-types==0.7.0 -asttokens==2.4.1 -astunparse==1.6.3 -attrs==24.2.0 -babel==2.16.0 -backcall==0.2.0 -beautifulsoup4==4.12.3 -bleach==6.1.0 -certifi==2024.8.30 -charset-normalizer==3.3.2 -click==8.1.7 -colorama==0.4.6 -coloredlogs==15.0.1 -comet-ml==3.47.0 -comm==0.2.2 -configobj==5.0.9 -contourpy==1.3.0 -coverage==7.6.1 +action-msgs==2.0.3 +action-tutorials-interfaces==0.33.6 +action-tutorials-py==0.33.6 +actionlib-msgs==5.3.6 +ament-cmake-test==2.5.4 +ament-copyright==0.17.3 +ament-cppcheck==0.17.3 +ament-cpplint==0.17.3 +ament-flake8==0.17.3 +ament-index-python==1.8.1 +ament-lint==0.17.3 +ament-lint-cmake==0.17.3 +ament-package==0.16.4 +ament-pep257==0.17.3 +ament-uncrustify==0.17.3 +ament-xmllint==0.17.3 +angles==1.16.0 +argcomplete==3.6.3 +builtin-interfaces==2.0.3 +catkin-pkg==1.1.0 +certifi==2025.11.12 +charset-normalizer==3.4.4 +colcon-argcomplete==0.3.3 +colcon-bash==0.5.0 +colcon-cd==0.1.1 +colcon-cmake==0.2.29 +colcon-common-extensions==0.3.0 +colcon-core==0.20.1 +colcon-defaults==0.2.9 +colcon-devtools==0.3.0 +colcon-library-path==0.2.1 +colcon-metadata==0.2.5 +colcon-notification==0.3.0 +colcon-output==0.2.13 +colcon-package-information==0.4.0 +colcon-package-selection==0.2.10 +colcon-parallel-executor==0.4.0 +colcon-pkg-config==0.1.0 +colcon-powershell==0.4.0 +colcon-python-setup-py==0.2.9 +colcon-recursive-crawl==0.2.3 +colcon-ros==0.5.0 +colcon-test-result==0.3.8 +colcon-zsh==0.5.0 +composition-interfaces==2.0.3 +contourpy==1.3.3 +coverage==7.13.0 +cv-bridge==4.1.0 cycler==0.12.1 -Cython==3.0.11 -debugpy==1.8.6 -decorator==5.1.1 -deepsparse==1.8.0 -defusedxml==0.7.1 -dulwich==0.22.1 -easydict==1.13 -everett==3.1.0 -exceptiongroup==1.2.2 -executing==2.1.0 -fastjsonschema==2.20.0 -filelock==3.16.1 -filterpy==1.4.5 -flatbuffers==24.3.25 -fonttools==4.54.1 -fsspec==2024.9.0 -future==1.0.0 -gast==0.6.0 -geocoder==1.38.1 -ghp-import==2.1.0 -gitdb==4.0.11 -GitPython==3.1.43 -google-pasta==0.2.0 -GPUtil==1.4.0 -gputils==1.0.6 -griffe==1.3.2 -grpcio==1.66.2 -h5py==3.12.1 -humanfriendly==10.0 -idna==3.10 -imageio==2.36.0 -img2unicode==0.1a11 -iniconfig==2.0.0 -ipykernel==6.29.5 -ipython==8.12.0 -jedi==0.19.1 -Jinja2==3.1.4 -joblib==1.4.2 -jsonschema==4.23.0 -jsonschema-specifications==2023.12.1 -jupyter_client==8.6.3 -jupyter_core==5.7.2 -jupyterlab_pygments==0.3.0 -jupytext==1.16.4 -keras==3.5.0 -kiwisolver==1.4.7 -lapx==0.5.10.post1 -lazy_loader==0.4 -libclang==18.1.1 -Markdown==3.7 -markdown-it-py==3.0.0 -MarkupSafe==2.1.5 -matplotlib==3.9.2 -matplotlib-inline==0.1.7 -matplotlib-terminal==0.1a4 -mdit-py-plugins==0.4.2 -mdurl==0.1.2 -merge-args==0.1.5 -mergedeep==1.3.4 -mistune==3.0.2 -mkdocs==1.6.1 -mkdocs-autorefs==1.2.0 -mkdocs-get-deps==0.2.0 -mkdocs-jupyter==0.25.0 -mkdocs-macros-plugin==1.2.0 -mkdocs-material==9.5.39 -mkdocs-material-extensions==1.3.1 -mkdocs-redirects==1.2.1 -mkdocs-ultralytics-plugin==0.1.9 -mkdocstrings==0.26.1 -mkdocstrings-python==1.11.1 -ml-dtypes==0.4.1 +demo-nodes-py==0.33.6 +diagnostic-msgs==5.3.6 +distlib==0.4.0 +docutils==0.22.4 +domain-coordinator==0.12.0 +empy==4.2 +example-interfaces==0.12.0 +examples-rclpy-executors==0.19.6 +examples-rclpy-minimal-action-client==0.19.6 +examples-rclpy-minimal-action-server==0.19.6 +examples-rclpy-minimal-client==0.19.6 +examples-rclpy-minimal-publisher==0.19.6 +examples-rclpy-minimal-service==0.19.6 +examples-rclpy-minimal-subscriber==0.19.6 +filelock==3.20.1 +fonttools==4.61.1 +fsspec==2025.12.0 +geometry-msgs==5.3.6 +idna==3.11 +image-geometry==4.1.0 +iniconfig==2.3.0 +interactive-markers==2.5.4 +Jinja2==3.1.6 +kiwisolver==1.4.9 +lark-parser==0.12.0 +laser-geometry==2.7.1 +launch==3.4.6 +launch-ros==0.26.8 +launch-testing==3.4.6 +launch-testing-ros==0.26.8 +launch-xml==3.4.6 +launch-yaml==3.4.6 +lifecycle-msgs==2.0.3 +logging-demo==0.33.6 +map-msgs==2.4.1 +MarkupSafe==3.0.3 +matplotlib==3.10.8 +message-filters==4.11.7 mpmath==1.3.0 -n2==0.1.7 -namex==0.0.8 -nbclient==0.10.0 -nbconvert==7.16.4 -nbformat==5.10.4 -nest-asyncio==1.6.0 -networkx==3.3 -nm-yolov5==1.7.0.60200 -numpy==1.23.5 -nvidia-cublas-cu12==12.4.5.8 -nvidia-cuda-cupti-cu12==12.4.127 -nvidia-cuda-nvrtc-cu12==12.4.127 -nvidia-cuda-runtime-cu12==12.4.127 -nvidia-cudnn-cu12==9.1.0.70 -nvidia-cufft-cu12==11.2.1.3 -nvidia-curand-cu12==10.3.5.147 -nvidia-cusolver-cu12==11.6.1.9 -nvidia-cusparse-cu12==12.3.1.170 -nvidia-nccl-cu12==2.21.5 -nvidia-nvjitlink-cu12==12.4.127 -nvidia-nvtx-cu12==12.4.127 -onnx==1.14.1 -onnxruntime==1.19.2 -onnxslim==0.1.34 -opencv-python==4.6.0.66 -opt_einsum==3.4.0 -optree==0.12.1 -packaging==24.1 -paginate==0.5.7 -pandas==2.2.3 -pandocfilters==1.5.1 -parso==0.8.4 -pathspec==0.12.1 -pexpect==4.9.0 -pickleshare==0.7.5 -Pillow==9.5.0 -platformdirs==4.3.6 -pluggy==1.5.0 -prompt_toolkit==3.0.48 -protobuf==3.20.3 -psutil==6.0.0 -ptyprocess==0.7.0 -pure_eval==0.2.3 -py-cpuinfo==9.0.0 -py-machineid==0.6.0 -pydantic==2.7.4 -pydantic_core==2.18.4 -Pygments==2.18.0 -pymdown-extensions==10.11.2 -pyparsing==3.1.4 -PyQt5==5.15.9 -pyqt5-plugins==5.15.9.2.3 -PyQt5-Qt5==5.15.2 -pyqt5-tools==5.15.9.3.3 -PyQt5_sip==12.15.0 -pytest==8.3.3 -pytest-cov==5.0.0 -python-box==6.1.0 +nav-msgs==5.3.6 +networkx==3.6.1 +notify2==0.3.1 +numpy==1.26.4 # Required for compatibility ("numpy < 2") +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.3.3.83 +nvidia-cufile-cu12==1.13.1.3 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.5 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvshmem-cu12==3.3.20 +nvidia-nvtx-cu12==12.8.90 +opencv-python==4.12.0.88 +osrf-pycommon==2.1.7 +packaging==25.0 +pcl-msgs==1.0.0 +pendulum-msgs==0.33.6 +pillow==12.0.0 +pluggy==1.6.0 +polars==1.36.1 +polars-runtime-32==1.36.1 +psutil==7.2.0 +Pygments==2.19.2 +pyparsing==3.3.1 +pytest==9.0.2 +pytest-cov==7.0.0 +pytest-repeat==0.9.4 +pytest-rerunfailures==16.1 python-dateutil==2.9.0.post0 -python-dotenv==1.0.1 -pytz==2024.2 -PyYAML==6.0.2 -pyyaml_env_tag==0.1 -pyzmq==26.2.0 -qt5-applications==5.15.2.2.3 -qt5-tools==5.15.2.1.3 -ratelim==0.1.6 -referencing==0.35.1 -regex==2024.9.11 -requests==2.32.3 -requests-toolbelt==1.0.0 -rich==13.8.1 -rpds-py==0.20.0 -scikit-image==0.24.0 -scikit-learn==1.5.2 -scipy==1.14.1 -seaborn==0.13.2 -semantic-version==2.10.0 -sentry-sdk==2.15.0 -simplejson==3.19.3 -six==1.16.0 -smmap==5.0.1 -soupsieve==2.6 -sparseml==1.8.0 -sparsezoo==1.8.1 -stack-data==0.6.3 -sympy==1.13.1 -tensorboard==2.17.1 -tensorboard-data-server==0.7.2 -tensorflow==2.17.0 -tensorflow-io-gcs-filesystem==0.37.1 -termcolor==2.4.0 -thop==0.1.1.post2209072238 -threadpoolctl==3.5.0 -tifffile==2024.9.20 -tinycss2==1.3.0 -tomli==2.0.2 -toposort==1.10 -torch==2.5.1 -torchvision==0.20.1 -tornado==6.4.1 -tqdm==4.66.5 -traitlets==5.14.3 -triton==3.1.0 -typing_extensions==4.12.2 -tzdata==2024.2 -ultralytics @ git+https://github.com/ultralytics/ultralytics.git@cece2ee2cf661502d0fe15656574f486625d8490 -ultralytics-thop==2.0.8 -urllib3==2.2.3 -watchdog==5.0.3 -wcwidth==0.2.13 -webencodings==0.5.1 -Werkzeug==3.0.4 -wrapt==1.16.0 -wurlitzer==3.1.1 \ No newline at end of file +python-qt-binding==2.2.1 +PyYAML==6.0.3 +qt-dotgraph==2.7.5 +qt-gui==2.7.5 +qt-gui-cpp==2.7.5 +qt-gui-py-common==2.7.5 +quality-of-service-demo-py==0.33.6 +rcl-interfaces==2.0.3 +rclpy==7.1.5 +rcutils==6.7.3 +requests==2.32.5 +resource-retriever==3.4.4 +rmw-dds-common==3.1.0 +ros2action==0.32.5 +ros2bag==0.26.9 +ros2bag-mcap-cli==0.26.9 +ros2bag-sqlite3-cli==0.26.9 +ros2cli==0.32.5 +ros2component==0.32.5 +ros2doctor==0.32.5 +ros2interface==0.32.5 +ros2launch==0.26.8 +ros2lifecycle==0.32.5 +ros2multicast==0.32.5 +ros2node==0.32.5 +ros2param==0.32.5 +ros2pkg==0.32.5 +ros2run==0.32.5 +ros2service==0.32.5 +ros2topic==0.32.5 +rosbag2-interfaces==0.26.9 +rosbag2-py==0.26.9 +rosgraph-msgs==2.0.3 +rosidl-adapter==4.6.6 +rosidl-cli==4.6.6 +rosidl-cmake==4.6.6 +rosidl-generator-c==4.6.6 +rosidl-generator-cpp==4.6.6 +rosidl-generator-py==0.22.1 +rosidl-generator-type-description==4.6.6 +rosidl-parser==4.6.6 +rosidl-pycommon==4.6.6 +rosidl-runtime-py==0.13.1 +rosidl-typesupport-c==3.2.2 +rosidl-typesupport-cpp==3.2.2 +rosidl-typesupport-fastrtps-c==3.6.2 +rosidl-typesupport-fastrtps-cpp==3.6.2 +rosidl-typesupport-introspection-c==4.6.6 +rosidl-typesupport-introspection-cpp==4.6.6 +rpyutils==0.4.1 +rqt-action==2.2.0 +rqt-bag==1.5.5 +rqt-bag-plugins==1.5.5 +rqt-console==2.2.1 +rqt-graph==1.5.5 +rqt-gui==1.6.0 +rqt-gui-py==1.6.0 +rqt-msg==1.5.1 +rqt-plot==1.4.4 +rqt-publisher==1.7.2 +rqt-py-common==1.6.0 +rqt-py-console==1.2.2 +rqt-reconfigure==1.6.2 +rqt-service-caller==1.2.1 +rqt-shell==1.2.2 +rqt-srv==1.2.2 +rqt-topic==1.7.3 +scipy==1.16.3 +sensor-msgs==5.3.6 +sensor-msgs-py==5.3.6 +service-msgs==2.0.3 +setuptools==79.0.1 +shape-msgs==5.3.6 +six==1.17.0 +sros2==0.13.4 +statistics-msgs==2.0.3 +std-msgs==5.3.6 +std-srvs==5.3.6 +stereo-msgs==5.3.6 +sympy==1.14.0 +teleop-twist-keyboard==2.4.0 +tf2-geometry-msgs==0.36.14 +tf2-kdl==0.36.14 +tf2-msgs==0.36.14 +tf2-py==0.36.14 +tf2-ros-py==0.36.14 +tf2-sensor-msgs==0.36.14 +tf2-tools==0.36.14 +topic-monitor==0.33.6 +torch==2.9.1 +torchvision==0.24.1 +trajectory-msgs==5.3.6 +triton==3.5.1 +turtlesim==1.8.3 +type-description-interfaces==2.0.3 +typing_extensions==4.15.0 +ultralytics==8.3.241 +ultralytics-thop==2.0.18 +unique-identifier-msgs==2.5.0 +urllib3==2.6.2 +visualization-msgs==5.3.6 \ No newline at end of file diff --git a/models/yolov8n.pt b/models/yolov8n.pt index e69de29..0db4ca4 100644 Binary files a/models/yolov8n.pt and b/models/yolov8n.pt differ diff --git a/src/_archive_legacy/models/SORT/kalmanTest.py b/src/_archive_legacy/models/SORT/kalmanTest.py index 00c279a..315bf7c 100644 --- a/src/_archive_legacy/models/SORT/kalmanTest.py +++ b/src/_archive_legacy/models/SORT/kalmanTest.py @@ -1,111 +1,111 @@ -import cv2 -from ultralytics import YOLO -# https://github.com/rlabbe/filterpy/blob/master/filterpy/kalman/kalman_filter.py -from yoloKalman import KalmanTracker, convert_bbox_to_z -# import numpy as np -import sys -from time import perf_counter - -trt_model = YOLO('yolo11m.pt') - -# Open the video file -video_path = 'noResult.mp4' -# video_path = './cars-stock-footage.mp4' -cap = cv2.VideoCapture(video_path) - - -# We need to set resolutions. -# so, convert them from float to integer. -frame_width = int(cap.get(3)) -frame_height = int(cap.get(4)) -fps = cap.get(cv2.CAP_PROP_FPS) - -size = (frame_width, frame_height) - -# Define the codec and create a VideoWriter object -fourcc = cv2.VideoWriter_fourcc(*'mp4v') -out = cv2.VideoWriter('stream.avi', fourcc, fps, size) - -fc = 0 - -# Frame delay so that the video is displayed with original fps -# Use "python kalmanTest.py --show" to use -if len(sys.argv) > 1: - if sys.argv[1] == "--show": - delay = int(1000 / fps) - show_video = True - else: - print(f"Invalid argument: \"{sys.argv[1]}\"") - exit() -else: - delay = 1 - show_video = False - -while True: - start_time = perf_counter() - - ret, frame = cap.read() - if not ret: - break - fc += 1 - - # Run inference - results = trt_model.predict(frame) - - # Process results (similar to your existing code) - for result in results: - bboxes = result.boxes.xywh.cpu().numpy().astype(int) - if fc == 1: predBBoxes = KalmanTracker() - tracks = predBBoxes.bboxes_to_tracks(bboxes) - - # TEST YOLO's DETECTIONS - # for nbbox in bboxes: - # x1 = int(nbbox[0] - nbbox[2]/2) - # y1 = int(nbbox[1] - nbbox[3]/2) - # x2 = int(nbbox[0] + nbbox[2]/2) - # y2 = int(nbbox[1] + nbbox[3]/2) - # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - - # TEST TRACK CREATION AND DELETION ALGORITHM - # for track in tracks: - # nbbox = track["history"][-1] - # x1 = int(nbbox[0] - nbbox[2]/2) - # y1 = int(nbbox[1] - nbbox[3]/2) - # x2 = int(nbbox[0] + nbbox[2]/2) - # y2 = int(nbbox[1] + nbbox[3]/2) - # cv2.putText( - # frame, - # str(track["id"]), - # (int(x1), int(y1) - 10), - # fontFace = cv2.FONT_HERSHEY_SIMPLEX, - # fontScale = 0.6, - # color = (0, 255, 0), - # thickness=2 - # ) - # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - - # TEST KALMAN FILTER PREDICTIONS - for nbbox in predBBoxes.pred_tracks(): - x1 = int(nbbox[0] - nbbox[2]/2) - y1 = int(nbbox[1] - nbbox[3]/2) - x2 = int(nbbox[0] + nbbox[2]/2) - y2 = int(nbbox[1] + nbbox[3]/2) - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - - # Write the frame to the VideoWriter object - out.write(frame) - - if show_video: cv2.imshow("Video with BBoxes (press q to quit)", frame) - - end_time = perf_counter() - op_time_delay = int((end_time - start_time) * 1000) - adjusted_delay = max(1, int(delay - op_time_delay)) - # If the video is shown, then the delay on the output of each frame is adjusted - # to keep the video at the original fps. - if cv2.waitKey(adjusted_delay) & 0xFF == ord('q'): - break - - -out.release() -cap.release() -cv2.destroyAllWindows() +import cv2 +from ultralytics import YOLO +# https://github.com/rlabbe/filterpy/blob/master/filterpy/kalman/kalman_filter.py +from yoloKalman import KalmanTracker, convert_bbox_to_z +# import numpy as np +import sys +from time import perf_counter + +trt_model = YOLO('yolo11m.pt') + +# Open the video file +video_path = 'noResult.mp4' +# video_path = './cars-stock-footage.mp4' +cap = cv2.VideoCapture(video_path) + + +# We need to set resolutions. +# so, convert them from float to integer. +frame_width = int(cap.get(3)) +frame_height = int(cap.get(4)) +fps = cap.get(cv2.CAP_PROP_FPS) + +size = (frame_width, frame_height) + +# Define the codec and create a VideoWriter object +fourcc = cv2.VideoWriter_fourcc(*'mp4v') +out = cv2.VideoWriter('stream.avi', fourcc, fps, size) + +fc = 0 + +# Frame delay so that the video is displayed with original fps +# Use "python kalmanTest.py --show" to use +if len(sys.argv) > 1: + if sys.argv[1] == "--show": + delay = int(1000 / fps) + show_video = True + else: + print(f"Invalid argument: \"{sys.argv[1]}\"") + exit() +else: + delay = 1 + show_video = False + +while True: + start_time = perf_counter() + + ret, frame = cap.read() + if not ret: + break + fc += 1 + + # Run inference + results = trt_model.predict(frame) + + # Process results (similar to your existing code) + for result in results: + bboxes = result.boxes.xywh.cpu().numpy().astype(int) + if fc == 1: predBBoxes = KalmanTracker() + tracks = predBBoxes.bboxes_to_tracks(bboxes) + + # TEST YOLO's DETECTIONS + # for nbbox in bboxes: + # x1 = int(nbbox[0] - nbbox[2]/2) + # y1 = int(nbbox[1] - nbbox[3]/2) + # x2 = int(nbbox[0] + nbbox[2]/2) + # y2 = int(nbbox[1] + nbbox[3]/2) + # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # TEST TRACK CREATION AND DELETION ALGORITHM + # for track in tracks: + # nbbox = track["history"][-1] + # x1 = int(nbbox[0] - nbbox[2]/2) + # y1 = int(nbbox[1] - nbbox[3]/2) + # x2 = int(nbbox[0] + nbbox[2]/2) + # y2 = int(nbbox[1] + nbbox[3]/2) + # cv2.putText( + # frame, + # str(track["id"]), + # (int(x1), int(y1) - 10), + # fontFace = cv2.FONT_HERSHEY_SIMPLEX, + # fontScale = 0.6, + # color = (0, 255, 0), + # thickness=2 + # ) + # cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # TEST KALMAN FILTER PREDICTIONS + for nbbox in predBBoxes.pred_tracks(): + x1 = int(nbbox[0] - nbbox[2]/2) + y1 = int(nbbox[1] - nbbox[3]/2) + x2 = int(nbbox[0] + nbbox[2]/2) + y2 = int(nbbox[1] + nbbox[3]/2) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # Write the frame to the VideoWriter object + out.write(frame) + + if show_video: cv2.imshow("Video with BBoxes (press q to quit)", frame) + + end_time = perf_counter() + op_time_delay = int((end_time - start_time) * 1000) + adjusted_delay = max(1, int(delay - op_time_delay)) + # If the video is shown, then the delay on the output of each frame is adjusted + # to keep the video at the original fps. + if cv2.waitKey(adjusted_delay) & 0xFF == ord('q'): + break + + +out.release() +cap.release() +cv2.destroyAllWindows() diff --git a/src/_archive_legacy/models/SORT/new_test_sort.py b/src/_archive_legacy/models/SORT/new_test_sort.py index 1ada4e9..7678ec6 100644 --- a/src/_archive_legacy/models/SORT/new_test_sort.py +++ b/src/_archive_legacy/models/SORT/new_test_sort.py @@ -1,109 +1,109 @@ -import cv2 -from ultralytics import YOLO -from sort import Sort -import numpy as np -from sklearn.cluster import KMeans - -trt_model = YOLO('yolo11s.pt') - -# Open the video file -video_path = 'people.avi' -cap = cv2.VideoCapture(video_path) - - -# get the video's dimmension -frame_width = int(cap.get(3)) -frame_height = int(cap.get(4)) -fps = cap.get(cv2.CAP_PROP_FPS) - -size = (frame_width, frame_height) - -# Define the codec and create a VideoWriter object -fourcc = cv2.VideoWriter_fourcc(*'MJPG') -out = cv2.VideoWriter('stream.avi', fourcc, 20, size) - - -fc = 0 -sort = Sort() -while True: - ret, frame = cap.read() - if not ret: - break - fc += 1 - - - #initialization sequence - if (fc == 1): - blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0) - point_of_ref = (frame_width//2, frame_height//2) - mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8) - mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1) - frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame) - # for each subsequent frame - else: - blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0) - mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8) - mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1) - frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame) - - - # Run inference - results = trt_model.predict(frame) - # Process results - for result in results: - # process results as a dataframe (convinience) - df = result.to_df() - # filter out objects based on class and confidence score - df.drop(df[(df['class'] != 0) | (df['confidence'] < .6)].index , inplace=True) - # get a new array of bounding boxes - bboxes = np.array([result.boxes.xywh.cpu().numpy().astype(int)[i] for i in df.index.values]) - # get the coordinates of each bbox - positions = np.array([(x, y) for x, y, _, _, in bboxes]) - - try: - # do a kmeans clustering with only 1 cluster - kmeans = KMeans(n_clusters=1, random_state=0, n_init='auto') - labels = kmeans.fit_predict(positions).tolist() - # get the center of that cluster - centroid = kmeans.cluster_centers_[labels[0]] - # update our point_of_reference (center of the circle) - point_of_ref = tuple(centroid.astype(int).tolist()) - # if there are no objects to cluster - except ValueError: - # set the point of reference to the center of the screen - point_of_ref = (frame_width//2, frame_height//2) - - # pass the bboxes to SORT - try: - tracks = sort.manage_tracks(bboxes, 0.5) - # go to the next frame if there aren't any - except: - break - - # draw a bounding box for each track - for track in tracks: - nbbox = track["history"][-1] - x1 = int(nbbox[0] - nbbox[2]/2) - y1 = int(nbbox[1] - nbbox[3]/2) - x2 = int(nbbox[0] + nbbox[2]/2) - y2 = int(nbbox[1] + nbbox[3]/2) - cv2.putText( - frame, - str(track["id"]), - (int(x1), int(y1) - 10), - fontFace = cv2.FONT_HERSHEY_SIMPLEX, - fontScale = 0.6, - color = (0, 255, 0), - thickness=2 - ) - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - - # Write the frame to the VideoWriter object - out.write(frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - -out.release() -cap.release() +import cv2 +from ultralytics import YOLO +from sort import Sort +import numpy as np +from sklearn.cluster import KMeans + +trt_model = YOLO('yolo11s.pt') + +# Open the video file +video_path = 'people.avi' +cap = cv2.VideoCapture(video_path) + + +# get the video's dimmension +frame_width = int(cap.get(3)) +frame_height = int(cap.get(4)) +fps = cap.get(cv2.CAP_PROP_FPS) + +size = (frame_width, frame_height) + +# Define the codec and create a VideoWriter object +fourcc = cv2.VideoWriter_fourcc(*'MJPG') +out = cv2.VideoWriter('stream.avi', fourcc, 20, size) + + +fc = 0 +sort = Sort() +while True: + ret, frame = cap.read() + if not ret: + break + fc += 1 + + + #initialization sequence + if (fc == 1): + blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0) + point_of_ref = (frame_width//2, frame_height//2) + mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8) + mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1) + frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame) + # for each subsequent frame + else: + blurred_frame = cv2.GaussianBlur(frame, (45, 45), 0) + mask = np.zeros((frame_height, frame_width, 3), dtype=np.uint8) + mask = cv2.circle(mask, point_of_ref, 150, color=(255, 255, 255), thickness=-1) + frame = np.where(mask==np.array([255, 255, 255]), frame, blurred_frame) + + + # Run inference + results = trt_model.predict(frame) + # Process results + for result in results: + # process results as a dataframe (convinience) + df = result.to_df() + # filter out objects based on class and confidence score + df.drop(df[(df['class'] != 0) | (df['confidence'] < .6)].index , inplace=True) + # get a new array of bounding boxes + bboxes = np.array([result.boxes.xywh.cpu().numpy().astype(int)[i] for i in df.index.values]) + # get the coordinates of each bbox + positions = np.array([(x, y) for x, y, _, _, in bboxes]) + + try: + # do a kmeans clustering with only 1 cluster + kmeans = KMeans(n_clusters=1, random_state=0, n_init='auto') + labels = kmeans.fit_predict(positions).tolist() + # get the center of that cluster + centroid = kmeans.cluster_centers_[labels[0]] + # update our point_of_reference (center of the circle) + point_of_ref = tuple(centroid.astype(int).tolist()) + # if there are no objects to cluster + except ValueError: + # set the point of reference to the center of the screen + point_of_ref = (frame_width//2, frame_height//2) + + # pass the bboxes to SORT + try: + tracks = sort.manage_tracks(bboxes, 0.5) + # go to the next frame if there aren't any + except: + break + + # draw a bounding box for each track + for track in tracks: + nbbox = track["history"][-1] + x1 = int(nbbox[0] - nbbox[2]/2) + y1 = int(nbbox[1] - nbbox[3]/2) + x2 = int(nbbox[0] + nbbox[2]/2) + y2 = int(nbbox[1] + nbbox[3]/2) + cv2.putText( + frame, + str(track["id"]), + (int(x1), int(y1) - 10), + fontFace = cv2.FONT_HERSHEY_SIMPLEX, + fontScale = 0.6, + color = (0, 255, 0), + thickness=2 + ) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # Write the frame to the VideoWriter object + out.write(frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + +out.release() +cap.release() cv2.destroyAllWindows() \ No newline at end of file diff --git a/src/_archive_legacy/models/SORT/sort.py b/src/_archive_legacy/models/SORT/sort.py index d33910d..28dae50 100644 --- a/src/_archive_legacy/models/SORT/sort.py +++ b/src/_archive_legacy/models/SORT/sort.py @@ -1,263 +1,263 @@ -from filterpy.kalman import KalmanFilter -import numpy as np -from math import sqrt, pow -import scipy -import scipy.optimize - -def cost_matrix(dect: np.ndarray, pred: np.ndarray): - """ - This functions takes in a numpy array of detections from the YOLO models - and a list of predictions from the kalman filter tracker, and returns a iou cost - matrix. - """ - - # avoid broadcasting issues - dect = np.expand_dims(dect, axis=1) - pred = np.expand_dims(pred, axis=0) - - # get two points from each detected bbox - x1D = dect[..., 0] - dect[..., 2]/2 - y1D = dect[..., 1] - dect[..., 3]/2 - x2D = dect[..., 0] + dect[..., 2]/2 - y2D = dect[..., 1] + dect[..., 3]/2 - - # get two points from each predicted bbox - x1P = pred[..., 0] - pred[..., 2]/2 - y1P = pred[..., 1] - pred[..., 3]/2 - x2P = pred[..., 0] + pred[..., 2]/2 - y2P = pred[..., 1] + pred[..., 3]/2 - - # choose the maximum bewteen the two points - x1 = np.maximum(x1D, x1P) - y1 = np.maximum(y1D, y1P) - x2 = np.minimum(x2D, x2P) - y2 = np.minimum(y2D, y2P) - intersection_width = np.maximum(0, x2-x1) - intersection_height = np.maximum(0, y2-y1) - area_of_intersection = intersection_height * intersection_width - - dect_area = dect[..., 3] * dect[..., 2] - pred_area = pred[..., 3] * pred[..., 2] - area_of_union = dect_area + pred_area - area_of_intersection - - iou_matrix = np.where(area_of_union > 0, area_of_intersection / area_of_union, 0.0) - - return iou_matrix - -def linear_assignment(costMatrix: np.float64): - """ - This function will take in a cost matrix and return an array of matched - indeces for the rows and columns of the matrix throught linear assignment - """ - # do a linear assignment based on the cost matrix - # the return is a list of row and column indexes with the optimal assignment - dect, pred = scipy.optimize.linear_sum_assignment(costMatrix, True) - # make an array of indexes of detections matched with indexes of predictions - match_indxs = np.array(list(zip(dect,pred))) - - return match_indxs - - -def convert_xywh_to_xyxy(bbox: np.int32): - x1 = bbox[0] - bbox[2]/2 - x2 = bbox[0] + bbox[2]/2 - y1 = bbox[1] - bbox[3]/2 - y2 = bbox[1] + bbox[3]/2 - z = np.array([x1,y1,x2,y2]).astype(int) - return z - -def convert_xyxy_to_xywh(bbox: np.int32): - w = bbox[2]-bbox[0] - h = bbox[3]-bbox[1] - x = bbox[2] - w/2 - y = bbox[3] - h/2 - z = np.array([x,y,w,h]).astype(int) - return z - -def convert_bbox_to_z(bbox: np.int32): - w=bbox[2] - h=bbox[3] - area = w*h - aspect_ratio = w/h - z = np.array([bbox[0], bbox[1], area, aspect_ratio]) - return z - -def convert_z_to_bbox(bbox: np.int32): - w = int(sqrt(abs(bbox[2]*bbox[3]))) - h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) ))) - new_bbox = np.array([bbox[0], bbox[1], w, h]) - return new_bbox - -class Kfilter(): - def __init__(self): - - - # create a kalman filter instance - self.dim_x = 7 - self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4) - - #state transition matrix - self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0], - [0, 1, 0, 0, 0, 1, 0], - [0, 0, 1, 0, 0, 0, 1], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 0, 0, 0, 1]]) - - # measurement function - self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0]]) - - #covariance matrix, P already contains np.eye(dim_x) - self.kalman.P *= 1000. - - # measurement noise - self.kalman.R = np.eye(4) - self.kalman.R *= 10. - - # assign the process noise - self.kalman.Q = np.eye(self.dim_x) - self.kalman.Q *= 0.01 - - #set arbitrary initial value - self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0]) - - # make a prediction - def predict(self): - self.kalman.predict() - bbox = convert_z_to_bbox(self.kalman.x) - - return bbox - - def update(self, bbox): - bbox = convert_bbox_to_z(bbox) - self.kalman.update(bbox) - -class KalmanTracker(): - def __init__(self): - self.tracks = list() - self.id = 0 - - def create_track(self, bbox): - track = { - "id": self.id, - "filter": Kfilter(), - "history": [bbox], - } - self.id += 1 - return track - - def associate_detections(self, preds: list, dects: np.int32, iou_threshold = 0.3): - """ - This function is in charged of associating the detections matched by the linear - sum assignment to their corresponding track, and return a list of matched bboxes, - unmatched bboxes, and unmatched tracks to be deleted - """ - - - iou_matrix = cost_matrix(dects, preds) - matched_indexes = linear_assignment(iou_matrix) - - # get unmatched bounding boxes list from the detections by comparing - # if any of the indexes in the detections array are not in the - # matched indeces array - unmatched_bboxes = list() - for i, dect in enumerate(dects): - if i not in matched_indexes[:, 0]: - unmatched_bboxes.append(i) - - # similarly, get the unmatched tracks - unmatched_tracks = list() - for j, pred in enumerate(preds): - if j not in matched_indexes[:, 1]: - unmatched_tracks.append(j) - - # get the matched detections and filter out the low ious - matched_detections = list() - for matched in matched_indexes: - # if the detection doesn't pass the threshold - if (iou_matrix[matched[0], matched[1]] < iou_threshold): - # append the index of the detection to the unmatched detection list - unmatched_bboxes.append(matched[0]) - # append the index of the prediction to the unmatched track list - unmatched_tracks.append(matched[1]) - - else: - # append it to the matched detections list - matched_detections.append(matched) - - # sort the unmatched track list to avoid indexing errors when deleting tracks - unmatched_tracks.sort(reverse=True) - - return matched_detections, unmatched_bboxes, unmatched_tracks - - def pred_tracks(self): - lstOfPred = list() - if len(self.tracks) == 0: - return - - for track in self.tracks: - # if the length of the track is just 1, then initialize the kalman filter with that value - if len(track["history"]) == 1: - track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0]) - # predict the kalman filter, get the new bounding box - nbbox = track["filter"].predict() - else: - nbbox = track["filter"].predict() - - lstOfPred.append(nbbox) - - return lstOfPred - - def update_tracks(self, matched_indexes: list, bboxes: np.ndarray): - for m in matched_indexes: - # add on the corresponding dectections to each track's history - self.tracks[m[1]]["history"].append(bboxes[m[0]]) - # update each matched track filter - self.tracks[m[1]]["filter"].update(self.tracks[m[1]]["history"][-1]) - -class Sort(KalmanTracker): - def __init__(self): - super().__init__() - self.bboxes = np.empty((2,3)) - - def manage_tracks(self, bboxes: np.ndarray, iou_threshold=0.3): - """ - This function is in charged of managing all the tracks correctly and returns - a list of tracks - """ - # get new bounding boxes - self.bboxes = bboxes - - # check to see if there are no tracks - if (len(self.tracks) == 0): - for bbox in self.bboxes: - self.tracks.append(self.create_track(bbox)) - else: - - # do a prediction using the kalman filter - list_predictions = self.pred_tracks() - - # get list of matched detections, unmatched detections, and unmatched tracks - matches, unmatched_detections, unmatched_tracks = self.associate_detections(list_predictions, bboxes, iou_threshold) - - # update kalman filter for each track - self.update_tracks(matches, bboxes) - - # if a detection was not associated to any track - # create a new track for it - if len(unmatched_detections) > 0: - for m in unmatched_detections: - self.tracks.append(self.create_track(bboxes[m])) - - # if there is a track that was not matched to any - # detection, delete it - if len(unmatched_tracks) > 0: - for t in unmatched_tracks: - del self.tracks[t] - - - return self.tracks +from filterpy.kalman import KalmanFilter +import numpy as np +from math import sqrt, pow +import scipy +import scipy.optimize + +def cost_matrix(dect: np.ndarray, pred: np.ndarray): + """ + This functions takes in a numpy array of detections from the YOLO models + and a list of predictions from the kalman filter tracker, and returns a iou cost + matrix. + """ + + # avoid broadcasting issues + dect = np.expand_dims(dect, axis=1) + pred = np.expand_dims(pred, axis=0) + + # get two points from each detected bbox + x1D = dect[..., 0] - dect[..., 2]/2 + y1D = dect[..., 1] - dect[..., 3]/2 + x2D = dect[..., 0] + dect[..., 2]/2 + y2D = dect[..., 1] + dect[..., 3]/2 + + # get two points from each predicted bbox + x1P = pred[..., 0] - pred[..., 2]/2 + y1P = pred[..., 1] - pred[..., 3]/2 + x2P = pred[..., 0] + pred[..., 2]/2 + y2P = pred[..., 1] + pred[..., 3]/2 + + # choose the maximum bewteen the two points + x1 = np.maximum(x1D, x1P) + y1 = np.maximum(y1D, y1P) + x2 = np.minimum(x2D, x2P) + y2 = np.minimum(y2D, y2P) + intersection_width = np.maximum(0, x2-x1) + intersection_height = np.maximum(0, y2-y1) + area_of_intersection = intersection_height * intersection_width + + dect_area = dect[..., 3] * dect[..., 2] + pred_area = pred[..., 3] * pred[..., 2] + area_of_union = dect_area + pred_area - area_of_intersection + + iou_matrix = np.where(area_of_union > 0, area_of_intersection / area_of_union, 0.0) + + return iou_matrix + +def linear_assignment(costMatrix: np.float64): + """ + This function will take in a cost matrix and return an array of matched + indeces for the rows and columns of the matrix throught linear assignment + """ + # do a linear assignment based on the cost matrix + # the return is a list of row and column indexes with the optimal assignment + dect, pred = scipy.optimize.linear_sum_assignment(costMatrix, True) + # make an array of indexes of detections matched with indexes of predictions + match_indxs = np.array(list(zip(dect,pred))) + + return match_indxs + + +def convert_xywh_to_xyxy(bbox: np.int32): + x1 = bbox[0] - bbox[2]/2 + x2 = bbox[0] + bbox[2]/2 + y1 = bbox[1] - bbox[3]/2 + y2 = bbox[1] + bbox[3]/2 + z = np.array([x1,y1,x2,y2]).astype(int) + return z + +def convert_xyxy_to_xywh(bbox: np.int32): + w = bbox[2]-bbox[0] + h = bbox[3]-bbox[1] + x = bbox[2] - w/2 + y = bbox[3] - h/2 + z = np.array([x,y,w,h]).astype(int) + return z + +def convert_bbox_to_z(bbox: np.int32): + w=bbox[2] + h=bbox[3] + area = w*h + aspect_ratio = w/h + z = np.array([bbox[0], bbox[1], area, aspect_ratio]) + return z + +def convert_z_to_bbox(bbox: np.int32): + w = int(sqrt(abs(bbox[2]*bbox[3]))) + h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) ))) + new_bbox = np.array([bbox[0], bbox[1], w, h]) + return new_bbox + +class Kfilter(): + def __init__(self): + + + # create a kalman filter instance + self.dim_x = 7 + self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4) + + #state transition matrix + self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 0, 1, 0], + [0, 0, 1, 0, 0, 0, 1], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 1]]) + + # measurement function + self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0]]) + + #covariance matrix, P already contains np.eye(dim_x) + self.kalman.P *= 1000. + + # measurement noise + self.kalman.R = np.eye(4) + self.kalman.R *= 10. + + # assign the process noise + self.kalman.Q = np.eye(self.dim_x) + self.kalman.Q *= 0.01 + + #set arbitrary initial value + self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0]) + + # make a prediction + def predict(self): + self.kalman.predict() + bbox = convert_z_to_bbox(self.kalman.x) + + return bbox + + def update(self, bbox): + bbox = convert_bbox_to_z(bbox) + self.kalman.update(bbox) + +class KalmanTracker(): + def __init__(self): + self.tracks = list() + self.id = 0 + + def create_track(self, bbox): + track = { + "id": self.id, + "filter": Kfilter(), + "history": [bbox], + } + self.id += 1 + return track + + def associate_detections(self, preds: list, dects: np.int32, iou_threshold = 0.3): + """ + This function is in charged of associating the detections matched by the linear + sum assignment to their corresponding track, and return a list of matched bboxes, + unmatched bboxes, and unmatched tracks to be deleted + """ + + + iou_matrix = cost_matrix(dects, preds) + matched_indexes = linear_assignment(iou_matrix) + + # get unmatched bounding boxes list from the detections by comparing + # if any of the indexes in the detections array are not in the + # matched indeces array + unmatched_bboxes = list() + for i, dect in enumerate(dects): + if i not in matched_indexes[:, 0]: + unmatched_bboxes.append(i) + + # similarly, get the unmatched tracks + unmatched_tracks = list() + for j, pred in enumerate(preds): + if j not in matched_indexes[:, 1]: + unmatched_tracks.append(j) + + # get the matched detections and filter out the low ious + matched_detections = list() + for matched in matched_indexes: + # if the detection doesn't pass the threshold + if (iou_matrix[matched[0], matched[1]] < iou_threshold): + # append the index of the detection to the unmatched detection list + unmatched_bboxes.append(matched[0]) + # append the index of the prediction to the unmatched track list + unmatched_tracks.append(matched[1]) + + else: + # append it to the matched detections list + matched_detections.append(matched) + + # sort the unmatched track list to avoid indexing errors when deleting tracks + unmatched_tracks.sort(reverse=True) + + return matched_detections, unmatched_bboxes, unmatched_tracks + + def pred_tracks(self): + lstOfPred = list() + if len(self.tracks) == 0: + return + + for track in self.tracks: + # if the length of the track is just 1, then initialize the kalman filter with that value + if len(track["history"]) == 1: + track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0]) + # predict the kalman filter, get the new bounding box + nbbox = track["filter"].predict() + else: + nbbox = track["filter"].predict() + + lstOfPred.append(nbbox) + + return lstOfPred + + def update_tracks(self, matched_indexes: list, bboxes: np.ndarray): + for m in matched_indexes: + # add on the corresponding dectections to each track's history + self.tracks[m[1]]["history"].append(bboxes[m[0]]) + # update each matched track filter + self.tracks[m[1]]["filter"].update(self.tracks[m[1]]["history"][-1]) + +class Sort(KalmanTracker): + def __init__(self): + super().__init__() + self.bboxes = np.empty((2,3)) + + def manage_tracks(self, bboxes: np.ndarray, iou_threshold=0.3): + """ + This function is in charged of managing all the tracks correctly and returns + a list of tracks + """ + # get new bounding boxes + self.bboxes = bboxes + + # check to see if there are no tracks + if (len(self.tracks) == 0): + for bbox in self.bboxes: + self.tracks.append(self.create_track(bbox)) + else: + + # do a prediction using the kalman filter + list_predictions = self.pred_tracks() + + # get list of matched detections, unmatched detections, and unmatched tracks + matches, unmatched_detections, unmatched_tracks = self.associate_detections(list_predictions, bboxes, iou_threshold) + + # update kalman filter for each track + self.update_tracks(matches, bboxes) + + # if a detection was not associated to any track + # create a new track for it + if len(unmatched_detections) > 0: + for m in unmatched_detections: + self.tracks.append(self.create_track(bboxes[m])) + + # if there is a track that was not matched to any + # detection, delete it + if len(unmatched_tracks) > 0: + for t in unmatched_tracks: + del self.tracks[t] + + + return self.tracks diff --git a/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py b/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py index 8b13789..d3f5a12 100644 --- a/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py +++ b/src/_archive_legacy/models/SORT/tempCodeRunnerFile.py @@ -1 +1 @@ - + diff --git a/src/_archive_legacy/models/SORT/test_sort.py b/src/_archive_legacy/models/SORT/test_sort.py index 828ce0d..c905ef6 100644 --- a/src/_archive_legacy/models/SORT/test_sort.py +++ b/src/_archive_legacy/models/SORT/test_sort.py @@ -1,76 +1,76 @@ -import cv2 -from ultralytics import YOLO -from sort import Sort - -import os -print("Current working directory:", os.getcwd()) - - -trt_model = YOLO('yolo11m.pt') - -# Open the video file -video_path = "C:/Users/dtj10/02-CLUBS_AND_PROJECTS/ACM/DroneProject/Repo/Main/SORT/bestResult.mp4" -cap = cv2.VideoCapture(video_path) - -#Check if cap actually opened -if not cap.isOpened(): - print('CAP NOT OPENED') - exit() - -# We need to set resolutions. -# so, convert them from float to integer. -frame_width = int(cap.get(3)) -frame_height = int(cap.get(4)) -fps = cap.get(cv2.CAP_PROP_FPS) - -size = (frame_width, frame_height) - -# Define the codec and create a VideoWriter object -fourcc = cv2.VideoWriter_fourcc(*'mp4v') -out = cv2.VideoWriter('./stream.mp4', fourcc, 24, size) - -fc = 0 -sort = Sort() -while True: - ret, frame = cap.read() - if not ret: - break - fc += 1 - - # Run inference - # results = yolo_pipeline(images=frame) - results = trt_model.predict(frame) - - # Process results (similar to your existing code) - for result in results: - bboxes = result.boxes.xywh.cpu().numpy().astype(int) - #if fc == 1: predBBoxes = KalmanTracker() - #tracks = predBBoxes.bboxes_to_tracks(bboxes) - tracks = sort.manage_tracks(bboxes) - - for track in tracks: - nbbox = track["history"][-1] - x1 = int(nbbox[0] - nbbox[2]/2) - y1 = int(nbbox[1] - nbbox[3]/2) - x2 = int(nbbox[0] + nbbox[2]/2) - y2 = int(nbbox[1] + nbbox[3]/2) - cv2.putText( - frame, - str(track["id"]), - (int(x1), int(y1) - 10), - fontFace = cv2.FONT_HERSHEY_SIMPLEX, - fontScale = 0.6, - color = (0, 255, 0), - thickness=2 - ) - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - - # Write the frame to the VideoWriter object - out.write(frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - -out.release() -cap.release() +import cv2 +from ultralytics import YOLO +from sort import Sort + +import os +print("Current working directory:", os.getcwd()) + + +trt_model = YOLO('yolo11m.pt') + +# Open the video file +video_path = "C:/Users/dtj10/02-CLUBS_AND_PROJECTS/ACM/DroneProject/Repo/Main/SORT/bestResult.mp4" +cap = cv2.VideoCapture(video_path) + +#Check if cap actually opened +if not cap.isOpened(): + print('CAP NOT OPENED') + exit() + +# We need to set resolutions. +# so, convert them from float to integer. +frame_width = int(cap.get(3)) +frame_height = int(cap.get(4)) +fps = cap.get(cv2.CAP_PROP_FPS) + +size = (frame_width, frame_height) + +# Define the codec and create a VideoWriter object +fourcc = cv2.VideoWriter_fourcc(*'mp4v') +out = cv2.VideoWriter('./stream.mp4', fourcc, 24, size) + +fc = 0 +sort = Sort() +while True: + ret, frame = cap.read() + if not ret: + break + fc += 1 + + # Run inference + # results = yolo_pipeline(images=frame) + results = trt_model.predict(frame) + + # Process results (similar to your existing code) + for result in results: + bboxes = result.boxes.xywh.cpu().numpy().astype(int) + #if fc == 1: predBBoxes = KalmanTracker() + #tracks = predBBoxes.bboxes_to_tracks(bboxes) + tracks = sort.manage_tracks(bboxes) + + for track in tracks: + nbbox = track["history"][-1] + x1 = int(nbbox[0] - nbbox[2]/2) + y1 = int(nbbox[1] - nbbox[3]/2) + x2 = int(nbbox[0] + nbbox[2]/2) + y2 = int(nbbox[1] + nbbox[3]/2) + cv2.putText( + frame, + str(track["id"]), + (int(x1), int(y1) - 10), + fontFace = cv2.FONT_HERSHEY_SIMPLEX, + fontScale = 0.6, + color = (0, 255, 0), + thickness=2 + ) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # Write the frame to the VideoWriter object + out.write(frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + +out.release() +cap.release() cv2.destroyAllWindows() \ No newline at end of file diff --git a/src/_archive_legacy/models/SORT/yoloKalman.py b/src/_archive_legacy/models/SORT/yoloKalman.py index b0a8a85..0267420 100644 --- a/src/_archive_legacy/models/SORT/yoloKalman.py +++ b/src/_archive_legacy/models/SORT/yoloKalman.py @@ -1,251 +1,251 @@ -import cv2 -import time -from ultralytics import YOLO -from filterpy.kalman import KalmanFilter -from filterpy.common import Q_discrete_white_noise -import numpy as np -from math import sqrt, pow - -def iou_calculator(bb_test: np.int32, bb_gt: np.int32): - """ - From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] - """ - # get the length and width of the box of the intersection - x1 = max(bb_test[0], bb_gt[0]) - y1 = max(bb_test[1], bb_gt[1]) - x2 = min(bb_test[2], bb_gt[2]) - y2 = min(bb_test[3], bb_gt[3]) - intersection_width = max(0, x2-x1) - intersection_height = max(0, y2-y1) - area_of_intersection = intersection_height * intersection_width - #get the area of the ground truth - gt_bb_len = abs(bb_gt[2] - bb_gt[0]) - gt_bb_width = abs(bb_gt[3] - bb_gt[1]) - gt_bb_area = gt_bb_len * gt_bb_width - #get the area of the prediction - det_bb_len = abs(bb_test[2] - bb_test[0]) - det_bb_width = abs(bb_test[3] - bb_test[1]) - det_bb_area = det_bb_len * det_bb_width - #get area of the union - area_of_union = gt_bb_area + det_bb_area - area_of_intersection - - iou = area_of_intersection / area_of_union - - return iou - -def convert_xywh_to_xyxy(bbox: np.int32): - x1 = bbox[0] - bbox[2]/2 - x2 = bbox[0] + bbox[2]/2 - y1 = bbox[1] - bbox[3]/2 - y2 = bbox[1] + bbox[3]/2 - z = np.array([x1,y1,x2,y2]).astype(int) - return z - -def convert_xyxy_to_xywh(bbox: np.int32): - w = bbox[2]-bbox[0] - h = bbox[3]-bbox[1] - x = bbox[2] - w/2 - y = bbox[3] - h/2 - z = np.array([x,y,w,h]).astype(int) - return z - -def convert_bbox_to_z(bbox: np.int32): - w=bbox[2] - h=bbox[3] - area = w*h - aspect_ratio = w/h - z = np.array([bbox[0], bbox[1], area, aspect_ratio]) - return z - -def convert_z_to_bbox(bbox: np.int32): - w = int(sqrt(abs(bbox[2]*bbox[3]))) - h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) ))) - new_bbox = np.array([bbox[0], bbox[1], w, h]) - return new_bbox - -class Kfilter(): - def __init__(self): - - - # create a kalman filter instance - self.dim_x = 7 - self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4) - - #state transition matrix - self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0], - [0, 1, 0, 0, 0, 1, 0], - [0, 0, 1, 0, 0, 0, 1], - [0, 0, 0, 1, 0, 0, 0], - [0, 0, 0, 0, 1, 0, 0], - [0, 0, 0, 0, 0, 1, 0], - [0, 0, 0, 0, 0, 0, 1]]) - - # measurement function - self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0], - [0, 1, 0, 0, 0, 0, 0], - [0, 0, 1, 0, 0, 0, 0], - [0, 0, 0, 1, 0, 0, 0]]) - - #covariance matrix, P already contains np.eye(dim_x) - self.kalman.P *= 1000. - - # measurement noise - self.kalman.R = np.eye(4) - self.kalman.R *= 10. - - # assign the process noise - self.kalman.Q = np.eye(self.dim_x) - self.kalman.Q *= 0.01 - - #set arbitrary initial value - # center of bounding box - # self.kalman.x = np.array([self.bbox[0], self.bbox[1], - # # area and aspect ratio - # self.bbox[2]*self.bbox[3], self.bbox[2]/self.bbox[3], - # # change in position over time of center and area - # 0, 0, 0]) - self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0]) - - # make a prediction - def predict(self): - self.kalman.predict() - bbox = convert_z_to_bbox(self.kalman.x) - - return bbox - - def update(self, bbox): - bbox = convert_bbox_to_z(bbox) - self.kalman.update(bbox) - -class KalmanTracker(): - def __init__(self): - self.tracks = list() - self.id = 0 - - def create_track(self, bbox): - track = { - "id": self.id, - "filter": Kfilter(), - "history": [bbox] - } - self.id += 1 - return track - - def bboxes_to_tracks(self, bboxes: np.int32): - """ - This function must handle: - - creating tracks - - extending tracks - - ensure no two tracks have the same bounding box - - iou calculation - - eliminating uncessary tracks - - controling tracks' size""" - - # if tracks is 0, then we must be at the first frame - if len(self.tracks) == 0: - for bbox in bboxes: - track = self.create_track(bbox) - self.tracks.append(track) - - return self.tracks - - convBBoxes = [convert_xywh_to_xyxy(bbox) for bbox in bboxes] - convTrkedObj = [convert_xywh_to_xyxy(track["history"][-1]) for track in self.tracks] - #print(f"Track Obj: {convTrkedObj}") - - # if there are more tracks in the list than bounding boxes, we have to delete some tracks - if len(self.tracks) > len(bboxes): - # find how many tracks are extra - exTrack = len(self.tracks) - len(bboxes) - iouLst = list() - # Calculate each track's iou with each bounding box, find the maximum - for track in convTrkedObj: - maxIoU = 0 - for bbox in convBBoxes: - iou = iou_calculator(track, bbox) - if iou > maxIoU: - maxIoU = iou - # store max in a list - iouLst.append(maxIoU) - # NOTE: the id of the max value will correspond with the id of the track - # delete the tracks with the lowests IoUs - trkEnumSorted = sorted(enumerate(iouLst), key= lambda x: x[1]) - self.tracks = [self.tracks[i] for i, _ in trkEnumSorted[exTrack:]] - return self.tracks - # elif there are less tracks in the list than bounding boxes, we have to create some tracks - elif len(self.tracks) < len(bboxes): - # find how many tracks are needed - nTracks = len(bboxes) - len(self.tracks) - iouLst = list() - # Calculate each bounding box's iou with each track, find the maximum - for bbox in convBBoxes: - maxIoU = 0 - for track in convTrkedObj: - iou = iou_calculator(bbox, track) - if iou > maxIoU: - maxIoU = iou - # store max in a list - iouLst.append(maxIoU) - # NOTE: the id of the max value will correspond with the id of the bounding box - # append to the tracks list new tracks with the bounding boxes with the lowest IoUs - iouLow = sorted(enumerate(iouLst), key= lambda x: x[1])[:nTracks] - #print(f"iouLow: {iouLow}") - self.tracks.extend([self.create_track(bboxes[i]) for i, _ in iouLow]) - return self.tracks - - # if the number of tracks equals the number of bounding boxes, then we don't need more tracks - # Calculate the track's iou with each bounding box, store it in a list - used = set() - for _, track in enumerate(self.tracks): - lasttrkobj = convert_xywh_to_xyxy(track["history"][-1]) - maxIoU = 0 - idx = -1 - for i, bbox in enumerate(convBBoxes): - if i in used: - continue - iou = iou_calculator(lasttrkobj, bbox) - if iou > maxIoU: - # get the max value - maxIoU = iou - # find the index - idx = i - - # append the bounding box to the corresponding track - if idx != -1: - track["history"].append(bboxes[idx]) - used.add(idx) - # delete the track's previous bounding box - #print(len(track)) - max_track_size = 3 - if len(track["history"]) > max_track_size: - # keep the last three delete the rest - track["history"] = track["history"][-max_track_size:] - # return the tracks - return self.tracks - - def pred_tracks(self): - lstOfPred = list() - if len(self.tracks) == 0: - return [] - - for track in self.tracks: - # if the length of the track is just 1, then initialize the kalman filter with that value - if len(track["history"]) == 1: - track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0]) - # predict the kalman filter, get the new bounding box - nbbox = track["filter"].predict() - #update the kalman filter - track["filter"].update(track["history"][-1]) - else: - # do a prediction and update the filter based on the most current bbox in the track's history - nbbox = track["filter"].predict() - track["filter"].update(track["history"][-1]) - - lstOfPred.append(nbbox) - - return lstOfPred - - - - - +import cv2 +import time +from ultralytics import YOLO +from filterpy.kalman import KalmanFilter +from filterpy.common import Q_discrete_white_noise +import numpy as np +from math import sqrt, pow + +def iou_calculator(bb_test: np.int32, bb_gt: np.int32): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + # get the length and width of the box of the intersection + x1 = max(bb_test[0], bb_gt[0]) + y1 = max(bb_test[1], bb_gt[1]) + x2 = min(bb_test[2], bb_gt[2]) + y2 = min(bb_test[3], bb_gt[3]) + intersection_width = max(0, x2-x1) + intersection_height = max(0, y2-y1) + area_of_intersection = intersection_height * intersection_width + #get the area of the ground truth + gt_bb_len = abs(bb_gt[2] - bb_gt[0]) + gt_bb_width = abs(bb_gt[3] - bb_gt[1]) + gt_bb_area = gt_bb_len * gt_bb_width + #get the area of the prediction + det_bb_len = abs(bb_test[2] - bb_test[0]) + det_bb_width = abs(bb_test[3] - bb_test[1]) + det_bb_area = det_bb_len * det_bb_width + #get area of the union + area_of_union = gt_bb_area + det_bb_area - area_of_intersection + + iou = area_of_intersection / area_of_union + + return iou + +def convert_xywh_to_xyxy(bbox: np.int32): + x1 = bbox[0] - bbox[2]/2 + x2 = bbox[0] + bbox[2]/2 + y1 = bbox[1] - bbox[3]/2 + y2 = bbox[1] + bbox[3]/2 + z = np.array([x1,y1,x2,y2]).astype(int) + return z + +def convert_xyxy_to_xywh(bbox: np.int32): + w = bbox[2]-bbox[0] + h = bbox[3]-bbox[1] + x = bbox[2] - w/2 + y = bbox[3] - h/2 + z = np.array([x,y,w,h]).astype(int) + return z + +def convert_bbox_to_z(bbox: np.int32): + w=bbox[2] + h=bbox[3] + area = w*h + aspect_ratio = w/h + z = np.array([bbox[0], bbox[1], area, aspect_ratio]) + return z + +def convert_z_to_bbox(bbox: np.int32): + w = int(sqrt(abs(bbox[2]*bbox[3]))) + h = int(sqrt(abs( bbox[2]*pow(bbox[3], -1) ))) + new_bbox = np.array([bbox[0], bbox[1], w, h]) + return new_bbox + +class Kfilter(): + def __init__(self): + + + # create a kalman filter instance + self.dim_x = 7 + self.kalman = KalmanFilter(dim_x=self.dim_x, dim_z=4) + + #state transition matrix + self.kalman.F = np.array([[1, 0, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 0, 1, 0], + [0, 0, 1, 0, 0, 0, 1], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 1]]) + + # measurement function + self.kalman.H = np.array([[1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0]]) + + #covariance matrix, P already contains np.eye(dim_x) + self.kalman.P *= 1000. + + # measurement noise + self.kalman.R = np.eye(4) + self.kalman.R *= 10. + + # assign the process noise + self.kalman.Q = np.eye(self.dim_x) + self.kalman.Q *= 0.01 + + #set arbitrary initial value + # center of bounding box + # self.kalman.x = np.array([self.bbox[0], self.bbox[1], + # # area and aspect ratio + # self.bbox[2]*self.bbox[3], self.bbox[2]/self.bbox[3], + # # change in position over time of center and area + # 0, 0, 0]) + self.kalman.x = np.array([0, 0, 0, 0, 0, 0, 0]) + + # make a prediction + def predict(self): + self.kalman.predict() + bbox = convert_z_to_bbox(self.kalman.x) + + return bbox + + def update(self, bbox): + bbox = convert_bbox_to_z(bbox) + self.kalman.update(bbox) + +class KalmanTracker(): + def __init__(self): + self.tracks = list() + self.id = 0 + + def create_track(self, bbox): + track = { + "id": self.id, + "filter": Kfilter(), + "history": [bbox] + } + self.id += 1 + return track + + def bboxes_to_tracks(self, bboxes: np.int32): + """ + This function must handle: + - creating tracks + - extending tracks + - ensure no two tracks have the same bounding box + - iou calculation + - eliminating uncessary tracks + - controling tracks' size""" + + # if tracks is 0, then we must be at the first frame + if len(self.tracks) == 0: + for bbox in bboxes: + track = self.create_track(bbox) + self.tracks.append(track) + + return self.tracks + + convBBoxes = [convert_xywh_to_xyxy(bbox) for bbox in bboxes] + convTrkedObj = [convert_xywh_to_xyxy(track["history"][-1]) for track in self.tracks] + #print(f"Track Obj: {convTrkedObj}") + + # if there are more tracks in the list than bounding boxes, we have to delete some tracks + if len(self.tracks) > len(bboxes): + # find how many tracks are extra + exTrack = len(self.tracks) - len(bboxes) + iouLst = list() + # Calculate each track's iou with each bounding box, find the maximum + for track in convTrkedObj: + maxIoU = 0 + for bbox in convBBoxes: + iou = iou_calculator(track, bbox) + if iou > maxIoU: + maxIoU = iou + # store max in a list + iouLst.append(maxIoU) + # NOTE: the id of the max value will correspond with the id of the track + # delete the tracks with the lowests IoUs + trkEnumSorted = sorted(enumerate(iouLst), key= lambda x: x[1]) + self.tracks = [self.tracks[i] for i, _ in trkEnumSorted[exTrack:]] + return self.tracks + # elif there are less tracks in the list than bounding boxes, we have to create some tracks + elif len(self.tracks) < len(bboxes): + # find how many tracks are needed + nTracks = len(bboxes) - len(self.tracks) + iouLst = list() + # Calculate each bounding box's iou with each track, find the maximum + for bbox in convBBoxes: + maxIoU = 0 + for track in convTrkedObj: + iou = iou_calculator(bbox, track) + if iou > maxIoU: + maxIoU = iou + # store max in a list + iouLst.append(maxIoU) + # NOTE: the id of the max value will correspond with the id of the bounding box + # append to the tracks list new tracks with the bounding boxes with the lowest IoUs + iouLow = sorted(enumerate(iouLst), key= lambda x: x[1])[:nTracks] + #print(f"iouLow: {iouLow}") + self.tracks.extend([self.create_track(bboxes[i]) for i, _ in iouLow]) + return self.tracks + + # if the number of tracks equals the number of bounding boxes, then we don't need more tracks + # Calculate the track's iou with each bounding box, store it in a list + used = set() + for _, track in enumerate(self.tracks): + lasttrkobj = convert_xywh_to_xyxy(track["history"][-1]) + maxIoU = 0 + idx = -1 + for i, bbox in enumerate(convBBoxes): + if i in used: + continue + iou = iou_calculator(lasttrkobj, bbox) + if iou > maxIoU: + # get the max value + maxIoU = iou + # find the index + idx = i + + # append the bounding box to the corresponding track + if idx != -1: + track["history"].append(bboxes[idx]) + used.add(idx) + # delete the track's previous bounding box + #print(len(track)) + max_track_size = 3 + if len(track["history"]) > max_track_size: + # keep the last three delete the rest + track["history"] = track["history"][-max_track_size:] + # return the tracks + return self.tracks + + def pred_tracks(self): + lstOfPred = list() + if len(self.tracks) == 0: + return [] + + for track in self.tracks: + # if the length of the track is just 1, then initialize the kalman filter with that value + if len(track["history"]) == 1: + track["filter"].kalman.x = np.append(convert_bbox_to_z(track["history"][-1]), [0,0,0]) + # predict the kalman filter, get the new bounding box + nbbox = track["filter"].predict() + #update the kalman filter + track["filter"].update(track["history"][-1]) + else: + # do a prediction and update the filter based on the most current bbox in the track's history + nbbox = track["filter"].predict() + track["filter"].update(track["history"][-1]) + + lstOfPred.append(nbbox) + + return lstOfPred + + + + + diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py index c5659d8..24c9479 100644 --- a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Data_Processing.py @@ -1,138 +1,138 @@ -import os -import cv2 -import torch -from torch.utils.data import Dataset, DataLoader -import torchvision.transforms as transforms - -class TrainingData(Dataset): - def __init__(self, img_dirs, det_files, transform=None): - """ - Args: - img_dirs (list): List of image directories (one per sequence) - det_files (list): List of detection files corresponding to img_dirs - transform (callable, optional): Optional transform to apply to images - """ - self.img_dirs = img_dirs - self.det_files = det_files - self.transform = transform - self.detections = self.read_detections() # Read detections for all files - self.image_paths, self.labels = self.get_all_image_paths_and_labels() # Gather images and labels - - def read_detections(self): - """ - Reads detection data from multiple MOT16 detection files. - Returns: - Dictionary mapping (frame_id, img_dir) -> list of (object_id, bbox) - """ - detections = {} - - for img_dir, det_file in zip(self.img_dirs, self.det_files): - if not os.path.exists(det_file): - print(f"Warning: Detection file {det_file} not found, skipping.") - continue # Skip missing files - - with open(det_file, 'r') as f: - for line in f: - parts = line.strip().split(',') - frame_id = int(parts[0]) # Frame ID - object_id = int(parts[1]) # Object ID (LABEL) - - if object_id == -1: # Ignore unknown object IDs - continue - - bbox = list(map(float, parts[2:6])) # Bounding Box (x, y, w, h) - key = (frame_id, img_dir) - - if key not in detections: - detections[key] = [] - detections[key].append((object_id, bbox)) - - print(f"Total valid detections loaded: {sum(len(v) for v in detections.values())}") - return detections - - def get_all_image_paths_and_labels(self): - """ - Collects all image paths and their corresponding object labels. - Returns: - List of image paths and their associated object IDs (labels). - """ - image_paths = [] - labels = [] - - for img_dir in self.img_dirs: - if not os.path.exists(img_dir): - print(f"Warning: Image directory {img_dir} not found, skipping.") - continue - - image_files = sorted( - [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')] - ) - - print(f"Found {len(image_files)} images in {img_dir}") - - for image_file in image_files: - frame_id = int(os.path.basename(image_file).split('.')[0]) # Extract frame ID from filename - key = (frame_id, img_dir) - - if key in self.detections: - for obj_id, bbox in self.detections[key]: - image_paths.append(image_file) - labels.append(obj_id) # Store the object ID as the label - - print(f"Total valid images found: {len(image_paths)}") - return image_paths, labels - - def __len__(self): - return len(self.image_paths) - - def __getitem__(self, idx): - image_path = self.image_paths[idx] - label = self.labels[idx] # Retrieve the corresponding object ID - - if not os.path.exists(image_path): - print(f"Warning: Image {image_path} not found, skipping.") - return None, None - - image = cv2.imread(image_path) - if image is None: - print(f"Warning: Failed to load {image_path}, skipping.") - return None, None - - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - if self.transform: - image = self.transform(image) - - return image, label # Return both image and object ID - -# Custom collate function to remove None values -def custom_collate_fn(batch): - batch = [b for b in batch if b[0] is not None] # Remove None values - if len(batch) == 0: - return torch.empty(0), torch.empty(0) # Return empty tensors if no valid samples - return torch.utils.data.dataloader.default_collate(batch) - -# MOT16 Dataset File Paths -img_dirs = [ - r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\img1', - r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\img1', - r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\img1', -] - -det_files = [ - r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\det\det.txt', - r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\det\det.txt', - r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\det\det.txt', -] - -# Define Image Transformations -transform = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize((64, 64)), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), -]) - -# Create Dataset and DataLoader -train_dataset = TrainingData(img_dirs, det_files, transform) -train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn) +import os +import cv2 +import torch +from torch.utils.data import Dataset, DataLoader +import torchvision.transforms as transforms + +class TrainingData(Dataset): + def __init__(self, img_dirs, det_files, transform=None): + """ + Args: + img_dirs (list): List of image directories (one per sequence) + det_files (list): List of detection files corresponding to img_dirs + transform (callable, optional): Optional transform to apply to images + """ + self.img_dirs = img_dirs + self.det_files = det_files + self.transform = transform + self.detections = self.read_detections() # Read detections for all files + self.image_paths, self.labels = self.get_all_image_paths_and_labels() # Gather images and labels + + def read_detections(self): + """ + Reads detection data from multiple MOT16 detection files. + Returns: + Dictionary mapping (frame_id, img_dir) -> list of (object_id, bbox) + """ + detections = {} + + for img_dir, det_file in zip(self.img_dirs, self.det_files): + if not os.path.exists(det_file): + print(f"Warning: Detection file {det_file} not found, skipping.") + continue # Skip missing files + + with open(det_file, 'r') as f: + for line in f: + parts = line.strip().split(',') + frame_id = int(parts[0]) # Frame ID + object_id = int(parts[1]) # Object ID (LABEL) + + if object_id == -1: # Ignore unknown object IDs + continue + + bbox = list(map(float, parts[2:6])) # Bounding Box (x, y, w, h) + key = (frame_id, img_dir) + + if key not in detections: + detections[key] = [] + detections[key].append((object_id, bbox)) + + print(f"Total valid detections loaded: {sum(len(v) for v in detections.values())}") + return detections + + def get_all_image_paths_and_labels(self): + """ + Collects all image paths and their corresponding object labels. + Returns: + List of image paths and their associated object IDs (labels). + """ + image_paths = [] + labels = [] + + for img_dir in self.img_dirs: + if not os.path.exists(img_dir): + print(f"Warning: Image directory {img_dir} not found, skipping.") + continue + + image_files = sorted( + [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')] + ) + + print(f"Found {len(image_files)} images in {img_dir}") + + for image_file in image_files: + frame_id = int(os.path.basename(image_file).split('.')[0]) # Extract frame ID from filename + key = (frame_id, img_dir) + + if key in self.detections: + for obj_id, bbox in self.detections[key]: + image_paths.append(image_file) + labels.append(obj_id) # Store the object ID as the label + + print(f"Total valid images found: {len(image_paths)}") + return image_paths, labels + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_path = self.image_paths[idx] + label = self.labels[idx] # Retrieve the corresponding object ID + + if not os.path.exists(image_path): + print(f"Warning: Image {image_path} not found, skipping.") + return None, None + + image = cv2.imread(image_path) + if image is None: + print(f"Warning: Failed to load {image_path}, skipping.") + return None, None + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + if self.transform: + image = self.transform(image) + + return image, label # Return both image and object ID + +# Custom collate function to remove None values +def custom_collate_fn(batch): + batch = [b for b in batch if b[0] is not None] # Remove None values + if len(batch) == 0: + return torch.empty(0), torch.empty(0) # Return empty tensors if no valid samples + return torch.utils.data.dataloader.default_collate(batch) + +# MOT16 Dataset File Paths +img_dirs = [ + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\img1', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\img1', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\img1', +] + +det_files = [ + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\det\det.txt', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\det\det.txt', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\det\det.txt', +] + +# Define Image Transformations +transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((64, 64)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) + +# Create Dataset and DataLoader +train_dataset = TrainingData(img_dirs, det_files, transform) +train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn) diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py index f79be9a..991d3bd 100644 --- a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_Training_Script.py @@ -1,46 +1,46 @@ -import torch -import torch.optim as optim -import torch.nn as nn -from CNN_Data_Processing import train_loader -from CNN_model import CNNDeepSORT # Assuming this is your CNN model -import cv2 - -DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -# Define Triplet Loss -criterion = nn.TripletMarginLoss(margin=1.0, p=2) - -# Initialize Model -model = CNNDeepSORT().to(DEVICE) -optimizer = optim.Adam(model.parameters(), lr=0.0001) - -def train(model, train_loader, criterion, optimizer, num_epochs): - for epoch in range(num_epochs): - model.train() - running_loss = 0.0 - - for i, (anchor, positive, negative) in enumerate(train_loader): - if anchor.shape[0] == 0: - continue # Skip empty batches - - anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE) - optimizer.zero_grad() - - anchor_embedding = model(anchor) - positive_embedding = model(positive) - negative_embedding = model(negative) - - loss = criterion(anchor_embedding, positive_embedding, negative_embedding) - loss.backward() - optimizer.step() - - running_loss += loss.item() - if i % 10 == 0: - print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}") - - print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {running_loss / len(train_loader):.4f}") - - torch.save(model.state_dict(), "CNNDeepSORT_final.pth") - print("Training complete! Model saved to CNNDeepSORT_final.pth") - -train(model, train_loader, criterion, optimizer, num_epochs=10) +import torch +import torch.optim as optim +import torch.nn as nn +from CNN_Data_Processing import train_loader +from CNN_model import CNNDeepSORT # Assuming this is your CNN model +import cv2 + +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Define Triplet Loss +criterion = nn.TripletMarginLoss(margin=1.0, p=2) + +# Initialize Model +model = CNNDeepSORT().to(DEVICE) +optimizer = optim.Adam(model.parameters(), lr=0.0001) + +def train(model, train_loader, criterion, optimizer, num_epochs): + for epoch in range(num_epochs): + model.train() + running_loss = 0.0 + + for i, (anchor, positive, negative) in enumerate(train_loader): + if anchor.shape[0] == 0: + continue # Skip empty batches + + anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE) + optimizer.zero_grad() + + anchor_embedding = model(anchor) + positive_embedding = model(positive) + negative_embedding = model(negative) + + loss = criterion(anchor_embedding, positive_embedding, negative_embedding) + loss.backward() + optimizer.step() + + running_loss += loss.item() + if i % 10 == 0: + print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}") + + print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {running_loss / len(train_loader):.4f}") + + torch.save(model.state_dict(), "CNNDeepSORT_final.pth") + print("Training complete! Model saved to CNNDeepSORT_final.pth") + +train(model, train_loader, criterion, optimizer, num_epochs=10) diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py index 5541779..6f9ec1f 100644 --- a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/CNN_model.py @@ -1,77 +1,77 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -#deepSORT uses a CNN to create appearance based I.D.'s, this augments SORT by allowing the model to re-identify occluded objects. -#This CNN focuses on learning spatial features. -class CNNDeepSORT(nn.Module): - - #Embedding_dim is the number of features or dimensions (independent variables) in the representation space. - #We chose 128 because it is a sweet spot, small enough for efficient training but large enough to capture unique features. - #128 dimensional embeddings is a standard in object tracking/re-identification models. - - def __init__(self, embedding_dim = 128): - super(CNNDeepSORT, self).__init__() - '''Convolution Parameters: - - in_channels: 3 Channels, RGB - out_channels: Number of feature maps the layer will produce, doubles per feature. - kernel_size: A filter that will essentially slide along each channel and capture features using dot product. - stride: The distance in which the kernel travels across the input channels. - padding: Extra pixels that are added around the input image before applying convolution, this is so image details aren't lost. - ''' - # Batch Normalization normalizes activations between the layers during training. This reduces the covariate shift - # aka the change of the distribution of inputs from one layer to the next. Which ensures faster training. - #x(hat) = (x - mean)/std dev - #then y = w(x hat) + b - #We do this to normalize the embedding values before the activation function is applied, this also reduces overfitting (by regularizing the data) - - #The first convolutional layer where the convolution math operation is performed. We are performing this in a 2D space. - self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1) - self.bn1 = nn.BatchNorm2d(32) - - self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3, stride=1, padding=1) - self.bn2 = nn.BatchNorm2d(64) - - self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1) - self.bn3 = nn.BatchNorm2d(128) - - '''Global Average Pooling: - Used instead of multiple pooling layers because it reduces the spatial dimensions of - the feature map to 1x1, summarizing the entire feature map into 1 vector per channel. This reduces the loss of spatial detail, which is very important - for appearance embedding. - ''' - #Since DeepSORT's goal is feature extraction and not classification, we only need a single fully connected layer with linear connections. - #The linear transformation of the neurons in the last conv layer to the dense layer is y=Wx+b, which extracts the features into an - #embedding space which is perfect for cosine similarity matching. - self.pool = nn.AdaptiveAvgPool2d((1,1)) - self.fc = nn.Linear(in_features=128, out_features=embedding_dim) - - #Forward propagation method for training the CNN. - def forward(self,x): - ''' - Convolutional Layer: - input: Image - output: Feature maps that highlight different patterns in the image/frame - - Batch Normalization: - input: Feature map and number of features - output: A feature map that will converge faster and reduce covariate shift. - - Rectified Linear Unit: - input: The summation of all neurons in one layers' activations multplied by their weights and added their biases - output: If input < 0, output is 0. If input > 0, output is input. This removes any negative values that do not meet a threshold. - It also introduces non-linearity to the network, allowing for complex learning. - ''' - x = F.relu(self.bn1(self.conv1(x))) - x = F.relu(self.bn2(self.conv2(x))) - x = F.relu(self.bn3(self.conv3(x))) - - x = self.pool(x) #Downsizes the spatial dimensions of the feature map to 1x1 using Global Average Pooling - x = torch.flatten(x,1) #Converts the multi-dimensional tensors into a 2D tensor, (batch size * features) - x = self.fc(x) #A linear layer that matches the feature map to the desired embedding dimension (128) - return F.normalize(x,p=2, dim=1) #Uses euclidean norm to normalize a tensor along a specific dimension - -#instantiate the model -model = CNNDeepSORT() +import torch +import torch.nn as nn +import torch.nn.functional as F + +#deepSORT uses a CNN to create appearance based I.D.'s, this augments SORT by allowing the model to re-identify occluded objects. +#This CNN focuses on learning spatial features. +class CNNDeepSORT(nn.Module): + + #Embedding_dim is the number of features or dimensions (independent variables) in the representation space. + #We chose 128 because it is a sweet spot, small enough for efficient training but large enough to capture unique features. + #128 dimensional embeddings is a standard in object tracking/re-identification models. + + def __init__(self, embedding_dim = 128): + super(CNNDeepSORT, self).__init__() + '''Convolution Parameters: + + in_channels: 3 Channels, RGB + out_channels: Number of feature maps the layer will produce, doubles per feature. + kernel_size: A filter that will essentially slide along each channel and capture features using dot product. + stride: The distance in which the kernel travels across the input channels. + padding: Extra pixels that are added around the input image before applying convolution, this is so image details aren't lost. + ''' + # Batch Normalization normalizes activations between the layers during training. This reduces the covariate shift + # aka the change of the distribution of inputs from one layer to the next. Which ensures faster training. + #x(hat) = (x - mean)/std dev + #then y = w(x hat) + b + #We do this to normalize the embedding values before the activation function is applied, this also reduces overfitting (by regularizing the data) + + #The first convolutional layer where the convolution math operation is performed. We are performing this in a 2D space. + self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1) + self.bn1 = nn.BatchNorm2d(32) + + self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3, stride=1, padding=1) + self.bn2 = nn.BatchNorm2d(64) + + self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1) + self.bn3 = nn.BatchNorm2d(128) + + '''Global Average Pooling: + Used instead of multiple pooling layers because it reduces the spatial dimensions of + the feature map to 1x1, summarizing the entire feature map into 1 vector per channel. This reduces the loss of spatial detail, which is very important + for appearance embedding. + ''' + #Since DeepSORT's goal is feature extraction and not classification, we only need a single fully connected layer with linear connections. + #The linear transformation of the neurons in the last conv layer to the dense layer is y=Wx+b, which extracts the features into an + #embedding space which is perfect for cosine similarity matching. + self.pool = nn.AdaptiveAvgPool2d((1,1)) + self.fc = nn.Linear(in_features=128, out_features=embedding_dim) + + #Forward propagation method for training the CNN. + def forward(self,x): + ''' + Convolutional Layer: + input: Image + output: Feature maps that highlight different patterns in the image/frame + + Batch Normalization: + input: Feature map and number of features + output: A feature map that will converge faster and reduce covariate shift. + + Rectified Linear Unit: + input: The summation of all neurons in one layers' activations multplied by their weights and added their biases + output: If input < 0, output is 0. If input > 0, output is input. This removes any negative values that do not meet a threshold. + It also introduces non-linearity to the network, allowing for complex learning. + ''' + x = F.relu(self.bn1(self.conv1(x))) + x = F.relu(self.bn2(self.conv2(x))) + x = F.relu(self.bn3(self.conv3(x))) + + x = self.pool(x) #Downsizes the spatial dimensions of the feature map to 1x1 using Global Average Pooling + x = torch.flatten(x,1) #Converts the multi-dimensional tensors into a 2D tensor, (batch size * features) + x = self.fc(x) #A linear layer that matches the feature map to the desired embedding dimension (128) + return F.normalize(x,p=2, dim=1) #Uses euclidean norm to normalize a tensor along a specific dimension + +#instantiate the model +model = CNNDeepSORT() #print(model) \ No newline at end of file diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Data_Processing.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Data_Processing.py new file mode 100644 index 0000000..24c9479 --- /dev/null +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Data_Processing.py @@ -0,0 +1,138 @@ +import os +import cv2 +import torch +from torch.utils.data import Dataset, DataLoader +import torchvision.transforms as transforms + +class TrainingData(Dataset): + def __init__(self, img_dirs, det_files, transform=None): + """ + Args: + img_dirs (list): List of image directories (one per sequence) + det_files (list): List of detection files corresponding to img_dirs + transform (callable, optional): Optional transform to apply to images + """ + self.img_dirs = img_dirs + self.det_files = det_files + self.transform = transform + self.detections = self.read_detections() # Read detections for all files + self.image_paths, self.labels = self.get_all_image_paths_and_labels() # Gather images and labels + + def read_detections(self): + """ + Reads detection data from multiple MOT16 detection files. + Returns: + Dictionary mapping (frame_id, img_dir) -> list of (object_id, bbox) + """ + detections = {} + + for img_dir, det_file in zip(self.img_dirs, self.det_files): + if not os.path.exists(det_file): + print(f"Warning: Detection file {det_file} not found, skipping.") + continue # Skip missing files + + with open(det_file, 'r') as f: + for line in f: + parts = line.strip().split(',') + frame_id = int(parts[0]) # Frame ID + object_id = int(parts[1]) # Object ID (LABEL) + + if object_id == -1: # Ignore unknown object IDs + continue + + bbox = list(map(float, parts[2:6])) # Bounding Box (x, y, w, h) + key = (frame_id, img_dir) + + if key not in detections: + detections[key] = [] + detections[key].append((object_id, bbox)) + + print(f"Total valid detections loaded: {sum(len(v) for v in detections.values())}") + return detections + + def get_all_image_paths_and_labels(self): + """ + Collects all image paths and their corresponding object labels. + Returns: + List of image paths and their associated object IDs (labels). + """ + image_paths = [] + labels = [] + + for img_dir in self.img_dirs: + if not os.path.exists(img_dir): + print(f"Warning: Image directory {img_dir} not found, skipping.") + continue + + image_files = sorted( + [os.path.join(img_dir, f) for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')] + ) + + print(f"Found {len(image_files)} images in {img_dir}") + + for image_file in image_files: + frame_id = int(os.path.basename(image_file).split('.')[0]) # Extract frame ID from filename + key = (frame_id, img_dir) + + if key in self.detections: + for obj_id, bbox in self.detections[key]: + image_paths.append(image_file) + labels.append(obj_id) # Store the object ID as the label + + print(f"Total valid images found: {len(image_paths)}") + return image_paths, labels + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_path = self.image_paths[idx] + label = self.labels[idx] # Retrieve the corresponding object ID + + if not os.path.exists(image_path): + print(f"Warning: Image {image_path} not found, skipping.") + return None, None + + image = cv2.imread(image_path) + if image is None: + print(f"Warning: Failed to load {image_path}, skipping.") + return None, None + + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + if self.transform: + image = self.transform(image) + + return image, label # Return both image and object ID + +# Custom collate function to remove None values +def custom_collate_fn(batch): + batch = [b for b in batch if b[0] is not None] # Remove None values + if len(batch) == 0: + return torch.empty(0), torch.empty(0) # Return empty tensors if no valid samples + return torch.utils.data.dataloader.default_collate(batch) + +# MOT16 Dataset File Paths +img_dirs = [ + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\img1', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\img1', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\img1', +] + +det_files = [ + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-02\det\det.txt', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-04\det\det.txt', + r'C:\Users\adamm\PycharmProjects\Kestrel\deepSORT\MOT16\train\MOT16-05\det\det.txt', +] + +# Define Image Transformations +transform = transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize((64, 64)), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) + +# Create Dataset and DataLoader +train_dataset = TrainingData(img_dirs, det_files, transform) +train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn) diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Training_Script.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Training_Script.py new file mode 100644 index 0000000..991d3bd --- /dev/null +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_Training_Script.py @@ -0,0 +1,46 @@ +import torch +import torch.optim as optim +import torch.nn as nn +from CNN_Data_Processing import train_loader +from CNN_model import CNNDeepSORT # Assuming this is your CNN model +import cv2 + +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Define Triplet Loss +criterion = nn.TripletMarginLoss(margin=1.0, p=2) + +# Initialize Model +model = CNNDeepSORT().to(DEVICE) +optimizer = optim.Adam(model.parameters(), lr=0.0001) + +def train(model, train_loader, criterion, optimizer, num_epochs): + for epoch in range(num_epochs): + model.train() + running_loss = 0.0 + + for i, (anchor, positive, negative) in enumerate(train_loader): + if anchor.shape[0] == 0: + continue # Skip empty batches + + anchor, positive, negative = anchor.to(DEVICE), positive.to(DEVICE), negative.to(DEVICE) + optimizer.zero_grad() + + anchor_embedding = model(anchor) + positive_embedding = model(positive) + negative_embedding = model(negative) + + loss = criterion(anchor_embedding, positive_embedding, negative_embedding) + loss.backward() + optimizer.step() + + running_loss += loss.item() + if i % 10 == 0: + print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}") + + print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {running_loss / len(train_loader):.4f}") + + torch.save(model.state_dict(), "CNNDeepSORT_final.pth") + print("Training complete! Model saved to CNNDeepSORT_final.pth") + +train(model, train_loader, criterion, optimizer, num_epochs=10) diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_model.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_model.py new file mode 100644 index 0000000..6f9ec1f --- /dev/null +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/Legacy CNN/CNN_model.py @@ -0,0 +1,77 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +#deepSORT uses a CNN to create appearance based I.D.'s, this augments SORT by allowing the model to re-identify occluded objects. +#This CNN focuses on learning spatial features. +class CNNDeepSORT(nn.Module): + + #Embedding_dim is the number of features or dimensions (independent variables) in the representation space. + #We chose 128 because it is a sweet spot, small enough for efficient training but large enough to capture unique features. + #128 dimensional embeddings is a standard in object tracking/re-identification models. + + def __init__(self, embedding_dim = 128): + super(CNNDeepSORT, self).__init__() + '''Convolution Parameters: + + in_channels: 3 Channels, RGB + out_channels: Number of feature maps the layer will produce, doubles per feature. + kernel_size: A filter that will essentially slide along each channel and capture features using dot product. + stride: The distance in which the kernel travels across the input channels. + padding: Extra pixels that are added around the input image before applying convolution, this is so image details aren't lost. + ''' + # Batch Normalization normalizes activations between the layers during training. This reduces the covariate shift + # aka the change of the distribution of inputs from one layer to the next. Which ensures faster training. + #x(hat) = (x - mean)/std dev + #then y = w(x hat) + b + #We do this to normalize the embedding values before the activation function is applied, this also reduces overfitting (by regularizing the data) + + #The first convolutional layer where the convolution math operation is performed. We are performing this in a 2D space. + self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1) + self.bn1 = nn.BatchNorm2d(32) + + self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=3, stride=1, padding=1) + self.bn2 = nn.BatchNorm2d(64) + + self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1) + self.bn3 = nn.BatchNorm2d(128) + + '''Global Average Pooling: + Used instead of multiple pooling layers because it reduces the spatial dimensions of + the feature map to 1x1, summarizing the entire feature map into 1 vector per channel. This reduces the loss of spatial detail, which is very important + for appearance embedding. + ''' + #Since DeepSORT's goal is feature extraction and not classification, we only need a single fully connected layer with linear connections. + #The linear transformation of the neurons in the last conv layer to the dense layer is y=Wx+b, which extracts the features into an + #embedding space which is perfect for cosine similarity matching. + self.pool = nn.AdaptiveAvgPool2d((1,1)) + self.fc = nn.Linear(in_features=128, out_features=embedding_dim) + + #Forward propagation method for training the CNN. + def forward(self,x): + ''' + Convolutional Layer: + input: Image + output: Feature maps that highlight different patterns in the image/frame + + Batch Normalization: + input: Feature map and number of features + output: A feature map that will converge faster and reduce covariate shift. + + Rectified Linear Unit: + input: The summation of all neurons in one layers' activations multplied by their weights and added their biases + output: If input < 0, output is 0. If input > 0, output is input. This removes any negative values that do not meet a threshold. + It also introduces non-linearity to the network, allowing for complex learning. + ''' + x = F.relu(self.bn1(self.conv1(x))) + x = F.relu(self.bn2(self.conv2(x))) + x = F.relu(self.bn3(self.conv3(x))) + + x = self.pool(x) #Downsizes the spatial dimensions of the feature map to 1x1 using Global Average Pooling + x = torch.flatten(x,1) #Converts the multi-dimensional tensors into a 2D tensor, (batch size * features) + x = self.fc(x) #A linear layer that matches the feature map to the desired embedding dimension (128) + return F.normalize(x,p=2, dim=1) #Uses euclidean norm to normalize a tensor along a specific dimension + +#instantiate the model +model = CNNDeepSORT() +#print(model) \ No newline at end of file diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/dataset.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/dataset.py new file mode 100644 index 0000000..47b57d2 --- /dev/null +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/dataset.py @@ -0,0 +1,81 @@ +import os +import torch +from torch.utils.data import Dataset # The base class we will inherit from to create our custom dataset +from PIL import Image # Python Imaging Library (Pillow), used for opening and manipulating image files + +class Market1501(Dataset): + """ + Custom PyTorch Dataset for the Market-1501 dataset. + + This class handles loading images and parsing their person IDs (PIDs) + from the filenames to be used as integer labels for training a classification model. + """ + + # CHANGED: Added 'transform=None' to accept an image transformation pipeline. + def __init__(self, image_dir, transform=None): + """ + Args: + image_dir (string): Directory path with all the training images. + transform (callable, optional): A function/transform to be applied to each image. + """ + # Store the directory path for later use in __getitem__. + self.image_dir = image_dir + + # Store the transform pipeline (e.g., resize, normalize) for later use. + self.transform = transform + + # Create a list of all filenames in the directory that end with '.jpg'. + # This is a 'list comprehension', a compact way to build a list. + # It filters out any non-JPEG files (like '.txt' or system files) that might cause errors. + self.image_paths = [file for file in os.listdir(image_dir) if file.endswith(".jpg")] + + # Create a set of unique Person IDs (PIDs) from the filenames. + # Example filename: '0002_c1s1_000451_01.jpg'. We split by '_' and take the first part '0002'. + # Using a 'set' automatically removes all duplicate PIDs, giving us a list of unique people. + pids_in_dataset = set([path.split("_")[0] for path in self.image_paths]) + + # Create a mapping dictionary to convert string PIDs to integer labels (0, 1, 2, ...). + # Neural networks and loss functions require integer labels, not strings. + # 'enumerate' provides a counter 'i' for each unique 'pid'. + # Example: {'0002': 0, '0007': 1, '-1': 2} + self.pid_to_label = {pid: i for i, pid in enumerate(pids_in_dataset)} + + # Store the total number of unique classes (people). + # This is needed to correctly configure the output layer of our neural network. + self.num_classes = len(self.pid_to_label) + + # The __getitem__ method defines how to retrieve a single sample (one image and its label) from the dataset. + # The DataLoader will call this method automatically for each index from 0 to len(dataset)-1. + def __getitem__(self, index): + # Retrieve the filename for the given index from our list of paths. + name = self.image_paths[index] + + # Construct the full, platform-independent path to the image file. + # os.path.join is used so this code works on both Windows ('\') and Linux/Mac ('/'). + path = os.path.join(self.image_dir, name) + + # Open the image file using Pillow and ensure it's in RGB format (corresponds to 3 channels). + # This is important because some images might be in grayscale or have an alpha channel. + image = Image.open(path).convert("RGB") + + # CHANGED: Apply the transformations to the image. + # This is a CRITICAL step. It will resize, normalize, and convert the PIL Image to a PyTorch Tensor. + if self.transform: + image = self.transform(image) + + # Parse the filename again to get the string Person ID. + pid = name.split("_")[0] + + # Use our mapping dictionary to look up the integer label for the corresponding PID. + label = self.pid_to_label[pid] + + # Return the processed image tensor and its corresponding label as a tensor. + # The label must be a LongTensor (64-bit integer) for PyTorch's CrossEntropyLoss function. + return image, torch.tensor(label, dtype=torch.long) + + # The __len__ method must return the total number of samples in the dataset. + # The DataLoader needs this to know the dataset's size for batching, shuffling, and epoch completion. + def __len__(self): + # Return the total count of the image paths we found during initialization. + return len(self.image_paths) + diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/model.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/model.py new file mode 100644 index 0000000..a1ac40c --- /dev/null +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/model.py @@ -0,0 +1,42 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class CNNdeepSORT(nn.Module): + + def __init__(self, embedding_dim): + super().__init__() + + self.convolution = nn.Sequential( + #Input Dimensions: [B, 3, H, W] + #inChannels= 3 (RGB), outChannels = embedding_dim/4 (out being # of kernels) + + # Stage 1 + nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), # without padding, convolution shrinks images quickly, potentially losing info @ edges. + nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), + nn.MaxPool2d(2), #downsample using maxmimum values in kernel -> H/2 x W/2 + + # Stage 2 + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True), + nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True), + nn.MaxPool2d(2), # H/2 x W/2 + + # Stage 3 + nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(True), + nn.Conv2d(in_channels=256, out_channels=embedding_dim, kernel_size=3, padding=1), nn.BatchNorm2d(embedding_dim), nn.ReLU(True), + + #Final reduction later into 1x1 embedding: [B, embedding_dim, 1, 1] + nn.AdaptiveAvgPool2d((1,1)) + ) + + # Fully connected classifier for Re-ID training + #self.classifier = nn.Linear(in_features = embedding_dim, out_features=num_classes) + + def forward(self, inputTensor): + + output = self.convolution(inputTensor) # CNN encoder block -> [B, emb_dim, 1, 1] + output = torch.flatten(output,1) # [B, emb_dim] -> person's appearance vector + + output = self.classifier(output) # For use as classifier -> [B, num_classes] + # else: For use as feature extractor + return output \ No newline at end of file diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-03-15/events.out.tfevents.1752418995.1660-SUPER.4968.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-03-15/events.out.tfevents.1752418995.1660-SUPER.4968.0 new file mode 100644 index 0000000..72213b0 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-03-15/events.out.tfevents.1752418995.1660-SUPER.4968.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-49-46/events.out.tfevents.1752421786.1660-SUPER.17424.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-49-46/events.out.tfevents.1752421786.1660-SUPER.17424.0 new file mode 100644 index 0000000..b46e664 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_11-49-46/events.out.tfevents.1752421786.1660-SUPER.17424.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_12-08-54/events.out.tfevents.1752422934.1660-SUPER.38948.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_12-08-54/events.out.tfevents.1752422934.1660-SUPER.38948.0 new file mode 100644 index 0000000..decb8d0 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_B128_LR0.001_2025-07-13_12-08-54/events.out.tfevents.1752422934.1660-SUPER.38948.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267721.1660-SUPER.43972.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267721.1660-SUPER.43972.0 new file mode 100644 index 0000000..a8e40c0 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267721.1660-SUPER.43972.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267725.1660-SUPER.16484.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267725.1660-SUPER.16484.0 new file mode 100644 index 0000000..a41083e Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267725.1660-SUPER.16484.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267865.1660-SUPER.45020.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267865.1660-SUPER.45020.0 new file mode 100644 index 0000000..3ab3372 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267865.1660-SUPER.45020.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267868.1660-SUPER.47964.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267868.1660-SUPER.47964.0 new file mode 100644 index 0000000..056e3ea Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752267868.1660-SUPER.47964.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752268438.1660-SUPER.38940.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752268438.1660-SUPER.38940.0 new file mode 100644 index 0000000..d84f3e0 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752268438.1660-SUPER.38940.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269132.1660-SUPER.19448.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269132.1660-SUPER.19448.0 new file mode 100644 index 0000000..9752b05 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269132.1660-SUPER.19448.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269766.1660-SUPER.42140.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269766.1660-SUPER.42140.0 new file mode 100644 index 0000000..adf7856 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752269766.1660-SUPER.42140.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752289904.1660-SUPER.17300.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752289904.1660-SUPER.17300.0 new file mode 100644 index 0000000..70cb2ef Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752289904.1660-SUPER.17300.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752346955.1660-SUPER.2188.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752346955.1660-SUPER.2188.0 new file mode 100644 index 0000000..e1b6b03 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752346955.1660-SUPER.2188.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347004.1660-SUPER.16044.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347004.1660-SUPER.16044.0 new file mode 100644 index 0000000..ce662cc Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347004.1660-SUPER.16044.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347084.1660-SUPER.6076.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347084.1660-SUPER.6076.0 new file mode 100644 index 0000000..ae6e6c1 Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752347084.1660-SUPER.6076.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752417153.1660-SUPER.30424.0 b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752417153.1660-SUPER.30424.0 new file mode 100644 index 0000000..4a06bdd Binary files /dev/null and b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/runs/market1501_experiment_1/events.out.tfevents.1752417153.1660-SUPER.30424.0 differ diff --git a/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/train.py b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/train.py new file mode 100644 index 0000000..0d99346 --- /dev/null +++ b/src/_archive_legacy/models/deepSORT/CNN/Legacy CNN/train.py @@ -0,0 +1,292 @@ +import os +import sys +import time +import torch +import torch.nn as nn +import torchvision +from datetime import datetime +from torchvision import transforms +from tqdm import tqdm +from torch.utils.data import DataLoader +from torch.utils.data import random_split +from torch.utils.tensorboard import SummaryWriter +from torch.amp import autocast, GradScaler # instead of torch.cuda.amp -- deprecated +from torch.nn import TripletMarginWithDistanceLoss +# Custom Modules +import dataset +from model import CNNdeepSORT + +# --------------------------------------------------- +# TRAINING FUNCTION +# --------------------------------------------------- +def euclidean_distance(x, y): + return torch.norm(x - y, p=2, dim=1) + +def training(model, dataloader, loss_fn, optimizer, device): + model.train() # Set model to training mode + total_loss = 0 + total_correct = 0 + total_samples = 0 + progress_bar = tqdm(dataloader, "Training Progress...", unit = "batch") + + for (archor, positive, negative) in progress_bar: + anchor.to(device, non_blocking=True) + positive = positive.to(device, non_blocking=True) + negative = negative.to(device, non_blocking=True) + # Non_blocking lets compute overlap with data transfer (small but free speed-up). Launches host --> GPU copy async if tensor already pinned (it is) + + with autocast(device_type=device): + anchor_out = model(anchor) + positive_out = model(positive) + negative_out = model(negative) + + loss = loss_fn(anchor_out, positive_out, negative_out) + + # Backward pass and optimization + scaler.scale(loss).backward() # 1. Calculate gradients based on the current loss. + torch.nn.utils.clip_grad_norm_(model.parameters(), clip_val) # Compues total L2 norm for all gradients. If exceeds `clip_val`, rescales them to be equal. Prevents gradient explosion. + scaler.step(optimizer) # 2. Update the model's weights using the calculated gradients. With scaler, undoes the scaling and skips the step if gradients overflowed (inf) + scaler.update() # Adjusts the scale factor up/down depending on whether overflow was detected, keeping training stable. + optimizer.zero_grad(set_to_none=True)# 3. Reset gradients to zero for the next iteration. Instead of writing 0s into every grad tensor, it just sets the .grad pointer to None (saving a full CUDA memset each iteration) + + # Metrics + total_loss += loss.item() + d_ap = euclidean_distance(anchor_out, positive_out) + d_an = euclidean_distance(anchor_out, negative_out) + total_correct += (d_ap < d_an).sum().item() + total_samples += anchor.size(0) + + progress_bar.set_postfix(loss=loss.item()) + + avg_loss = total_loss / len(dataloader) + accuracy = total_correct / total_samples + return avg_loss, accuracy + +# --------------------------------------------------- +# 1. SETUP +# --------------------------------------------------- + +# This code will only run when you execute `python train.py` directly +if __name__ == '__main__': + # Hyperparameters + LEARNING_RATE = 1e-3 + EPOCHS = 50 + BATCH_SIZE = 128 # e.g. 32, 64, 128 + # Increased from 64. AMP frees memory, and more data per step = fewer GPU idles + + + # Move the model to the appropriate device (GPU if available) + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print(f"Using device: {device}") + + # Tries several convolution algorithms on the first batch, then uses fastest one thereafter. + # Pays off only if input sizes stay fixed (normalized to 256x128) + torch.backends.cudnn.benchmark = True + + scaler = GradScaler() # WIth FP16, gradients can under-flow to 0. This multiplies the loss by a large factor, performs backward, then divides them back down during optimizer. + clip_val = 1.0 # gradient-clip norm + + # --------------------------------------------------- + # 2. MODEL, LOSS, OPTIMIZER + # --------------------------------------------------- + + # Instantiate the model and move to device + # 751 is the number of classes for Market-1501 + model = CNNdeepSORT(embedding_dim=128).to(device) + + # Define loss function, optimizer, and learning rate scheduler + + #TODO distance function, margin depends on distance choosen + + loss_function = nn.TripletMarginWithDistanceLoss(distance_function=euclidean_distance, margin=0.3, swap = True ) # Cross Entropy Loss is simpler, so more suitable for validation + optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE) # AdamW is an upgraded version of Adam + + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Decays LR by a factor of 0.1 every 10 epochs + # Gamma of 0.1 is common -- sharp reduction in LR, allowing model to switch from large adjustments to fine-tuning + + # Load from saved checkpoint if found (most generalized model) + checkpoint_path = 'best_model_checkpoint.pth' + start_epoch = 0 + if(len(sys.argv) > 1 and sys.argv[1].lower() == "load"): + if(os.path.exists(checkpoint_path)): + print(f"Loading checkpoint from {checkpoint_path}...") + checkpoint = torch.load(checkpoint_path) + model.load_state_dict(checkpoint['model_state_dict']) + optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + scheduler.load_state_dict(checkpoint['scheduler_state_dict']) + scaler.load_state_dict(checkpoint.get('scaler_state_dict', {})) + start_epoch = checkpoint['epoch'] + 1 + print(f"Resumed at epoch {start_epoch} | val_loss={checkpoint['loss']:.4f}") + else: + print("No checkpoint found, starting from scratch.") + else: + print("Training from scratch...") + + #TODO + + + # --------------------------------------------------- + # 3. DATA LOADING & SMALL VALIDATION SPLIT + # --------------------------------------------------- + + # Define more robust transforms for training + # Images are resized and normalized for better model performance. + transform = transforms.Compose([ + transforms.Resize((128, 64)), # native Market-1501 + transforms.RandomHorizontalFlip(p=0.5), # Adds ~1% mAP with no speed cost + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # mean and std are pre-calculated mean/stdev of the ImageNet dataset. + # For each channel of the image: output[channel] = (input[channel] - mean[channel]) / std[channel] + # Standardizes the pixel value range to train more effectively. Helps converge faster + ]) + + # Load dataset and create DataLoader + Market1501_train_file_path = r'C:\Users\david\Downloads\Market-1501-v15.09.15\bounding_box_train' + train_data = dataset.Market1501(Market1501_train_file_path, transform=transform) + + # Only used for early-stopping / LR checks, NOT for final testing. Tiny split keeps epochs fast but still shows over-/under-fitting trends + val_len = int(0.05 * len(train_data)) # split 5 % off for quick val + train_len = len(train_data) - val_len + + # split dataset before building both dataLoaders + train_ds, val_ds = random_split(train_data, [train_len, val_len]) + + # Entire training data + training_dataloader = DataLoader( + dataset=train_ds, + batch_size=BATCH_SIZE, + shuffle=True, + persistent_workers=True,# Keeps DataLoader workers alive across epochs and + prefetch_factor=4, # queues 4 batches per worker. Eliminates Windows process-spawn penalty; feeds GPU continuously + num_workers=4, # start with 4; 6–8 if CPU has threads to spare + pin_memory=True) # pin_memory puts tensors into pinned memory, allowing for faster transfer from CPU to GPU. + # Can significantly speed up training. Set to true with GPU. + + val_loader = DataLoader( + val_ds, + batch_size=BATCH_SIZE, + shuffle=False, # no need to shuffle for evaluation + num_workers=4, + pin_memory=True, + persistent_workers=True) + + # Evaluation helper + @torch.no_grad() # we don't track gradients here + def evaluate(model, loader): + """ + Runs one full pass on `loader` with AMP and returns: + (mean_cross_entropy_loss, top-1_accuracy) + """ + model.eval() # turn off dropout / batch-norm update + total_loss = 0.0 + correct = 0 + samples = 0 + + for anchor, positive, negative in loader: + anchor = anchor.to(device, non_blocking=True) + positive = positive.to(device, non_blocking=True) + negative = negative.to(device, non_blocking=True) + + with autocast(device_type=device): # AMP even in eval -> less VRAM, faster + anchor_out = model(anchor) + positive_out = model(positive) + negative_out = model(negative) + + + + + loss = loss_function(anchor_out, positive_out, negative_out) + + total_loss += loss.item() + #TODO + d_ap = euclidean_distance(anchor_out, positive_out) + d_an = euclidean_distance(anchor_out, negative_out) + correct += (d_ap < d_an).sum().item() + + samples += anchor.size(0) + + mean_loss = total_loss / len(loader) + accuracy = correct / samples + model.train() # restore training mode for caller + return mean_loss, accuracy + + # --------------------------------------------------- + # 4. TENSORBOARD LOGGING (Initial) + # --------------------------------------------------- + + print("Logging model graph and sample images to TensorBoard...") + # Get a single batch from the dataloader to log graph and images + anchor, positibe, negative = next(iter(training_dataloader)) + + print("Done.") + + # --------------------------------------------------- + # 6. MAIN TRAINING LOOP + # --------------------------------------------------- + best_val_loss = float('inf') + patience = 5 + patience_counter = 0 + + for epoch in range(start_epoch, EPOCHS): + start_time = time.time() + + # Train for one epoch + avg_loss, accuracy = training(model, training_dataloader, loss_function, optimizer, device) #tells you if optimizer is doing its job on the data it sees + val_loss, val_acc = evaluate(model, val_loader) #tells you if the network is generalizing; early stopping + + #*NEW* Update the learning rate + scheduler.step() # Without this, the model is unable to converge. + + + + # ----- save best on *validation* loss ----- + #if avg_loss < best_val_loss: + if val_loss < best_val_loss: + #best_val_loss = avg_loss + best_val_loss = val_loss + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'scheduler_state_dict': scheduler.state_dict(), + 'loss': val_loss + }, 'best_model_checkpoint.pth') + print(f"Epoch {epoch+1}: val_loss improved to {val_loss:.4f}") + patience_counter = 0 + else: + patience_counter += 1 + print(f" Epoch {epoch+1}: Loss did not improve. Patience = {patience_counter}/{patience}") + + # Print epoch summary + elapsed_time = time.time() - start_time + print(f"\nEpoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | Acc: {accuracy:.2%} | Time: {elapsed_time:.2f}s") + + # Always save latest weights + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'scheduler_state_dict': scheduler.state_dict(), + 'scaler_state_dict': scaler.state_dict(), + 'loss': avg_loss + }, 'latest_model_checkpoint_epoch.pth') + + # Early stopping check + if patience_counter >= patience: + print(f"\n Early stopping triggered after {epoch+1} epochs.") + break + + # Log weight histograms + + + print("\nTraining Finished!") + + + # --------------------------------------------------- + # 7. CLEANUP + # --------------------------------------------------- + + + + # To view the logs, open a terminal in your project's root directory and run: + # tensorboard --logdir=run \ No newline at end of file diff --git a/src/_archive_legacy/models/deepSORT/DataLoading.py b/src/_archive_legacy/models/deepSORT/DataLoading.py index 59d95de..fc6b3f1 100644 --- a/src/_archive_legacy/models/deepSORT/DataLoading.py +++ b/src/_archive_legacy/models/deepSORT/DataLoading.py @@ -1,17 +1,17 @@ -import pandas as pd -import torch -class DataLoading: - def load_youtube_data(self, csv_path): - ''' - This method will load data from the Youtube BoundingBoxes data set from google. - We'll use the data from this dataset to help train and test the data association metric for deepSORT - ''' - df = pd.read_csv(csv_path, header=None, names=["youtube_id", "timestamp_ms", "class_id", "class_name", "object_id", "object_presence", "xmin", "xmax", "ymin", "ymax"]) - tracking_data = {} - - #Assuming the videos are in 30 FPS, we want to normalize the timestamp to frame indexes since they're in ms - - -csv_path = "C:\Users\adamm\OneDrive\Documents\AI Projects\Kestrel\yt_bb_classification_validation.csv" -load_data = DataLoading() +import pandas as pd +import torch +class DataLoading: + def load_youtube_data(self, csv_path): + ''' + This method will load data from the Youtube BoundingBoxes data set from google. + We'll use the data from this dataset to help train and test the data association metric for deepSORT + ''' + df = pd.read_csv(csv_path, header=None, names=["youtube_id", "timestamp_ms", "class_id", "class_name", "object_id", "object_presence", "xmin", "xmax", "ymin", "ymax"]) + tracking_data = {} + + #Assuming the videos are in 30 FPS, we want to normalize the timestamp to frame indexes since they're in ms + + +csv_path = "C:\Users\adamm\OneDrive\Documents\AI Projects\Kestrel\yt_bb_classification_validation.csv" +load_data = DataLoading() tracking_data = load_data.load_youtube_data(csv_path) \ No newline at end of file diff --git a/src/_archive_legacy/models/deepSORT/README.md b/src/_archive_legacy/models/deepSORT/README.md index 983b7cc..57c5865 100644 --- a/src/_archive_legacy/models/deepSORT/README.md +++ b/src/_archive_legacy/models/deepSORT/README.md @@ -1,9 +1,9 @@ -# Project Kestrel: Deep-SORT -________________________ -* Deep-SORT is a MOT algorithm that improves on SORT's ability to handle occlusion by using deep learning. A convolutional neural network is used to extract appearance based vectors from each object, then the data association algorithm can associate apperance vectors to tracks. -## Adam Mouedden's Work Explained: -* CNN_model.py: Contains the CNN model. -* CNN_Data_Processing.py: Contains the data loader, trained on MOT 16. -* CNN_Training_Script.py: Contains the training loop. -* Data Association.py: The skeleton code for deep-SORT's data association algorithm. +# Project Kestrel: Deep-SORT +________________________ +* Deep-SORT is a MOT algorithm that improves on SORT's ability to handle occlusion by using deep learning. A convolutional neural network is used to extract appearance based vectors from each object, then the data association algorithm can associate apperance vectors to tracks. +## Adam Mouedden's Work Explained: +* CNN_model.py: Contains the CNN model. +* CNN_Data_Processing.py: Contains the data loader, trained on MOT 16. +* CNN_Training_Script.py: Contains the training loop. +* Data Association.py: The skeleton code for deep-SORT's data association algorithm. * Cost functions implemented from research paper: https://www.mdpi.com/2076-3417/12/3/1319 \ No newline at end of file diff --git a/src/kestrel_clustering/kestrel_clustering/clustering_node.py b/src/kestrel_clustering/kestrel_clustering/clustering_node.py new file mode 100644 index 0000000..143d5f9 --- /dev/null +++ b/src/kestrel_clustering/kestrel_clustering/clustering_node.py @@ -0,0 +1,105 @@ +# Clustering node will subscribe to the tracking node and emit a message called TrackCenter.msg using ROS2 Jazzy +''' +This file will create the clustering node which will send a message called TrackCenter.msg to the next node +This node susbcribes to the input node and publishes to the next node +''' +import rclpy #ROS Common Library Python +from rclpy.node import Node +from kestrel_msgs.msg import TrackCenter +from kestrel_msgs.msg import TrackArray + + +#GLOBAL VARIABLES +#Topics +subscribed_topic = "/kestrel/whitelist_tracks" #David may need to change this once the input node is implemented +published_topic = "/kestrel/track_center" #The next node in the ROS graph needs to subscribe to this one + + +class ClusteringNode(Node): + def __init__(self): #Constructor + super().__init__('clustering_node') #Create a node in the ROS graph + + queue_size = 10 # Default queue size + + self.subscription = self.create_subscription(TrackArray, subscribed_topic, self.vision_callback, queue_size) + self.publisher_ = self.create_publisher(TrackCenter, published_topic, queue_size) + + + def _calculate_center(self, x1, y1, x2, y2): + return (x1+x2)/2 , (y1+y2)/2 + + + def _minimum_bounding_rectangle(self, input_message): + #Image coordinate system involves top left of the frame being the origin + min_x = float('inf') + max_x = float('-inf') + min_y = float('inf') + max_y = float('-inf') + + for track in input_message.tracks: + + #Instead of comparing 8 points, we realized that we can half that number with the following logic: + #Take max_x for example, x2 being the bottom-right corner of any bbox, meaning it's x value will always be larger than x1 + #So we need only compare max_x with x2 and there is no need to compare max_x with x1. This logic applies both ways for all points + max_x = track.x2 if track.x2 > max_x else max_x + min_x = track.x1 if track.x1 < min_x else min_x + max_y = track.y2 if track.y2 > max_y else max_y + min_y = track.y1 if track.y1 < min_y else min_y + + + if min_x == float('inf'): + return None + + return (min_x, min_y, max_x, max_y) + + + def _publish_center(self, input_message: TrackArray): + rectangle = self._minimum_bounding_rectangle(input_message) + + if rectangle == None: + self.get_logger().debug("No tracks in message. Skipping publish.") + return + + x1, y1, x2, y2 = rectangle + + #Calculate the center and the extents of the minimum bounding rectangle + center_x, center_y = self._calculate_center(x1, y1, x2, y2) + + #Initialize the output message + output_message = TrackCenter() + + #Default parameters that come from the TrackArray message + output_message.header = input_message.header + + #Newly calculated parameters, unique to the clustering node: + #Top left corner and bottom right corner + output_message.x1 = x1 + output_message.y1 = y1 + output_message.x2 = x2 + output_message.y2 = y2 + + #Center of the minimum bounding rectangle + output_message.center_x = center_x + output_message.center_y = center_y + + self.publisher_.publish(output_message) + self.get_logger().info(f'Publishing: {output_message}') + + + def vision_callback(self, input_message: TrackArray): + self.get_logger().info(f'Received: {input_message}') + self._publish_center(input_message) #Call the function that will then create the TrackCenter message and publish it + + +def main(): + rclpy.init(args=None) + print("Running the clustering node!") + clustering_node = ClusteringNode() + rclpy.spin(clustering_node) + + clustering_node.destroy_node() + rclpy.shutdown() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/kestrel_clustering/package.xml b/src/kestrel_clustering/package.xml index c485d9d..7dc8d66 100644 --- a/src/kestrel_clustering/package.xml +++ b/src/kestrel_clustering/package.xml @@ -1,22 +1,22 @@ - - - kestrel_clustering - 0.0.1 - ROS 2 node for clustering PEV group centroids - David Orjuela - MIT - - ament_python - - rclpy - kestrel_msgs - - ament_copyright - ament_flake8 - ament_pep257 - python3-pytest - - - ament_python - - + + + kestrel_clustering + 0.0.1 + ROS 2 node for clustering PEV group centroids + David Orjuela + MIT + + ament_python + + rclpy + kestrel_msgs + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/src/kestrel_clustering/setup.cfg b/src/kestrel_clustering/setup.cfg index 4a35092..8654622 100644 --- a/src/kestrel_clustering/setup.cfg +++ b/src/kestrel_clustering/setup.cfg @@ -1,4 +1,4 @@ -[develop] -script_dir=$base/lib/kestrel_clustering -[install] -install_scripts=$base/lib/kestrel_clustering +[develop] +script_dir=$base/lib/kestrel_clustering +[install] +install_scripts=$base/lib/kestrel_clustering diff --git a/src/kestrel_clustering/setup.py b/src/kestrel_clustering/setup.py index 1887f71..7051a08 100644 --- a/src/kestrel_clustering/setup.py +++ b/src/kestrel_clustering/setup.py @@ -1,25 +1,26 @@ -from setuptools import find_packages, setup - -package_name = 'kestrel_clustering' - -setup( - name=package_name, - version='0.0.0', - packages=find_packages(exclude=['test']), - data_files=[ - ('share/ament_index/resource_index/packages', - ['resource/' + package_name]), - ('share/' + package_name, ['package.xml']), - ], - install_requires=['setuptools'], - zip_safe=True, - maintainer='davidorjuela', - maintainer_email='davidorjuela@todo.todo', - description='TODO: Package description', - license='TODO: License declaration', - tests_require=['pytest'], - entry_points={ - 'console_scripts': [ - ], - }, -) +from setuptools import find_packages, setup + +package_name = 'kestrel_clustering' + +setup( + name=package_name, + version='0.0.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='davidorjuela', + maintainer_email='davidorjuela@todo.todo', + description='TODO: Package description', + license='TODO: License declaration', + tests_require=['pytest'], + entry_points={ + 'console_scripts': [ + 'clustering_node = kestrel_clustering.clustering_node:main', + ], + }, +) diff --git a/src/kestrel_clustering/test/test_copyright.py b/src/kestrel_clustering/test/test_copyright.py index 97a3919..282981a 100644 --- a/src/kestrel_clustering/test/test_copyright.py +++ b/src/kestrel_clustering/test/test_copyright.py @@ -1,25 +1,25 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_copyright.main import main -import pytest - - -# Remove the `skip` decorator once the source file(s) have a copyright header -@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') -@pytest.mark.copyright -@pytest.mark.linter -def test_copyright(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found errors' +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/src/kestrel_clustering/test/test_flake8.py b/src/kestrel_clustering/test/test_flake8.py index 27ee107..625324e 100644 --- a/src/kestrel_clustering/test/test_flake8.py +++ b/src/kestrel_clustering/test/test_flake8.py @@ -1,25 +1,25 @@ -# Copyright 2017 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_flake8.main import main_with_errors -import pytest - - -@pytest.mark.flake8 -@pytest.mark.linter -def test_flake8(): - rc, errors = main_with_errors(argv=[]) - assert rc == 0, \ - 'Found %d code style errors / warnings:\n' % len(errors) + \ - '\n'.join(errors) +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/src/kestrel_clustering/test/test_pep257.py b/src/kestrel_clustering/test/test_pep257.py index b234a38..4569717 100644 --- a/src/kestrel_clustering/test/test_pep257.py +++ b/src/kestrel_clustering/test/test_pep257.py @@ -1,23 +1,23 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_pep257.main import main -import pytest - - -@pytest.mark.linter -@pytest.mark.pep257 -def test_pep257(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found code style errors / warnings' +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/src/kestrel_input/kestrel_input/PartB_00000.jpg b/src/kestrel_input/kestrel_input/PartB_00000.jpg new file mode 100644 index 0000000..73dd415 Binary files /dev/null and b/src/kestrel_input/kestrel_input/PartB_00000.jpg differ diff --git a/src/kestrel_msgs/msg/ObjectTrack.msg b/src/kestrel_input/kestrel_input/__init__.py similarity index 100% rename from src/kestrel_msgs/msg/ObjectTrack.msg rename to src/kestrel_input/kestrel_input/__init__.py diff --git a/src/kestrel_input/kestrel_input/testing_GUI.py b/src/kestrel_input/kestrel_input/testing_GUI.py new file mode 100644 index 0000000..6adcea8 --- /dev/null +++ b/src/kestrel_input/kestrel_input/testing_GUI.py @@ -0,0 +1,68 @@ +import dearpygui.dearpygui as dpg +import dearpygui.demo as demo +import cv2 +import numpy as np + +# For Testing +image_path = "PartB_00000.jpg" +image_array = cv2.imread(image_path, 256) +image_array = cv2.resize(image_array, (100, 100)) # if needed +alpha = np.full((image_array.shape[0], image_array.shape[1], 1), 255, dtype=np.uint8) +rgba = np.concatenate([image_array, alpha], axis=2) # HxWx4, RGBA uint8 +tex = (rgba.astype(np.float32) / 255.0).ravel() # flat float32 in [0,1] + +dpg.create_context() +dpg.create_viewport(title='Kestrel Tracking Management', width=1080, height=1920) + +texture_data = image_array + +# Test add texture +# for i in range(0, 100 * 100): +# texture_data.append(255 / 255) +# texture_data.append(0) +# texture_data.append(255 / 255) +# texture_data.append(255 / 255) + +with dpg.texture_registry(): + texture_id = dpg.add_dynamic_texture(width=100, height=100, default_value=tex, tag="texture_tag") # Textures cannot be resized once created + +def _update_dynamic_textures(sender, app_data, user_data): + new_color = dpg.get_value(sender) + new_color[0] = new_color[0] / 255 + new_color[1] = new_color[1] / 255 + new_color[2] = new_color[2] / 255 + new_color[3] = new_color[3] / 255 + + new_texture_data = [] + for i in range(0, 100 * 100): + new_texture_data.append(new_color[0]) + new_texture_data.append(new_color[1]) + new_texture_data.append(new_color[2]) + new_texture_data.append(new_color[3]) + + dpg.set_value("texture_tag", new_texture_data) + +with dpg.window(tag="Primary Window", label="Whitelist Tracks"): + dpg.add_text("Select the bounding boxes you wish to track.") + dpg.set_primary_window("Primary Window", True) + with dpg.table(header_row=False): + dpg.add_table_column() + dpg.add_table_column() + dpg.add_table_column() + dpg.add_table_column() + dpg.add_table_column() + + for i in range(0, 7): + with dpg.table_row(): + for j in range(0, 6): + dpg.add_image("texture_tag") + + # dpg.add_color_picker((255, 0, 255, 255), label="Texture", + # no_side_preview=True, alpha_bar=True, width=200, + # callback=_update_dynamic_textures) + + +dpg.setup_dearpygui() +dpg.show_viewport() +dpg.start_dearpygui() +dpg.destroy_context() \ No newline at end of file diff --git a/src/kestrel_input/kestrel_input/whitelistTracks.py b/src/kestrel_input/kestrel_input/whitelistTracks.py new file mode 100644 index 0000000..3f2aff6 --- /dev/null +++ b/src/kestrel_input/kestrel_input/whitelistTracks.py @@ -0,0 +1,296 @@ +import rclpy +from rclpy.node import Node +import cv2 +import dearpygui.dearpygui as dpg +import numpy as np +from sensor_msgs.msg import Image +from kestrel_msgs.msg import TrackArray, Track as TrackMsg +from std_msgs.msg import Header +from builtin_interfaces.msg import Time +from collections import OrderedDict +from typing import Dict, List, Optional, Tuple, Set + +from cv_bridge import CvBridge + +def clip_bbox_to_image(bbox, w: int, h: int) -> Optional[Tuple[int, int, int, int]]: + """Clip [x1,y1,x2,y2] to image bounds; return None if invalid/empty.""" + x1, y1, x2, y2 = bbox + x1 = int(np.clip(x1, 0, max(0, w - 1))) + x2 = int(np.clip(x2, 0, max(0, w - 1))) + y1 = int(np.clip(y1, 0, max(0, h - 1))) + y2 = int(np.clip(y2, 0, max(0, h - 1))) + if x2 <= x1 or y2 <= y1: + return None + return x1, y1, x2, y2 + +def ns_to_time(timestamp_ns: int) -> Time: + t = Time() + t.sec = int(timestamp_ns // 1_000_000_000) + t.nanosec = int(timestamp_ns % 1_000_000_000) + return t + +class InputNode(Node): + """ + Subscribes to camera frames and TrackArray; time-matches them, + shows per-ID crops in a GUI, and publishes a whitelist of selected IDs. + """ + def __init__(self): + super().__init__('input_node') + + # --- Buffers / state --- + self.images: "OrderedDict[int, np.ndarray]" = OrderedDict() + self.track_batches: "OrderedDict[int, List[TrackMsg]]" = OrderedDict() + + self.max_len: int = 30 + self.delta_ns: int = int(30e6) # 30 ms tolerance + + self.current_ts: Optional[int] = None + self.current_tracks_by_id: Dict[int, TrackMsg] = {} + # display_items: id -> {"crop": np.ndarray, "conf": float, "class": str} + self.display_items: Dict[int, Dict[str, object]] = {} + + # Selected (whitelisted) IDs + self.selected_tracks: Set[int] = set() + + # GUI assets + self.bridge = CvBridge() + self.thumb_size = (128, 128) # W,H for thumbnails + self.tex_tags_by_id: Dict[int, str] = {} # id -> texture tag + self.image_item_by_id: Dict[int, str] = {} # id -> image item tag + self.gui_initialized = False + + # --- Initalize ros publishers/subscribers --- + + # Raw images from YOLO, with timestamp + self.image_subscriber = self.create_subscription(Image, '/camera/image_raw', self.image_callback, 10) + # Array of Tracks from tracking_node -- each track is a bounding box with timestamp + self.tracking_subscriber = self.create_subscription(TrackArray, '/kestrel/tracks', self.tracking_callback, 10) + # Publish whitelisted array of tracks for the camera to track + self.pub_input = self.create_publisher(TrackArray, '/kestrel/whitelist_tracks', 10) + + # ----------------- Callbacks ----------------- + + # receives raw image from /camera/image_raw + def image_callback(self, msg: Image): + # convert ROS stamp (sec, nanosec) to combined timestamp in nano sec + ts = msg.header.stamp.sec * 1e9 + msg.header.nanosec + + # Convert to RGBA for dearpygui + frame = self.bridge.imgmsg_to_cv2(msg, "rgba8") + + # Buffer and evict + # mapping timestamp in nanosec as key, to the actual image as value + self.images[ts] = frame + while len(self.images) > self.max_len: + self.images.popitem(last=False) + + # Try to match with an existing track batch + self._try_match_after_new_image(ts) + + # receives TrackArray.msg from /kestrel/tracks + def tracking_callback(self, msg: TrackArray): + # All individual track timestamps should match TrackArray (msg).header timestamp + ts = msg.header.stamp.sec * 1e9 + msg.header.nanosec + + # Buffer and evict + self.track_batches[ts] = list(msg.tracks) + while len(self.track_batches) > self.max_len: + self.track_batches.popitem(last=False) + + # Try to match with an existing image + self._try_match_after_new_tracks(ts) + + # ----------------- Matching & View Build ----------------- + def _find_nearest_key(self, target_ts: int, keys: List[int], tol_ns: int) -> Optional[int]: + """Return key with smallest |key - target_ts| within tol_ns, else None.""" + if not keys: + return None + diffs = [(abs(k - target_ts), k) for k in keys] + diffs.sort() + best_diff, best_key = diffs[0] + return best_key if best_diff <= tol_ns else None + + def _try_match_after_new_image(self, image_ts: int): + # Find nearest track batch + nearest_tracks_ts = self._find_nearest_key(image_ts, list(self.track_batches.keys()), self.delta_ns) + if nearest_tracks_ts is None: + return + self._build_current_view(image_ts, nearest_tracks_ts) + + def _try_match_after_new_tracks(self, tracks_ts: int): + # Find nearest image + nearest_image_ts = self._find_nearest_key(tracks_ts, list(self.images.keys()), self.delta_ns) + if nearest_image_ts is None: + return + self._build_current_view(nearest_image_ts, tracks_ts) + + def _build_current_view(self, image_ts: int, tracks_ts: int): + """Set current view: compute crops for each track and refresh GUI.""" + frame = self.images.get(image_ts, None) + tracks = self.track_batches.get(tracks_ts, None) + if frame is None or tracks is None: + return + + h, w = frame.shape[:2] + self.current_ts = tracks_ts + self.current_tracks_by_id = {} + self.display_items = {} + + for t in tracks: + # Map ID -> message + self.current_tracks_by_id[int(t.id)] = t + + # Clip and crop + clipped = clip_bbox_to_image((t.x1, t.y1, t.x2, t.y2), w, h) + if clipped is None: + continue # skip invalid boxes + x1, y1, x2, y2 = clipped + crop = frame[y1:y2, x1:x2] + + # Resize to thumb for GUI + if crop.size == 0: + continue + crop_resized = cv2.resize(crop, self.thumb_size, interpolation=cv2.INTER_LINEAR) + + self.display_items[int(t.id)] = { + "crop": crop_resized, + "conf": float(t.conf), + "class": t.class_name + } + + # Update GUI from display_items + if self.gui_initialized: + self._refresh_gui() + + + # ----------------- GUI ----------------- + + def run_gui(self): + """Build GUI once and then rely on _refresh_gui() to update textures.""" + dpg.create_context() + dpg.create_viewport(title='Kestrel Tracking Management', width=900, height=700) + + with dpg.texture_registry(show=True): + dpg.add_texture_registry(tag="tex_registry") + + with dpg.window(tag="Primary Window", label="Whitelist Tracks", width=880, height=660): + dpg.set_primary_window("Primary Window", True) + dpg.add_text("Click a thumbnail to toggle selection; selected items are highlighted.") + dpg.add_separator() + # A grid container; we'll add image buttons per ID + dpg.add_child_window(tag="grid", width=-1, height=-1) + + dpg.setup_dearpygui() + dpg.show_viewport() + self.gui_initialized = True + + # Initial fill (in case we had a match before GUI launched) + self._refresh_gui() + + dpg.start_dearpygui() + dpg.destroy_context() + + # On GUI close, publish current whitelist once (optional) + self.publish_whitelist() + + def _refresh_gui(self): + """Create/update textures & image widgets per track ID from self.display_items.""" + if not dpg.does_item_exist("grid"): + return + + # Remove widgets for IDs no longer present + for old_id in list(self.image_item_by_id.keys()): + if old_id not in self.display_items: + # delete image widget and texture + if dpg.does_item_exist(self.image_item_by_id[old_id]): + dpg.delete_item(self.image_item_by_id[old_id]) + if dpg.does_item_exist(self.tex_tags_by_id[old_id]): + dpg.delete_item(self.tex_tags_by_id[old_id]) + self.image_item_by_id.pop(old_id, None) + self.tex_tags_by_id.pop(old_id, None) + + # Add/update current IDs + for tid, item in self.display_items.items(): + crop = item["crop"] # RGBA 128x128 + # TODO: Ensure RGBA; if source is BGRA, convert here + if crop.shape[2] == 3: + crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGBA) + + # Normalize to [0,1] and flatten for Dear PyGui texture + tex_data = (crop.astype(np.float32) / 255.0).reshape(-1) + + # Create texture if missing + if tid not in self.tex_tags_by_id or not dpg.does_item_exist(self.tex_tags_by_id[tid]): + tex_tag = f"tex_{tid}" + dpg.add_dynamic_texture( + width=self.thumb_size[0], height=self.thumb_size[1], + default_value=tex_data, tag=tex_tag, parent="tex_registry" + ) + self.tex_tags_by_id[tid] = tex_tag + else: + dpg.set_value(self.tex_tags_by_id[tid], tex_data) + + # Create/Update image button widget + selected = (tid in self.selected_tracks) + tint = (0.2, 1.0, 0.2, 1.0) if selected else (1.0, 1.0, 1.0, 1.0) + tooltip = f"ID {tid} | conf {item['conf']:.2f} | {item['class']}" + + if tid not in self.image_item_by_id or not dpg.does_item_exist(self.image_item_by_id[tid]): + btn_tag = f"img_{tid}" + with dpg.group(parent="grid"): + dpg.add_image_button( + texture_tag=self.tex_tags_by_id[tid], + tag=btn_tag, + width=self.thumb_size[0], + height=self.thumb_size[1], + tint_color=tint, + callback=self._on_tile_click, + user_data=tid + ) + dpg.add_text(tooltip) + self.image_item_by_id[tid] = btn_tag + else: + dpg.configure_item(self.image_item_by_id[tid], texture_tag=self.tex_tags_by_id[tid], tint_color=tint) + # The text below the button isn't stored; simplest is to ignore or recreate group if needed. + + def _on_tile_click(self, sender, app_data, user_data): + """Toggle selection for a given track ID and refresh the tile tint.""" + tid = int(user_data) + if tid in self.selected_tracks: + self.selected_tracks.remove(tid) + else: + self.selected_tracks.add(tid) + # Update tint immediately + if tid in self.image_item_by_id: + tint = (0.2, 1.0, 0.2, 1.0) if tid in self.selected_tracks else (1.0, 1.0, 1.0, 1.0) + dpg.configure_item(self.image_item_by_id[tid], tint_color=tint) + + # ----------------- Whitelist publisher ----------------- + + def publish_whitelist(self): + """Publish currently selected tracks as a TrackArray.""" + if not self.selected_tracks or not self.current_tracks_by_id or self.current_ts is None: + return + + whitelisted = [self.current_tracks_by_id[tid] for tid in self.selected_tracks if tid in self.current_tracks_by_id] + if not whitelisted: + return + + arr = TrackArray() + hdr = Header() + hdr.stamp = ns_to_time(self.current_ts) + # hdr.frame_id could be set if you carry it through your pipeline + arr.header = hdr + arr.tracks = whitelisted + self.pub_input.publish(arr) + + +def main(args=None): + rclpy.init(args=args) + node = InputNode() + rclpy.spin(node) + node.destroy_node() + rclpy.shutdown() + +if __name__ == '__main__': + main() diff --git a/src/kestrel_input/package.xml b/src/kestrel_input/package.xml new file mode 100644 index 0000000..354ed12 --- /dev/null +++ b/src/kestrel_input/package.xml @@ -0,0 +1,18 @@ + + + + kestrel_input + 0.0.0 + TODO: Package description + davidorjuela + TODO: License declaration + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/src/kestrel_input/requirements.txt b/src/kestrel_input/requirements.txt new file mode 100644 index 0000000..873c0fc --- /dev/null +++ b/src/kestrel_input/requirements.txt @@ -0,0 +1,3 @@ +opencv-python +dearpygui +cv_bridge diff --git a/src/kestrel_msgs/msg/TrackCentroid.msg b/src/kestrel_input/resource/kestrel_input similarity index 100% rename from src/kestrel_msgs/msg/TrackCentroid.msg rename to src/kestrel_input/resource/kestrel_input diff --git a/src/kestrel_input/setup.cfg b/src/kestrel_input/setup.cfg new file mode 100644 index 0000000..786b8a7 --- /dev/null +++ b/src/kestrel_input/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script_dir=$base/lib/kestrel_input +[install] +install_scripts=$base/lib/kestrel_input diff --git a/src/kestrel_input/setup.py b/src/kestrel_input/setup.py new file mode 100644 index 0000000..f9238e2 --- /dev/null +++ b/src/kestrel_input/setup.py @@ -0,0 +1,25 @@ +from setuptools import find_packages, setup + +package_name = 'kestrel_input' + +setup( + name=package_name, + version='0.0.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='davidorjuela', + maintainer_email='davidorjuela@todo.todo', + description='TODO: Package description', + license='TODO: License declaration', + entry_points={ + 'console_scripts': [ + 'whitelistTracks = kestrel_input.whitelistTracks:main', + ], + }, +) diff --git a/src/kestrel_input/test/test_copyright.py b/src/kestrel_input/test/test_copyright.py new file mode 100644 index 0000000..97a3919 --- /dev/null +++ b/src/kestrel_input/test/test_copyright.py @@ -0,0 +1,25 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/src/kestrel_input/test/test_flake8.py b/src/kestrel_input/test/test_flake8.py new file mode 100644 index 0000000..27ee107 --- /dev/null +++ b/src/kestrel_input/test/test_flake8.py @@ -0,0 +1,25 @@ +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/src/kestrel_input/test/test_pep257.py b/src/kestrel_input/test/test_pep257.py new file mode 100644 index 0000000..b234a38 --- /dev/null +++ b/src/kestrel_input/test/test_pep257.py @@ -0,0 +1,23 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/src/kestrel_msgs/CMakeLists.txt b/src/kestrel_msgs/CMakeLists.txt index c65d346..06b1623 100644 --- a/src/kestrel_msgs/CMakeLists.txt +++ b/src/kestrel_msgs/CMakeLists.txt @@ -1,38 +1,37 @@ -message(STATUS "PACKAGE_XML_PATH=${CMAKE_CURRENT_SOURCE_DIR}/package.xml") -file(READ "${CMAKE_CURRENT_SOURCE_DIR}/package.xml" PKGXML) -message(STATUS "XML_CONTENT=${PKGXML}") - -cmake_minimum_required(VERSION 3.10) -project(kestrel_msgs) - -find_package(ament_cmake REQUIRED) -find_package(rosidl_default_generators REQUIRED) -find_package(std_msgs REQUIRED) -find_package(geometry_msgs REQUIRED) -find_package(diagnostic_msgs REQUIRED) -#find_package(vision_msgs REQUIRED) - -rosidl_generate_interfaces(${PROJECT_NAME} - "msg/EmbeddedDetection2D.msg" - "msg/EmbeddedDetection2DArray.msg" - "msg/CameraCommand.msg" - "msg/FlightStatus.msg" - "msg/ObjectTrack.msg" - "msg/ObstacleGrid.msg" - "msg/ProximityAlert.msg" - "msg/SensorArray.msg" - "msg/SensorReading.msg" - "msg/SystemHealth.msg" - "msg/TrackCentroid.msg" - "srv/CalibrateCamera.srv" - "srv/GetSystemStatus.srv" - "srv/ResetSensors.srv" - "srv/SetFlightMode.srv" - "srv/TriggerEmergencyStop.srv" - "action/FollowTarget.action" - "action/NavigateToWaypoint.action" - DEPENDENCIES std_msgs geometry_msgs diagnostic_msgs#vision_msgs -) - -ament_export_dependencies(rosidl_default_runtime diagnostic_msgs) -ament_package() +message(STATUS "PACKAGE_XML_PATH=${CMAKE_CURRENT_SOURCE_DIR}/package.xml") +file(READ "${CMAKE_CURRENT_SOURCE_DIR}/package.xml" PKGXML) +message(STATUS "XML_CONTENT=${PKGXML}") + +cmake_minimum_required(VERSION 3.10) +project(kestrel_msgs) + +find_package(ament_cmake REQUIRED) +find_package(rosidl_default_generators REQUIRED) +find_package(std_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) + +rosidl_generate_interfaces(${PROJECT_NAME} + "msg/Detection.msg" + "msg/DetectionArray.msg" + "msg/Track.msg" + "msg/TrackArray.msg" + "msg/CameraCommand.msg" + "msg/FlightStatus.msg" + "msg/ObstacleGrid.msg" + "msg/ProximityAlert.msg" + "msg/SensorArray.msg" + "msg/SensorReading.msg" + "msg/SystemHealth.msg" + "msg/TrackCenter.msg" + "srv/CalibrateCamera.srv" + "srv/GetSystemStatus.srv" + "srv/ResetSensors.srv" + "srv/SetFlightMode.srv" + "srv/TriggerEmergencyStop.srv" + "action/FollowTarget.action" + "action/NavigateToWaypoint.action" + DEPENDENCIES std_msgs geometry_msgs +) + +ament_export_dependencies(rosidl_default_runtime) +ament_package() diff --git a/src/kestrel_msgs/action/FollowTarget.action b/src/kestrel_msgs/action/FollowTarget.action index ea915eb..b48d6b9 100644 --- a/src/kestrel_msgs/action/FollowTarget.action +++ b/src/kestrel_msgs/action/FollowTarget.action @@ -1,11 +1,11 @@ -# Minimal Placeholder for build -# Goal -int32 dummy_goal ---- - -# Result -bool dummy_result ---- - -# Feedback -float32 dummy_feedback +# Minimal Placeholder for build +# Goal +int32 dummy_goal +--- + +# Result +bool dummy_result +--- + +# Feedback +float32 dummy_feedback diff --git a/src/kestrel_msgs/action/NavigateToWaypoint.action b/src/kestrel_msgs/action/NavigateToWaypoint.action index ea915eb..b48d6b9 100644 --- a/src/kestrel_msgs/action/NavigateToWaypoint.action +++ b/src/kestrel_msgs/action/NavigateToWaypoint.action @@ -1,11 +1,11 @@ -# Minimal Placeholder for build -# Goal -int32 dummy_goal ---- - -# Result -bool dummy_result ---- - -# Feedback -float32 dummy_feedback +# Minimal Placeholder for build +# Goal +int32 dummy_goal +--- + +# Result +bool dummy_result +--- + +# Feedback +float32 dummy_feedback diff --git a/src/kestrel_msgs/msg/CameraCommand.msg b/src/kestrel_msgs/msg/CameraCommand.msg index 6db9c42..e69de29 100644 --- a/src/kestrel_msgs/msg/CameraCommand.msg +++ b/src/kestrel_msgs/msg/CameraCommand.msg @@ -1,8 +0,0 @@ -# instructions to move the camera up ig - -# the values are typically in radians -# Positive pan: right, Positive tilt: up - -float64 pan # Pan angle -float64 tilt # Tilt angle - diff --git a/src/kestrel_msgs/msg/EmbeddedDetection2D.msg b/src/kestrel_msgs/msg/Detection.msg similarity index 92% rename from src/kestrel_msgs/msg/EmbeddedDetection2D.msg rename to src/kestrel_msgs/msg/Detection.msg index 8b7c410..fe0be05 100644 --- a/src/kestrel_msgs/msg/EmbeddedDetection2D.msg +++ b/src/kestrel_msgs/msg/Detection.msg @@ -1,16 +1,16 @@ -std_msgs/Header header -float32 x1 -float32 y1 -float32 x2 -float32 y2 - -float32 w -float32 h - -float32 center_x -float32 center_y - -float32 conf -string class_name - +std_msgs/Header header +float32 x1 +float32 y1 +float32 x2 +float32 y2 + +float32 w +float32 h + +float32 center_x +float32 center_y + +float32 conf +string class_name + float32[128] embedding \ No newline at end of file diff --git a/src/kestrel_msgs/msg/DetectionArray.msg b/src/kestrel_msgs/msg/DetectionArray.msg new file mode 100644 index 0000000..6767302 --- /dev/null +++ b/src/kestrel_msgs/msg/DetectionArray.msg @@ -0,0 +1,2 @@ +std_msgs/Header header +kestrel_msgs/Detection[] detections \ No newline at end of file diff --git a/src/kestrel_msgs/msg/EmbeddedDetection2DArray.msg b/src/kestrel_msgs/msg/EmbeddedDetection2DArray.msg deleted file mode 100644 index 663e236..0000000 --- a/src/kestrel_msgs/msg/EmbeddedDetection2DArray.msg +++ /dev/null @@ -1,2 +0,0 @@ -std_msgs/Header header -kestrel_msgs/EmbeddedDetection2D[] detections \ No newline at end of file diff --git a/src/kestrel_msgs/msg/FlightStatus.msg b/src/kestrel_msgs/msg/FlightStatus.msg index 887f326..e69de29 100644 --- a/src/kestrel_msgs/msg/FlightStatus.msg +++ b/src/kestrel_msgs/msg/FlightStatus.msg @@ -1,26 +0,0 @@ -# Current state of the drone (flying, landed, etc.) - -# Header for timestamp - -std_msgs/Header header - -#Flight mode constraints for clarity - -uint8 MODE_UNKNOWN = 0 -uint8 MODE_MANUAL = 1 -uint8 MODE_AUTO = 2 -uint8 MODE_LANDING = 3 -uint8 MODE_EMERGENCY = 4 -uint8 MODE_LANDED = 5 - -# Current flight mode -uint8 flight_mode - -# Armed state of the vehicle -bool armed - -# Current battery percentage -float32 battery_percent - -# Propeller speeds in RPM. The array order SHOULD be consistent. -uint8[] propeller_speeds \ No newline at end of file diff --git a/src/kestrel_msgs/msg/ObstacleGrid.msg b/src/kestrel_msgs/msg/ObstacleGrid.msg index 5b9ae7b..e69de29 100644 --- a/src/kestrel_msgs/msg/ObstacleGrid.msg +++ b/src/kestrel_msgs/msg/ObstacleGrid.msg @@ -1,18 +0,0 @@ -# Represents a 3D map showing where obstacles are in space - -# Header for timestamp and frame ID -std_msgs/Header header - -# Grid metadata - -float32 resolution # Resolution of the grid in meters/cell -uint32 width # self explanitory -uint32 height # self explanitory -uint32 depth #depth of the grid - -# Origin of the grid [m] in the frame given in the Header -geometry_msgs/Pose origin - -# The grid data is stored in a 1D array in a row-major order lol -# 0 means free, 100 means occupied, -1 means unknown -int8[] data \ No newline at end of file diff --git a/src/kestrel_msgs/msg/ProximityAlert.msg b/src/kestrel_msgs/msg/ProximityAlert.msg index 2c59ed3..e69de29 100644 --- a/src/kestrel_msgs/msg/ProximityAlert.msg +++ b/src/kestrel_msgs/msg/ProximityAlert.msg @@ -1,17 +0,0 @@ -# Warns when something gets too close - -# Header for timestamp and frame ID - -std_msgs/Header header - -# Distance to the closest detected object - -float32 distance - -# Severity level of the alert - -uint8 SEVERITY_LOW = 1 # CAUTION -uint8 SEVERITY_MEDIUM = 2 # WARNING -uint8 SEVERITY_HIGH = 3 # CRITICAL - -uint8 severity \ No newline at end of file diff --git a/src/kestrel_msgs/msg/SensorArray.msg b/src/kestrel_msgs/msg/SensorArray.msg index 2b0db85..e69de29 100644 --- a/src/kestrel_msgs/msg/SensorArray.msg +++ b/src/kestrel_msgs/msg/SensorArray.msg @@ -1,7 +0,0 @@ -# Collection of data from multiple sensors - -# Header for timestamp -std_msgs/Header header - -# This is an array of individual sensor readings -kestrel_msgs/SensorReading[] readings \ No newline at end of file diff --git a/src/kestrel_msgs/msg/SensorReading.msg b/src/kestrel_msgs/msg/SensorReading.msg index 0134cc5..e69de29 100644 --- a/src/kestrel_msgs/msg/SensorReading.msg +++ b/src/kestrel_msgs/msg/SensorReading.msg @@ -1,14 +0,0 @@ -# Data from one sensor (Like distance or temperature) - -# Header for timestamp and frame_id of the sensor - -std_msgs/Header header - -# Type of the sensor (Like ToF and crap) -string sensor_type - -# The actual sensor values -float64 value - -# Variance of the reading, if available -float64 variance \ No newline at end of file diff --git a/src/kestrel_msgs/msg/SystemHealth.msg b/src/kestrel_msgs/msg/SystemHealth.msg index eca2a1a..e69de29 100644 --- a/src/kestrel_msgs/msg/SystemHealth.msg +++ b/src/kestrel_msgs/msg/SystemHealth.msg @@ -1,22 +0,0 @@ -# Report on how well all systems are functioning - -# Header for timestamp -std_msgs/Header header - -# Overall system status lole - -uint8 STATUS_OK = 0 -uint8 STATUS_WARN = 1 -uint8 STATUS_ERROR = 2 -uint8 STATUS_CRITICAL = 3 -uint8 status - -# CPU and Memory usage -float32 cpu_usage_percent -float32 memory_usage_percent - -# System temperature IN CELSIUS!! -float32 temperature_celsius - -# Ok so this is a list of key-value pairs for other component-specific status lol -diagnostic_msgs/KeyValue[] my_key_values \ No newline at end of file diff --git a/src/kestrel_msgs/msg/Track.msg b/src/kestrel_msgs/msg/Track.msg new file mode 100644 index 0000000..eb57306 --- /dev/null +++ b/src/kestrel_msgs/msg/Track.msg @@ -0,0 +1,16 @@ +std_msgs/Header header + +int16 id + +float32 x1 +float32 y1 +float32 x2 +float32 y2 + +float32 center_x +float32 center_y + +float32 conf +string class_name + +float32[128] embedding diff --git a/src/kestrel_msgs/msg/TrackArray.msg b/src/kestrel_msgs/msg/TrackArray.msg new file mode 100644 index 0000000..99da94b --- /dev/null +++ b/src/kestrel_msgs/msg/TrackArray.msg @@ -0,0 +1,2 @@ +std_msgs/Header header +kestrel_msgs/Track[] tracks \ No newline at end of file diff --git a/src/kestrel_msgs/msg/TrackCenter.msg b/src/kestrel_msgs/msg/TrackCenter.msg new file mode 100644 index 0000000..40536f4 --- /dev/null +++ b/src/kestrel_msgs/msg/TrackCenter.msg @@ -0,0 +1,10 @@ +std_msgs/Header header + +float32 x1 +float32 y1 +float32 x2 +float32 y2 + +float32 center_x +float32 center_y + diff --git a/src/kestrel_msgs/package.xml b/src/kestrel_msgs/package.xml index 6c2bfa0..7f19585 100644 --- a/src/kestrel_msgs/package.xml +++ b/src/kestrel_msgs/package.xml @@ -1,27 +1,24 @@ - - - kestrel_msgs - 0.1.0 - Custom messages, services, and actions for the Kestrel Drone - Rafeed Khan - MIT - - ament_cmake - - rosidl_default_generators - rosidl_default_runtime - - std_msgs - std_msgs - geometry_msgs - geometry_msgs - diagnostic_msgs - - - rosidl_interface_packages - - - ament_cmake - + + + kestrel_msgs + 0.1.0 + Custom messages, services, and actions for the Kestrel Drone + David Orjuela + MIT + + ament_cmake + + rosidl_default_generators + rosidl_default_runtime + + std_msgs + std_msgs + geometry_msgs + geometry_msgs + + rosidl_interface_packages + + + ament_cmake + \ No newline at end of file diff --git a/src/kestrel_msgs/srv/CalibrateCamera.srv b/src/kestrel_msgs/srv/CalibrateCamera.srv index d139c88..0c4f109 100644 --- a/src/kestrel_msgs/srv/CalibrateCamera.srv +++ b/src/kestrel_msgs/srv/CalibrateCamera.srv @@ -1,13 +1,4 @@ -# Service to trigger a camera calibration routine - -# Request -# Calibration mode constants -uint8 CALIBRATE_INTRINSIC = 0 -uint8 CALIBRATE_EXTRINSIC = 1 -uint8 CALIBRATE_WHITE_BALANCE = 2 - -uint8 calibration_mode ---- -# Response -bool success -string message # Provides details on success or failure (like "Calibration complete" or "FAILED: Camera not found") +# Empty request +--- + +# Empty response diff --git a/src/kestrel_msgs/srv/GetSystemStatus.srv b/src/kestrel_msgs/srv/GetSystemStatus.srv index 990125e..0c4f109 100644 --- a/src/kestrel_msgs/srv/GetSystemStatus.srv +++ b/src/kestrel_msgs/srv/GetSystemStatus.srv @@ -1,7 +1,4 @@ -# Service to retrieve the latest system health status. - -# Request (should be empty) ---- -# Response -# Returns the complete system health message -kestrel_msgs/SystemHealth system_health \ No newline at end of file +# Empty request +--- + +# Empty response diff --git a/src/kestrel_msgs/srv/ResetSensors.srv b/src/kestrel_msgs/srv/ResetSensors.srv index 757fdc8..0c4f109 100644 --- a/src/kestrel_msgs/srv/ResetSensors.srv +++ b/src/kestrel_msgs/srv/ResetSensors.srv @@ -1,9 +1,4 @@ -# Service to restart and re-initialize one or more sensors. - -# Request -# A list of sensor_ids to reset. If empty, reset all sensors. -string[] sensor_ids ---- -# Response -bool success -string message # example.. like "Successfully reset 3 sensors" or "Error: sensor 'tof_front' failed to respond" \ No newline at end of file +# Empty request +--- + +# Empty response diff --git a/src/kestrel_msgs/srv/SetFlightMode.srv b/src/kestrel_msgs/srv/SetFlightMode.srv index a46d878..0c4f109 100644 --- a/src/kestrel_msgs/srv/SetFlightMode.srv +++ b/src/kestrel_msgs/srv/SetFlightMode.srv @@ -1,9 +1,4 @@ -# Service to switch the drones flight mode - -# Request -# ThIS uses the flight mode constants defined in kestrel_msgs/FlightStatus -uint8 flight_mode ---- -# Response -bool success -string message # ex... "Flight mode set to AUTO" or "Failed: Transition to AUTO not possible in current state" \ No newline at end of file +# Empty request +--- + +# Empty response diff --git a/src/kestrel_msgs/srv/TriggerEmergencyStop.srv b/src/kestrel_msgs/srv/TriggerEmergencyStop.srv index cb9b6ec..0c4f109 100644 --- a/src/kestrel_msgs/srv/TriggerEmergencyStop.srv +++ b/src/kestrel_msgs/srv/TriggerEmergencyStop.srv @@ -1,9 +1,4 @@ -# Service to immediately stop the drone in an emergency - -# Request -# A brief HUMAN READABLE reason for the emergency stop for logging purposes -string reason ---- -# Response -bool acknowledged # so this confirms that the emergency stop command has been received and is being executed -string message # ex... "Emergency stop acknowledged, Reason: Proximity breach" +# Empty request +--- + +# Empty response diff --git a/src/_archive_legacy/models/deepSORT/DataAssociation.py b/src/kestrel_tracking/kestrel_tracking/DataAssociation.py similarity index 51% rename from src/_archive_legacy/models/deepSORT/DataAssociation.py rename to src/kestrel_tracking/kestrel_tracking/DataAssociation.py index bb77623..093dde6 100644 --- a/src/_archive_legacy/models/deepSORT/DataAssociation.py +++ b/src/kestrel_tracking/kestrel_tracking/DataAssociation.py @@ -1,366 +1,397 @@ -#This skeleton code's structure comes from the research paper: https://www.mdpi.com/2076-3417/12/3/1319 - -#Contain the CNN that deepSORT uses in replace of the Hungarion based Cost Matrix -from filterpy.kalman import KalmanFilter -import numpy as np -from numpy.typing import NDArray - -class DataAssociation: - """ - For all parameters: - detections : List[List[float]] or List[np.ndarray] - A list of bounding boxes representing current detections, each in the format [x, y, w, h]. - tracks : List[List[float]] or List[np.ndarray] - A list of bounding boxes representing predicted tracks, each in the format [x, y, w, h]. - """ - - - #Euclidean Distance Based Cost Matrix (𝐷𝐸(𝐷,𝑃)) - def euclidean_cost( - self, - detections: NDArray[np.float_], - tracks: NDArray[np.float_], - image_dims: tuple[int, int] - ) -> NDArray[np.float_]: - """ - Computes the Euclidean distance cost matrix (𝐷𝐸(𝐷,𝑃)), which represents - the distance between bounding box central points normalized into half - of the image dimension. To formulate the problem as a maximization - problem, the distance is obtained by the difference between 1 and the - normalized Euclidean distance. - - d(Di, Pi) = 1 - [sqrt((u_Di - u_Pi)^2 + (v_Di - v_Pi)^2) / (1/2) * sqrt(h^2 + w^2)] - - where (h, w) are the height and width of the input image. - """ - #Retrieve lengths - detections = np.array(detections) - tracks = np.array(tracks) - - #Store bounding boxes centers for computation - detections_pos = detections[:, 0:2] + detections[:, 2:4] / 2.0 # (D, 2) - Center of each detection - tracks_pos = tracks[:, 0:2] + tracks[:, 2:4] / 2.0 # (T, 2) - Center of each track - - #Calculate norm based off image size - norm = 0.5 * np.sqrt(image_dims[0]**2 + image_dims[1]**2) - - #Subtract so u_Di - u_Pi & v_Di - v_Pi - delta = detections_pos[:, None, :] - tracks_pos[None, :, :] - - #Perform linear norm of sum of deltas - dist_matrix = np.linalg.norm(delta, axis=2) - - #Compute cost matrix - euclidean_cost_matrix = np.clip((dist_matrix / norm), 0.0, 1.0) # Ensures all values are bounded between 0 and 1. - - return euclidean_cost_matrix - - #Bounding Box Ratio Based Cost Matrix (𝑅(𝐷,𝑃)) - def bbox_ratio_cost( - self, - detections: NDArray[np.float_], - tracks: NDArray[np.float_] - ) -> NDArray[np.float_]: - - """ - Computes the bounding box ratio-based cost matrix (𝑅(𝐷,𝑃)), which is - implemented as a ratio between the product of each width and height. - - r(Di, Pi) = min( (w_Di * h_Di) / (w_Pi * h_Pi), (w_Pi * h_Pi) / (w_Di * h_Di) ) - - Returns a cost matrix where lower values indicate better box shape alignment. - - Box shape similarity ranges from 0 (different) to 1 (identical), and is converted to cost as: - cost_r = 1.0 - similarity_r. - - Note: This implementation matches the paper definition, which compares area ratios only. - This may return cost = 0 even for mismatched shapes (e.g., 10x1 vs 1x10), since area = same. - - """ - if len(detections) == 0 or len(tracks) == 0: - return np.array([]) - - detections = np.array(detections) # (D, 4) - tracks = np.array(tracks) # (T, 4) - - # Gets every width and height from each row into 2 1D arrays and calculates area - detection_areas = detections[:, 2] * detections[:, 3] - track_areas = tracks[:, 2] * tracks[:, 3] # (T,) = (T,) * (T,) - - # Transform the 1D arrays to broadcast into (D, T) - detection_areas = detection_areas[:, None]# (D, 1): [[1], [2], [3]] - track_areas = track_areas[None, :] # (1, T): [[1, 2, 3, 4]] - - # Prevent divide-by-zero - eps = 1e-6 - detection_areas = np.maximum(detection_areas, eps) - track_areas = np.maximum(track_areas, eps) - - if np.any(detection_areas == eps) or np.any(track_areas == eps): - print("[Warning] Zero-area bounding box encountered. Auto-corrected to epsilon.") - - # Calculates ratio and broadcasts to (D, T) - ratio1 = detection_areas / track_areas - ratio2 = track_areas / detection_areas - - # Calculates cost at every [i, j] - bbox_cost_matrix = 1.0 - np.minimum(ratio1, ratio2) - return bbox_cost_matrix - - - #SORT’s IoU Cost Matrix - def iou_cost( - self, - detections: NDArray[np.float_], - tracks: NDArray[np.float_] - ) -> NDArray[np.float_]: - - detections = np.array(detections) - tracks = np.array(tracks) - - det_x1 = detections[:, 0:1] - det_y1 = detections[:, 1:2] - det_x2 = det_x1 + detections[:, 2:3] - det_y2 = det_y1 + detections[:, 3:4] - - trk_x1 = tracks[:, 0].reshape(1,-1) - trk_y1 = tracks[:, 1].reshape(1,-1) - trk_x2 = trk_x1 + tracks[:, 2].reshape(1,-1) - trk_y2 = trk_y1 + tracks[:, 3].reshape(1,-1) - - detectionWidth = detections[:,2:3] - detectionHeight = detections[:,3:4] - - areaDetection = detectionWidth * detectionHeight - areaTrack = tracks[:, 2].reshape(1, -1) * tracks[:, 3].reshape(1, -1) - - inter_x1 = np.maximum(det_x1, trk_x1) - inter_y1 = np.maximum(det_y1, trk_y1) - inter_x2 = np.minimum(det_x2, trk_x2) - inter_y2 = np.minimum(det_y2, trk_y2) - - inter_w = np.maximum(0.0, inter_x2 - inter_x1) - inter_h = np.maximum(0.0, inter_y2 - inter_y1) - - # AoI = Area of Intersection, AoU = Area of Union - AoI = inter_w * inter_h - AoU = areaDetection + areaTrack - AoI - - - # Explicitly Guard Against Divide-by-Zero - with np.errstate(divide='ignore', invalid='ignore'): - iou_raw = np.divide(AoI, AoU, out=np.zeros_like(AoI), where=AoU > 0) - if np.any(AoU == 0): - print("[Warning] Divide by zero encountered in IoU computation.") # Consider removal when production ready! - - # Convert NaN to 0 before computing cost - iou_raw = np.nan_to_num(iou_raw, nan=0.0, posinf=0.0, neginf=0.0) - - iou_matrix = np.clip(iou_raw, 0.0, 1.0) - - return 1.0 - iou_matrix - - #SORT’s IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix (𝐸𝐼𝑜𝑈𝐷(𝐷,𝑃)) - def iou_euclidean_cost(self, detections, tracks, image_dims): - """ - Computes the IoU cost matrix combined with the Euclidean distance cost - matrix using the Hadamard (element-wise) product: - - EIoUD(D, P) = IoU(D, P) ∘ DE(D, P) - - where ∘ represents element-wise multiplication. - """ - #Call iou cost matrix - iou_matrix = np.array(self.iou_cost(detections, tracks)) - - #Call euclidean cost matrix - euclidean_matrix = np.array(self.euclidean_cost(detections, tracks, image_dims)) - - #Perform Hadamard product - iou_euclidean_cost_matrix = iou_matrix * euclidean_matrix - - return iou_euclidean_cost_matrix - - - #SORT’s IoU Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐼𝑜𝑈(𝐷,𝑃)) - def iou_bbox_ratio_cost(self, detections, tracks): - """ - Computes the IoU cost matrix combined with the bounding box ratio-based - cost matrix using the Hadamard (element-wise) product: - - RIoU(D, P) = IoU(D, P) ∘ R(D, P) - - where ∘ represents element-wise multiplication. - """ - num_detections, num_tracks = len(detections), len(tracks) - - if num_detections == 0 or num_tracks == 0: - return np.array([]) - - cost_iou = np.asarray(self.iou_cost(detections, tracks)) # IoU cost matrix - cost_bbr = np.asarray(self.bbox_ratio_cost(detections, tracks)) # Bounding box ratio cost matrix - - if np.shape(cost_iou) != np.shape(cost_bbr): - raise ValueError("IoU cost matrix and bbox ratio cost matrix are of different shapes") - - return cost_iou * cost_bbr # Element-wise multiplication - - - #Euclidean Distance Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐷𝐸(𝐷,𝑃)) - def euclidean_bbox_ratio_cost(self, detections, tracks, image_dims): - """ - Computes the Euclidean distance cost matrix combined with the bounding box - ratio-based cost matrix using the Hadamard (element-wise) product: - RDE(D, P) = DE(D, P) ∘ R(D, P) - where ∘ represents element-wise multiplication. - """ - num_detections, num_tracks = len(detections), len(tracks) - - if num_detections == 0 or num_tracks == 0: - return np.array([]) - - cost_de = np.asarray(self.euclidean_cost(detections, tracks, image_dims)) - cost_r = np.asarray(self.bbox_ratio_cost(detections, tracks)) - - if np.shape(cost_de) != np.shape(cost_r): - raise ValueError("Euclidean cost matrix and bbox ratio cost matrix are of different shapes") - - # performs element-wise multiplication - cost_rde = np.multiply(cost_de, cost_r) - - return cost_rde - - - #Step 7: SORT's IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix and the Bounding Box Ratio Based Cost Matrix (𝑀(𝐷,𝑃)) - def combined_cost_matrix(self, detections, tracks, image_dims): - """ - Computes the IoU cost matrix combined with the Euclidean distance cost - matrix and the bounding box ratio-based cost matrix using the Hadamard - (element-wise) product: - - M(D, P) = IoU(D, P) ∘ DE(D, P) ∘ R(D, P) - - where ∘ represents element-wise multiplication. - """ - - num_detections = len(detections) - num_tracks = len(tracks) - - if num_detections == 0 or num_tracks == 0: - return np.array([]) - - matrix_iou = self.iou_cost(detections, tracks) - matrix_de = self.euclidean_cost(detections, tracks, image_dims) - matrix_r = self.bbox_ratio_cost(detections, tracks) - - # Ensure all matrices have the same shape - if not (matrix_iou.shape == matrix_de.shape == matrix_r.shape == (num_detections, num_tracks)): - raise ValueError("Cost matrices must have the same shape.") - - # Compute the combined cost matrix using element-wise multiplication - # Each component is already a cost (lower = better), so multiplication is safe - combined_matrix = matrix_iou * matrix_de * matrix_r - - return combined_matrix - - - #Element-wise Average of Every Cost Matrix (𝐴(𝐷,𝑃)) - def average_cost_matrix(self, detections, tracks, image_dims): - """ - Computes the element-wise average of every cost matrix: - - A(Di, Pi) = (IoU(Di, Pj) + DE(Di, Pj) + R(Di, Pj)) / 3, for i ∈ D, j ∈ P - """ - num_detections = len(detections) - num_tracks = len(tracks) - - if num_detections == 0 or num_tracks == 0: - return np.array([]) #Return an empty array if there are no tracks or detections - - cost_iou = self.iou_cost(detections, tracks) - cost_de = self.euclidean_cost(detections, tracks, image_dims) - cost_r = self.bbox_ratio_cost(detections, tracks) - - # Ensure all shapes match - if not (cost_iou.shape == cost_de.shape == cost_r.shape): - raise ValueError("Cost matrices are not aligned in shape.") - - #Final cost matrix - return (cost_iou + cost_de + cost_r) / 3 - - - #Element-wise Weighted Mean of Every Cost Matrix Value (𝑊𝑀(𝐷,𝑃)) - def weighted_mean_cost_matrix(self, detections, tracks, image_dims, lambda_iou=0.33, lambda_de=0.33, lambda_r=0.34): - """ - Computes the element-wise weighted mean of every cost matrix value: - - WM(Di, Pi) = (λ_IoU * IoU(Di, Pi) + λ_DE * DE(Di, Pi) + λ_R * R(Di, Pi)) - - where λ_IoU + λ_DE + λ_R = 1. - """ - ''' - It calculates a combined cost based on the Intersection over Union (IoU), - Euclidean distance, and bounding box ratio metrics, using specified weights. - ''' - num_detections = len(detections) - num_tracks = len(tracks) - - if num_detections == 0 or num_tracks == 0: - return np.array([]) #Return an empty array if there are no tracks or detections - - cost_matrix = np.zeros((num_detections, num_tracks)) - - #Ensure the weights sum to 1.0 - sum_lambdas = lambda_iou + lambda_de + lambda_r - if not np.isclose(sum_lambdas, 1.0): - print("Warning: Lambda weights do not sum to 1.0. I will normalize them.") - lambda_iou /= sum_lambdas - lambda_de /= sum_lambdas - lambda_r /= sum_lambdas - - #Compute the cost matrices using other cost functions. All other functions SHOULD return cost matrices, if not change to: 1.0 - output_matrix - cost_iou = self.iou_cost(detections, tracks) - cost_euclidean = self.euclidean_cost(detections, tracks, image_dims) - cost_ratio = self.bbox_ratio_cost(detections, tracks) - - #Vectorized weight sum. NumPy arrays are implemented in C under the hood. - #So with these arrays, math operations are executed in compiled C code, not interpreted Python - #Rather than iterating through with nested loops, we can perform vector/matrix multiplication on the arrays as a whole - cost_matrix = ( - lambda_iou * cost_iou + - lambda_de * cost_euclidean + - lambda_r * cost_ratio - ) - - return cost_matrix - - - #Class Gate Update Based on Object Class Match (𝐶∗(𝐷,𝑃)) - def class_gate_cost_matrix(self, cost_matrix, detection_classes, track_classes): - """ - Updates the cost matrix based on the match between predicted and detected - object class. If the class labels do not match, the cost is set to 0: - - C*(Ci, j, Di, Pi) = { Ci, j if Class_Di = Class_Pi, 0 otherwise } - - for i ∈ D, j ∈ P. - """ - ''' - This function updates cost matrices based on the match between - predicted and detected object classes''' - - num_detections = cost_matrix.shape[0] - num_tracks = cost_matrix.shape[1] - - if num_detections != len(detection_classes) or num_tracks != len(track_classes): - raise ValueError("Dimensions of cost_matrix, detection_classes, and track_classes do not match - Class Gate Cost Matrix") - - #Create a boolean mask where classses match - # Reshapes to (num_detections, 1) Reshapes to (1, num_tracks) -> (D, T) - match_mask = (np.array(detection_classes)[:, None] == np.array(track_classes)[None, :]) #Shape = [num_detections, num_tracks] - #Because detection_classes has the same number of rows as track_classes has columns, we can perform matrix multiplication - - #Apply the mask and keep the values where classes match, zero where they do not - gated_cost_matrix = cost_matrix * match_mask - - return gated_cost_matrix \ No newline at end of file +#This skeleton code's structure comes from the research paper: https://www.mdpi.com/2076-3417/12/3/1319 + +import numpy as np +from numpy.typing import NDArray + +class DataAssociation: + """ + All bbox arrays are expected in format: + [cx, cy, w, h] (center coordinates + width/height) + + All cost matrices returned are shape: + (T, D) = (num_tracks, num_detections) + + Convention: + - Lower cost is better + - Costs are clipped/bounded into [0, 1] for the included costs + """ + + + @staticmethod + def _cxcywh_to_xyxy(boxes: NDArray[np.float_]) -> tuple[NDArray[np.float_], NDArray[np.float_], NDArray[np.float_], NDArray[np.float_]]: + """ + Convert boxes (N,4) in cxcywh into x1,y1,x2,y2 (each (N,1) or (1,N) depending on reshape). + """ + boxes = np.asarray(boxes, dtype=np.float32) + cx = boxes[:, 0] + cy = boxes[:, 1] + w = boxes[:, 2] + h = boxes[:, 3] + + x1 = cx - w / 2.0 + y1 = cy - h / 2.0 + x2 = cx + w / 2.0 + y2 = cy + h / 2.0 + return x1, y1, x2, y2 + + #Euclidean Distance Based Cost Matrix (𝐷𝐸(𝐷,𝑃)) + def euclidean_cost( + self, + tracks: NDArray[np.float_], # (T,4) cxcywh + detections: NDArray[np.float_], # (D,4) cxcywh + image_dims: tuple[int, int], + ) -> NDArray[np.float_]: + """ + Euclidean distance between centers normalized by half the image diagonal. + Returns (T, D) in [0, 1] where 0 is best (same center), 1 is worst (far). + + d(Di, Pi) = 1 - [sqrt((u_Di - u_Pi)^2 + (v_Di - v_Pi)^2) / (1/2) * sqrt(h^2 + w^2)] + + where (h, w) are the height and width of the input image. + """ + tracks = np.asarray(tracks, dtype=np.float32) + detections = np.asarray(detections, dtype=np.float32) + + if tracks.size == 0 or detections.size == 0: + return np.zeros((tracks.shape[0], detections.shape[0]), dtype=np.float32) + + # Centers are already (cx, cy) + trk_pos = tracks[:, 0:2] # (T,2) + det_pos = detections[:, 0:2] # (D,2) + + # Normalization constant: half the image diagonal + H, W = image_dims + norm = 0.5 * np.sqrt(float(H) ** 2 + float(W) ** 2) + + # Broadcast subtract: + # trk_pos[:, None, :] -> (T,1,2) + # det_pos[None, :, :] -> (1,D,2) + delta = trk_pos[:, None, :] - det_pos[None, :, :] # (T,D,2) + dist = np.linalg.norm(delta, axis=2) # (T,D) + + cost = np.clip(dist / (norm + 1e-12), 0.0, 1.0).astype(np.float32) # (T,D) + return cost + + #Bounding Box Ratio Based Cost Matrix (𝑅(𝐷,𝑃)) + def bbox_ratio_cost( + self, + tracks: NDArray[np.float_], # (T,4) cxcywh + detections: NDArray[np.float_], # (D,4) cxcywh + ) -> NDArray[np.float_]: + + """ + Computes the bounding box ratio-based cost matrix (𝑅(𝐷,𝑃)), which is + implemented as a ratio between the product of each width and height. + + r(Di, Pi) = min( (w_Di * h_Di) / (w_Pi * h_Pi), (w_Pi * h_Pi) / (w_Di * h_Di) ) + + cost = 1 - min(area_det/area_trk, area_trk/area_det) + + Note: this is area-only, so shapes like 10x1 and 1x10 have same area. + """ + tracks = np.asarray(tracks, dtype=np.float32) + detections = np.asarray(detections, dtype=np.float32) + + T = tracks.shape[0] + D = detections.shape[0] + if T == 0 or D == 0: + return np.zeros((T, D), dtype=np.float32) + + trk_areas = tracks[:, 2] * tracks[:, 3] # (T,) + det_areas = detections[:, 2] * detections[:, 3] # (D,) + + eps = 1e-6 + trk_areas = np.maximum(trk_areas, eps) + det_areas = np.maximum(det_areas, eps) + + # Broadcast to (T,D) + # trk_areas[:,None] => (T,1) + # det_areas[None,:] => (1,D) + ratio1 = det_areas[None, :] / trk_areas[:, None] # (T,D) det/trk + ratio2 = trk_areas[:, None] / det_areas[None, :] # (T,D) trk/det + + cost = 1.0 - np.minimum(ratio1, ratio2) + cost = np.clip(cost, 0.0, 1.0).astype(np.float32) + return cost + + + # SORT's IoU Cost + def iou_cost( + self, + tracks: NDArray[np.float_], # (T,4) cxcywh + detections: NDArray[np.float_], # (D,4) cxcywh + ) -> NDArray[np.float_]: + """ + IoU cost = 1 - IoU, returned as (T, D). + """ + tracks = np.asarray(tracks, dtype=np.float32) + detections = np.asarray(detections, dtype=np.float32) + + T = tracks.shape[0] + D = detections.shape[0] + if T == 0 or D == 0: + return np.zeros((T, D), dtype=np.float32) + + # Convert both to xyxy + trk_x1, trk_y1, trk_x2, trk_y2 = self._cxcywh_to_xyxy(tracks) # each (T,) + det_x1, det_y1, det_x2, det_y2 = self._cxcywh_to_xyxy(detections) # each (D,) + + # Reshape for broadcasting: + # Tracks along rows (T,1), detections along cols (1,D) + trk_x1 = trk_x1[:, None] + trk_y1 = trk_y1[:, None] + trk_x2 = trk_x2[:, None] + trk_y2 = trk_y2[:, None] + + det_x1 = det_x1[None, :] + det_y1 = det_y1[None, :] + det_x2 = det_x2[None, :] + det_y2 = det_y2[None, :] + + inter_x1 = np.maximum(trk_x1, det_x1) + inter_y1 = np.maximum(trk_y1, det_y1) + inter_x2 = np.minimum(trk_x2, det_x2) + inter_y2 = np.minimum(trk_y2, det_y2) + + inter_w = np.maximum(0.0, inter_x2 - inter_x1) + inter_h = np.maximum(0.0, inter_y2 - inter_y1) + inter_area = inter_w * inter_h # (T,D) + + trk_area = np.maximum(0.0, (trk_x2 - trk_x1)) * np.maximum(0.0, (trk_y2 - trk_y1)) # (T,1) + det_area = np.maximum(0.0, (det_x2 - det_x1)) * np.maximum(0.0, (det_y2 - det_y1)) # (1,D) + + union = trk_area + det_area - inter_area # (T,D) + + with np.errstate(divide="ignore", invalid="ignore"): + iou = np.divide(inter_area, union, out=np.zeros_like(inter_area), where=union > 0) + + iou = np.nan_to_num(iou, nan=0.0, posinf=0.0, neginf=0.0) + iou = np.clip(iou, 0.0, 1.0).astype(np.float32) + + cost = (1.0 - iou).astype(np.float32) # (T,D) + return cost + + #SORT's IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix (𝐸𝐼𝑜𝑈𝐷(𝐷,𝑃)) + def iou_euclidean_cost(self, detections, tracks, image_dims): + """ + Computes the IoU cost matrix combined with the Euclidean distance cost + matrix using the Hadamard (element-wise) product: + + EIoUD(D, P) = IoU(D, P) ∘ DE(D, P) + + where ∘ represents element-wise multiplication. + """ + #Call iou cost matrix + iou_matrix = np.array(self.iou_cost(detections, tracks)) + + #Call euclidean cost matrix + euclidean_matrix = np.array(self.euclidean_cost(detections, tracks, image_dims)) + + #Perform Hadamard product + iou_euclidean_cost_matrix = iou_matrix * euclidean_matrix + + return iou_euclidean_cost_matrix + + + #SORT’s IoU Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐼𝑜𝑈(𝐷,𝑃)) + def iou_bbox_ratio_cost(self, detections, tracks): + """ + Computes the IoU cost matrix combined with the bounding box ratio-based + cost matrix using the Hadamard (element-wise) product: + + RIoU(D, P) = IoU(D, P) ∘ R(D, P) + + where ∘ represents element-wise multiplication. + """ + num_detections, num_tracks = len(detections), len(tracks) + + if num_detections == 0 or num_tracks == 0: + return np.array([]) + + cost_iou = np.asarray(self.iou_cost(detections, tracks)) # IoU cost matrix + cost_bbr = np.asarray(self.bbox_ratio_cost(detections, tracks)) # Bounding box ratio cost matrix + + if np.shape(cost_iou) != np.shape(cost_bbr): + raise ValueError("IoU cost matrix and bbox ratio cost matrix are of different shapes") + + return cost_iou * cost_bbr # Element-wise multiplication + + + #Euclidean Distance Cost Matrix Combined with the Bounding Box Ratio Based Cost Matrix (𝑅𝐷𝐸(𝐷,𝑃)) + def euclidean_bbox_ratio_cost(self, detections, tracks, image_dims): + """ + Computes the Euclidean distance cost matrix combined with the bounding box + ratio-based cost matrix using the Hadamard (element-wise) product: + RDE(D, P) = DE(D, P) ∘ R(D, P) + where ∘ represents element-wise multiplication. + """ + num_detections, num_tracks = len(detections), len(tracks) + + if num_detections == 0 or num_tracks == 0: + return np.array([]) + + cost_de = np.asarray(self.euclidean_cost(detections, tracks, image_dims)) + cost_r = np.asarray(self.bbox_ratio_cost(detections, tracks)) + + if np.shape(cost_de) != np.shape(cost_r): + raise ValueError("Euclidean cost matrix and bbox ratio cost matrix are of different shapes") + + # performs element-wise multiplication + cost_rde = np.multiply(cost_de, cost_r) + + return cost_rde + + + #Step 7: SORT's IoU Cost Matrix Combined with the Euclidean Distance Cost Matrix and the Bounding Box Ratio Based Cost Matrix (𝑀(𝐷,𝑃)) + def combined_cost_matrix(self, detections, tracks, image_dims): + """ + Computes the IoU cost matrix combined with the Euclidean distance cost + matrix and the bounding box ratio-based cost matrix using the Hadamard + (element-wise) product: + + M(D, P) = IoU(D, P) ∘ DE(D, P) ∘ R(D, P) + + where ∘ represents element-wise multiplication. + """ + + num_detections = len(detections) + num_tracks = len(tracks) + + if num_detections == 0 or num_tracks == 0: + return np.array([]) + + matrix_iou = self.iou_cost(detections, tracks) + matrix_de = self.euclidean_cost(detections, tracks, image_dims) + matrix_r = self.bbox_ratio_cost(detections, tracks) + + # Ensure all matrices have the same shape + if not (matrix_iou.shape == matrix_de.shape == matrix_r.shape == (num_detections, num_tracks)): + raise ValueError("Cost matrices must have the same shape.") + + # Compute the combined cost matrix using element-wise multiplication + # Each component is already a cost (lower = better), so multiplication is safe + combined_matrix = matrix_iou * matrix_de * matrix_r + + return combined_matrix + + + #Element-wise Average of Every Cost Matrix (𝐴(𝐷,𝑃)) + def average_cost_matrix(self, detections, tracks, image_dims): + """ + Computes the element-wise average of every cost matrix: + + A(Di, Pi) = (IoU(Di, Pj) + DE(Di, Pj) + R(Di, Pj)) / 3, for i ∈ D, j ∈ P + """ + num_detections = len(detections) + num_tracks = len(tracks) + + if num_detections == 0 or num_tracks == 0: + return np.array([]) #Return an empty array if there are no tracks or detections + + cost_iou = self.iou_cost(detections, tracks) + cost_de = self.euclidean_cost(detections, tracks, image_dims) + cost_r = self.bbox_ratio_cost(detections, tracks) + + # Ensure all shapes match + if not (cost_iou.shape == cost_de.shape == cost_r.shape): + raise ValueError("Cost matrices are not aligned in shape.") + + #Final cost matrix + return (cost_iou + cost_de + cost_r) / 3 + + + #Element-wise Weighted Mean of Every Cost Matrix Value (𝑊𝑀(𝐷,𝑃)) + def weighted_mean_cost_matrix( + self, + tracks: NDArray[np.float_], # (T,4) cxcywh + detections: NDArray[np.float_], # (D,4) cxcywh + image_dims: tuple[int, int], + lambda_iou: float = 7 / 10, + lambda_de: float = 2 / 10, + lambda_r: float = 1 / 10, + ) -> NDArray[np.float_]: + """ + WM = lambda_iou * IoU_cost + lambda_de * Euclidean_cost + lambda_r * Ratio_cost + Returns (T, D). + """ + tracks = np.asarray(tracks, dtype=np.float32) + detections = np.asarray(detections, dtype=np.float32) + + T = tracks.shape[0] + D = detections.shape[0] + if T == 0 or D == 0: + return np.zeros((T, D), dtype=np.float32) + + # Ensure weights sum to 1.0 + s = float(lambda_iou + lambda_de + lambda_r) + if not np.isclose(s, 1.0): + lambda_iou /= s + lambda_de /= s + lambda_r /= s + + cost_iou = self.iou_cost(tracks, detections) # (T,D) + cost_de = self.euclidean_cost(tracks, detections, image_dims) # (T,D) + cost_r = self.bbox_ratio_cost(tracks, detections) # (T,D) + + wm = (lambda_iou * cost_iou) + (lambda_de * cost_de) + (lambda_r * cost_r) + wm = np.clip(wm, 0.0, 1.0).astype(np.float32) + return wm + + + #Class Gate Update Based on Object Class Match (𝐶∗(𝐷,𝑃)) + def class_gate_cost_matrix(self, cost_matrix, detection_classes, track_classes): + """ + Updates the cost matrix based on the match between predicted and detected + object class. If the class labels do not match, the cost is set to 0: + + C*(Ci, j, Di, Pi) = { Ci, j if Class_Di = Class_Pi, 0 otherwise } + + for i ∈ D, j ∈ P. + """ + ''' + This function updates cost matrices based on the match between + predicted and detected object classes''' + + num_detections = cost_matrix.shape[0] + num_tracks = cost_matrix.shape[1] + + if num_detections != len(detection_classes) or num_tracks != len(track_classes): + raise ValueError("Dimensions of cost_matrix, detection_classes, and track_classes do not match - Class Gate Cost Matrix") + + #Create a boolean mask where classses match + # Reshapes to (num_detections, 1) Reshapes to (1, num_tracks) -> (D, T) + match_mask = (np.array(detection_classes)[:, None] == np.array(track_classes)[None, :]) #Shape = [num_detections, num_tracks] + #Because detection_classes has the same number of rows as track_classes has columns, we can perform matrix multiplication + + #Apply the mask and keep the values where classes match, zero where they do not + gated_cost_matrix = cost_matrix * match_mask + + return gated_cost_matrix + + +_DA = DataAssociation() + +def weighted_mean_cost_matrix( + tracks: NDArray[np.float_], + detections: NDArray[np.float_], + image_dims: tuple[int, int], + lambda_iou: float = 7 / 10, + lambda_de: float = 2 / 10, + lambda_r: float = 1 / 10, +) -> NDArray[np.float_]: + """ + Pure NumPy in/out helper. + Expects: + tracks: (T,4) cxcywh + detections: (D,4) cxcywh + Returns: + (T,D) cost matrix + """ + return _DA.weighted_mean_cost_matrix( + tracks=tracks, + detections=detections, + image_dims=image_dims, + lambda_iou=lambda_iou, + lambda_de=lambda_de, + lambda_r=lambda_r, + ) \ No newline at end of file diff --git a/src/kestrel_tracking/kestrel_tracking/tracking_node.py b/src/kestrel_tracking/kestrel_tracking/tracking_node.py new file mode 100644 index 0000000..0579cdd --- /dev/null +++ b/src/kestrel_tracking/kestrel_tracking/tracking_node.py @@ -0,0 +1,555 @@ +import rclpy +from rclpy.node import Node + +from kestrel_msgs.msg import DetectionArray, Track as TrackMsg, TrackArray +from kestrel_tracking.DataAssociation import weighted_mean_cost_matrix + +import numpy as np +from dataclasses import dataclass +from typing import List, Sequence, Tuple +from scipy.optimize import linear_sum_assignment + +from filterpy.kalman import KalmanFilter as FP_KalmanFilter + + +# ----------------------------- +# Detection struct + adapters +# ----------------------------- +@dataclass(frozen=True) +class Det: + """One detection in convenient tracker format.""" + x1: float + y1: float + x2: float + y2: float + z: np.ndarray # (4,1) measurement: [cx, cy, w, h]^T + embedding: np.ndarray # (128,) appearance embedding + conf: float = 0.0 + class_name: str = "" + +def det_from_ros(det_msg) -> Det: + """Convert kestrel_msgs/Detection -> Det.""" + z = np.array( + [det_msg.center_x, det_msg.center_y, det_msg.w, det_msg.h], + dtype=np.float32 + ).reshape(4,1) + + emb = np.array(det_msg.embedding, dtype=np.float32) + + return Det( + x1=float(det_msg.x1), + y1=float(det_msg.y1), + x2=float(det_msg.x2), + y2=float(det_msg.y2), + z=z, + embedding=emb, + conf=float(det_msg.conf), + class_name=str(det_msg.class_name), + ) + + +# ----------------------------- +# Geometry helpers +# ----------------------------- +def state2bbox_xyxy(state: Sequence[float]) -> List[float]: + """ + Convert a KF state -> bbox [x1, y1, x2, y2] using state[:4] = [cx, cy, w, h]. + Keep as floats; cast to int only for drawing. + """ + cx, cy, w, h = state[:4] + x1 = cx - w / 2.0 + y1 = cy - h / 2.0 + x2 = cx + w / 2.0 + y2 = cy + h / 2.0 + return [float(x1), float(y1), float(x2), float(y2)] + + +# ----------------------------- +# Costs + gating (vectorized) +# ----------------------------- +def mahalanobis_gate_mask( + tracks: List["Track"], + det_z: np.ndarray, + gate_d2_thresh: float +) -> np.ndarray: + """ + Build a (T,D) boolean mask where True means the pairing is allowed by gating. + + Implementation detail: + - We loop over tracks (T loops) + - We vectorize over detections per-track (no T*D Python loops) + + Args: + tracks: list of Track objects + det_z: (D,4) array of detection measurements in cxcywh + gate_d2_thresh: squared Mahalanobis threshold + + Returns: + mask: (T,D) boolean array + """ + T = len(tracks) + D = det_z.shape[0] + mask = np.zeros((T, D), dtype=bool) + + if T == 0 or D == 0: + return mask + + # Ensure det_z is float32 for speed/consistency + det_z = np.asarray(det_z, dtype=np.float32) + + # Precompute det_z as (4,D) once, so each track can reuse it cheaply + # det_z_T has columns = detections + det_z_T = det_z.T # (4,D) + + for i, trk in enumerate(tracks): + kf = trk.kf + + # Innovation covariance S = HPH^T + R (4x4) + H = kf.H.astype(np.float32) + P = kf.P.astype(np.float32) + R = kf.R.astype(np.float32) + S = H @ P @ H.T + R # (4,4) + + # Predicted measurement Hx (4,1) + Hx = (H @ kf.x).astype(np.float32) # (4,1) + + # Innovations for all detections at once: + # y = z - Hx, but z is (4,D) and Hx is (4,1) so broadcast works + y = det_z_T - Hx # (4,D) + + # Solve S^{-1} y for all columns at once (4,D) + # This avoids explicitly computing inv(S) + try: + tmp = np.linalg.solve(S, y) # (4,D) + except np.linalg.LinAlgError: + # If S becomes singular for any reason, fail-safe: gate nothing for this track + continue + + # d2 per detection = y^T * S^{-1} * y + # Since y and tmp are (4,D), columnwise dot is sum(y * tmp) over rows + d2 = np.sum(y * tmp, axis=0) # (D,) + + mask[i, :] = (d2 <= gate_d2_thresh) + + return mask + + +def mahalanobis_distance_squared(kf: FP_KalmanFilter, z: np.ndarray) -> float: + """ + Compute squared Mahalanobis distance: + d^2 = (z - Hx)^T S^{-1} (z - Hx), + where S = HPH^T + R. + """ + H = kf.H + x = kf.x + y = z - (H @ x) # innovation + S = H @ kf.P @ H.T + kf.R # innovation covariance + + # Solve S^{-1} y without explicitly inverting S + tmp = np.linalg.solve(S, y) + d2 = float((y.T @ tmp).item()) + return d2 + + +def appearance_cost(track_feature: np.ndarray, det_feature: np.ndarray) -> float: + """Cosine distance = 1 - cosine similarity.""" + if track_feature is None: + return 1.0 # max cost if no feature + f1 = track_feature / (np.linalg.norm(track_feature) + 1e-12) + f2 = det_feature / (np.linalg.norm(det_feature) + 1e-12) + return float(1.0 - np.dot(f1, f2)) + +def appearance_cost_matrix( + track_feats: np.ndarray, + det_feats: np.ndarray, + track_has_feat: np.ndarray +) -> np.ndarray: + """ + Cosine distance matrix (T,D): + cost = 1 - cosine_similarity + + If a track has no feature, we force its cost row to 1.0 (max cost), + matching your old scalar behavior. + + Args: + track_feats: (T,F) float32 + det_feats: (D,F) float32 + track_has_feat: (T,) bool + + Returns: + (T,D) float32 matrix in [0,2] but practically [0,1] if normalized; we clip to [0,1]. + """ + T = track_feats.shape[0] + D = det_feats.shape[0] + + if T == 0 or D == 0: + return np.zeros((T, D), dtype=np.float32) + + # Normalize features (avoid divide by 0) + eps = 1e-12 + + trk = track_feats.astype(np.float32, copy=False) + det = det_feats.astype(np.float32, copy=False) + + trk_norm = trk / (np.linalg.norm(trk, axis=1, keepdims=True) + eps) # (T,F) + det_norm = det / (np.linalg.norm(det, axis=1, keepdims=True) + eps) # (D,F) + + # Cosine similarity: (T,D) = (T,F) @ (F,D) + sim = trk_norm @ det_norm.T # (T,D) + + # Convert to cosine distance cost + cost = (1.0 - sim).astype(np.float32) + + # Tracks without features => max cost + # We set the entire row to 1.0 (same as your scalar function returning 1.0) + if track_has_feat is not None and track_has_feat.size == T: + cost[~track_has_feat, :] = 1.0 + + # Keep bounded for sanity + cost = np.clip(cost, 0.0, 1.0) + return cost + + +# ----------------------------- +# Assignment +# ----------------------------- +def hungarian_assign(cost: np.ndarray) -> Tuple[List[Tuple[int, int]], List[int], List[int]]: + """ + Solve Hungarian for minimal assignment. + Returns: + matches: (track_idx, det_idx) + unmatched_tracks: [track_idx] + unmatched_dets: [det_idx] + """ + if cost.size == 0: + # Nothing to match + return [], list(range(cost.shape[0])), list(range(cost.shape[1])) + + + # Run Hungarian + row_ind, col_ind = linear_sum_assignment(cost) + + # Build the matched pairs + matches = list(zip(row_ind.tolist(), col_ind.tolist())) + + # Determine unmatched pairs + all_tracks = set(range(cost.shape[0])) + all_dets = set(range(cost.shape[1])) + matched_tracks = set(row_ind.tolist()) + matched_dets = set(col_ind.tolist()) + + unmatched_tracks = sorted(list(all_tracks - matched_tracks)) + unmatched_dets = sorted(list(all_dets - matched_dets)) + + return matches, unmatched_tracks, unmatched_dets + + +# ----------------------------- +# Track + TrackManager +# ----------------------------- +class Track: + """ + KF state is [cx, cy, w, h, vx, vy, vw, vh]^T + Measurement z is [cx, cy, w, h]^T + """ + def __init__(self, track_id: int, init_det: Det, dt: float = 1.0): + self.id = int(track_id) + self.kf = FP_KalmanFilter(dim_x=8, dim_z=4) + self._init_kf(dt) + + + # Initialize state from detection measurement + cx, cy, w, h = init_det.z.flatten().tolist() + self.kf.x = np.array([cx, cy, w, h, 0.0, 0.0, 0.0, 0.0], dtype=np.float32).reshape(8, 1) + + #Initialize other attributes + self.conf = init_det.conf # Store conf + self.class_name = init_det.class_name # Store class + self.bbox_xyxy: List[float] = [init_det.x1, init_det.y1, init_det.x2, init_det.y2] + self.feature: np.ndarray | None = init_det.embedding + + self.missed: int = 0 # number of consecutive misses + self.hits: int = 1 # total number of hits + + + def _init_kf(self, dt: float) -> None: + # State transition matrix (constant velocity) + F = np.eye(8, dtype=np.float32) + for i in range(4): + F[i, i + 4] = dt + self.kf.F = F + + # Measurement matrix: observe [cx, cy, w, h] + H = np.zeros((4, 8), dtype=np.float32) + H[:, :4] = np.eye(4, dtype=np.float32) + self.kf.H = H + + #TODO: Tune Values Later + # Initialize covariance P (start uncertain about velocity) + self.kf.P *= 1000.0 + + # Measurement noise R + # Higher value -> trust measurements less + self.kf.R = np.eye(4, dtype=np.float32) * 10.0 + + # Process noise Q + # Small value -> assume near‐constant velocity + self.kf.Q = np.eye(8, dtype=np.float32) * 0.01 + + + def predict(self) -> None: + self.kf.predict() + self.bbox_xyxy = state2bbox_xyxy(self.kf.x.flatten()) + self.missed +=1 # increment; reset to 0 on successful update + + + def update(self, det: Det) -> None: + self.kf.update(det.z) + self.bbox_xyxy = state2bbox_xyxy(self.kf.x.flatten()) + self.conf = det.conf # Update conf + self.class_name = det.class_name # Not sure how this could happen but... + self.feature = det.embedding + self.missed = 0 + # Update the amount of times a track has been matched to a detection + self.hits += 1 + + def z_pred(self) -> np.ndarray: + """ + Predicted measurement (4,) in cxcywh: + z_pred = Hx + Using this keeps motion costs consistent with Mahalanobis gating space. + """ + z = (self.kf.H @ self.kf.x).reshape(4,) + return z.astype(np.float32, copy=False) + + +class TrackManager: + def __init__( + self, + max_age: int, + min_hits: int, + gate_d2_thresh: float, + w_motion: float, + w_app: float, + dt: float = 1.0, + image_dims: tuple[int, int] = (720, 1280), # <--- TODO: placeholder dims, must verify + ): + self.tracks: List[Track] = [] + self.next_id: int = 0 + + self.max_age = int(max_age) + self.min_hits = int(min_hits) + + # This threshold is squared Mahalanobis distance + self.gate_d2_thresh = float(gate_d2_thresh) + + self.w_motion = float(w_motion) + self.w_app = float(w_app) + self.dt = float(dt) + + self.image_dims = image_dims + + + # ---- tiny adapters (for use with DataAssociation) ---- + @staticmethod + def _stack_track_z(tracks: List[Track]) -> np.ndarray: + """ + Stack predicted track measurements into (T,4) cxcywh. + """ + if len(tracks) == 0: + return np.zeros((0, 4), dtype=np.float32) + return np.stack([t.z_pred() for t in tracks], axis=0).astype(np.float32) + + @staticmethod + def _stack_det_z(detections: List[Det]) -> np.ndarray: + """ + Stack detection measurements into (D,4) cxcywh. + """ + if len(detections) == 0: + return np.zeros((0, 4), dtype=np.float32) + return np.stack([d.z.reshape(4,) for d in detections], axis=0).astype(np.float32) + + @staticmethod + def _stack_track_features(tracks: List[Track], feat_dim: int = 128) -> tuple[np.ndarray, np.ndarray]: + """ + Stack track features into (T,F). If a track has no feature, fill zeros and mark it invalid. + + Returns: + feats: (T,F) float32 + has_feat: (T,) bool + """ + T = len(tracks) + if T == 0: + return np.zeros((0, feat_dim), dtype=np.float32), np.zeros((0,), dtype=bool) + + feats = np.zeros((T, feat_dim), dtype=np.float32) + has_feat = np.zeros((T,), dtype=bool) + + for i, t in enumerate(tracks): + if t.feature is None: + continue + feats[i, :] = t.feature.astype(np.float32, copy=False) + has_feat[i] = True + + return feats, has_feat + + @staticmethod + def _stack_det_features(detections: List[Det], feat_dim: int = 128) -> np.ndarray: + """ + Stack detection embeddings into (D,F). + """ + D = len(detections) + if D == 0: + return np.zeros((0, feat_dim), dtype=np.float32) + return np.stack([d.embedding.astype(np.float32, copy=False) for d in detections], axis=0) + + + def step(self, detections: List[Det]) -> List[Track]: + # 1) Predict existing tracks forward + for trk in self.tracks: + trk.predict() + + # If no tracks exist, create from all detections + if len(self.tracks) == 0: + for det in detections: + self.tracks.append(Track(self.next_id, det, dt=self.dt)) + self.next_id += 1 + return [t for t in self.tracks if t.hits >= self.min_hits] + + # If no detections, just age/prune tracks + if len(detections) == 0: + self.tracks = [t for t in self.tracks if t.missed <= self.max_age] + return [t for t in self.tracks if t.hits >= self.min_hits] + + # ---- adapters (build arrays once per frame) ---- + Zt = self._stack_track_z(self.tracks) # (T,4) cxcywh + Zd = self._stack_det_z(detections) # (D,4) cxcywh + + Ft, track_has_feat = self._stack_track_features(self.tracks, feat_dim=128) # (T,128), (T,) + Fd = self._stack_det_features(detections, feat_dim=128) # (D,128) + + # ---- vectorized motion cost via DataAssociation (returns (T,D)) ---- + mc = weighted_mean_cost_matrix( + tracks=Zt, + detections=Zd, + image_dims=self.image_dims, + # You can pass custom lambdas here if you want: + # lambda_iou=0.7, lambda_de=0.2, lambda_r=0.1 + ).astype(np.float32, copy=False) # (T,D) + + # ---- vectorized appearance cost (returns (T,D)) ---- + ac = appearance_cost_matrix(Ft, Fd, track_has_feat).astype(np.float32, copy=False) # (T,D) + + # ---- gating mask (returns (T,D) bool) ---- + gate = mahalanobis_gate_mask(self.tracks, Zd, self.gate_d2_thresh) # (T,D) + + # ---- combine costs ---- + cost = (self.w_motion * mc) + (self.w_app * ac) # (T,D) + + # Any disallowed pairing becomes "impossible" + cost = cost.astype(np.float32, copy=False) + cost[~gate] = 1e6 + + # 3) Hungarian assignment + matches, unmatched_tracks, unmatched_dets = hungarian_assign(cost) + + # 4) Update matched tracks + for trk_idx, det_idx in matches: + if cost[trk_idx, det_idx] >= 1e6: + unmatched_tracks.append(trk_idx) + unmatched_dets.append(det_idx) + continue + self.tracks[trk_idx].update(detections[det_idx]) + + # 5) Unmatched tracks: do nothing (they already had missed++ in predict()) + unmatched_dets = sorted(set(unmatched_dets)) + + # 6) Create new tracks for unmatched detections + for det_idx in unmatched_dets: + self.tracks.append(Track(self.next_id, detections[det_idx], dt=self.dt)) + self.next_id += 1 + + # 7) Prune dead tracks + self.tracks = [t for t in self.tracks if t.missed <= self.max_age] + + # 8) Return only publishable tracks + return [t for t in self.tracks if t.hits >= self.min_hits] + + + +# ----------------------------- +# ROS Node +# ----------------------------- +class TrackingNode(Node): + def __init__(self): + super().__init__('tracking_node') + + image_dims = (720, 1280) # (H,W) <-- TODO: set to your camera feed size, consider as ROS params + + self.track_manager = TrackManager( + max_age=30, # Deletes the track if it hasnt been updated in 30 frames + min_hits=1, # Only publishes tracks that have been updated at least 3 + gate_d2_thresh=9.4877, # chi2.ppf(0.95, df=4) is ~9.49 + w_motion=0.5, #W eight for motion cost + w_app=0.5, # Weight for appearance cost + dt=1.0, + image_dims=image_dims, + ) + + self.sub_det = self.create_subscription( + DetectionArray, + '/kestrel/detections', + self.dets_cb, + 10, + ) + self.pub_tracks = self.create_publisher(TrackArray, '/kestrel/tracks', 10) + + def publish_tracks(self, header, tracks: List[Track]) -> None: + """Turn internal tracks into TrackArray message and publish.""" + msg = TrackArray() + msg.header = header + + for trk in tracks: + t = TrackMsg() + t.id = int(trk.id) + # Publish current bbox (xyxy). Keep float. + t.x1 = float(trk.bbox_xyxy[0]) + t.y1 = float(trk.bbox_xyxy[1]) + t.x2 = float(trk.bbox_xyxy[2]) + t.y2 = float(trk.bbox_xyxy[3]) + + t.center_x = (t.x1 + t.x2) / 2.0 + t.center_y = (t.y1 + t.y2) / 2.0 + + t.conf = float(trk.conf) + t.class_name = str(trk.class_name) + + # Copy the embedding from the internal Track object to the ROS message + if trk.feature is not None: + # trk.feature is a numpy array, convert to list for ROS + t.embedding = trk.feature.tolist() + + msg.tracks.append(t) + + self.pub_tracks.publish(msg) + + + def dets_cb(self, det_msg: DetectionArray) -> None: + dets = [det_from_ros(d) for d in det_msg.detections] + active_tracks = self.track_manager.step(dets) + self.publish_tracks(det_msg.header, active_tracks) + + +def main(args=None): + rclpy.init(args=args) + node = TrackingNode() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/kestrel_tracking/package.xml b/src/kestrel_tracking/package.xml index dc60420..0a4bdd4 100644 --- a/src/kestrel_tracking/package.xml +++ b/src/kestrel_tracking/package.xml @@ -1,23 +1,22 @@ - - - kestrel_tracking - 0.1.0 - Real-time object tracking node for Kestrel using DeepSORT and vision_msgs - David Orjuela - MIT - - ament_python - - rclpy - vision_msgs - kestrel_msgs - - ament_copyright - ament_flake8 - ament_pep257 - python3-pytest - - - ament_python - - + + + kestrel_tracking + 0.1.0 + Real-time object tracking node for Kestrel using DeepSORT and vision_msgs + David Orjuela + MIT + + ament_python + + rclpy + kestrel_msgs + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/src/kestrel_tracking/setup.cfg b/src/kestrel_tracking/setup.cfg index 65f004a..a45a703 100644 --- a/src/kestrel_tracking/setup.cfg +++ b/src/kestrel_tracking/setup.cfg @@ -1,4 +1,4 @@ -[develop] -script_dir=$base/lib/kestrel_tracking -[install] -install_scripts=$base/lib/kestrel_tracking +[develop] +script_dir=$base/lib/kestrel_tracking +[install] +install_scripts=$base/lib/kestrel_tracking diff --git a/src/kestrel_tracking/setup.py b/src/kestrel_tracking/setup.py index a6edecb..7912fef 100644 --- a/src/kestrel_tracking/setup.py +++ b/src/kestrel_tracking/setup.py @@ -1,25 +1,25 @@ -from setuptools import find_packages, setup - -package_name = 'kestrel_tracking' - -setup( - name=package_name, - version='0.0.0', - packages=find_packages(exclude=['test']), - data_files=[ - ('share/ament_index/resource_index/packages', - ['resource/' + package_name]), - ('share/' + package_name, ['package.xml']), - ], - install_requires=['setuptools'], - zip_safe=True, - maintainer='davidorjuela', - maintainer_email='davidorjuela@todo.todo', - description='TODO: Package description', - license='TODO: License declaration', - tests_require=['pytest'], - entry_points={ - 'console_scripts': [ - ], - }, -) +from setuptools import find_packages, setup + +package_name = 'kestrel_tracking' + +setup( + name=package_name, + version='0.0.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='', + maintainer_email='davidorjuela@todo.todo', + description='TODO: Package description', + license='TODO: License declaration', + entry_points={ + 'console_scripts': [ + 'tracking_node = kestrel_tracking.tracking_node:main', + ], + }, +) diff --git a/src/kestrel_tracking/test/test_copyright.py b/src/kestrel_tracking/test/test_copyright.py index 97a3919..282981a 100644 --- a/src/kestrel_tracking/test/test_copyright.py +++ b/src/kestrel_tracking/test/test_copyright.py @@ -1,25 +1,25 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_copyright.main import main -import pytest - - -# Remove the `skip` decorator once the source file(s) have a copyright header -@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') -@pytest.mark.copyright -@pytest.mark.linter -def test_copyright(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found errors' +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/src/kestrel_tracking/test/test_flake8.py b/src/kestrel_tracking/test/test_flake8.py index 27ee107..625324e 100644 --- a/src/kestrel_tracking/test/test_flake8.py +++ b/src/kestrel_tracking/test/test_flake8.py @@ -1,25 +1,25 @@ -# Copyright 2017 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_flake8.main import main_with_errors -import pytest - - -@pytest.mark.flake8 -@pytest.mark.linter -def test_flake8(): - rc, errors = main_with_errors(argv=[]) - assert rc == 0, \ - 'Found %d code style errors / warnings:\n' % len(errors) + \ - '\n'.join(errors) +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/src/kestrel_tracking/test/test_pep257.py b/src/kestrel_tracking/test/test_pep257.py index b234a38..4569717 100644 --- a/src/kestrel_tracking/test/test_pep257.py +++ b/src/kestrel_tracking/test/test_pep257.py @@ -1,23 +1,23 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_pep257.main import main -import pytest - - -@pytest.mark.linter -@pytest.mark.pep257 -def test_pep257(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found code style errors / warnings' +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/src/kestrel_vision/kestrel_vision/dataset.py b/src/kestrel_vision/kestrel_vision/dataset.py index 97301a8..cc954e1 100644 --- a/src/kestrel_vision/kestrel_vision/dataset.py +++ b/src/kestrel_vision/kestrel_vision/dataset.py @@ -1,81 +1,81 @@ -import os -import torch -from torch.utils.data import Dataset # The base class we will inherit from to create our custom dataset -from PIL import Image # Python Imaging Library (Pillow), used for opening and manipulating image files - -class Market1501(Dataset): - """ - Custom PyTorch Dataset for the Market-1501 dataset. - - This class handles loading images and parsing their person IDs (PIDs) - from the filenames to be used as integer labels for training a classification model. - """ - - # CHANGED: Added 'transform=None' to accept an image transformation pipeline. - def __init__(self, image_dir, transform=None): - """ - Args: - image_dir (string): Directory path with all the training images. - transform (callable, optional): A function/transform to be applied to each image. - """ - # Store the directory path for later use in __getitem__. - self.image_dir = image_dir - - # Store the transform pipeline (e.g., resize, normalize) for later use. - self.transform = transform - - # Create a list of all filenames in the directory that end with '.jpg'. - # This is a 'list comprehension', a compact way to build a list. - # It filters out any non-JPEG files (like '.txt' or system files) that might cause errors. - self.image_paths = [file for file in os.listdir(image_dir) if file.endswith(".jpg")] - - # Create a set of unique Person IDs (PIDs) from the filenames. - # Example filename: '0002_c1s1_000451_01.jpg'. We split by '_' and take the first part '0002'. - # Using a 'set' automatically removes all duplicate PIDs, giving us a list of unique people. - pids_in_dataset = set([path.split("_")[0] for path in self.image_paths]) - - # Create a mapping dictionary to convert string PIDs to integer labels (0, 1, 2, ...). - # Neural networks and loss functions require integer labels, not strings. - # 'enumerate' provides a counter 'i' for each unique 'pid'. - # Example: {'0002': 0, '0007': 1, '-1': 2} - self.pid_to_label = {pid: i for i, pid in enumerate(pids_in_dataset)} - - # Store the total number of unique classes (people). - # This is needed to correctly configure the output layer of our neural network. - self.num_classes = len(self.pid_to_label) - - # The __getitem__ method defines how to retrieve a single sample (one image and its label) from the dataset. - # The DataLoader will call this method automatically for each index from 0 to len(dataset)-1. - def __getitem__(self, index): - # Retrieve the filename for the given index from our list of paths. - name = self.image_paths[index] - - # Construct the full, platform-independent path to the image file. - # os.path.join is used so this code works on both Windows ('\') and Linux/Mac ('/'). - path = os.path.join(self.image_dir, name) - - # Open the image file using Pillow and ensure it's in RGB format (corresponds to 3 channels). - # This is important because some images might be in grayscale or have an alpha channel. - image = Image.open(path).convert("RGB") - - # CHANGED: Apply the transformations to the image. - # This is a CRITICAL step. It will resize, normalize, and convert the PIL Image to a PyTorch Tensor. - if self.transform: - image = self.transform(image) - - # Parse the filename again to get the string Person ID. - pid = name.split("_")[0] - - # Use our mapping dictionary to look up the integer label for the corresponding PID. - label = self.pid_to_label[pid] - - # Return the processed image tensor and its corresponding label as a tensor. - # The label must be a LongTensor (64-bit integer) for PyTorch's CrossEntropyLoss function. - return image, torch.tensor(label, dtype=torch.long) - - # The __len__ method must return the total number of samples in the dataset. - # The DataLoader needs this to know the dataset's size for batching, shuffling, and epoch completion. - def __len__(self): - # Return the total count of the image paths we found during initialization. - return len(self.image_paths) +import os +import torch +from torch.utils.data import Dataset # The base class we will inherit from to create our custom dataset +from PIL import Image # Python Imaging Library (Pillow), used for opening and manipulating image files + +class Market1501(Dataset): + """ + Custom PyTorch Dataset for the Market-1501 dataset. + + This class handles loading images and parsing their person IDs (PIDs) + from the filenames to be used as integer labels for training a classification model. + """ + + # CHANGED: Added 'transform=None' to accept an image transformation pipeline. + def __init__(self, image_dir, transform=None): + """ + Args: + image_dir (string): Directory path with all the training images. + transform (callable, optional): A function/transform to be applied to each image. + """ + # Store the directory path for later use in __getitem__. + self.image_dir = image_dir + + # Store the transform pipeline (e.g., resize, normalize) for later use. + self.transform = transform + + # Create a list of all filenames in the directory that end with '.jpg'. + # This is a 'list comprehension', a compact way to build a list. + # It filters out any non-JPEG files (like '.txt' or system files) that might cause errors. + self.image_paths = [file for file in os.listdir(image_dir) if file.endswith(".jpg")] + + # Create a set of unique Person IDs (PIDs) from the filenames. + # Example filename: '0002_c1s1_000451_01.jpg'. We split by '_' and take the first part '0002'. + # Using a 'set' automatically removes all duplicate PIDs, giving us a list of unique people. + pids_in_dataset = set([path.split("_")[0] for path in self.image_paths]) + + # Create a mapping dictionary to convert string PIDs to integer labels (0, 1, 2, ...). + # Neural networks and loss functions require integer labels, not strings. + # 'enumerate' provides a counter 'i' for each unique 'pid'. + # Example: {'0002': 0, '0007': 1, '-1': 2} + self.pid_to_label = {pid: i for i, pid in enumerate(pids_in_dataset)} + + # Store the total number of unique classes (people). + # This is needed to correctly configure the output layer of our neural network. + self.num_classes = len(self.pid_to_label) + + # The __getitem__ method defines how to retrieve a single sample (one image and its label) from the dataset. + # The DataLoader will call this method automatically for each index from 0 to len(dataset)-1. + def __getitem__(self, index): + # Retrieve the filename for the given index from our list of paths. + name = self.image_paths[index] + + # Construct the full, platform-independent path to the image file. + # os.path.join is used so this code works on both Windows ('\') and Linux/Mac ('/'). + path = os.path.join(self.image_dir, name) + + # Open the image file using Pillow and ensure it's in RGB format (corresponds to 3 channels). + # This is important because some images might be in grayscale or have an alpha channel. + image = Image.open(path).convert("RGB") + + # CHANGED: Apply the transformations to the image. + # This is a CRITICAL step. It will resize, normalize, and convert the PIL Image to a PyTorch Tensor. + if self.transform: + image = self.transform(image) + + # Parse the filename again to get the string Person ID. + pid = name.split("_")[0] + + # Use our mapping dictionary to look up the integer label for the corresponding PID. + label = self.pid_to_label[pid] + + # Return the processed image tensor and its corresponding label as a tensor. + # The label must be a LongTensor (64-bit integer) for PyTorch's CrossEntropyLoss function. + return image, torch.tensor(label, dtype=torch.long) + + # The __len__ method must return the total number of samples in the dataset. + # The DataLoader needs this to know the dataset's size for batching, shuffling, and epoch completion. + def __len__(self): + # Return the total count of the image paths we found during initialization. + return len(self.image_paths) \ No newline at end of file diff --git a/src/kestrel_vision/kestrel_vision/model.py b/src/kestrel_vision/kestrel_vision/model.py index e20fa20..e0254cb 100644 --- a/src/kestrel_vision/kestrel_vision/model.py +++ b/src/kestrel_vision/kestrel_vision/model.py @@ -1,42 +1,42 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -class CNNdeepSORT(nn.Module): - - def __init__(self, embedding_dim): - super().__init__() - - self.convolution = nn.Sequential( - #Input Dimensions: [B, 3, H, W] - #inChannels= 3 (RGB), outChannels = embedding_dim/4 (out being # of kernels) - - # Stage 1 - nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), # without padding, convolution shrinks images quickly, potentially losing info @ edges. - nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), - nn.MaxPool2d(2), #downsample using maxmimum values in kernel -> H/2 x W/2 - - # Stage 2 - nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True), - nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True), - nn.MaxPool2d(2), # H/2 x W/2 - - # Stage 3 - nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(True), - nn.Conv2d(in_channels=256, out_channels=embedding_dim, kernel_size=3, padding=1), nn.BatchNorm2d(embedding_dim), nn.ReLU(True), - - #Final reduction later into 1x1 embedding: [B, embedding_dim, 1, 1] - nn.AdaptiveAvgPool2d((1,1)) - ) - - # Fully connected classifier for Re-ID training - #self.classifier = nn.Linear(in_features = embedding_dim, out_features=num_classes) - - def forward(self, inputTensor): - - output = self.convolution(inputTensor) # CNN encoder block -> [B, emb_dim, 1, 1] - output = torch.flatten(output,1) # [B, emb_dim] -> person's appearance vector - - output = self.classifier(output) # For use as classifier -> [B, num_classes] - # else: For use as feature extractor +import torch +import torch.nn as nn +import torch.nn.functional as F + +class CNNdeepSORT(nn.Module): + + def __init__(self, embedding_dim): + super().__init__() + + self.convolution = nn.Sequential( + #Input Dimensions: [B, 3, H, W] + #inChannels= 3 (RGB), outChannels = embedding_dim/4 (out being # of kernels) + + # Stage 1 + nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), # without padding, convolution shrinks images quickly, potentially losing info @ edges. + nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(True), + nn.MaxPool2d(2), #downsample using maxmimum values in kernel -> H/2 x W/2 + + # Stage 2 + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True), + nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(True), + nn.MaxPool2d(2), # H/2 x W/2 + + # Stage 3 + nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(True), + nn.Conv2d(in_channels=256, out_channels=embedding_dim, kernel_size=3, padding=1), nn.BatchNorm2d(embedding_dim), nn.ReLU(True), + + #Final reduction later into 1x1 embedding: [B, embedding_dim, 1, 1] + nn.AdaptiveAvgPool2d((1,1)) + ) + + # Fully connected classifier for Re-ID training + #self.classifier = nn.Linear(in_features = embedding_dim, out_features=num_classes) + + def forward(self, inputTensor): + + output = self.convolution(inputTensor) # CNN encoder block -> [B, emb_dim, 1, 1] + output = torch.flatten(output,1) # [B, emb_dim] -> person's appearance vector + + #output = self.classifier(output) # For use as classifier -> [B, num_classes] + # else: For use as feature extractor return output \ No newline at end of file diff --git a/src/kestrel_vision/kestrel_vision/processing.py b/src/kestrel_vision/kestrel_vision/processing.py index 023d3c0..0dac5c1 100644 --- a/src/kestrel_vision/kestrel_vision/processing.py +++ b/src/kestrel_vision/kestrel_vision/processing.py @@ -1,27 +1,27 @@ -import cv2 -import torch -import numpy as np - -def extract_person_embedding(frame: np.ndarray, box, cnn_model) -> np.ndarray | None: - """ - Crops a person from the frame, runs pre-processing, and extracts - the appearance embedding using the CNN model. - """ - x1, y1, x2, y2 = map(int, box.xyxy[0]) - person_crop = frame[y1:y2, x1:x2] - - if person_crop.size == 0: - return None - - # Pre-process the crop for the CNN - crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB) # get rgb instead of bgr - crop_tensor = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0 - - # Run CNN inference to get the appearance vector - with torch.no_grad(): - appearance_vector = cnn_model(crop_tensor) - if hasattr(appearance_vector, "detach"): - appearance_vector = appearance_vector.detach().cpu().numpy() - - # Squeeze the batch dimension out +import cv2 +import torch +import numpy as np + +def extract_person_embedding(frame: np.ndarray, box, cnn_model) -> np.ndarray | None: + """ + Crops a person from the frame, runs pre-processing, and extracts + the appearance embedding using the CNN model. + """ + x1, y1, x2, y2 = map(int, box.xyxy[0]) + person_crop = frame[y1:y2, x1:x2] + + if person_crop.size == 0: + return None + + # Pre-process the crop for the CNN + crop_rgb = cv2.cvtColor(person_crop, cv2.COLOR_BGR2RGB) # get rgb instead of bgr + crop_tensor = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0 + + # Run CNN inference to get the appearance vector + with torch.no_grad(): + appearance_vector = cnn_model(crop_tensor) + if hasattr(appearance_vector, "detach"): + appearance_vector = appearance_vector.detach().cpu().numpy() + + # Squeeze the batch dimension out return np.squeeze(appearance_vector) \ No newline at end of file diff --git a/src/kestrel_vision/kestrel_vision/train.py b/src/kestrel_vision/kestrel_vision/train.py index f78a55b..0d99346 100644 --- a/src/kestrel_vision/kestrel_vision/train.py +++ b/src/kestrel_vision/kestrel_vision/train.py @@ -1,292 +1,292 @@ -import os -import sys -import time -import torch -import torch.nn as nn -import torchvision -from datetime import datetime -from torchvision import transforms -from tqdm import tqdm -from torch.utils.data import DataLoader -from torch.utils.data import random_split -from torch.utils.tensorboard import SummaryWriter -from torch.amp import autocast, GradScaler # instead of torch.cuda.amp -- deprecated -from torch.nn import TripletMarginWithDistanceLoss -# Custom Modules -import dataset -from model import CNNdeepSORT - -# --------------------------------------------------- -# TRAINING FUNCTION -# --------------------------------------------------- -def euclidean_distance(x, y): - return torch.norm(x - y, p=2, dim=1) - -def training(model, dataloader, loss_fn, optimizer, device): - model.train() # Set model to training mode - total_loss = 0 - total_correct = 0 - total_samples = 0 - progress_bar = tqdm(dataloader, "Training Progress...", unit = "batch") - - for (archor, positive, negative) in progress_bar: - anchor.to(device, non_blocking=True) - positive = positive.to(device, non_blocking=True) - negative = negative.to(device, non_blocking=True) - # Non_blocking lets compute overlap with data transfer (small but free speed-up). Launches host --> GPU copy async if tensor already pinned (it is) - - with autocast(device_type=device): - anchor_out = model(anchor) - positive_out = model(positive) - negative_out = model(negative) - - loss = loss_fn(anchor_out, positive_out, negative_out) - - # Backward pass and optimization - scaler.scale(loss).backward() # 1. Calculate gradients based on the current loss. - torch.nn.utils.clip_grad_norm_(model.parameters(), clip_val) # Compues total L2 norm for all gradients. If exceeds `clip_val`, rescales them to be equal. Prevents gradient explosion. - scaler.step(optimizer) # 2. Update the model's weights using the calculated gradients. With scaler, undoes the scaling and skips the step if gradients overflowed (inf) - scaler.update() # Adjusts the scale factor up/down depending on whether overflow was detected, keeping training stable. - optimizer.zero_grad(set_to_none=True)# 3. Reset gradients to zero for the next iteration. Instead of writing 0s into every grad tensor, it just sets the .grad pointer to None (saving a full CUDA memset each iteration) - - # Metrics - total_loss += loss.item() - d_ap = euclidean_distance(anchor_out, positive_out) - d_an = euclidean_distance(anchor_out, negative_out) - total_correct += (d_ap < d_an).sum().item() - total_samples += anchor.size(0) - - progress_bar.set_postfix(loss=loss.item()) - - avg_loss = total_loss / len(dataloader) - accuracy = total_correct / total_samples - return avg_loss, accuracy - -# --------------------------------------------------- -# 1. SETUP -# --------------------------------------------------- - -# This code will only run when you execute `python train.py` directly -if __name__ == '__main__': - # Hyperparameters - LEARNING_RATE = 1e-3 - EPOCHS = 50 - BATCH_SIZE = 128 # e.g. 32, 64, 128 - # Increased from 64. AMP frees memory, and more data per step = fewer GPU idles - - - # Move the model to the appropriate device (GPU if available) - device = 'cuda' if torch.cuda.is_available() else 'cpu' - print(f"Using device: {device}") - - # Tries several convolution algorithms on the first batch, then uses fastest one thereafter. - # Pays off only if input sizes stay fixed (normalized to 256x128) - torch.backends.cudnn.benchmark = True - - scaler = GradScaler() # WIth FP16, gradients can under-flow to 0. This multiplies the loss by a large factor, performs backward, then divides them back down during optimizer. - clip_val = 1.0 # gradient-clip norm - - # --------------------------------------------------- - # 2. MODEL, LOSS, OPTIMIZER - # --------------------------------------------------- - - # Instantiate the model and move to device - # 751 is the number of classes for Market-1501 - model = CNNdeepSORT(embedding_dim=128).to(device) - - # Define loss function, optimizer, and learning rate scheduler - - #TODO distance function, margin depends on distance choosen - - loss_function = nn.TripletMarginWithDistanceLoss(distance_function=euclidean_distance, margin=0.3, swap = True ) # Cross Entropy Loss is simpler, so more suitable for validation - optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE) # AdamW is an upgraded version of Adam - - scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Decays LR by a factor of 0.1 every 10 epochs - # Gamma of 0.1 is common -- sharp reduction in LR, allowing model to switch from large adjustments to fine-tuning - - # Load from saved checkpoint if found (most generalized model) - checkpoint_path = 'best_model_checkpoint.pth' - start_epoch = 0 - if(len(sys.argv) > 1 and sys.argv[1].lower() == "load"): - if(os.path.exists(checkpoint_path)): - print(f"Loading checkpoint from {checkpoint_path}...") - checkpoint = torch.load(checkpoint_path) - model.load_state_dict(checkpoint['model_state_dict']) - optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - scheduler.load_state_dict(checkpoint['scheduler_state_dict']) - scaler.load_state_dict(checkpoint.get('scaler_state_dict', {})) - start_epoch = checkpoint['epoch'] + 1 - print(f"Resumed at epoch {start_epoch} | val_loss={checkpoint['loss']:.4f}") - else: - print("No checkpoint found, starting from scratch.") - else: - print("Training from scratch...") - - #TODO - - - # --------------------------------------------------- - # 3. DATA LOADING & SMALL VALIDATION SPLIT - # --------------------------------------------------- - - # Define more robust transforms for training - # Images are resized and normalized for better model performance. - transform = transforms.Compose([ - transforms.Resize((128, 64)), # native Market-1501 - transforms.RandomHorizontalFlip(p=0.5), # Adds ~1% mAP with no speed cost - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # mean and std are pre-calculated mean/stdev of the ImageNet dataset. - # For each channel of the image: output[channel] = (input[channel] - mean[channel]) / std[channel] - # Standardizes the pixel value range to train more effectively. Helps converge faster - ]) - - # Load dataset and create DataLoader - Market1501_train_file_path = r'C:\Users\david\Downloads\Market-1501-v15.09.15\bounding_box_train' - train_data = dataset.Market1501(Market1501_train_file_path, transform=transform) - - # Only used for early-stopping / LR checks, NOT for final testing. Tiny split keeps epochs fast but still shows over-/under-fitting trends - val_len = int(0.05 * len(train_data)) # split 5 % off for quick val - train_len = len(train_data) - val_len - - # split dataset before building both dataLoaders - train_ds, val_ds = random_split(train_data, [train_len, val_len]) - - # Entire training data - training_dataloader = DataLoader( - dataset=train_ds, - batch_size=BATCH_SIZE, - shuffle=True, - persistent_workers=True,# Keeps DataLoader workers alive across epochs and - prefetch_factor=4, # queues 4 batches per worker. Eliminates Windows process-spawn penalty; feeds GPU continuously - num_workers=4, # start with 4; 6–8 if CPU has threads to spare - pin_memory=True) # pin_memory puts tensors into pinned memory, allowing for faster transfer from CPU to GPU. - # Can significantly speed up training. Set to true with GPU. - - val_loader = DataLoader( - val_ds, - batch_size=BATCH_SIZE, - shuffle=False, # no need to shuffle for evaluation - num_workers=4, - pin_memory=True, - persistent_workers=True) - - # Evaluation helper - @torch.no_grad() # we don't track gradients here - def evaluate(model, loader): - """ - Runs one full pass on `loader` with AMP and returns: - (mean_cross_entropy_loss, top-1_accuracy) - """ - model.eval() # turn off dropout / batch-norm update - total_loss = 0.0 - correct = 0 - samples = 0 - - for anchor, positive, negative in loader: - anchor = anchor.to(device, non_blocking=True) - positive = positive.to(device, non_blocking=True) - negative = negative.to(device, non_blocking=True) - - with autocast(device_type=device): # AMP even in eval -> less VRAM, faster - anchor_out = model(anchor) - positive_out = model(positive) - negative_out = model(negative) - - - - - loss = loss_function(anchor_out, positive_out, negative_out) - - total_loss += loss.item() - #TODO - d_ap = euclidean_distance(anchor_out, positive_out) - d_an = euclidean_distance(anchor_out, negative_out) - correct += (d_ap < d_an).sum().item() - - samples += anchor.size(0) - - mean_loss = total_loss / len(loader) - accuracy = correct / samples - model.train() # restore training mode for caller - return mean_loss, accuracy - - # --------------------------------------------------- - # 4. TENSORBOARD LOGGING (Initial) - # --------------------------------------------------- - - print("Logging model graph and sample images to TensorBoard...") - # Get a single batch from the dataloader to log graph and images - anchor, positibe, negative = next(iter(training_dataloader)) - - print("Done.") - - # --------------------------------------------------- - # 6. MAIN TRAINING LOOP - # --------------------------------------------------- - best_val_loss = float('inf') - patience = 5 - patience_counter = 0 - - for epoch in range(start_epoch, EPOCHS): - start_time = time.time() - - # Train for one epoch - avg_loss, accuracy = training(model, training_dataloader, loss_function, optimizer, device) #tells you if optimizer is doing its job on the data it sees - val_loss, val_acc = evaluate(model, val_loader) #tells you if the network is generalizing; early stopping - - #*NEW* Update the learning rate - scheduler.step() # Without this, the model is unable to converge. - - - - # ----- save best on *validation* loss ----- - #if avg_loss < best_val_loss: - if val_loss < best_val_loss: - #best_val_loss = avg_loss - best_val_loss = val_loss - torch.save({ - 'epoch': epoch, - 'model_state_dict': model.state_dict(), - 'optimizer_state_dict': optimizer.state_dict(), - 'scheduler_state_dict': scheduler.state_dict(), - 'loss': val_loss - }, 'best_model_checkpoint.pth') - print(f"Epoch {epoch+1}: val_loss improved to {val_loss:.4f}") - patience_counter = 0 - else: - patience_counter += 1 - print(f" Epoch {epoch+1}: Loss did not improve. Patience = {patience_counter}/{patience}") - - # Print epoch summary - elapsed_time = time.time() - start_time - print(f"\nEpoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | Acc: {accuracy:.2%} | Time: {elapsed_time:.2f}s") - - # Always save latest weights - torch.save({ - 'epoch': epoch, - 'model_state_dict': model.state_dict(), - 'optimizer_state_dict': optimizer.state_dict(), - 'scheduler_state_dict': scheduler.state_dict(), - 'scaler_state_dict': scaler.state_dict(), - 'loss': avg_loss - }, 'latest_model_checkpoint_epoch.pth') - - # Early stopping check - if patience_counter >= patience: - print(f"\n Early stopping triggered after {epoch+1} epochs.") - break - - # Log weight histograms - - - print("\nTraining Finished!") - - - # --------------------------------------------------- - # 7. CLEANUP - # --------------------------------------------------- - - - - # To view the logs, open a terminal in your project's root directory and run: +import os +import sys +import time +import torch +import torch.nn as nn +import torchvision +from datetime import datetime +from torchvision import transforms +from tqdm import tqdm +from torch.utils.data import DataLoader +from torch.utils.data import random_split +from torch.utils.tensorboard import SummaryWriter +from torch.amp import autocast, GradScaler # instead of torch.cuda.amp -- deprecated +from torch.nn import TripletMarginWithDistanceLoss +# Custom Modules +import dataset +from model import CNNdeepSORT + +# --------------------------------------------------- +# TRAINING FUNCTION +# --------------------------------------------------- +def euclidean_distance(x, y): + return torch.norm(x - y, p=2, dim=1) + +def training(model, dataloader, loss_fn, optimizer, device): + model.train() # Set model to training mode + total_loss = 0 + total_correct = 0 + total_samples = 0 + progress_bar = tqdm(dataloader, "Training Progress...", unit = "batch") + + for (archor, positive, negative) in progress_bar: + anchor.to(device, non_blocking=True) + positive = positive.to(device, non_blocking=True) + negative = negative.to(device, non_blocking=True) + # Non_blocking lets compute overlap with data transfer (small but free speed-up). Launches host --> GPU copy async if tensor already pinned (it is) + + with autocast(device_type=device): + anchor_out = model(anchor) + positive_out = model(positive) + negative_out = model(negative) + + loss = loss_fn(anchor_out, positive_out, negative_out) + + # Backward pass and optimization + scaler.scale(loss).backward() # 1. Calculate gradients based on the current loss. + torch.nn.utils.clip_grad_norm_(model.parameters(), clip_val) # Compues total L2 norm for all gradients. If exceeds `clip_val`, rescales them to be equal. Prevents gradient explosion. + scaler.step(optimizer) # 2. Update the model's weights using the calculated gradients. With scaler, undoes the scaling and skips the step if gradients overflowed (inf) + scaler.update() # Adjusts the scale factor up/down depending on whether overflow was detected, keeping training stable. + optimizer.zero_grad(set_to_none=True)# 3. Reset gradients to zero for the next iteration. Instead of writing 0s into every grad tensor, it just sets the .grad pointer to None (saving a full CUDA memset each iteration) + + # Metrics + total_loss += loss.item() + d_ap = euclidean_distance(anchor_out, positive_out) + d_an = euclidean_distance(anchor_out, negative_out) + total_correct += (d_ap < d_an).sum().item() + total_samples += anchor.size(0) + + progress_bar.set_postfix(loss=loss.item()) + + avg_loss = total_loss / len(dataloader) + accuracy = total_correct / total_samples + return avg_loss, accuracy + +# --------------------------------------------------- +# 1. SETUP +# --------------------------------------------------- + +# This code will only run when you execute `python train.py` directly +if __name__ == '__main__': + # Hyperparameters + LEARNING_RATE = 1e-3 + EPOCHS = 50 + BATCH_SIZE = 128 # e.g. 32, 64, 128 + # Increased from 64. AMP frees memory, and more data per step = fewer GPU idles + + + # Move the model to the appropriate device (GPU if available) + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print(f"Using device: {device}") + + # Tries several convolution algorithms on the first batch, then uses fastest one thereafter. + # Pays off only if input sizes stay fixed (normalized to 256x128) + torch.backends.cudnn.benchmark = True + + scaler = GradScaler() # WIth FP16, gradients can under-flow to 0. This multiplies the loss by a large factor, performs backward, then divides them back down during optimizer. + clip_val = 1.0 # gradient-clip norm + + # --------------------------------------------------- + # 2. MODEL, LOSS, OPTIMIZER + # --------------------------------------------------- + + # Instantiate the model and move to device + # 751 is the number of classes for Market-1501 + model = CNNdeepSORT(embedding_dim=128).to(device) + + # Define loss function, optimizer, and learning rate scheduler + + #TODO distance function, margin depends on distance choosen + + loss_function = nn.TripletMarginWithDistanceLoss(distance_function=euclidean_distance, margin=0.3, swap = True ) # Cross Entropy Loss is simpler, so more suitable for validation + optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE) # AdamW is an upgraded version of Adam + + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Decays LR by a factor of 0.1 every 10 epochs + # Gamma of 0.1 is common -- sharp reduction in LR, allowing model to switch from large adjustments to fine-tuning + + # Load from saved checkpoint if found (most generalized model) + checkpoint_path = 'best_model_checkpoint.pth' + start_epoch = 0 + if(len(sys.argv) > 1 and sys.argv[1].lower() == "load"): + if(os.path.exists(checkpoint_path)): + print(f"Loading checkpoint from {checkpoint_path}...") + checkpoint = torch.load(checkpoint_path) + model.load_state_dict(checkpoint['model_state_dict']) + optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + scheduler.load_state_dict(checkpoint['scheduler_state_dict']) + scaler.load_state_dict(checkpoint.get('scaler_state_dict', {})) + start_epoch = checkpoint['epoch'] + 1 + print(f"Resumed at epoch {start_epoch} | val_loss={checkpoint['loss']:.4f}") + else: + print("No checkpoint found, starting from scratch.") + else: + print("Training from scratch...") + + #TODO + + + # --------------------------------------------------- + # 3. DATA LOADING & SMALL VALIDATION SPLIT + # --------------------------------------------------- + + # Define more robust transforms for training + # Images are resized and normalized for better model performance. + transform = transforms.Compose([ + transforms.Resize((128, 64)), # native Market-1501 + transforms.RandomHorizontalFlip(p=0.5), # Adds ~1% mAP with no speed cost + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # mean and std are pre-calculated mean/stdev of the ImageNet dataset. + # For each channel of the image: output[channel] = (input[channel] - mean[channel]) / std[channel] + # Standardizes the pixel value range to train more effectively. Helps converge faster + ]) + + # Load dataset and create DataLoader + Market1501_train_file_path = r'C:\Users\david\Downloads\Market-1501-v15.09.15\bounding_box_train' + train_data = dataset.Market1501(Market1501_train_file_path, transform=transform) + + # Only used for early-stopping / LR checks, NOT for final testing. Tiny split keeps epochs fast but still shows over-/under-fitting trends + val_len = int(0.05 * len(train_data)) # split 5 % off for quick val + train_len = len(train_data) - val_len + + # split dataset before building both dataLoaders + train_ds, val_ds = random_split(train_data, [train_len, val_len]) + + # Entire training data + training_dataloader = DataLoader( + dataset=train_ds, + batch_size=BATCH_SIZE, + shuffle=True, + persistent_workers=True,# Keeps DataLoader workers alive across epochs and + prefetch_factor=4, # queues 4 batches per worker. Eliminates Windows process-spawn penalty; feeds GPU continuously + num_workers=4, # start with 4; 6–8 if CPU has threads to spare + pin_memory=True) # pin_memory puts tensors into pinned memory, allowing for faster transfer from CPU to GPU. + # Can significantly speed up training. Set to true with GPU. + + val_loader = DataLoader( + val_ds, + batch_size=BATCH_SIZE, + shuffle=False, # no need to shuffle for evaluation + num_workers=4, + pin_memory=True, + persistent_workers=True) + + # Evaluation helper + @torch.no_grad() # we don't track gradients here + def evaluate(model, loader): + """ + Runs one full pass on `loader` with AMP and returns: + (mean_cross_entropy_loss, top-1_accuracy) + """ + model.eval() # turn off dropout / batch-norm update + total_loss = 0.0 + correct = 0 + samples = 0 + + for anchor, positive, negative in loader: + anchor = anchor.to(device, non_blocking=True) + positive = positive.to(device, non_blocking=True) + negative = negative.to(device, non_blocking=True) + + with autocast(device_type=device): # AMP even in eval -> less VRAM, faster + anchor_out = model(anchor) + positive_out = model(positive) + negative_out = model(negative) + + + + + loss = loss_function(anchor_out, positive_out, negative_out) + + total_loss += loss.item() + #TODO + d_ap = euclidean_distance(anchor_out, positive_out) + d_an = euclidean_distance(anchor_out, negative_out) + correct += (d_ap < d_an).sum().item() + + samples += anchor.size(0) + + mean_loss = total_loss / len(loader) + accuracy = correct / samples + model.train() # restore training mode for caller + return mean_loss, accuracy + + # --------------------------------------------------- + # 4. TENSORBOARD LOGGING (Initial) + # --------------------------------------------------- + + print("Logging model graph and sample images to TensorBoard...") + # Get a single batch from the dataloader to log graph and images + anchor, positibe, negative = next(iter(training_dataloader)) + + print("Done.") + + # --------------------------------------------------- + # 6. MAIN TRAINING LOOP + # --------------------------------------------------- + best_val_loss = float('inf') + patience = 5 + patience_counter = 0 + + for epoch in range(start_epoch, EPOCHS): + start_time = time.time() + + # Train for one epoch + avg_loss, accuracy = training(model, training_dataloader, loss_function, optimizer, device) #tells you if optimizer is doing its job on the data it sees + val_loss, val_acc = evaluate(model, val_loader) #tells you if the network is generalizing; early stopping + + #*NEW* Update the learning rate + scheduler.step() # Without this, the model is unable to converge. + + + + # ----- save best on *validation* loss ----- + #if avg_loss < best_val_loss: + if val_loss < best_val_loss: + #best_val_loss = avg_loss + best_val_loss = val_loss + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'scheduler_state_dict': scheduler.state_dict(), + 'loss': val_loss + }, 'best_model_checkpoint.pth') + print(f"Epoch {epoch+1}: val_loss improved to {val_loss:.4f}") + patience_counter = 0 + else: + patience_counter += 1 + print(f" Epoch {epoch+1}: Loss did not improve. Patience = {patience_counter}/{patience}") + + # Print epoch summary + elapsed_time = time.time() - start_time + print(f"\nEpoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | Acc: {accuracy:.2%} | Time: {elapsed_time:.2f}s") + + # Always save latest weights + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'scheduler_state_dict': scheduler.state_dict(), + 'scaler_state_dict': scaler.state_dict(), + 'loss': avg_loss + }, 'latest_model_checkpoint_epoch.pth') + + # Early stopping check + if patience_counter >= patience: + print(f"\n Early stopping triggered after {epoch+1} epochs.") + break + + # Log weight histograms + + + print("\nTraining Finished!") + + + # --------------------------------------------------- + # 7. CLEANUP + # --------------------------------------------------- + + + + # To view the logs, open a terminal in your project's root directory and run: # tensorboard --logdir=run \ No newline at end of file diff --git a/src/kestrel_vision/kestrel_vision/vision_node.py b/src/kestrel_vision/kestrel_vision/vision_node.py index aab9392..797be19 100644 --- a/src/kestrel_vision/kestrel_vision/vision_node.py +++ b/src/kestrel_vision/kestrel_vision/vision_node.py @@ -1,113 +1,191 @@ -# ROS and system libraries -import rclpy -from rclpy.node import Node -import sys -from pathlib import Path - -# ROS messages -from sensor_msgs.msg import Image -from kestrel_msgs.msg import DetectionWithEmbedding, DetectionWithEmbeddingArray -from vision_msgs.msg import Detection2D, ObjectHypothesisWithPose, BoundingBox2D - -# Computer vision and deep learning libraries -import cv2 -import numpy as np -import torch -from cv_bridge import CvBridge -from ultralytics import YOLO - - -from model import CNNdeepSORT -from . import processing - - -class VisionNode(Node): - """ - This node subscribes to an image topic, runs YOLO and cnn, - and publishes the detections with their feature embeddings. - """ - def __init__(self): - super().__init__('vision_node') - - # initialize models - self.yolo_model = YOLO('yolo11m.pt') - self.cnn_model = CNNdeepSORT(embedding_dim=128) - self.get_logger().info("Loaded YOLO and CNNdeepSORT models successfully.") - - self.bridge = CvBridge() - # threshold to stop certain stuff to go into cnn - self.conf_threshold = self.declare_parameter('conf_threshold', 0.5).get_parameter_value().double_value - - # create publishers and subscription - self.pub_det = self.create_publisher(DetectionWithEmbeddingArray, '/kestrel/detections', 10) - self.pub_dbg = self.create_publisher(Image, '/kestrel/debug/frame', 10) - self.image_subscriber = self.create_subscription(Image, '/camera/image_raw', self.image_callback, 10) - - def image_callback(self, msg: Image): - """ - Callback to process every frame - """ - frame = self.bridge.imgmsg_to_cv2(msg, 'bgr8') - yolo_results = self.yolo_model(frame, verbose=False)[0] - - det_array_msg = DetectionWithEmbeddingArray() - det_array_msg.header = msg.header - - for box in yolo_results.boxes: - conf = float(box.conf[0]) - class_name = self.yolo_model.names[int(box.cls[0])] - - # checks if person and confidence is higher than threshold - if class_name == 'person' and conf >= self.conf_threshold: - - appearance_vector = processing.extract_person_embedding( - frame, box, self.cnn_model - ) - - if appearance_vector is None: - continue - - # Populate the custom ROS message - custom_detection = DetectionWithEmbedding() - custom_detection.detection.header = msg.header - - # standard hypothesis msg from ros with info on detection - hypothesis = ObjectHypothesisWithPose() - hypothesis.hypothesis.class_id = class_name - hypothesis.hypothesis.score = conf - custom_detection.detection.results.append(hypothesis) - - x1, y1, x2, y2 = map(int, box.xyxy[0]) # get corners of bbox - custom_detection.detection.bbox = BoundingBox2D( - center=rclpy.geometry.Pose2D(x=float((x1+x2)/2), y=float((y1+y2)/2)), - size_x=float(x2-x1), - size_y=float(y2-y1) - ) - - # add this detection to rest of all detections (the array) - custom_detection.embedding = appearance_vector.tolist() - det_array_msg.detections.append(custom_detection) - - # draws bbox on the frame - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - cv2.putText(frame, f'Person {conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - - # Publish the results - self.pub_det.publish(det_array_msg) - self.pub_dbg.publish(self.bridge.cv2_to_imgmsg(frame, 'bgr8')) - - -def main(args=None): - """Standard entry point for the ROS node.""" - rclpy.init(args=args) - node = VisionNode() - try: - rclpy.spin(node) - except KeyboardInterrupt: - pass - finally: - node.destroy_node() - rclpy.shutdown() - -if __name__ == '__main__': +# ROS and system libraries +import rclpy +from rclpy.node import Node +from pathlib import Path + +# ROS messages +from sensor_msgs.msg import Image +from kestrel_msgs.msg import Detection, DetectionArray + +# Computer vision and deep learning libraries +import cv2 +import numpy as np +import torch +from cv_bridge import CvBridge +from ultralytics import YOLO + + +from .model import CNNdeepSORT +from . import processing + + +class VisionNode(Node): + """ + This node subscribes to an image topic, runs YOLO and CNN, + and publishes the detections with their feature embeddings. + """ + def __init__(self): + super().__init__('vision_node') + + # initialize models + default_model_path = Path(__file__).resolve().parents[3] / 'models' / 'yolov8n.pt' + + model_path = self.declare_parameter( + 'yolo_model_path', + str(default_model_path) + ).value + + self.yolo_model = YOLO(model_path) + self.yolo_model.to('cpu') #TODO: why CPU? + + # ---- CNN init ---- + self.cnn_model = CNNdeepSORT(embedding_dim=128) + + checkpoint_path = Path(__file__).resolve().parents[3] / 'models' / 'best_model_checkpoint.pth' + if checkpoint_path.is_file(): + try: + # torch.load with weights_only when available (2.1+), else fallback + try: + ckpt = torch.load(checkpoint_path, map_location='cpu', weights_only=True) + except TypeError: + ckpt = torch.load(checkpoint_path, map_location='cpu') + + # Find the actual state_dict inside common wrappers + state = None + if isinstance(ckpt, dict): + for k in ('model_state_dict', 'state_dict', 'net', 'model'): + if k in ckpt and isinstance(ckpt[k], dict): + state = ckpt[k] + break + if state is None: + # maybe it's already a raw state_dict + state = {k: v for k, v in ckpt.items() if hasattr(v, 'dtype')} + if state is None: + raise RuntimeError("Unrecognized checkpoint format") + + # Clean keys: strip 'module.' and drop classifier / batch-tracking + from collections import OrderedDict + cleaned = OrderedDict() + for k, v in state.items(): + if k.startswith('module.'): + k = k[len('module.'):] + if k.startswith('classifier.') or k.endswith('num_batches_tracked'): + continue + cleaned[k] = v + + # Filter to keys that exist in the current model + model_sd = self.cnn_model.state_dict() + intersect = {k: v for k, v in cleaned.items() if k in model_sd and v.shape == model_sd[k].shape} + + # Informative logs + dropped_unexpected = sorted(set(cleaned.keys()) - set(intersect.keys())) + missing_in_ckpt = sorted(set(model_sd.keys()) - set(intersect.keys())) + if dropped_unexpected: + self.get_logger().warn(f"Dropping {len(dropped_unexpected)} checkpoint keys not in the current model (e.g., {dropped_unexpected[:3]})") + if missing_in_ckpt: + self.get_logger().warn(f"{len(missing_in_ckpt)} model keys not found/mismatched in checkpoint (e.g., {missing_in_ckpt[:3]})") + + # Load with strict=False so non-intersecting layers stay at init + self.cnn_model.load_state_dict(intersect, strict=False) + self.get_logger().info("Loaded CNN weights (filtered to matching layers).") + except Exception as e: + self.get_logger().error(f"Failed to load CNN checkpoint: {e}. Using randomly initialized weights.") + else: + self.get_logger().warn(f"Could not find CNN checkpoint at {checkpoint_path}. Using untrained model.") + + self.cnn_model.eval() + self.cnn_model.to('cpu') # or to('cuda') later via a device param + + # ----------------------------------------------------------------------- + + self.bridge = CvBridge() + # threshold to stop certain stuff to go into cnn + self.conf_threshold = self.declare_parameter('conf_threshold', 0.5).get_parameter_value().double_value + + # create publishers and subscription + self.pub_det = self.create_publisher(DetectionArray, '/kestrel/detections', 10) + self.pub_dbg = self.create_publisher(Image, '/kestrel/debug/frame', 10) + self.image_subscriber = self.create_subscription(Image, '/camera/image_raw', self.image_callback, 10) + + def image_callback(self, msg: Image): + """ + Callback to process every frame + """ + frame = self.bridge.imgmsg_to_cv2(msg, 'bgr8') + yolo_results = self.yolo_model(frame, verbose=False)[0] + + det_array_msg = DetectionArray() + det_array_msg.header = msg.header + + for box in yolo_results.boxes: + conf = float(box.conf[0]) + class_name = self.yolo_model.names[int(box.cls[0])] + + # checks if person and confidence is higher than threshold + if class_name == 'person' and conf >= self.conf_threshold: + + appearance_vector = processing.extract_person_embedding( + frame, box, self.cnn_model + ) + + if appearance_vector is None: + continue + + # Populate the custom ROS message + detection_msg = Detection() + detection_msg.header = msg.header + + # populate msg with info on detection + detection_msg.class_name = class_name + detection_msg.conf = conf + + x1, y1, x2, y2 = [float(v) for v in box.xyxy[0].tolist()] # get corners of bbox + detection_msg.x1 = float(x1) + detection_msg.y1 = float(y1) + detection_msg.x2 = float(x2) + detection_msg.y2 = float(y2) + + detection_msg.w = float(x2 - x1) + detection_msg.h = float(y2 - y1) + + detection_msg.center_x = float((x1 + x2) / 2.0) + detection_msg.center_y = float((y1 + y2) / 2.0) + + emb = appearance_vector.reshape(-1) + + if emb.shape[0] != 128: + self.get_logger().warn(f"Bad embedding length: {emb.shape[0]}") + continue + + # add this detection to rest of all detections (the array) + detection_msg.embedding = [float(v) for v in emb.tolist()] + det_array_msg.detections.append(detection_msg) + + # draws bbox on the frame + x1i, y1i, x2i, y2i = map(int, [x1, y1, x2, y2]) + cv2.rectangle(frame, (x1i, y1i), (x2i, y2i), (0,255,0), 2) + cv2.putText(frame, f'Person {conf:.2f}', (x1i, y1i - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # Publish the results + self.pub_det.publish(det_array_msg) + self.pub_dbg.publish(self.bridge.cv2_to_imgmsg(frame, 'bgr8')) + + +def main(args=None): + """Standard entry point for the ROS node.""" + rclpy.init(args=args) + node = VisionNode() + try: + rclpy.spin(node) + except KeyboardInterrupt: + node.get_logger().info("Shutting down on Ctrl-C") + finally: + node.destroy_node() + try: + if rclpy.ok(): # not already shut down + rclpy.shutdown() + except Exception: + pass + +if __name__ == '__main__': main() \ No newline at end of file diff --git a/src/kestrel_vision/package.xml b/src/kestrel_vision/package.xml index bc3f8d0..0f8fa21 100644 --- a/src/kestrel_vision/package.xml +++ b/src/kestrel_vision/package.xml @@ -1,25 +1,24 @@ - - - kestrel_vision - 0.1.0 - YOLO-based vision detection node for Kestrel Drone using rclpy and OpenCV - David Orjuela - MIT - - ament_python - - rclpy - sensor_msgs - cv_bridge - vision_msgs - ultralytics - - ament_copyright - ament_flake8 - ament_pep257 - python3-pytest - - - ament_python - - + + + kestrel_vision + 0.1.0 + YOLO-based vision detection node for Kestrel Drone using rclpy and OpenCV + David Orjuela + MIT + + ament_python + + rclpy + sensor_msgs + cv_bridge + ultralytics + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/src/kestrel_vision/setup.cfg b/src/kestrel_vision/setup.cfg index ff7a8e2..27c1f88 100644 --- a/src/kestrel_vision/setup.cfg +++ b/src/kestrel_vision/setup.cfg @@ -1,4 +1,4 @@ -[develop] -script_dir=$base/lib/kestrel_vision -[install] -install_scripts=$base/lib/kestrel_vision +[develop] +script_dir=$base/lib/kestrel_vision +[install] +install_scripts=$base/lib/kestrel_vision diff --git a/src/kestrel_vision/setup.py b/src/kestrel_vision/setup.py index 6b75bde..aef3709 100644 --- a/src/kestrel_vision/setup.py +++ b/src/kestrel_vision/setup.py @@ -1,26 +1,25 @@ -from setuptools import setup - -package_name = 'kestrel_vision' - -setup( - name=package_name, - version='0.1.0', - packages=[package_name], - data_files=[ - ('share/ament_index/resource_index/packages', - ['resource/' + package_name]), - ('share/' + package_name, ['package.xml']), - ], - install_requires=['setuptools'], - zip_safe=True, - maintainer='David Orjuela', - maintainer_email='da364739@ucf.edu', - description='Vision node for Kestrel drone using YOLO and DeepSORT', - license='MIT', - tests_require=['pytest'], - entry_points={ - 'console_scripts': [ - 'vision_node = kestrel_vision.vision_node:main', - ], - }, -) +from setuptools import setup + +package_name = 'kestrel_vision' + +setup( + name=package_name, + version='0.1.0', + packages=[package_name], + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='David Orjuela', + maintainer_email='da364739@ucf.edu', + description='Vision node for Kestrel drone using YOLO and DeepSORT', + license='MIT', + entry_points={ + 'console_scripts': [ + 'vision_node = kestrel_vision.vision_node:main', + ], + }, +) diff --git a/src/kestrel_vision/test/test_copyright.py b/src/kestrel_vision/test/test_copyright.py index 97a3919..282981a 100644 --- a/src/kestrel_vision/test/test_copyright.py +++ b/src/kestrel_vision/test/test_copyright.py @@ -1,25 +1,25 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_copyright.main import main -import pytest - - -# Remove the `skip` decorator once the source file(s) have a copyright header -@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') -@pytest.mark.copyright -@pytest.mark.linter -def test_copyright(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found errors' +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/src/kestrel_vision/test/test_flake8.py b/src/kestrel_vision/test/test_flake8.py index 27ee107..625324e 100644 --- a/src/kestrel_vision/test/test_flake8.py +++ b/src/kestrel_vision/test/test_flake8.py @@ -1,25 +1,25 @@ -# Copyright 2017 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_flake8.main import main_with_errors -import pytest - - -@pytest.mark.flake8 -@pytest.mark.linter -def test_flake8(): - rc, errors = main_with_errors(argv=[]) - assert rc == 0, \ - 'Found %d code style errors / warnings:\n' % len(errors) + \ - '\n'.join(errors) +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/src/kestrel_vision/test/test_pep257.py b/src/kestrel_vision/test/test_pep257.py index b234a38..4569717 100644 --- a/src/kestrel_vision/test/test_pep257.py +++ b/src/kestrel_vision/test/test_pep257.py @@ -1,23 +1,23 @@ -# Copyright 2015 Open Source Robotics Foundation, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ament_pep257.main import main -import pytest - - -@pytest.mark.linter -@pytest.mark.pep257 -def test_pep257(): - rc = main(argv=['.', 'test']) - assert rc == 0, 'Found code style errors / warnings' +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings'