From a9bb9a8ad9fa8bf9864798efea38d3bbeca69527 Mon Sep 17 00:00:00 2001 From: Rakshitha Ireddi Date: Sun, 11 Jan 2026 22:19:12 +0530 Subject: [PATCH] Add 3 new CV projects: Volume Control, Drowsiness Detection, Invisible Cloak --- DrowsinessDetection/README.md | 19 +++ DrowsinessDetection/drowsiness_detect.py | 127 ++++++++++++++++++ DrowsinessDetection/requirements.txt | 5 + InvisibleCloak/invisible_cloak.py | 71 ++++++++++ InvisibleCloak/requirements.txt | 2 + .../hand_tracking_module.py | 74 ++++++++++ VolumeControlHandGestures/requirements.txt | 5 + VolumeControlHandGestures/volume_control.py | 83 ++++++++++++ 8 files changed, 386 insertions(+) create mode 100644 DrowsinessDetection/README.md create mode 100644 DrowsinessDetection/drowsiness_detect.py create mode 100644 DrowsinessDetection/requirements.txt create mode 100644 InvisibleCloak/invisible_cloak.py create mode 100644 InvisibleCloak/requirements.txt create mode 100644 VolumeControlHandGestures/hand_tracking_module.py create mode 100644 VolumeControlHandGestures/requirements.txt create mode 100644 VolumeControlHandGestures/volume_control.py diff --git a/DrowsinessDetection/README.md b/DrowsinessDetection/README.md new file mode 100644 index 0000000..9d9d977 --- /dev/null +++ b/DrowsinessDetection/README.md @@ -0,0 +1,19 @@ +# Drowsiness Detection Setup + +1. Install requirements: + ```bash + pip install -r requirements.txt + ``` + *Note: Installing `dlib` requires CMake installed on your system.* + +2. Download Facial Landmark Model: + The code relies on `shape_predictor_68_face_landmarks.dat`. + + - Download it from: [https://github.com/davisking/dlib-models/blob/master/shape_predictor_68_face_landmarks.dat.bz2](https://github.com/davisking/dlib-models/blob/master/shape_predictor_68_face_landmarks.dat.bz2) + - Extract the `.bz2` file. + - Place the `.dat` file in this directory (`DrowsinessDetection/`). + +3. Run the script: + ```bash + python drowsiness_detect.py + ``` diff --git a/DrowsinessDetection/drowsiness_detect.py b/DrowsinessDetection/drowsiness_detect.py new file mode 100644 index 0000000..110d2e3 --- /dev/null +++ b/DrowsinessDetection/drowsiness_detect.py @@ -0,0 +1,127 @@ +from scipy.spatial import distance as dist +from imutils import face_utils +import imutils +import dlib +import cv2 +import winsound # For Windows, easier than playsound for simple beep + +def eye_aspect_ratio(eye): + # compute the euclidean distances between the two sets of + # vertical eye landmarks (x, y)-coordinates + A = dist.euclidean(eye[1], eye[5]) + B = dist.euclidean(eye[2], eye[4]) + + # compute the euclidean distance between the horizontal + # eye landmark (x, y)-coordinates + C = dist.euclidean(eye[0], eye[3]) + + # compute the eye aspect ratio + ear = (A + B) / (2.0 * C) + + # return the eye aspect_ratio + return ear + +# Define constants +EYE_AR_THRESH = 0.25 +EYE_AR_CONSEC_FRAMES = 48 + +# Initialize frames counter and alarm status +COUNTER = 0 +ALARM_ON = False + +# Initialize dlib's face detector (HOG-based) and then create +# the facial landmark predictor +print("[INFO] loading facial landmark predictor...") +detector = dlib.get_frontal_face_detector() +# NOTE: The user must have this file. We will add a README to explain where to get it. +predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") + +# Grab the indexes of the facial landmarks for the left and +# right eye, respectively +(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] +(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] + +# Start video stream +print("[INFO] starting video stream thread...") +cap = cv2.VideoCapture(0) + +while True: + ret, frame = cap.read() + if not ret: + break + + frame = imutils.resize(frame, width=450) + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + + # detect faces in the grayscale frame + rects = detector(gray, 0) + + for rect in rects: + # determine the facial landmarks for the face region, then + # convert the facial landmark (x, y)-coordinates to a NumPy + # array + shape = predictor(gray, rect) + shape = face_utils.shape_to_np(shape) + + # extract the left and right eye coordinates, then use the + # coordinates to compute the eye aspect ratio for both eyes + leftEye = shape[lStart:lEnd] + rightEye = shape[rStart:rEnd] + leftEAR = eye_aspect_ratio(leftEye) + rightEAR = eye_aspect_ratio(rightEye) + + # average the eye aspect ratio together for both eyes + ear = (leftEAR + rightEAR) / 2.0 + + # compute the convex hull for the left and right eye, then + # visualize each of the eyes + leftEyeHull = cv2.convexHull(leftEye) + rightEyeHull = cv2.convexHull(rightEye) + cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1) + cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1) + + # check to see if the eye aspect ratio is below the blink + # threshold, and if so, increment the blink frame counter + if ear < EYE_AR_THRESH: + COUNTER += 1 + + # if the eyes were closed for a sufficient number of + # then sound the alarm + if COUNTER >= EYE_AR_CONSEC_FRAMES: + # if the alarm is not on, turn it on + if not ALARM_ON: + ALARM_ON = True + + # check to see if an alarm file was supplied, + # and if so, utilize it + print("[ALERT] Drowsiness Detected!") + # Using winsound for simple beep on Windows + winsound.Beep(2500, 1000) + + # draw an alarm on the frame + cv2.putText(frame, "DROWSINESS ALERT!", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) + + # otherwise, the eye aspect ratio is not below the blink + # threshold + else: + COUNTER = 0 + ALARM_ON = False + + # draw the computed eye aspect ratio on the frame to help + # with debugging and setting the correct eye aspect ratio + # thresholds and frame counters + cv2.putText(frame, "EAR: {:.2f}".format(ear), (300, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) + + # show the frame + cv2.imshow("Frame", frame) + key = cv2.waitKey(1) & 0xFF + + # if the `q` key was pressed, break from the loop + if key == ord("q"): + break + +# do a bit of cleanup +cv2.destroyAllWindows() +cap.release() diff --git a/DrowsinessDetection/requirements.txt b/DrowsinessDetection/requirements.txt new file mode 100644 index 0000000..e6b464a --- /dev/null +++ b/DrowsinessDetection/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +scipy +imutils +dlib +cmake diff --git a/InvisibleCloak/invisible_cloak.py b/InvisibleCloak/invisible_cloak.py new file mode 100644 index 0000000..664af72 --- /dev/null +++ b/InvisibleCloak/invisible_cloak.py @@ -0,0 +1,71 @@ +import cv2 +import numpy as np +import time + +def main(): + print("Welcome to the Invisible Cloak!") + print("Instructions:") + print("1. The camera will capture the background for 3 seconds.") + print(" Please move out of the frame!") + print("2. Once started, anything RED will become invisible.") + print("3. Press 'q' to quit.") + + cap = cv2.VideoCapture(0) + time.sleep(3) # Give time to user to move aside + background = 0 + + # Capturing the background + for i in range(60): + ret, background = cap.read() + + background = np.flip(background, axis=1) + + while(cap.isOpened()): + ret, img = cap.read() + if not ret: + break + + img = np.flip(img, axis=1) + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + + # Generating mask to detect red color + # HSV values for Red can wrap around, so we need two ranges. + + # Range 1 (0-10) + lower_red = np.array([0, 120, 70]) + upper_red = np.array([10, 255, 255]) + mask1 = cv2.inRange(hsv, lower_red, upper_red) + + # Range 2 (170-180) + lower_red = np.array([170, 120, 70]) + upper_red = np.array([180, 255, 255]) + mask2 = cv2.inRange(hsv, lower_red, upper_red) + + mask1 = mask1 + mask2 # OR operation + + # Refining the mask + mask1 = cv2.morphologyEx(mask1, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8)) + mask1 = cv2.morphologyEx(mask1, cv2.MORPH_DILATE, np.ones((3, 3), np.uint8)) + + # Creating an inverted mask to segment out the red color + mask2 = cv2.bitwise_not(mask1) + + # Segmenting the red color part out of the frame using bitwise and with the inverted mask + res1 = cv2.bitwise_and(img, img, mask=mask2) + + # Creating image showing static background frame pixels only for the masked region + res2 = cv2.bitwise_and(background, background, mask=mask1) + + # Generating the final output + final_output = cv2.addWeighted(res1, 1, res2, 1, 0) + + cv2.imshow("Invisible Cloak", final_output) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() + +if __name__ == "__main__": + main() diff --git a/InvisibleCloak/requirements.txt b/InvisibleCloak/requirements.txt new file mode 100644 index 0000000..b96544b --- /dev/null +++ b/InvisibleCloak/requirements.txt @@ -0,0 +1,2 @@ +opencv-python +numpy diff --git a/VolumeControlHandGestures/hand_tracking_module.py b/VolumeControlHandGestures/hand_tracking_module.py new file mode 100644 index 0000000..5ed1530 --- /dev/null +++ b/VolumeControlHandGestures/hand_tracking_module.py @@ -0,0 +1,74 @@ +import cv2 +import mediapipe as mp +import time +import math + +class HandDetector: + def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5): + self.mode = mode + self.maxHands = maxHands + self.detectionCon = detectionCon + self.trackCon = trackCon + + self.mpHands = mp.solutions.hands + # Note: model_complexity is required in recent mediapipe versions, but defaults are usually fine. + # Strict type checking might complain about float vs int for confidence, ensuring they are valid. + self.hands = self.mpHands.Hands( + static_image_mode=self.mode, + max_num_hands=self.maxHands, + min_detection_confidence=self.detectionCon, + min_tracking_confidence=self.trackCon + ) + self.mpDraw = mp.solutions.drawing_utils + + def findHands(self, img, draw=True): + imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + self.results = self.hands.process(imgRGB) + + if self.results.multi_hand_landmarks: + for handLms in self.results.multi_hand_landmarks: + if draw: + self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS) + return img + + def findPosition(self, img, handNo=0, draw=True): + lmList = [] + if self.results.multi_hand_landmarks: + myHand = self.results.multi_hand_landmarks[handNo] + for id, lm in enumerate(myHand.landmark): + h, w, c = img.shape + cx, cy = int(lm.x * w), int(lm.y * h) + lmList.append([id, cx, cy]) + if draw: + cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED) + return lmList + +def main(): + pTime = 0 + cTime = 0 + cap = cv2.VideoCapture(0) + detector = HandDetector() + + while True: + success, img = cap.read() + if not success: + break + + img = detector.findHands(img) + lmList = detector.findPosition(img) + if len(lmList) != 0: + print(lmList[4]) # Tip of thumb + + cTime = time.time() + fps = 1 / (cTime - pTime) + pTime = cTime + + cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, + (255, 0, 255), 3) + + cv2.imshow("Image", img) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +if __name__ == "__main__": + main() diff --git a/VolumeControlHandGestures/requirements.txt b/VolumeControlHandGestures/requirements.txt new file mode 100644 index 0000000..1874fdb --- /dev/null +++ b/VolumeControlHandGestures/requirements.txt @@ -0,0 +1,5 @@ +opencv-python +mediapipe +numpy +pycaw +comtypes diff --git a/VolumeControlHandGestures/volume_control.py b/VolumeControlHandGestures/volume_control.py new file mode 100644 index 0000000..a624c57 --- /dev/null +++ b/VolumeControlHandGestures/volume_control.py @@ -0,0 +1,83 @@ +import cv2 +import time +import numpy as np +import hand_tracking_module as htm +import math +from ctypes import cast, POINTER +from comtypes import CLSCTX_ALL +from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume + +################################ +wCam, hCam = 640, 480 +################################ + +cap = cv2.VideoCapture(0) +cap.set(3, wCam) +cap.set(4, hCam) +pTime = 0 + +detector = htm.HandDetector(detectionCon=0.7) + +# PyCaw Initialization for Windows Volume Control +devices = AudioUtilities.GetSpeakers() +interface = devices.Activate( + IAudioEndpointVolume._iid_, CLSCTX_ALL, None) +volume = cast(interface, POINTER(IAudioEndpointVolume)) +volRange = volume.GetVolumeRange() +minVol = volRange[0] +maxVol = volRange[1] + +vol = 0 +volBar = 400 +volPer = 0 + +print(f"Volume Range: {minVol} to {maxVol}") + +while True: + success, img = cap.read() + if not success: + break + + img = detector.findHands(img) + lmList = detector.findPosition(img, draw=False) + + if len(lmList) != 0: + # Landmark 4 is Thumb Tip, 8 is Index Finger Tip + x1, y1 = lmList[4][1], lmList[4][2] + x2, y2 = lmList[8][1], lmList[8][2] + cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 + + cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED) + cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED) + cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) + cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED) + + length = math.hypot(x2 - x1, y2 - y1) + + # Hand Range usually 50 to 300 + # Volume Range -65 to 0 + vol = np.interp(length, [50, 300], [minVol, maxVol]) + volBar = np.interp(length, [50, 300], [400, 150]) + volPer = np.interp(length, [50, 300], [0, 100]) + + print(int(length), vol) + volume.SetMasterVolumeLevel(vol, None) + + if length < 50: + cv2.circle(img, (cx, cy), 15, (0, 255, 0), cv2.FILLED) + + cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3) + cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED) + cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX, + 1, (255, 0, 0), 3) + + cTime = time.time() + fps = 1 / (cTime - pTime) + pTime = cTime + + cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX, + 1, (255, 0, 0), 3) + + cv2.imshow("Img", img) + if cv2.waitKey(1) & 0xFF == ord('q'): + break