From a9bb9a8ad9fa8bf9864798efea38d3bbeca69527 Mon Sep 17 00:00:00 2001
From: Rakshitha Ireddi <your.email@example.com>
Date: Sun, 11 Jan 2026 22:19:12 +0530
Subject: [PATCH] Add 3 new CV projects: Volume Control, Drowsiness Detection,
 Invisible Cloak

---
 DrowsinessDetection/README.md                 |  19 +++
 DrowsinessDetection/drowsiness_detect.py      | 127 ++++++++++++++++++
 DrowsinessDetection/requirements.txt          |   5 +
 InvisibleCloak/invisible_cloak.py             |  71 ++++++++++
 InvisibleCloak/requirements.txt               |   2 +
 .../hand_tracking_module.py                   |  74 ++++++++++
 VolumeControlHandGestures/requirements.txt    |   5 +
 VolumeControlHandGestures/volume_control.py   |  83 ++++++++++++
 8 files changed, 386 insertions(+)
 create mode 100644 DrowsinessDetection/README.md
 create mode 100644 DrowsinessDetection/drowsiness_detect.py
 create mode 100644 DrowsinessDetection/requirements.txt
 create mode 100644 InvisibleCloak/invisible_cloak.py
 create mode 100644 InvisibleCloak/requirements.txt
 create mode 100644 VolumeControlHandGestures/hand_tracking_module.py
 create mode 100644 VolumeControlHandGestures/requirements.txt
 create mode 100644 VolumeControlHandGestures/volume_control.py

diff --git a/DrowsinessDetection/README.md b/DrowsinessDetection/README.md
new file mode 100644
index 0000000..9d9d977
--- /dev/null
+++ b/DrowsinessDetection/README.md
@@ -0,0 +1,19 @@
+# Drowsiness Detection Setup
+
+1. Install requirements:
+   ```bash
+   pip install -r requirements.txt
+   ```
+   *Note: Installing `dlib` requires CMake installed on your system.*
+
+2. Download Facial Landmark Model:
+   The code relies on `shape_predictor_68_face_landmarks.dat`.
+   
+   - Download it from: [https://github.com/davisking/dlib-models/blob/master/shape_predictor_68_face_landmarks.dat.bz2](https://github.com/davisking/dlib-models/blob/master/shape_predictor_68_face_landmarks.dat.bz2)
+   - Extract the `.bz2` file.
+   - Place the `.dat` file in this directory (`DrowsinessDetection/`).
+
+3. Run the script:
+   ```bash
+   python drowsiness_detect.py
+   ```
diff --git a/DrowsinessDetection/drowsiness_detect.py b/DrowsinessDetection/drowsiness_detect.py
new file mode 100644
index 0000000..110d2e3
--- /dev/null
+++ b/DrowsinessDetection/drowsiness_detect.py
@@ -0,0 +1,127 @@
+from scipy.spatial import distance as dist
+from imutils import face_utils
+import imutils
+import dlib
+import cv2
+import winsound  # For Windows, easier than playsound for simple beep
+
+def eye_aspect_ratio(eye):
+    # compute the euclidean distances between the two sets of
+    # vertical eye landmarks (x, y)-coordinates
+    A = dist.euclidean(eye[1], eye[5])
+    B = dist.euclidean(eye[2], eye[4])
+
+    # compute the euclidean distance between the horizontal
+    # eye landmark (x, y)-coordinates
+    C = dist.euclidean(eye[0], eye[3])
+
+    # compute the eye aspect ratio
+    ear = (A + B) / (2.0 * C)
+
+    # return the eye aspect_ratio
+    return ear
+
+# Define constants
+EYE_AR_THRESH = 0.25
+EYE_AR_CONSEC_FRAMES = 48
+
+# Initialize frames counter and alarm status
+COUNTER = 0
+ALARM_ON = False
+
+# Initialize dlib's face detector (HOG-based) and then create
+# the facial landmark predictor
+print("[INFO] loading facial landmark predictor...")
+detector = dlib.get_frontal_face_detector()
+# NOTE: The user must have this file. We will add a README to explain where to get it.
+predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
+
+# Grab the indexes of the facial landmarks for the left and
+# right eye, respectively
+(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
+(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
+
+# Start video stream
+print("[INFO] starting video stream thread...")
+cap = cv2.VideoCapture(0)
+
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+    
+    frame = imutils.resize(frame, width=450)
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+    # detect faces in the grayscale frame
+    rects = detector(gray, 0)
+
+    for rect in rects:
+        # determine the facial landmarks for the face region, then
+        # convert the facial landmark (x, y)-coordinates to a NumPy
+        # array
+        shape = predictor(gray, rect)
+        shape = face_utils.shape_to_np(shape)
+
+        # extract the left and right eye coordinates, then use the
+        # coordinates to compute the eye aspect ratio for both eyes
+        leftEye = shape[lStart:lEnd]
+        rightEye = shape[rStart:rEnd]
+        leftEAR = eye_aspect_ratio(leftEye)
+        rightEAR = eye_aspect_ratio(rightEye)
+
+        # average the eye aspect ratio together for both eyes
+        ear = (leftEAR + rightEAR) / 2.0
+
+        # compute the convex hull for the left and right eye, then
+        # visualize each of the eyes
+        leftEyeHull = cv2.convexHull(leftEye)
+        rightEyeHull = cv2.convexHull(rightEye)
+        cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
+        cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)
+
+        # check to see if the eye aspect ratio is below the blink
+        # threshold, and if so, increment the blink frame counter
+        if ear < EYE_AR_THRESH:
+            COUNTER += 1
+
+            # if the eyes were closed for a sufficient number of
+            # then sound the alarm
+            if COUNTER >= EYE_AR_CONSEC_FRAMES:
+                # if the alarm is not on, turn it on
+                if not ALARM_ON:
+                    ALARM_ON = True
+
+                    # check to see if an alarm file was supplied,
+                    # and if so, utilize it
+                    print("[ALERT] Drowsiness Detected!")
+                    # Using winsound for simple beep on Windows
+                    winsound.Beep(2500, 1000) 
+
+                # draw an alarm on the frame
+                cv2.putText(frame, "DROWSINESS ALERT!", (10, 30),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+
+        # otherwise, the eye aspect ratio is not below the blink
+        # threshold
+        else:
+            COUNTER = 0
+            ALARM_ON = False
+
+        # draw the computed eye aspect ratio on the frame to help
+        # with debugging and setting the correct eye aspect ratio
+        # thresholds and frame counters
+        cv2.putText(frame, "EAR: {:.2f}".format(ear), (300, 30),
+            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+ 
+    # show the frame
+    cv2.imshow("Frame", frame)
+    key = cv2.waitKey(1) & 0xFF
+ 
+    # if the `q` key was pressed, break from the loop
+    if key == ord("q"):
+        break
+
+# do a bit of cleanup
+cv2.destroyAllWindows()
+cap.release()
diff --git a/DrowsinessDetection/requirements.txt b/DrowsinessDetection/requirements.txt
new file mode 100644
index 0000000..e6b464a
--- /dev/null
+++ b/DrowsinessDetection/requirements.txt
@@ -0,0 +1,5 @@
+opencv-python
+scipy
+imutils
+dlib
+cmake
diff --git a/InvisibleCloak/invisible_cloak.py b/InvisibleCloak/invisible_cloak.py
new file mode 100644
index 0000000..664af72
--- /dev/null
+++ b/InvisibleCloak/invisible_cloak.py
@@ -0,0 +1,71 @@
+import cv2
+import numpy as np
+import time
+
+def main():
+    print("Welcome to the Invisible Cloak!")
+    print("Instructions:")
+    print("1. The camera will capture the background for 3 seconds.")
+    print("   Please move out of the frame!")
+    print("2. Once started, anything RED will become invisible.")
+    print("3. Press 'q' to quit.")
+
+    cap = cv2.VideoCapture(0)
+    time.sleep(3) # Give time to user to move aside
+    background = 0
+
+    # Capturing the background
+    for i in range(60):
+        ret, background = cap.read()
+    
+    background = np.flip(background, axis=1)
+
+    while(cap.isOpened()):
+        ret, img = cap.read()
+        if not ret:
+            break
+            
+        img = np.flip(img, axis=1)
+        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+
+        # Generating mask to detect red color
+        # HSV values for Red can wrap around, so we need two ranges.
+        
+        # Range 1 (0-10)
+        lower_red = np.array([0, 120, 70])
+        upper_red = np.array([10, 255, 255])
+        mask1 = cv2.inRange(hsv, lower_red, upper_red)
+
+        # Range 2 (170-180)
+        lower_red = np.array([170, 120, 70])
+        upper_red = np.array([180, 255, 255])
+        mask2 = cv2.inRange(hsv, lower_red, upper_red)
+
+        mask1 = mask1 + mask2 # OR operation
+
+        # Refining the mask
+        mask1 = cv2.morphologyEx(mask1, cv2.MORPH_OPEN, np.ones((3, 3), np.uint8))
+        mask1 = cv2.morphologyEx(mask1, cv2.MORPH_DILATE, np.ones((3, 3), np.uint8))
+
+        # Creating an inverted mask to segment out the red color
+        mask2 = cv2.bitwise_not(mask1)
+
+        # Segmenting the red color part out of the frame using bitwise and with the inverted mask
+        res1 = cv2.bitwise_and(img, img, mask=mask2)
+
+        # Creating image showing static background frame pixels only for the masked region
+        res2 = cv2.bitwise_and(background, background, mask=mask1)
+
+        # Generating the final output
+        final_output = cv2.addWeighted(res1, 1, res2, 1, 0)
+
+        cv2.imshow("Invisible Cloak", final_output)
+        
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
+
+if __name__ == "__main__":
+    main()
diff --git a/InvisibleCloak/requirements.txt b/InvisibleCloak/requirements.txt
new file mode 100644
index 0000000..b96544b
--- /dev/null
+++ b/InvisibleCloak/requirements.txt
@@ -0,0 +1,2 @@
+opencv-python
+numpy
diff --git a/VolumeControlHandGestures/hand_tracking_module.py b/VolumeControlHandGestures/hand_tracking_module.py
new file mode 100644
index 0000000..5ed1530
--- /dev/null
+++ b/VolumeControlHandGestures/hand_tracking_module.py
@@ -0,0 +1,74 @@
+import cv2
+import mediapipe as mp
+import time
+import math
+
+class HandDetector:
+    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
+        self.mode = mode
+        self.maxHands = maxHands
+        self.detectionCon = detectionCon
+        self.trackCon = trackCon
+
+        self.mpHands = mp.solutions.hands
+        # Note: model_complexity is required in recent mediapipe versions, but defaults are usually fine.
+        # Strict type checking might complain about float vs int for confidence, ensuring they are valid.
+        self.hands = self.mpHands.Hands(
+            static_image_mode=self.mode,
+            max_num_hands=self.maxHands,
+            min_detection_confidence=self.detectionCon,
+            min_tracking_confidence=self.trackCon
+        )
+        self.mpDraw = mp.solutions.drawing_utils
+
+    def findHands(self, img, draw=True):
+        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        self.results = self.hands.process(imgRGB)
+
+        if self.results.multi_hand_landmarks:
+            for handLms in self.results.multi_hand_landmarks:
+                if draw:
+                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
+        return img
+
+    def findPosition(self, img, handNo=0, draw=True):
+        lmList = []
+        if self.results.multi_hand_landmarks:
+            myHand = self.results.multi_hand_landmarks[handNo]
+            for id, lm in enumerate(myHand.landmark):
+                h, w, c = img.shape
+                cx, cy = int(lm.x * w), int(lm.y * h)
+                lmList.append([id, cx, cy])
+                if draw:
+                    cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
+        return lmList
+
+def main():
+    pTime = 0
+    cTime = 0
+    cap = cv2.VideoCapture(0)
+    detector = HandDetector()
+    
+    while True:
+        success, img = cap.read()
+        if not success:
+            break
+            
+        img = detector.findHands(img)
+        lmList = detector.findPosition(img)
+        if len(lmList) != 0:
+            print(lmList[4]) # Tip of thumb
+
+        cTime = time.time()
+        fps = 1 / (cTime - pTime)
+        pTime = cTime
+
+        cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3,
+                    (255, 0, 255), 3)
+
+        cv2.imshow("Image", img)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+if __name__ == "__main__":
+    main()
diff --git a/VolumeControlHandGestures/requirements.txt b/VolumeControlHandGestures/requirements.txt
new file mode 100644
index 0000000..1874fdb
--- /dev/null
+++ b/VolumeControlHandGestures/requirements.txt
@@ -0,0 +1,5 @@
+opencv-python
+mediapipe
+numpy
+pycaw
+comtypes
diff --git a/VolumeControlHandGestures/volume_control.py b/VolumeControlHandGestures/volume_control.py
new file mode 100644
index 0000000..a624c57
--- /dev/null
+++ b/VolumeControlHandGestures/volume_control.py
@@ -0,0 +1,83 @@
+import cv2
+import time
+import numpy as np
+import hand_tracking_module as htm
+import math
+from ctypes import cast, POINTER
+from comtypes import CLSCTX_ALL
+from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
+
+################################
+wCam, hCam = 640, 480
+################################
+
+cap = cv2.VideoCapture(0)
+cap.set(3, wCam)
+cap.set(4, hCam)
+pTime = 0
+
+detector = htm.HandDetector(detectionCon=0.7)
+
+# PyCaw Initialization for Windows Volume Control
+devices = AudioUtilities.GetSpeakers()
+interface = devices.Activate(
+    IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
+volume = cast(interface, POINTER(IAudioEndpointVolume))
+volRange = volume.GetVolumeRange()
+minVol = volRange[0]
+maxVol = volRange[1]
+
+vol = 0
+volBar = 400
+volPer = 0
+
+print(f"Volume Range: {minVol} to {maxVol}")
+
+while True:
+    success, img = cap.read()
+    if not success:
+        break
+
+    img = detector.findHands(img)
+    lmList = detector.findPosition(img, draw=False)
+    
+    if len(lmList) != 0:
+        # Landmark 4 is Thumb Tip, 8 is Index Finger Tip
+        x1, y1 = lmList[4][1], lmList[4][2]
+        x2, y2 = lmList[8][1], lmList[8][2]
+        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
+
+        cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
+        cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
+        cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
+        cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
+
+        length = math.hypot(x2 - x1, y2 - y1)
+
+        # Hand Range usually 50 to 300
+        # Volume Range -65 to 0
+        vol = np.interp(length, [50, 300], [minVol, maxVol])
+        volBar = np.interp(length, [50, 300], [400, 150])
+        volPer = np.interp(length, [50, 300], [0, 100])
+        
+        print(int(length), vol)
+        volume.SetMasterVolumeLevel(vol, None)
+
+        if length < 50:
+            cv2.circle(img, (cx, cy), 15, (0, 255, 0), cv2.FILLED)
+
+    cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3)
+    cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED)
+    cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX,
+                1, (255, 0, 0), 3)
+
+    cTime = time.time()
+    fps = 1 / (cTime - pTime)
+    pTime = cTime
+
+    cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX,
+                1, (255, 0, 0), 3)
+
+    cv2.imshow("Img", img)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break