From 9bcbd10e36c3c0c6438232c717a7d57ce46cefb2 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 11 Oct 2025 21:30:38 -0400
Subject: [PATCH 01/15] Update web scraping for printers

---
 .DS_Store                     | Bin 6148 -> 0 bytes
 src/data/scrapers/printers.py | 234 ++++++++++++++++++++++++++++++----
 2 files changed, 211 insertions(+), 23 deletions(-)
 delete mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 5b928bb719a93f6dc27edc3b82270d14a3109785..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK!AiqG5S?wSO({wb3LY1{R*X?v#7n6914i_qQky1dFwK^vwTDv3S%1hc@q3)v
z-Ac9U!Gnt3f!Q}ZJF{V4!fpltSZf&U0Mr1$LM1FzaQHxIopeqL+EYSga*q&F*n%zy
z=*>kN$6sWC-rXt$_{O?$2ea?5kG>2-f0Xt8Soj$C<oDw!8`$<UD-=sh%Vn!<Rjn)U
zR*bwsXE4lKo%Dicr$WU3)b99aQ8;SX)(%B7=tN1F%7iEgG34?*N&+!ziD43CGS^cJ
zR>i8cYa8RSv)`!mJ=>Yo`FP*4(cW!LCKYRabNk?=eHZr<@t}xh_zg-~(Kv-iG`_6y
z-b<5MBsUmGdla$m6uQ4%dDGdg^PT2Q<&C{>RIaOeSL2i$gN6aaz+y6>&r7SinB|+Q
z4FiUOFBqWnfuj<-8gqs6=)gg?0EqM(DFtonB`Ak$bT#G*aRr5`R791^bc?}MI{LYe
zb2a7)RXQ-;d@%iFraKfSpN{);84k=<Xhy?;Vc;zTMK!I_{eS#<|Nl10%nSpDfq%sS
zD;;@9Ei6gjt#ifEUCU9gQAsE+SNNKOhP;X~magIrR4M4^WFWd4bA{+ZF+T#51~V82
Hew2Y15T=Ww

diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py
index e972046..df0161b 100644
--- a/src/data/scrapers/printers.py
+++ b/src/data/scrapers/printers.py
@@ -1,37 +1,225 @@
 import requests
 from bs4 import BeautifulSoup
+from difflib import get_close_matches # For data scraping
+from difflib import SequenceMatcher
+import re # For using regex
+import unicodedata # Handles text encoding at Unicode level
 
 # URL of the CU Print directory page
-URL = "https://www.cornell.edu/about/maps/directory/?layer=CUPrint&caption=%20CU%20Print%20Printers"  # Replace with the actual URL
+# URL = "https://www.cornell.edu/about/maps/directory/?layer=CUPrint&caption=%20CU%20Print%20Printers"  # Replace with the actual URL
 
-def scrape_printers():
-    # Send a GET request to fetch the HTML content
-    response = requests.get(URL)
-    soup = BeautifulSoup(response.text, 'html.parser')
+URL = 'https://www.cornell.edu/about/maps/directory/text-data.cfm?layer=CUPrint&caption=%20CU%20Print%20Printers'
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
+    "Referer": 'https://www.cornell.edu/about/maps/directory/',
+    "X-Requested-With": 'XMLHttpRequest',
+    "Accept": 'application/json, text/javascript, */*',
+}
+
+# Canonical list of Cornell buildings; NOTE: This list is not exhaustive.
+CANONICAL_BUILDINGS = [
+    "Akwe:kon",
+    "Alice Cook House",
+    "Baker Lab",
+    "Barton Hall",
+    "Becker House",
+    "Breazzano Center",
+    "Catherwood Library",
+    "Clark Hall",
+    "College of Veterinary Medicine",
+    "Court-Kay-Bauer Hall",
+    "Dickson",
+    "Ecology House",
+    "Flora Rose House",
+    "Ganedago",
+    "Hans Bethe House",
+    "Hollister Hall",
+    "Ives Hall",
+    "John Henrik Clarke Africana Library",
+    "Keeton House",
+    "Kroch Library",
+    "Latino Living Center",
+    "Law Library",
+    "Lincoln Hall",
+    "Mann Library",
+    "Martha Van Rensselaer Hall",
+    "Mary Donlon Hall",
+    "Math Library",
+    "Mews Hall",
+    "Milstein Hall",
+    "Morrison Hall",
+    "Myron Taylor",
+    "Olin Library",
+    "Phillips Hall",
+    "Plant Science",
+    "RPCC",
+    "Rand Hall",
+    "Rhodes Hall",
+    "Risley Hall",
+    "Rockefeller Lab",
+    "Ruth Bader Ginsburg Hall",
+    "Sage Hall",
+    "Schwartz Center",
+    "Sibley Hall",
+    "Statler Hall",
+    "Stimson",
+    "Tjaden Hall",
+    "Toni Morrison",
+    "Ujamaa",
+    "Upson Hall",
+    "Uris Library",
+    "Vet Library",
+    "Warren Hall",
+    "White Hall",
+    "Willard Student Center"
+]
+
+CANONICAL_LABELS = [
+    "Residents Only",
+    "AA&P Students Only",
+    "Landscape Architecture Students Only"
+]
+
+# Regex helpers
+HTML_TAG_RE = re.compile(r"<[^>]+>")
+BRACKET_CONTENT_RE = re.compile(r"[\(\[\{].*?[\)\]\}]")
+MULTI_SPACE_RE = re.compile(r"\s+")
+DELIMS_RE = re.compile(r"\s*[-–—:/|]\s*")
+COORD_SPLIT_RE = re.compile(r"\s*,\s*")
+ALL_CAPS_PHRASE_RE = re.compile(r"\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\b")
+TRAILING_CAPS_RE = re.compile(r"\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\s*$")
+LABEL_PHRASES_RE = re.compile(
+    r"""
+    \bresidents?\s*only\b |
+    \bstudents?\s*only\b  |
+    \baa\s*&\s*p\b        |
+    \baap\b
+    """, re.IGNORECASE | re.VERBOSE
+)
+RESIDUAL_TRAILING_LABEL_RE = re.compile(
+    r"\b(?:resident|residents|student|students|staff|public)\b\s*$",
+    re.IGNORECASE
+)
+
+def _norm(s):
+    """
+    Unicode/HTML/whitespace normalization.
+    """
+    if s is None:
+        return ""
+    s = unicodedata.normalize('NFKC', s) # Normalizes unicode text
+    s = HTML_TAG_RE.sub(" ", s)
+    s = s.replace("*", " ")
+    s = BRACKET_CONTENT_RE.sub(" ", s)
+    s = MULTI_SPACE_RE.sub(" ", s).strip()
+    return s
+
+def _strip_trailing_allcaps(s):
+    """
+    Remove trailing ALL-CAPS qualifiers (e.g., RESIDENTS ONLY).
+    """
+    return TRAILING_CAPS_RE.sub("", s).strip()
+
+# def _title_clean(s: str) -> str:
+#     """
+#     Nice display casing: keep acronyms as-is, titlecase other words.
+#     """
+#     words = s.split()
+#     fixed = [w if w.isupper() else w.title() for w in words]
+#     return " ".join(fixed)
+
+def _pre_clean_for_match(s: str) -> str:
+    s = _norm(s)
+    s = LABEL_PHRASES_RE.sub(" ", s)   # <— removes "Resident(s) only", "AA&P", etc.
+    s = _strip_trailing_allcaps(s)
+    s = RESIDUAL_TRAILING_LABEL_RE.sub(" ", s) # <— removes "Resident", "Students", etc.
+    
+    s = re.sub(r"[^\w\s\-’']", " ", s) # punctuation noise
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+
+def _token_sort(s):
+    tokens = s.lower().split()
+    tokens.sort()
+    return " ".join(tokens)
+
+def map_building(name, threshold=87):
+    if not name:
+        return None, 0
+    
+    query = _token_sort(_pre_clean_for_match(name))
+    canon_token_list = [_token_sort(_pre_clean_for_match(c)) for c in CANONICAL_BUILDINGS]
+
+    best = get_close_matches(query, canon_token_list, n=1) # Returns a list of the (top-1) closest match to the cleaned name
 
-    # Locate the table
-    table = soup.find("table", {"id": "directoryTable"})
-    rows = table.find("tbody").find_all("tr")
+    # If no matches (empty list), return the original name and 0
+    if not best:
+        return name, 0
 
-    # Extract data
+    # Return the closest match and its similarity score
+    match = best[0]
+
+    # Calculate the similarity score of the match to the original name (for internal use, potential debugging purposes)
+    index = canon_token_list.index(match)
+    canon_raw = CANONICAL_BUILDINGS[index]
+    score = int(SequenceMatcher(None, query, match).ratio() * 100)
+
+    # If the score is below the threshold, return the original name instead of the canonical name
+    return (canon_raw, score) if score >= threshold else (name, score)
+
+def map_labels(description):
+    """
+    Extract label tokens from the description.
+    """
+    if not description:
+        return [], description
+
+    labels = LABEL_PHRASES_RE.findall(description)
+    labels = [label.title().replace("Aa&P", "AA&P").replace("Aap", "AA&P") for label in labels]
+    description = LABEL_PHRASES_RE.sub("", description).strip()
+    description = RESIDUAL_TRAILING_LABEL_RE.sub("", description).strip()
+    description = MULTI_SPACE_RE.sub(" ", description)
+
+    return labels, description
+
+def fetch_printers_json():
+    """
+    Fetch printer data in JSON format from the CU Print directory endpoint.
+    """
+    resp = requests.get(URL, headers=HEADERS, timeout=20)
+    resp.raise_for_status()
+    return resp.json()
+
+def scrape_printers():
+    """
+    Scrape CU Print printer locations from the Cornell directory page.
+    """
+    payload = fetch_printers_json()
     data = []
-    for row in rows:
-        cols = row.find_all("td")
-        if len(cols) < 3:  # Ensure row has enough columns
-            continue
-        
-        location_name = cols[0].text.strip()
-        description = cols[1].text.strip()
-        
-        # Extract coordinates from the hyperlink <a> tag inside <td>
-        coordinates_link = cols[2].find("a")
-        coordinates_string = coordinates_link.text.strip() if coordinates_link else ""
-        coordinates = [float(x) for x in coordinates_string.split(', ')]
 
+    # payload['rows'] is a list of lists, where each inner list represents a row of data
+    for row in payload['rows']:
+        if len(row) < 3:  # Ensure row has enough columns
+            continue # Skipping row with insufficient columns
+
+        # Each row is of the structure ["Building", "Equipment & Location", "Coordinates (Lat, Lng)"]
+        [raw_building, raw_location, raw_coordinates] = row
+
+        # Map raw building name to canonical building name
+        building, score = map_building(raw_building)
+
+        # Map labels from description to canonical labels
+        # TODO: Handle description (parse for room number, etc.)
+        description = raw_location
+
+        # Splits coordinates string into a list of floats
+        coordinates = [float(x) for x in raw_coordinates.split(', ')]
 
         data.append({
-            "Location": location_name,
+            "Location": building,
             "Description": description,
             "Coordinates": coordinates
         })
-    return data 
\ No newline at end of file
+    
+    return data
\ No newline at end of file

From 772a893f085b70a7c86991fd0d7e0813461fa616 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 11 Oct 2025 23:21:54 -0400
Subject: [PATCH 02/15] Implement baseplate labeling for scraped data

---
 src/data/scrapers/printers.py | 63 +++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 18 deletions(-)

diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py
index df0161b..acddffe 100644
--- a/src/data/scrapers/printers.py
+++ b/src/data/scrapers/printers.py
@@ -89,6 +89,8 @@
 COORD_SPLIT_RE = re.compile(r"\s*,\s*")
 ALL_CAPS_PHRASE_RE = re.compile(r"\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\b")
 TRAILING_CAPS_RE = re.compile(r"\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\s*$")
+
+# Used for stripping common label phrases from building names
 LABEL_PHRASES_RE = re.compile(
     r"""
     \bresidents?\s*only\b |
@@ -97,6 +99,25 @@
     \baap\b
     """, re.IGNORECASE | re.VERBOSE
 )
+
+# Used to identify common variants of labels
+LABEL_PATTERNS = {
+    # Residents Only (singular/plural + optional hyphen + any case)
+    "Residents Only": re.compile(r"\bresident[s]?[-\s]*only\b", re.IGNORECASE),
+
+    # AA&P Students Only (accept AA&P or AAP; allow any junk in-between; optional hyphen)
+    "AA&P Students Only": re.compile(
+        r"\b(?:aa\s*&\s*p|aap)\b.*\bstudent[s]?[-\s]*only\b",
+        re.IGNORECASE
+    ),
+
+    # Landscape Architecture Students Only (allow arbitrary whitespace; optional hyphen)
+    "Landscape Architecture Students Only": re.compile(
+        r"\blandscape\s+architecture\b.*\bstudent[s]?[-\s]*only\b",
+        re.IGNORECASE
+    ),
+}
+
 RESIDUAL_TRAILING_LABEL_RE = re.compile(
     r"\b(?:resident|residents|student|students|staff|public)\b\s*$",
     re.IGNORECASE
@@ -121,14 +142,6 @@ def _strip_trailing_allcaps(s):
     """
     return TRAILING_CAPS_RE.sub("", s).strip()
 
-# def _title_clean(s: str) -> str:
-#     """
-#     Nice display casing: keep acronyms as-is, titlecase other words.
-#     """
-#     words = s.split()
-#     fixed = [w if w.isupper() else w.title() for w in words]
-#     return " ".join(fixed)
-
 def _pre_clean_for_match(s: str) -> str:
     s = _norm(s)
     s = LABEL_PHRASES_RE.sub(" ", s)   # <— removes "Resident(s) only", "AA&P", etc.
@@ -168,20 +181,25 @@ def map_building(name, threshold=87):
     # If the score is below the threshold, return the original name instead of the canonical name
     return (canon_raw, score) if score >= threshold else (name, score)
 
-def map_labels(description):
+def map_labels(text):
     """
     Extract label tokens from the description.
     """
-    if not description:
-        return [], description
+    if not text:
+        return []
+    
+    cleaned = _norm(text)
+    found_labels = []
 
-    labels = LABEL_PHRASES_RE.findall(description)
-    labels = [label.title().replace("Aa&P", "AA&P").replace("Aap", "AA&P") for label in labels]
-    description = LABEL_PHRASES_RE.sub("", description).strip()
-    description = RESIDUAL_TRAILING_LABEL_RE.sub("", description).strip()
-    description = MULTI_SPACE_RE.sub(" ", description)
+    for canon, pattern in LABEL_PATTERNS.items():
+        # Search for the pattern in the cleaned text
+        if pattern.search(cleaned):
+            found_labels.append(canon)
 
-    return labels, description
+            # Remove the found label from the text to avoid duplicates
+            cleaned = pattern.sub("", cleaned).strip()
+        
+    return sorted(set(found_labels))
 
 def fetch_printers_json():
     """
@@ -210,6 +228,14 @@ def scrape_printers():
         building, score = map_building(raw_building)
 
         # Map labels from description to canonical labels
+        labels = []
+        
+        labels.extend(map_labels(raw_building)) # Get labels from the building name (e.g., "Residents Only")
+        labels.extend(map_labels(raw_location)) # Get labels from the location description (e.g., "Landscape Architecture Student ONLY")
+        
+        # Deduplicate and sort labels
+        labels = sorted(set(labels)) 
+
         # TODO: Handle description (parse for room number, etc.)
         description = raw_location
 
@@ -219,7 +245,8 @@ def scrape_printers():
         data.append({
             "Location": building,
             "Description": description,
-            "Coordinates": coordinates
+            "Coordinates": coordinates,
+            "Labels": labels
         })
     
     return data
\ No newline at end of file

From 4b008e458302e3cb74a646ed71c3b53617b25730 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 11 Oct 2025 23:28:45 -0400
Subject: [PATCH 03/15] Add labels for printer colors

---
 src/data/scrapers/printers.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py
index acddffe..5a568aa 100644
--- a/src/data/scrapers/printers.py
+++ b/src/data/scrapers/printers.py
@@ -74,12 +74,7 @@
     "White Hall",
     "Willard Student Center"
 ]
-
-CANONICAL_LABELS = [
-    "Residents Only",
-    "AA&P Students Only",
-    "Landscape Architecture Students Only"
-]
+# Add more buildings as needed...
 
 # Regex helpers
 HTML_TAG_RE = re.compile(r"<[^>]+>")
@@ -102,6 +97,7 @@
 
 # Used to identify common variants of labels
 LABEL_PATTERNS = {
+    # --- Access restrictions ---
     # Residents Only (singular/plural + optional hyphen + any case)
     "Residents Only": re.compile(r"\bresident[s]?[-\s]*only\b", re.IGNORECASE),
 
@@ -116,6 +112,15 @@
         r"\blandscape\s+architecture\b.*\bstudent[s]?[-\s]*only\b",
         re.IGNORECASE
     ),
+
+    # --- Printer capabilities ---
+    "Color": re.compile(r"\bcolor\b", re.IGNORECASE),
+    "Black & White": re.compile(
+        r"\b(?:black\s*(?:and|&)\s*white|b\s*&\s*w)\b", re.IGNORECASE
+    ),
+    "Color, Scan, & Copy": re.compile(
+        r"\bcolor[,/ &]*(scan|copy|print|copying)+\b", re.IGNORECASE
+    ),
 }
 
 RESIDUAL_TRAILING_LABEL_RE = re.compile(
@@ -198,7 +203,8 @@ def map_labels(text):
 
             # Remove the found label from the text to avoid duplicates
             cleaned = pattern.sub("", cleaned).strip()
-        
+    
+
     return sorted(set(found_labels))
 
 def fetch_printers_json():
@@ -234,7 +240,7 @@ def scrape_printers():
         labels.extend(map_labels(raw_location)) # Get labels from the location description (e.g., "Landscape Architecture Student ONLY")
         
         # Deduplicate and sort labels
-        labels = sorted(set(labels)) 
+        labels = sorted(set(labels))
 
         # TODO: Handle description (parse for room number, etc.)
         description = raw_location

From 7f87078d85266b2a62dbbb8ea329558002b6dd25 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 11 Oct 2025 23:38:29 -0400
Subject: [PATCH 04/15] Update description to exclude labels

---
 src/data/scrapers/printers.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py
index 5a568aa..8c76c21 100644
--- a/src/data/scrapers/printers.py
+++ b/src/data/scrapers/printers.py
@@ -191,7 +191,7 @@ def map_labels(text):
     Extract label tokens from the description.
     """
     if not text:
-        return []
+        return text, []
     
     cleaned = _norm(text)
     found_labels = []
@@ -204,8 +204,8 @@ def map_labels(text):
             # Remove the found label from the text to avoid duplicates
             cleaned = pattern.sub("", cleaned).strip()
     
-
-    return sorted(set(found_labels))
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    return cleaned, sorted(set(found_labels))
 
 def fetch_printers_json():
     """
@@ -236,14 +236,16 @@ def scrape_printers():
         # Map labels from description to canonical labels
         labels = []
         
-        labels.extend(map_labels(raw_building)) # Get labels from the building name (e.g., "Residents Only")
-        labels.extend(map_labels(raw_location)) # Get labels from the location description (e.g., "Landscape Architecture Student ONLY")
+        _, building_labels = map_labels(raw_building) # Get labels from the building name (e.g., "Residents Only")
+        remainder, location_labels = map_labels(raw_location) # Get labels from the location description (e.g., "Landscape Architecture Student ONLY")
         
         # Deduplicate and sort labels
+        labels += building_labels
+        labels += location_labels
         labels = sorted(set(labels))
-
-        # TODO: Handle description (parse for room number, etc.)
-        description = raw_location
+        
+        cleaned = re.sub(r"^[\s\-–—:/|]+", "", remainder).strip() # Remove leftover delimiters at the start (like " - ", " / ", ": ", etc.)
+        description = cleaned # Final cleaned description text (with labels removed) — essentially, remainder of the location description
 
         # Splits coordinates string into a list of floats
         coordinates = [float(x) for x in raw_coordinates.split(', ')]

From ff5d61be6d0d815a736547582ac545cd72213b93 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 11 Oct 2025 23:42:50 -0400
Subject: [PATCH 05/15] Add comments/documentation and clean up code

---
 src/data/scrapers/printers.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py
index 8c76c21..a71e1f7 100644
--- a/src/data/scrapers/printers.py
+++ b/src/data/scrapers/printers.py
@@ -10,6 +10,7 @@
 
 URL = 'https://www.cornell.edu/about/maps/directory/text-data.cfm?layer=CUPrint&caption=%20CU%20Print%20Printers'
 
+# HTTP headers to mimic a real browser request
 HEADERS = {
     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
     "Referer": 'https://www.cornell.edu/about/maps/directory/',
@@ -17,7 +18,8 @@
     "Accept": 'application/json, text/javascript, */*',
 }
 
-# Canonical list of Cornell buildings; NOTE: This list is not exhaustive.
+# Canonical list of Cornell buildings
+# NOTE: This list is not exhaustive. Add more buildings as needed...
 CANONICAL_BUILDINGS = [
     "Akwe:kon",
     "Alice Cook House",
@@ -74,15 +76,11 @@
     "White Hall",
     "Willard Student Center"
 ]
-# Add more buildings as needed...
 
 # Regex helpers
 HTML_TAG_RE = re.compile(r"<[^>]+>")
 BRACKET_CONTENT_RE = re.compile(r"[\(\[\{].*?[\)\]\}]")
 MULTI_SPACE_RE = re.compile(r"\s+")
-DELIMS_RE = re.compile(r"\s*[-–—:/|]\s*")
-COORD_SPLIT_RE = re.compile(r"\s*,\s*")
-ALL_CAPS_PHRASE_RE = re.compile(r"\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\b")
 TRAILING_CAPS_RE = re.compile(r"\b[A-Z]{2,}(?:\s+[A-Z]{2,})*\s*$")
 
 # Used for stripping common label phrases from building names
@@ -123,6 +121,7 @@
     ),
 }
 
+# Used for stripping residual trailing labels from descriptions
 RESIDUAL_TRAILING_LABEL_RE = re.compile(
     r"\b(?:resident|residents|student|students|staff|public)\b\s*$",
     re.IGNORECASE
@@ -148,6 +147,9 @@ def _strip_trailing_allcaps(s):
     return TRAILING_CAPS_RE.sub("", s).strip()
 
 def _pre_clean_for_match(s: str) -> str:
+    """
+    Pre-clean a building name for matching against the canonical list.
+    """
     s = _norm(s)
     s = LABEL_PHRASES_RE.sub(" ", s)   # <— removes "Resident(s) only", "AA&P", etc.
     s = _strip_trailing_allcaps(s)
@@ -158,18 +160,25 @@ def _pre_clean_for_match(s: str) -> str:
     return s
 
 def _token_sort(s):
+    """
+    Tokenize a string, sort the tokens, and re-join them.
+    """
     tokens = s.lower().split()
     tokens.sort()
     return " ".join(tokens)
 
 def map_building(name, threshold=87):
+    """
+    Map a building name to a canonical building name using fuzzy matching.
+    """
     if not name:
         return None, 0
     
     query = _token_sort(_pre_clean_for_match(name))
     canon_token_list = [_token_sort(_pre_clean_for_match(c)) for c in CANONICAL_BUILDINGS]
 
-    best = get_close_matches(query, canon_token_list, n=1) # Returns a list of the (top-1) closest match to the cleaned name
+    # Returns a list of the (top-1) closest match to the cleaned name
+    best = get_close_matches(query, canon_token_list, n=1) 
 
     # If no matches (empty list), return the original name and 0
     if not best:
@@ -231,7 +240,7 @@ def scrape_printers():
         [raw_building, raw_location, raw_coordinates] = row
 
         # Map raw building name to canonical building name
-        building, score = map_building(raw_building)
+        building, _ = map_building(raw_building)
 
         # Map labels from description to canonical labels
         labels = []

From 906c6331f54a5726b01a037b5a7d7b150a8b1e3a Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sun, 12 Oct 2025 00:30:16 -0400
Subject: [PATCH 06/15] Include labels in database creation and population

---
 src/data/db/database.py         | 40 ++++++++++++++++++++++++++++++++-
 src/data/db/models.py           | 24 ++++++++++++++++++++
 src/data/scripts/populate_db.py |  2 +-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/src/data/db/database.py b/src/data/db/database.py
index efdf541..7aba210 100644
--- a/src/data/db/database.py
+++ b/src/data/db/database.py
@@ -32,7 +32,7 @@ def insert_library(location, address, latitude, longitude):
     conn.close()
 
 
-def insert_printer(location, description, latitude, longitude):
+def insert_printer(location, description, labels, latitude, longitude):
     """Insert a printer into the database."""
     conn = get_db_connection()
     cursor = conn.cursor()
@@ -44,6 +44,44 @@ def insert_printer(location, description, latitude, longitude):
     """,
         (location, description, latitude, longitude),
     )
+    
+    # Insert labels into the labels table and get their IDs
+    label_ids = []
+    for label in labels:
+        cursor.execute(
+            """
+            INSERT OR IGNORE INTO labels (label)
+            VALUES (?)
+        """,
+            (label,),
+        )
+        cursor.execute(
+            """
+            SELECT id FROM labels WHERE label = ?
+        """,
+            (label,),
+        )
+        label_id = cursor.fetchone()[0]
+        label_ids.append(label_id)
+    
+    # Create entries in the junction table for printer-label relationships
+    cursor.execute(
+        """
+        SELECT id FROM printers WHERE location = ? AND description = ? AND latitude = ? AND longitude = ?
+    """,
+        (location, description, latitude, longitude),
+    )
+    printer_id = cursor.fetchone()[0]
+
+    # Insert into junction table
+    for label_id in label_ids:
+        cursor.execute(
+            """
+            INSERT OR IGNORE INTO printer_labels (printer_id, label_id)
+            VALUES (?, ?)
+        """,
+            (printer_id, label_id),
+        )
 
     conn.commit()
     conn.close()
diff --git a/src/data/db/models.py b/src/data/db/models.py
index 7634fd0..5499c30 100644
--- a/src/data/db/models.py
+++ b/src/data/db/models.py
@@ -15,6 +15,7 @@ def create_tables():
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
 
+    #TODO: Remove UNIQUE constraint from location
     cursor.execute(
         """
         CREATE TABLE IF NOT EXISTS libraries (
@@ -50,6 +51,29 @@ def create_tables():
         )
     """
     )
+    
+    # Table for storing unique labels
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS labels (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            label TEXT UNIQUE NOT NULL
+        )
+    """
+    )
+
+    # Junction table for many-to-many relationship between printers and labels
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS printer_labels (
+            printer_id INTEGER NOT NULL,
+            label_id   INTEGER NOT NULL,
+            PRIMARY KEY (printer_id, label_id),
+            FOREIGN KEY (printer_id) REFERENCES printers(id) ON DELETE CASCADE,
+            FOREIGN KEY (label_id) REFERENCES labels(id) ON DELETE CASCADE
+        )
+    """
+    )
 
     conn.commit()
     conn.close()
diff --git a/src/data/scripts/populate_db.py b/src/data/scripts/populate_db.py
index fa6a23f..c84cd1b 100644
--- a/src/data/scripts/populate_db.py
+++ b/src/data/scripts/populate_db.py
@@ -18,7 +18,7 @@ def populate_db():
     # Insert printers
     printers = scrape_printers()
     for printer in printers:
-        insert_printer(printer['Location'], printer['Description'], printer['Coordinates'][0], printer['Coordinates'][1])
+        insert_printer(printer['Location'], printer['Description'], printers['Labels'], printer['Coordinates'][0], printer['Coordinates'][1])
 
 if __name__ == "__main__":
     populate_db()
\ No newline at end of file

From de836bce1011263ea3b3082ba7334da688a518ac Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Tue, 14 Oct 2025 03:28:20 -0400
Subject: [PATCH 07/15] Update endpoint for fetching printer information and
 corresponding swagger documentation

---
 src/swagger.json            | 2 +-
 src/utils/EcosystemUtils.js | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/swagger.json b/src/swagger.json
index ff9b0af..fc7f734 100644
--- a/src/swagger.json
+++ b/src/swagger.json
@@ -66,7 +66,7 @@
         ],
         "responses": {
           "200": {
-            "description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}",
+            "description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806, \"labels\": [\"Color\"]}]}",
             "schema": {
               "$ref": "#/components/schemas/BusStop"
             }
diff --git a/src/utils/EcosystemUtils.js b/src/utils/EcosystemUtils.js
index 5aadd2b..a5e979a 100644
--- a/src/utils/EcosystemUtils.js
+++ b/src/utils/EcosystemUtils.js
@@ -45,7 +45,7 @@ function fetchAllPrinters() {
     });
 
     // Fetch printers
-    db.all("SELECT * FROM printers", (err, rows) => {
+    db.all("SELECT p.id, p.location, p.description, p.latitude, p.longitude, COALESCE(GROUP_CONCAT(DISTINCT l.label, ', '), '') AS labels FROM printers p LEFT JOIN printer_labels pl ON p.id = pl.printer_id LEFT JOIN labels l ON pl.label_id = l.id GROUP BY p.id", (err, rows) => {
       if (err) {
         console.error(err.message);
         return reject(err);

From f23bcc5726b88691265d533aabe89d6af0950b42 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Fri, 7 Nov 2025 19:30:35 -0500
Subject: [PATCH 08/15] Add script to run migrations on database

---
 package-lock.json                  |  15 +++++
 package.json                       |   1 +
 src/.DS_Store                      | Bin 10244 -> 8196 bytes
 src/data/scripts/run-migrations.js | 100 +++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+)
 create mode 100644 src/data/scripts/run-migrations.js

diff --git a/package-lock.json b/package-lock.json
index 826744f..f6b0842 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,6 +9,7 @@
       "version": "1.0.0",
       "license": "ISC",
       "dependencies": {
+        "better-sqlite3": "^12.4.1",
         "dotenv": "^16.4.7",
         "express": "^4.21.2",
         "firebase-admin": "^13.1.0",
@@ -791,6 +792,20 @@
         "tweetnacl": "^0.14.3"
       }
     },
+    "node_modules/better-sqlite3": {
+      "version": "12.4.1",
+      "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.4.1.tgz",
+      "integrity": "sha512-3yVdyZhklTiNrtg+4WqHpJpFDd+WHTg2oM7UcR80GqL05AOV0xEJzc6qNvFYoEtE+hRp1n9MpN6/+4yhlGkDXQ==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "bindings": "^1.5.0",
+        "prebuild-install": "^7.1.1"
+      },
+      "engines": {
+        "node": "20.x || 22.x || 23.x || 24.x"
+      }
+    },
     "node_modules/bignumber.js": {
       "version": "9.1.2",
       "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.1.2.tgz",
diff --git a/package.json b/package.json
index 057904a..74e05ef 100644
--- a/package.json
+++ b/package.json
@@ -12,6 +12,7 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
+    "better-sqlite3": "^12.4.1",
     "dotenv": "^16.4.7",
     "express": "^4.21.2",
     "firebase-admin": "^13.1.0",
diff --git a/src/.DS_Store b/src/.DS_Store
index a39c96c47949260ec6c8cd9be8d9394560f28cf6..807e94451ef138fbec80fa0ae23bfb042a5ce57f 100644
GIT binary patch
delta 96
zcmZn(XmOBWU|?W$DortDU;r^WfEYvza8E20o2aMA$gweCH$NlCWFCRdo6ie+unRH+
kWr09~8%Vf<6mBg1&ODi4C6I#=qMc!KJkPYvd}3_O03^Z?xc~qF

delta 201
zcmZp1XbF&DU|?W$DortDU{C-uIe-{M3-C-V6q~50$jG%ZU^hP_*JK`n&9Zz9DGd1x
z$qd;HsSHI7x<FPwLoP!BkSu0MoqS55fy30&NJqiY+-P!;puHwu<z>M|c{%xc=|H;}
zH}4VfX5ZNGflZJZ$OQrgZXn?ba>d5N@640=WdcQ*Aa2tDNi#Aq7=Y->1v0&xKMFH5
F0|3-|C*lAA

diff --git a/src/data/scripts/run-migrations.js b/src/data/scripts/run-migrations.js
new file mode 100644
index 0000000..25e2b1d
--- /dev/null
+++ b/src/data/scripts/run-migrations.js
@@ -0,0 +1,100 @@
+// Imports necessary for data migrations
+const fs = require('fs'); // Node's built-in file system module, which lets us read from disk 
+const path = require('path'); // Safer way to express file paths/path joining
+const crypto = require('crypto');
+const Database = require('better-sqlite3');
+
+const DB_PATH = path.join(__dirname, "../transit.db"); // Finds db file from current file's directory
+const MIGRATIONS_DIR = path.join(__dirname, "../migrations");
+
+/**
+ * Hashes a string using SHA-256
+ * 
+ * We use this to store the checksum of the migration file in the database.
+ * This allows us to track which migrations have been applied, as well as if a migration file has been modified since it was last applied.
+ * 
+ * @param {string} s - The string to hash
+ * @returns {string} - The SHA-256 hash of the string
+ */
+function sha256(s) {
+    return crypto.createHash('sha256').update(s, 'utf8').digest('hex');
+}
+
+/**
+ * Runs the migrations
+ * 
+ * This function reads all the migration files in the migrations directory, hashes them, and stores the checksum in the database.
+ * It then executes the migrations in the order of the files.
+ * 
+ * @returns {void}
+ * @throws {Error} - If the migrations fail
+ */
+function runMigration() {
+    // Open the database using the better-sqlite3 library
+    const db = new Database(DB_PATH);
+    
+    // Set defaults for migrations
+    db.pragma('journal_mode = WAL');
+    db.pragma('synchronous = NORMAL');
+    db.pragma('foreign_keys = ON');
+
+    // Create the schema_migrations table if it doesn't exist for tracking migrations applied to the database
+    db.exec(`
+        CREATE TABLE IF NOT EXISTS schema_migrations (
+            id INTEGER PRIMARY KEY,
+            filename TEXT NOT NULL UNIQUE,
+            checksum TEXT NOT NULL,
+            applied_at TEXT NOT NULL DEFAULT (datetime('now'))
+        )
+    `);
+
+    // Get the list of migrations that have already been applied to the database
+    const applied = new Set(
+        db.prepare('SELECT filename FROM schema_migrations').all().map(record => record.filename)
+    );
+
+    // Get the list of migration files in the migrations directory (keeping only .sql files and sorting them chronologically)
+    const files = fs.readdirSync(MIGRATIONS_DIR).filter(f => f.endsWith('.sql')).sort();
+
+    // Prepare the statement to insert a new migration into the schema_migrations table
+    const insertMig = db.prepare(`
+        INSERT INTO schema_migrations (filename, checksum) VALUES (?,?)
+    `);
+
+    // Define a transaction to execute the migrations
+    const transaction = db.transaction(() => {
+        for (const file of files) {
+            // Skip if the migration has already been applied
+            if (applied.has(file)) {
+                continue;
+            }
+            
+            const full = path.join(MIGRATIONS_DIR, file);
+            const sql = fs.readFileSync(full, 'utf8').trim();
+            if (!sql) {
+                continue;
+            }
+
+            // Defensive: re-enable FKs inside each run (is already done in the migrations, but just in case)
+            db.exec('PRAGMA foreign_keys = ON;');
+
+            // Execute SQL commands in the migration file
+            db.exec(sql);
+
+            // Records migration as applied to the database via its check
+            insertMig.run(file, sha256(sql));
+            console.log(`Applied ${file}`);
+        }
+    });
+
+    try {
+        transaction();
+        console.log('All migrations applied');
+    } catch (e) {
+        console.error("Migration failed", e);
+    } finally {
+        db.close();
+    }
+}
+
+runMigration();
\ No newline at end of file

From f8d95d5b4aac621931641cf5e4842da8bc22583a Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Fri, 7 Nov 2025 19:31:07 -0500
Subject: [PATCH 09/15] Add migration files to create labels and printer_label
 tables

---
 src/data/migrations/2025117_1854_create_labels.sql       | 6 ++++++
 .../migrations/2025117_1859_create_printer_labels.sql    | 9 +++++++++
 2 files changed, 15 insertions(+)
 create mode 100644 src/data/migrations/2025117_1854_create_labels.sql
 create mode 100644 src/data/migrations/2025117_1859_create_printer_labels.sql

diff --git a/src/data/migrations/2025117_1854_create_labels.sql b/src/data/migrations/2025117_1854_create_labels.sql
new file mode 100644
index 0000000..3884e98
--- /dev/null
+++ b/src/data/migrations/2025117_1854_create_labels.sql
@@ -0,0 +1,6 @@
+PRAGMA foreign_keys = ON;
+
+CREATE TABLE IF NOT EXISTS labels (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    label TEXT UNIQUE NOT NULL
+);
\ No newline at end of file
diff --git a/src/data/migrations/2025117_1859_create_printer_labels.sql b/src/data/migrations/2025117_1859_create_printer_labels.sql
new file mode 100644
index 0000000..73fd9c0
--- /dev/null
+++ b/src/data/migrations/2025117_1859_create_printer_labels.sql
@@ -0,0 +1,9 @@
+PRAGMA foreign_keys = ON;
+
+CREATE TABLE IF NOT EXISTS printer_labels (
+    printer_id INTEGER NOT NULL,
+    label_id   INTEGER NOT NULL,
+    PRIMARY KEY (printer_id, label_id),
+    FOREIGN KEY (printer_id) REFERENCES printers(id) ON DELETE CASCADE,
+    FOREIGN KEY (label_id) REFERENCES labels(id) ON DELETE CASCADE
+);
\ No newline at end of file

From 9fe37ff164f96a6c702f97c139d0445eb9b59656 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Fri, 7 Nov 2025 19:31:52 -0500
Subject: [PATCH 10/15] Remove labels and printer_labels table from database
 initialization for migration

---
 src/data/db/models.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/src/data/db/models.py b/src/data/db/models.py
index 5499c30..17db360 100644
--- a/src/data/db/models.py
+++ b/src/data/db/models.py
@@ -51,29 +51,6 @@ def create_tables():
         )
     """
     )
-    
-    # Table for storing unique labels
-    cursor.execute(
-        """
-        CREATE TABLE IF NOT EXISTS labels (
-            id INTEGER PRIMARY KEY AUTOINCREMENT,
-            label TEXT UNIQUE NOT NULL
-        )
-    """
-    )
-
-    # Junction table for many-to-many relationship between printers and labels
-    cursor.execute(
-        """
-        CREATE TABLE IF NOT EXISTS printer_labels (
-            printer_id INTEGER NOT NULL,
-            label_id   INTEGER NOT NULL,
-            PRIMARY KEY (printer_id, label_id),
-            FOREIGN KEY (printer_id) REFERENCES printers(id) ON DELETE CASCADE,
-            FOREIGN KEY (label_id) REFERENCES labels(id) ON DELETE CASCADE
-        )
-    """
-    )
 
     conn.commit()
     conn.close()

From 919125c48d5c4d3d4b6fdedcd960aa0752f4403e Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Fri, 7 Nov 2025 20:15:22 -0500
Subject: [PATCH 11/15] Minor bug fix

---
 src/data/scripts/populate_db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data/scripts/populate_db.py b/src/data/scripts/populate_db.py
index c84cd1b..30ddc62 100644
--- a/src/data/scripts/populate_db.py
+++ b/src/data/scripts/populate_db.py
@@ -18,7 +18,7 @@ def populate_db():
     # Insert printers
     printers = scrape_printers()
     for printer in printers:
-        insert_printer(printer['Location'], printer['Description'], printers['Labels'], printer['Coordinates'][0], printer['Coordinates'][1])
+        insert_printer(printer['Location'], printer['Description'], printer['Labels'], printer['Coordinates'][0], printer['Coordinates'][1])
 
 if __name__ == "__main__":
     populate_db()
\ No newline at end of file

From aa664b75f5a1ac4cc1560456d096fcd9c1ab7b16 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Fri, 7 Nov 2025 20:29:09 -0500
Subject: [PATCH 12/15] Export script to run migrations (and populate db)

---
 package.json                       | 4 +++-
 src/data/scripts/run-migrations.js | 8 +++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/package.json b/package.json
index 74e05ef..b4d1f5c 100644
--- a/package.json
+++ b/package.json
@@ -6,7 +6,9 @@
   "type": "module",
   "scripts": {
     "start:dev": "nodemon --ignore src/data/notifRequests.json src/index.js",
-    "start": "node src/index.js"
+    "start": "node src/index.js",
+    "migrate": "node src/data/scripts/run-migrations.js",
+    "populate:db": "npm run migrate && python3 src/data/scripts/populate_db.py"
   },
   "keywords": [],
   "author": "",
diff --git a/src/data/scripts/run-migrations.js b/src/data/scripts/run-migrations.js
index 25e2b1d..4748193 100644
--- a/src/data/scripts/run-migrations.js
+++ b/src/data/scripts/run-migrations.js
@@ -97,4 +97,10 @@ function runMigration() {
     }
 }
 
-runMigration();
\ No newline at end of file
+module.exports = {
+    runMigration
+};
+
+if (require.main === module) {
+    runMigration();
+}
\ No newline at end of file

From 58917a8995e6d27341c1a0209e0719e299532685 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 8 Nov 2025 10:07:46 -0500
Subject: [PATCH 13/15] Fix scraping and database bugs

---
 src/data/db/database.py       | 22 +++++++++----------
 src/data/db/models.py         |  3 +--
 src/data/scrapers/printers.py | 41 +++++++++++++++++++++++++++--------
 3 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/src/data/db/database.py b/src/data/db/database.py
index 7aba210..9b857b2 100644
--- a/src/data/db/database.py
+++ b/src/data/db/database.py
@@ -37,14 +37,18 @@ def insert_printer(location, description, labels, latitude, longitude):
     conn = get_db_connection()
     cursor = conn.cursor()
 
+    # We remove the "OR IGNORE" because we acknoledge that several printers may have the same location and description (i.e., same building and room), so we rely on the unique printer_id to identify the printer
     cursor.execute(
         """
-        INSERT OR IGNORE INTO printers (location, description, latitude, longitude)
+        INSERT INTO printers (location, description, latitude, longitude)
         VALUES (?, ?, ?, ?)
     """,
         (location, description, latitude, longitude),
     )
-    
+
+    # To get the printer_id, we do NOT rely on the location/description/coordinates, but rather on the printer_id that was just inserted (lastrowid), as several printers may have the same location and description (i.e., same building and room)
+    printer_id = cursor.lastrowid
+
     # Insert labels into the labels table and get their IDs
     label_ids = []
     for label in labels:
@@ -61,17 +65,11 @@ def insert_printer(location, description, labels, latitude, longitude):
         """,
             (label,),
         )
-        label_id = cursor.fetchone()[0]
+        result = cursor.fetchone()
+        if result is None:
+            raise ValueError(f"Failed to find label: {label}")
+        label_id = result[0]
         label_ids.append(label_id)
-    
-    # Create entries in the junction table for printer-label relationships
-    cursor.execute(
-        """
-        SELECT id FROM printers WHERE location = ? AND description = ? AND latitude = ? AND longitude = ?
-    """,
-        (location, description, latitude, longitude),
-    )
-    printer_id = cursor.fetchone()[0]
 
     # Insert into junction table
     for label_id in label_ids:
diff --git a/src/data/db/models.py b/src/data/db/models.py
index 17db360..8183be9 100644
--- a/src/data/db/models.py
+++ b/src/data/db/models.py
@@ -15,7 +15,6 @@ def create_tables():
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
 
-    #TODO: Remove UNIQUE constraint from location
     cursor.execute(
         """
         CREATE TABLE IF NOT EXISTS libraries (
@@ -32,7 +31,7 @@ def create_tables():
         """
         CREATE TABLE IF NOT EXISTS printers (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
-            location TEXT UNIQUE,
+            location TEXT,
             description TEXT,
             latitude REAL,
             longitude REAL
diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py
index a71e1f7..ea40cd6 100644
--- a/src/data/scrapers/printers.py
+++ b/src/data/scrapers/printers.py
@@ -1,5 +1,4 @@
 import requests
-from bs4 import BeautifulSoup
 from difflib import get_close_matches # For data scraping
 from difflib import SequenceMatcher
 import re # For using regex
@@ -112,13 +111,14 @@
     ),
 
     # --- Printer capabilities ---
-    "Color": re.compile(r"\bcolor\b", re.IGNORECASE),
+    "Color, Scan, & Copy": re.compile(
+        r"\bcolor\s*[,/&]?\s*(?:scan\s*[,/&]?\s*)?(?:and\s*)?\s*&?\s*(?:copy|print|copying)\b", re.IGNORECASE
+    ),
     "Black & White": re.compile(
         r"\b(?:black\s*(?:and|&)\s*white|b\s*&\s*w)\b", re.IGNORECASE
     ),
-    "Color, Scan, & Copy": re.compile(
-        r"\bcolor[,/ &]*(scan|copy|print|copying)+\b", re.IGNORECASE
-    ),
+    "Color": re.compile(r"\bcolor\b", re.IGNORECASE),
+    
 }
 
 # Used for stripping residual trailing labels from descriptions
@@ -209,11 +209,20 @@ def map_labels(text):
         # Search for the pattern in the cleaned text
         if pattern.search(cleaned):
             found_labels.append(canon)
+            cleaned = pattern.sub("", cleaned, count=1).strip()
+    
+    # Collapse runs of punctuation-delimiters to a single space
+    cleaned = re.sub(r"\s*[,;/|&\-–—:]+\s*", " ", cleaned)
 
-            # Remove the found label from the text to avoid duplicates
-            cleaned = pattern.sub("", cleaned).strip()
+    # Remove any leftover leading delimiters/spaces (e.g., ", ", "- ")
+    cleaned = re.sub(r"^[\s,;/|&\-–—:]+", "", cleaned)
     
+    # Remove standalone "Copy", "Print", or "Scan" at the start (leftover from partial label removal)
+    cleaned = re.sub(r"^(?:copy|print|scan)\s+", "", cleaned, flags=re.IGNORECASE)
+
+    # Final whitespace cleanup
     cleaned = re.sub(r"\s+", " ", cleaned).strip()
+
     return cleaned, sorted(set(found_labels))
 
 def fetch_printers_json():
@@ -242,12 +251,17 @@ def scrape_printers():
         # Map raw building name to canonical building name
         building, _ = map_building(raw_building)
 
+        # If we weren't able to map the building to a canonical building, skip this row
+        # NOTE: This should prevent us from getting "None" as the location, which was happening earlier
+        if building not in CANONICAL_BUILDINGS:
+            continue
+
         # Map labels from description to canonical labels
         labels = []
         
         _, building_labels = map_labels(raw_building) # Get labels from the building name (e.g., "Residents Only")
         remainder, location_labels = map_labels(raw_location) # Get labels from the location description (e.g., "Landscape Architecture Student ONLY")
-        
+
         # Deduplicate and sort labels
         labels += building_labels
         labels += location_labels
@@ -266,4 +280,13 @@ def scrape_printers():
             "Labels": labels
         })
     
-    return data
\ No newline at end of file
+    return data
+
+if __name__ == "__main__":
+    results = scrape_printers()
+    print(f"Scraped {len(results)} printers.\n")
+
+    # Print a sample of the data
+    for row in results:
+        if row['Location'] == 'Vet Library':
+            print(row['Description'], row['Labels'])
\ No newline at end of file

From 8f8486c9c3e4b510a945609d1368244f63d7fe4e Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 8 Nov 2025 10:12:17 -0500
Subject: [PATCH 14/15] Fix imports

---
 src/data/scripts/run-migrations.js | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/data/scripts/run-migrations.js b/src/data/scripts/run-migrations.js
index 4748193..5e499fc 100644
--- a/src/data/scripts/run-migrations.js
+++ b/src/data/scripts/run-migrations.js
@@ -1,10 +1,15 @@
 // Imports necessary for data migrations
-const fs = require('fs'); // Node's built-in file system module, which lets us read from disk 
-const path = require('path'); // Safer way to express file paths/path joining
-const crypto = require('crypto');
-const Database = require('better-sqlite3');
+import fs from 'fs' // Node's built-in file system module, which lets us read from disk 
+import path from 'path'; // Safer way to express file paths/path joining
+import crypto from 'crypto';
+import Database from 'better-sqlite3';
+import { fileURLToPath } from 'url';
 
-const DB_PATH = path.join(__dirname, "../transit.db"); // Finds db file from current file's directory
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// || path.join(__dirname, "../transit.db")
+const DB_PATH = process.env.DB_PATH; // Finds db file from current file's directory
 const MIGRATIONS_DIR = path.join(__dirname, "../migrations");
 
 /**
@@ -97,10 +102,11 @@ function runMigration() {
     }
 }
 
-module.exports = {
-    runMigration
-};
-
-if (require.main === module) {
+export function runMigrations() {
     runMigration();
-}
\ No newline at end of file
+}
+
+import { pathToFileURL } from 'url';
+if (import.meta.url === pathToFileURL(process.argv[1]).href) {
+    runMigrations();
+  }
\ No newline at end of file

From 7e062ccee33396f7cd89ab36862438af7bafb692 Mon Sep 17 00:00:00 2001
From: Chimdi Ejiogu <chimdije@gmail.com>
Date: Sat, 8 Nov 2025 10:12:34 -0500
Subject: [PATCH 15/15] Add pycache

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 4163587..a8fbbdc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 build/
 logs/
 node_modules/
+__pycache__/
 
 # Specific Files
 config.json