From e77d6bc57926faf12043aeb6095a890b241d70fd Mon Sep 17 00:00:00 2001
From: fbraza <fbraza@tutanota.com>
Date: Sat, 19 Jul 2025 13:56:03 +0200
Subject: [PATCH] refactor: add comprehensive hints issue #8

---
 vitals/biomarkers/helpers.py | 15 +++++-----
 vitals/biomarkers/io.py      |  5 ++--
 vitals/phenoage/compute.py   |  4 ++-
 vitals/score2/compute.py     | 55 ++++++++++++++++++++----------------
 4 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py
index 6c0fd9c..3580e07 100755
--- a/vitals/biomarkers/helpers.py
+++ b/vitals/biomarkers/helpers.py
@@ -1,4 +1,5 @@
-from typing import TypeVar
+from pathlib import Path
+from typing import Any, Callable, TypeVar
 
 from pydantic import BaseModel
 
@@ -25,7 +26,7 @@ def format_unit_suffix(unit: str) -> str:
     return suffix
 
 
-def update_biomarker_names(biomarkers: dict) -> dict:
+def update_biomarker_names(biomarkers: dict[str, Any]) -> dict[str, Any]:
     """Update biomarker names to include unit suffixes.
 
     Args:
@@ -49,7 +50,7 @@ def update_biomarker_names(biomarkers: dict) -> dict:
 
 
 def find_biomarker_value(
-    raw_biomarkers: dict, biomarker_name: str, expected_unit: str
+    raw_biomarkers: dict[str, Any], biomarker_name: str, expected_unit: str
 ) -> float | None:
     """
     Find biomarker value by name prefix and expected unit.
@@ -71,7 +72,7 @@ def find_biomarker_value(
     return None
 
 
-def add_converted_biomarkers(biomarkers: dict) -> dict:
+def add_converted_biomarkers(biomarkers: dict[str, Any]) -> dict[str, Any]:
     """Add converted biomarker entries for glucose, creatinine, albumin, and CRP.
 
     Args:
@@ -84,7 +85,7 @@ def add_converted_biomarkers(biomarkers: dict) -> dict:
     result = biomarkers.copy()
 
     # Conversion mappings
-    conversions = {
+    conversions: dict[str, dict[str, str | Callable[[float], float]]] = {
         "glucose_mg_dl": {
             "target_name": "glucose_mmol_l",
             "target_unit": "mmol/L",
@@ -136,7 +137,7 @@ def add_converted_biomarkers(biomarkers: dict) -> dict:
             # Skip if target already exists
             if target_name not in result:
                 converted_value = conversion_info["conversion"](source_value)  # type: ignore
-                result[target_name] = {
+                result[target_name] = { # type: ignore
                     "value": round(converted_value, 4),
                     "unit": conversion_info["target_unit"],
                 }
@@ -145,7 +146,7 @@ def add_converted_biomarkers(biomarkers: dict) -> dict:
 
 
 def extract_biomarkers_from_json(
-    filepath: str,
+    filepath: str | Path,
     biomarker_class: type[Biomarkers],
     biomarker_units: Units,
 ) -> Biomarkers:
diff --git a/vitals/biomarkers/io.py b/vitals/biomarkers/io.py
index f1b3ff2..8a22037 100755
--- a/vitals/biomarkers/io.py
+++ b/vitals/biomarkers/io.py
@@ -1,10 +1,11 @@
 import json
 from pathlib import Path
+from typing import Any
 
 from vitals.biomarkers import helpers
 
 
-def update(input_file: Path) -> dict:
+def update(input_file: Path) -> dict[str, Any]:
     """Process a single JSON file and create output file with converted biomarkers.
 
     Args:
@@ -26,7 +27,7 @@ def update(input_file: Path) -> dict:
     return data
 
 
-def write(data: dict, output_file: Path) -> None:
+def write(data: dict[str, Any], output_file: Path) -> None:
     """Write biomarker data to a JSON file.
 
     Args:
diff --git a/vitals/phenoage/compute.py b/vitals/phenoage/compute.py
index 5397876..faf597d 100755
--- a/vitals/phenoage/compute.py
+++ b/vitals/phenoage/compute.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import numpy as np
 from pydantic import BaseModel
 
@@ -42,7 +44,7 @@ def __gompertz_mortality_model(weighted_risk_score: float) -> float:
     )
 
 
-def biological_age(filepath: str) -> tuple[float, float, float]:
+def biological_age(filepath: str | Path) -> tuple[float, float, float]:
     """
     The Phenoage score is calculated as a weighted (coefficients available in Levine et al 2018)
     linear combination of these variables, which was then transformed into units of years using 2 parametric
diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py
index bc963aa..b78d8e0 100644
--- a/vitals/score2/compute.py
+++ b/vitals/score2/compute.py
@@ -5,11 +5,16 @@
 in apparently healthy individuals aged 40-69 years in Europe.
 """
 
+from pathlib import Path
+from typing import Literal, TypeAlias
+
 import numpy as np
 from pydantic import BaseModel
 
 from vitals.biomarkers import helpers, schemas
 
+RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"]
+
 
 class ModelCoefficients(BaseModel):
     """
@@ -75,7 +80,7 @@ class CalibrationScales(BaseModel):
     female_scale2: float = 0.7019
 
 
-def cardiovascular_risk(filepath: str) -> tuple[float, float, str]:
+def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]:
     """
     Calculate the 10-year cardiovascular disease risk using the SCORE2 algorithm.
 
@@ -106,28 +111,28 @@ def cardiovascular_risk(filepath: str) -> tuple[float, float, str]:
     if not isinstance(biomarkers, schemas.Score2Markers):
         raise ValueError(f"Invalid biomarker class used: {biomarkers}")
 
-    age = biomarkers.age
-    is_male = biomarkers.is_male  # True for male, False for female
+    age: float = biomarkers.age
+    is_male: bool = biomarkers.is_male  # True for male, False for female
 
     # Apply transformations to biomarkers
-    cage = (age - 60) / 5
-    smoking = float(biomarkers.smoking)  # Convert bool to float (1.0 or 0.0)
-    csbp = (biomarkers.systolic_blood_pressure - 120) / 20
-    ctchol = biomarkers.total_cholesterol - 6
-    chdl = (biomarkers.hdl_cholesterol - 1.3) / 0.5
+    cage: float = (age - 60) / 5
+    smoking: float = float(biomarkers.smoking)  # Convert bool to float (1.0 or 0.0)
+    csbp: float = (biomarkers.systolic_blood_pressure - 120) / 20
+    ctchol: float = biomarkers.total_cholesterol - 6
+    chdl: float = (biomarkers.hdl_cholesterol - 1.3) / 0.5
 
     # Calculate interaction terms
-    smoking_age = smoking * cage
-    sbp_age = csbp * cage
-    tchol_age = ctchol * cage
-    hdl_age = chdl * cage
+    smoking_age: float = smoking * cage
+    sbp_age: float = csbp * cage
+    tchol_age: float = ctchol * cage
+    hdl_age: float = chdl * cage
 
     # Get model coefficients
-    coef = ModelCoefficients()
+    coef: ModelCoefficients = ModelCoefficients()
 
     # Calculate linear predictor (x) based on sex
     if is_male:
-        x = (
+        x: float = (
             coef.male_age * cage +
             coef.male_smoking * smoking +
             coef.male_sbp * csbp +
@@ -138,11 +143,11 @@ def cardiovascular_risk(filepath: str) -> tuple[float, float, str]:
             coef.male_tchol_age * tchol_age +
             coef.male_hdl_age * hdl_age
         )
-        baseline_survival = BaselineSurvival().male
-        scale1 = CalibrationScales().male_scale1
-        scale2 = CalibrationScales().male_scale2
+        baseline_survival: float = BaselineSurvival().male
+        scale1: float = CalibrationScales().male_scale1
+        scale2: float = CalibrationScales().male_scale2
     else:
-        x = (
+        x: float = (
             coef.female_age * cage +
             coef.female_smoking * smoking +
             coef.female_sbp * csbp +
@@ -153,28 +158,28 @@ def cardiovascular_risk(filepath: str) -> tuple[float, float, str]:
             coef.female_tchol_age * tchol_age +
             coef.female_hdl_age * hdl_age
         )
-        baseline_survival = BaselineSurvival().female
-        scale1 = CalibrationScales().female_scale1
-        scale2 = CalibrationScales().female_scale2
+        baseline_survival: float = BaselineSurvival().female
+        scale1: float = CalibrationScales().female_scale1
+        scale2: float = CalibrationScales().female_scale2
 
     # Calculate uncalibrated risk
-    uncalibrated_risk = 1 - np.power(baseline_survival, np.exp(x))
+    uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(x))
 
     # Apply calibration for Belgium (Low Risk region)
     # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100
-    calibrated_risk = float((1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) * 100)
+    calibrated_risk: float = float((1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) * 100)
 
     # Determine risk category based on age
     if age < 50:
         if calibrated_risk < 2.5:
-            risk_category = "Low to moderate"
+            risk_category: RiskCategory = "Low to moderate"
         elif calibrated_risk < 7.5:
             risk_category = "High"
         else:
             risk_category = "Very high"
     else:  # age 50-69
         if calibrated_risk < 5:
-            risk_category = "Low to moderate"
+            risk_category: RiskCategory = "Low to moderate"
         elif calibrated_risk < 10:
             risk_category = "High"
         else: