From 2a9172436062603c5ffa77fec316041479850e7c Mon Sep 17 00:00:00 2001
From: fbraza <fbraza@tutanota.com>
Date: Sat, 19 Jul 2025 21:45:07 +0200
Subject: [PATCH 1/3] refactor: extract common code from SCORE2 modules and
 reorganize schemas
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This refactoring addresses code duplication between SCORE2 and SCORE2-Diabetes modules by:

1. Created new vitals/schemas/ module with organized structure:
   - units.py: All Units classes (PhenoageUnits, Score2Units, Score2DiabetesUnits)
   - markers.py: All Markers classes (PhenoageMarkers, Score2Markers, Score2DiabetesMarkers)
   - coefficients.py: Sex-split coefficient classes for better type safety
   - core.py: Shared components (BaselineSurvival, CalibrationScales, utility functions)

2. Replaced duplicate ModelCoefficients with sex-specific classes:
   - Score2MaleCoefficients and Score2FemaleCoefficients
   - Score2DiabetesMaleCoefficients and Score2DiabetesFemaleCoefficients (extending base)

3. Extracted shared utility functions:
   - determine_risk_category(): Common risk categorization logic
   - apply_calibration(): Common calibration formula

4. Updated all imports across the codebase to use new schema structure

5. Removed original biomarkers/schemas.py file

This refactoring follows DRY principles while maintaining simplicity and type safety.
All tests pass and code formatting is clean.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 vitals/biomarkers/helpers.py                  |   4 +-
 vitals/phenoage/compute.py                    |  10 +-
 vitals/schemas/__init__.py                    |   0
 vitals/schemas/coefficients.py                | 103 ++++++++
 vitals/schemas/core.py                        |  80 +++++++
 vitals/schemas/markers.py                     |  42 ++++
 .../schemas.py => schemas/units.py}           |  44 ----
 vitals/score2/compute.py                      | 170 ++++----------
 vitals/score2_diabetes/compute.py             | 220 +++++-------------
 9 files changed, 342 insertions(+), 331 deletions(-)
 create mode 100644 vitals/schemas/__init__.py
 create mode 100644 vitals/schemas/coefficients.py
 create mode 100644 vitals/schemas/core.py
 create mode 100644 vitals/schemas/markers.py
 rename vitals/{biomarkers/schemas.py => schemas/units.py} (54%)
 mode change 100755 => 100644

diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py
index ca1d8d7..eeed0fa 100755
--- a/vitals/biomarkers/helpers.py
+++ b/vitals/biomarkers/helpers.py
@@ -4,10 +4,10 @@
 
 from pydantic import BaseModel
 
-from vitals.biomarkers import schemas
+from vitals.schemas.units import PhenoageUnits, Score2DiabetesUnits, Score2Units
 
 Biomarkers = TypeVar("Biomarkers", bound=BaseModel)
-Units = schemas.PhenoageUnits | schemas.Score2Units | schemas.Score2DiabetesUnits
+Units = PhenoageUnits | Score2Units | Score2DiabetesUnits
 
 
 class ConversionInfo(TypedDict):
diff --git a/vitals/phenoage/compute.py b/vitals/phenoage/compute.py
index faf597d..ac52596 100755
--- a/vitals/phenoage/compute.py
+++ b/vitals/phenoage/compute.py
@@ -3,7 +3,9 @@
 import numpy as np
 from pydantic import BaseModel
 
-from vitals.biomarkers import helpers, schemas
+from vitals.biomarkers import helpers
+from vitals.schemas.markers import PhenoageMarkers
+from vitals.schemas.units import PhenoageUnits
 
 
 class LinearModel(BaseModel):
@@ -55,14 +57,14 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]:
     # Extract biomarkers from JSON file
     biomarkers = helpers.extract_biomarkers_from_json(
         filepath=filepath,
-        biomarker_class=schemas.PhenoageMarkers,
-        biomarker_units=schemas.PhenoageUnits(),
+        biomarker_class=PhenoageMarkers,
+        biomarker_units=PhenoageUnits(),
     )
 
     age = biomarkers.age
     coef = LinearModel()
 
-    if isinstance(biomarkers, schemas.PhenoageMarkers):
+    if isinstance(biomarkers, PhenoageMarkers):
         weighted_risk_score = (
             coef.intercept
             + (coef.albumin * biomarkers.albumin)
diff --git a/vitals/schemas/__init__.py b/vitals/schemas/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/vitals/schemas/coefficients.py b/vitals/schemas/coefficients.py
new file mode 100644
index 0000000..62a4e9f
--- /dev/null
+++ b/vitals/schemas/coefficients.py
@@ -0,0 +1,103 @@
+from pydantic import BaseModel
+
+
+class Score2MaleCoefficients(BaseModel):
+    """
+    Male-specific coefficients for the SCORE2 Cox proportional hazards model.
+    """
+
+    # Main effects
+    age: float = 0.3742
+    smoking: float = 0.6012
+    sbp: float = 0.2777
+    total_cholesterol: float = 0.1458
+    hdl_cholesterol: float = -0.2698
+
+    # Age interaction terms
+    smoking_age: float = -0.0755
+    sbp_age: float = -0.0255
+    tchol_age: float = -0.0281
+    hdl_age: float = 0.0426
+
+
+class Score2FemaleCoefficients(BaseModel):
+    """
+    Female-specific coefficients for the SCORE2 Cox proportional hazards model.
+    """
+
+    # Main effects
+    age: float = 0.4648
+    smoking: float = 0.7744
+    sbp: float = 0.3131
+    total_cholesterol: float = 0.1002
+    hdl_cholesterol: float = -0.2606
+
+    # Age interaction terms
+    smoking_age: float = -0.1088
+    sbp_age: float = -0.0277
+    tchol_age: float = -0.0226
+    hdl_age: float = 0.0613
+
+
+class Score2DiabetesMaleCoefficients(Score2MaleCoefficients):
+    """
+    Male-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model.
+    Extends the base SCORE2 male coefficients with diabetes-specific parameters.
+    """
+
+    # Override base values with SCORE2-Diabetes specific values
+    age: float = 0.5368
+    smoking: float = 0.4774
+    sbp: float = 0.1322
+    total_cholesterol: float = 0.1102
+    hdl_cholesterol: float = -0.1087
+
+    # Override age interaction terms
+    smoking_age: float = -0.0672
+    sbp_age: float = -0.0268
+    tchol_age: float = -0.0181
+    hdl_age: float = 0.0095
+
+    # Additional diabetes-specific coefficients
+    diabetes: float = 0.6457
+    age_at_diabetes_diagnosis: float = -0.0998
+    hba1c: float = 0.0955
+    egfr: float = -0.0591
+    egfr_squared: float = 0.0058
+
+    # Additional age interaction terms
+    diabetes_age: float = -0.0983
+    hba1c_age: float = -0.0134
+    egfr_age: float = 0.0115
+
+
+class Score2DiabetesFemaleCoefficients(Score2FemaleCoefficients):
+    """
+    Female-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model.
+    Extends the base SCORE2 female coefficients with diabetes-specific parameters.
+    """
+
+    # Override base values with SCORE2-Diabetes specific values
+    age: float = 0.6624
+    smoking: float = 0.6139
+    sbp: float = 0.1421
+    total_cholesterol: float = 0.1127
+    hdl_cholesterol: float = -0.1568
+
+    # Override age interaction terms
+    smoking_age: float = -0.1122
+    sbp_age: float = -0.0167
+    tchol_age: float = -0.0200
+    hdl_age: float = 0.0186
+
+    # Additional diabetes-specific coefficients
+    diabetes: float = 0.8096
+    age_at_diabetes_diagnosis: float = -0.1180
+    hba1c: float = 0.1173
+    egfr: float = -0.0640
+    egfr_squared: float = 0.0062
+
+    # Additional age interaction terms
+    diabetes_age: float = -0.1272
+    hba1c_age: float = -0.0196
+    egfr_age: float = 0.0169
diff --git a/vitals/schemas/core.py b/vitals/schemas/core.py
new file mode 100644
index 0000000..8ae4fdf
--- /dev/null
+++ b/vitals/schemas/core.py
@@ -0,0 +1,80 @@
+from typing import Literal, TypeAlias
+
+import numpy as np
+from pydantic import BaseModel
+
+RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"]
+
+
+class BaselineSurvival(BaseModel):
+    """
+    Sex-specific baseline survival probabilities for the SCORE2 model.
+
+    These values represent the 10-year survival probability for individuals
+    with all risk factors at their reference values.
+    """
+
+    male: float = 0.9605
+    female: float = 0.9776
+
+
+class CalibrationScales(BaseModel):
+    """
+    Region and sex-specific calibration scales for Belgium (Low Risk region).
+
+    These scales are used to calibrate the uncalibrated risk estimate to match
+    the population-specific cardiovascular disease incidence rates.
+    """
+
+    # Male calibration scales
+    male_scale1: float = -0.5699
+    male_scale2: float = 0.7476
+
+    # Female calibration scales
+    female_scale1: float = -0.7380
+    female_scale2: float = 0.7019
+
+
+def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory:
+    """
+    Determine cardiovascular risk category based on age and calibrated risk percentage.
+
+    Args:
+        age: Patient's age in years
+        calibrated_risk: Calibrated 10-year CVD risk as a percentage
+
+    Returns:
+        Risk stratification category
+    """
+    if age < 50:
+        if calibrated_risk < 2.5:
+            return "Low to moderate"
+        elif calibrated_risk < 7.5:
+            return "High"
+        else:
+            return "Very high"
+    else:  # age 50-69
+        if calibrated_risk < 5:
+            return "Low to moderate"
+        elif calibrated_risk < 10:
+            return "High"
+        else:
+            return "Very high"
+
+
+def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float:
+    """
+    Apply regional calibration to uncalibrated risk estimate.
+
+    Args:
+        uncalibrated_risk: Raw risk estimate from the Cox model
+        scale1: First calibration scale parameter
+        scale2: Second calibration scale parameter
+
+    Returns:
+        Calibrated 10-year CVD risk as a percentage
+    """
+    return float(
+        (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk)))))
+        * 100
+    )
diff --git a/vitals/schemas/markers.py b/vitals/schemas/markers.py
new file mode 100644
index 0000000..b3e630c
--- /dev/null
+++ b/vitals/schemas/markers.py
@@ -0,0 +1,42 @@
+from pydantic import BaseModel
+
+
+class PhenoageMarkers(BaseModel):
+    """Processed PhenoAge biomarkers with standardized units."""
+
+    albumin: float
+    creatinine: float
+    glucose: float
+    crp: float
+    lymphocyte_percent: float
+    mean_cell_volume: float
+    red_cell_distribution_width: float
+    alkaline_phosphatase: float
+    white_blood_cell_count: float
+    age: float
+
+
+class Score2Markers(BaseModel):
+    """Processed Score2 biomarkers with standardized units."""
+
+    age: float
+    systolic_blood_pressure: float
+    total_cholesterol: float
+    hdl_cholesterol: float
+    smoking: bool
+    is_male: bool
+
+
+class Score2DiabetesMarkers(BaseModel):
+    """Processed Score2-Diabetes biomarkers with standardized units."""
+
+    age: float
+    systolic_blood_pressure: float
+    total_cholesterol: float
+    hdl_cholesterol: float
+    smoking: bool
+    is_male: bool
+    diabetes: bool
+    age_at_diabetes_diagnosis: float
+    hba1c: float
+    egfr: float
diff --git a/vitals/biomarkers/schemas.py b/vitals/schemas/units.py
old mode 100755
new mode 100644
similarity index 54%
rename from vitals/biomarkers/schemas.py
rename to vitals/schemas/units.py
index e93e7d4..dfa3b18
--- a/vitals/biomarkers/schemas.py
+++ b/vitals/schemas/units.py
@@ -1,7 +1,6 @@
 from pydantic import BaseModel
 
 
-# ------ PHENOAGE Schemas
 class PhenoageUnits(BaseModel):
     """
     The expected unit to be used for phenoage computation
@@ -19,22 +18,6 @@ class PhenoageUnits(BaseModel):
     age: str = "years"
 
 
-class PhenoageMarkers(BaseModel):
-    """Processed PhenoAge biomarkers with standardized units."""
-
-    albumin: float
-    creatinine: float
-    glucose: float
-    crp: float
-    lymphocyte_percent: float
-    mean_cell_volume: float
-    red_cell_distribution_width: float
-    alkaline_phosphatase: float
-    white_blood_cell_count: float
-    age: float
-
-
-# ------ SCORE2 Schemas
 class Score2Units(BaseModel):
     """
     The expected unit to be used for Score2 computation
@@ -48,18 +31,6 @@ class Score2Units(BaseModel):
     is_male: str = "yes/no"
 
 
-class Score2Markers(BaseModel):
-    """Processed Score2 biomarkers with standardized units."""
-
-    age: float
-    systolic_blood_pressure: float
-    total_cholesterol: float
-    hdl_cholesterol: float
-    smoking: bool
-    is_male: bool
-
-
-# ------ SCORE2-Diabetes Schemas
 class Score2DiabetesUnits(BaseModel):
     """
     The expected unit to be used for Score2-Diabetes computation
@@ -75,18 +46,3 @@ class Score2DiabetesUnits(BaseModel):
     age_at_diabetes_diagnosis: str = "years"
     hba1c: str = "mmol/mol"
     egfr: str = "mL/min/1.73m²"
-
-
-class Score2DiabetesMarkers(BaseModel):
-    """Processed Score2-Diabetes biomarkers with standardized units."""
-
-    age: float
-    systolic_blood_pressure: float
-    total_cholesterol: float
-    hdl_cholesterol: float
-    smoking: bool
-    is_male: bool
-    diabetes: bool
-    age_at_diabetes_diagnosis: float
-    hba1c: float
-    egfr: float
diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py
index dc794af..a536d9b 100644
--- a/vitals/score2/compute.py
+++ b/vitals/score2/compute.py
@@ -6,78 +6,20 @@
 """
 
 from pathlib import Path
-from typing import Literal, TypeAlias
 
 import numpy as np
-from pydantic import BaseModel
 
-from vitals.biomarkers import helpers, schemas
-
-RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"]
-
-
-class ModelCoefficients(BaseModel):
-    """
-    Sex-specific coefficients for the SCORE2 Cox proportional hazards model.
-
-    These coefficients are used to calculate the 10-year risk of cardiovascular disease
-    based on transformed risk factors and their age interactions.
-    """
-
-    # Male coefficients
-    male_age: float = 0.3742
-    male_smoking: float = 0.6012
-    male_sbp: float = 0.2777
-    male_total_cholesterol: float = 0.1458
-    male_hdl_cholesterol: float = -0.2698
-
-    # Male interaction term coefficients
-    male_smoking_age: float = -0.0755
-    male_sbp_age: float = -0.0255
-    male_tchol_age: float = -0.0281
-    male_hdl_age: float = 0.0426
-
-    # Female coefficients
-    female_age: float = 0.4648
-    female_smoking: float = 0.7744
-    female_sbp: float = 0.3131
-    female_total_cholesterol: float = 0.1002
-    female_hdl_cholesterol: float = -0.2606
-
-    # Female interaction term coefficients
-    female_smoking_age: float = -0.1088
-    female_sbp_age: float = -0.0277
-    female_tchol_age: float = -0.0226
-    female_hdl_age: float = 0.0613
-
-
-class BaselineSurvival(BaseModel):
-    """
-    Sex-specific baseline survival probabilities for the SCORE2 model.
-
-    These values represent the 10-year survival probability for individuals
-    with all risk factors at their reference values.
-    """
-
-    male: float = 0.9605
-    female: float = 0.9776
-
-
-class CalibrationScales(BaseModel):
-    """
-    Region and sex-specific calibration scales for Belgium (Low Risk region).
-
-    These scales are used to calibrate the uncalibrated risk estimate to match
-    the population-specific cardiovascular disease incidence rates.
-    """
-
-    # Male calibration scales
-    male_scale1: float = -0.5699
-    male_scale2: float = 0.7476
-
-    # Female calibration scales
-    female_scale1: float = -0.7380
-    female_scale2: float = 0.7019
+from vitals.biomarkers import helpers
+from vitals.schemas.coefficients import Score2FemaleCoefficients, Score2MaleCoefficients
+from vitals.schemas.core import (
+    BaselineSurvival,
+    CalibrationScales,
+    RiskCategory,
+    apply_calibration,
+    determine_risk_category,
+)
+from vitals.schemas.markers import Score2Markers
+from vitals.schemas.units import Score2Units
 
 
 def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]:
@@ -104,11 +46,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     # Extract biomarkers from JSON file
     biomarkers = helpers.extract_biomarkers_from_json(
         filepath=filepath,
-        biomarker_class=schemas.Score2Markers,
-        biomarker_units=schemas.Score2Units(),
+        biomarker_class=Score2Markers,
+        biomarker_units=Score2Units(),
     )
 
-    if not isinstance(biomarkers, schemas.Score2Markers):
+    if not isinstance(biomarkers, Score2Markers):
         raise ValueError(f"Invalid biomarker class used: {biomarkers}")
 
     age: float = biomarkers.age
@@ -127,72 +69,50 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     tchol_age: float = ctchol * cage
     hdl_age: float = chdl * cage
 
-    # Get model coefficients
-    coef: ModelCoefficients = ModelCoefficients()
-
-    # Calculate linear predictor (x) based on sex
-
-    linear_pred: float
-    baseline_survival: float
-    scale1: float
-    scale2: float
+    # Get sex-specific coefficients and calibration values
+    baseline_survival_model = BaselineSurvival()
+    calibration_scales = CalibrationScales()
 
     if is_male:
+        male_coef = Score2MaleCoefficients()
         linear_pred = (
-            coef.male_age * cage
-            + coef.male_smoking * smoking
-            + coef.male_sbp * csbp
-            + coef.male_total_cholesterol * ctchol
-            + coef.male_hdl_cholesterol * chdl
-            + coef.male_smoking_age * smoking_age
-            + coef.male_sbp_age * sbp_age
-            + coef.male_tchol_age * tchol_age
-            + coef.male_hdl_age * hdl_age
+            male_coef.age * cage
+            + male_coef.smoking * smoking
+            + male_coef.sbp * csbp
+            + male_coef.total_cholesterol * ctchol
+            + male_coef.hdl_cholesterol * chdl
+            + male_coef.smoking_age * smoking_age
+            + male_coef.sbp_age * sbp_age
+            + male_coef.tchol_age * tchol_age
+            + male_coef.hdl_age * hdl_age
         )
-        baseline_survival = BaselineSurvival().male
-        scale1 = CalibrationScales().male_scale1
-        scale2 = CalibrationScales().male_scale2
+        baseline_survival = baseline_survival_model.male
+        scale1 = calibration_scales.male_scale1
+        scale2 = calibration_scales.male_scale2
     else:
+        female_coef = Score2FemaleCoefficients()
         linear_pred = (
-            coef.female_age * cage
-            + coef.female_smoking * smoking
-            + coef.female_sbp * csbp
-            + coef.female_total_cholesterol * ctchol
-            + coef.female_hdl_cholesterol * chdl
-            + coef.female_smoking_age * smoking_age
-            + coef.female_sbp_age * sbp_age
-            + coef.female_tchol_age * tchol_age
-            + coef.female_hdl_age * hdl_age
+            female_coef.age * cage
+            + female_coef.smoking * smoking
+            + female_coef.sbp * csbp
+            + female_coef.total_cholesterol * ctchol
+            + female_coef.hdl_cholesterol * chdl
+            + female_coef.smoking_age * smoking_age
+            + female_coef.sbp_age * sbp_age
+            + female_coef.tchol_age * tchol_age
+            + female_coef.hdl_age * hdl_age
         )
-        baseline_survival = BaselineSurvival().female
-        scale1 = CalibrationScales().female_scale1
-        scale2 = CalibrationScales().female_scale2
+        baseline_survival = baseline_survival_model.female
+        scale1 = calibration_scales.female_scale1
+        scale2 = calibration_scales.female_scale2
 
     # Calculate uncalibrated risk
     uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred))
 
     # Apply calibration for Belgium (Low Risk region)
-    # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100
-    calibrated_risk: float = float(
-        (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk)))))
-        * 100
-    )
+    calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2)
 
     # Determine risk category based on age
-    risk_category: RiskCategory
-    if age < 50:
-        if calibrated_risk < 2.5:
-            risk_category = "Low to moderate"
-        elif calibrated_risk < 7.5:
-            risk_category = "High"
-        else:
-            risk_category = "Very high"
-    else:  # age 50-69
-        if calibrated_risk < 5:
-            risk_category = "Low to moderate"
-        elif calibrated_risk < 10:
-            risk_category = "High"
-        else:
-            risk_category = "Very high"
+    risk_category: RiskCategory = determine_risk_category(age, calibrated_risk)
 
     return (age, round(calibrated_risk, 2), risk_category)
diff --git a/vitals/score2_diabetes/compute.py b/vitals/score2_diabetes/compute.py
index a628f50..4dbfed7 100644
--- a/vitals/score2_diabetes/compute.py
+++ b/vitals/score2_diabetes/compute.py
@@ -7,94 +7,23 @@
 
 import math
 from pathlib import Path
-from typing import Literal, TypeAlias
 
 import numpy as np
-from pydantic import BaseModel
 
-from vitals.biomarkers import helpers, schemas
-
-RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"]
-
-
-class ModelCoefficients(BaseModel):
-    """
-    Sex-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model.
-
-    These coefficients are used to calculate the 10-year risk of cardiovascular disease
-    based on transformed risk factors and their age interactions.
-    """
-
-    # Male main effect coefficients
-    male_age: float = 0.5368
-    male_smoking: float = 0.4774
-    male_sbp: float = 0.1322
-    male_diabetes: float = 0.6457
-    male_total_cholesterol: float = 0.1102
-    male_hdl_cholesterol: float = -0.1087
-    male_age_at_diabetes_diagnosis: float = -0.0998
-    male_hba1c: float = 0.0955
-    male_egfr: float = -0.0591
-    male_egfr_squared: float = 0.0058
-
-    # Male interaction term coefficients
-    male_smoking_age: float = -0.0672
-    male_sbp_age: float = -0.0268
-    male_diabetes_age: float = -0.0983
-    male_tchol_age: float = -0.0181
-    male_hdl_age: float = 0.0095
-    male_hba1c_age: float = -0.0134
-    male_egfr_age: float = 0.0115
-
-    # Female main effect coefficients
-    female_age: float = 0.6624
-    female_smoking: float = 0.6139
-    female_sbp: float = 0.1421
-    female_diabetes: float = 0.8096
-    female_total_cholesterol: float = 0.1127
-    female_hdl_cholesterol: float = -0.1568
-    female_age_at_diabetes_diagnosis: float = -0.1180
-    female_hba1c: float = 0.1173
-    female_egfr: float = -0.0640
-    female_egfr_squared: float = 0.0062
-
-    # Female interaction term coefficients
-    female_smoking_age: float = -0.1122
-    female_sbp_age: float = -0.0167
-    female_diabetes_age: float = -0.1272
-    female_tchol_age: float = -0.0200
-    female_hdl_age: float = 0.0186
-    female_hba1c_age: float = -0.0196
-    female_egfr_age: float = 0.0169
-
-
-class BaselineSurvival(BaseModel):
-    """
-    Sex-specific baseline survival probabilities for the SCORE2-Diabetes model.
-
-    These values represent the 10-year survival probability for individuals
-    with all risk factors at their reference values.
-    """
-
-    male: float = 0.9605
-    female: float = 0.9776
-
-
-class CalibrationScales(BaseModel):
-    """
-    Region and sex-specific calibration scales for Belgium (Low Risk region).
-
-    These scales are used to calibrate the uncalibrated risk estimate to match
-    the population-specific cardiovascular disease incidence rates.
-    """
-
-    # Male calibration scales
-    male_scale1: float = -0.5699
-    male_scale2: float = 0.7476
-
-    # Female calibration scales
-    female_scale1: float = -0.7380
-    female_scale2: float = 0.7019
+from vitals.biomarkers import helpers
+from vitals.schemas.coefficients import (
+    Score2DiabetesFemaleCoefficients,
+    Score2DiabetesMaleCoefficients,
+)
+from vitals.schemas.core import (
+    BaselineSurvival,
+    CalibrationScales,
+    RiskCategory,
+    apply_calibration,
+    determine_risk_category,
+)
+from vitals.schemas.markers import Score2DiabetesMarkers
+from vitals.schemas.units import Score2DiabetesUnits
 
 
 def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]:
@@ -122,11 +51,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     # Extract biomarkers from JSON file
     biomarkers = helpers.extract_biomarkers_from_json(
         filepath=filepath,
-        biomarker_class=schemas.Score2DiabetesMarkers,
-        biomarker_units=schemas.Score2DiabetesUnits(),
+        biomarker_class=Score2DiabetesMarkers,
+        biomarker_units=Score2DiabetesUnits(),
     )
 
-    if not isinstance(biomarkers, schemas.Score2DiabetesMarkers):
+    if not isinstance(biomarkers, Score2DiabetesMarkers):
         raise ValueError(f"Invalid biomarker class used: {biomarkers}")
 
     age: float = biomarkers.age
@@ -153,87 +82,66 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     hba1c_age: float = ca1c * cage
     egfr_age: float = cegfr * cage
 
-    # Get model coefficients
-    coef: ModelCoefficients = ModelCoefficients()
-
-    # Calculate linear predictor (x) based on sex
-    linear_pred: float
-    baseline_survival: float
-    scale1: float
-    scale2: float
+    # Get sex-specific coefficients and calibration values
+    baseline_survival_model = BaselineSurvival()
+    calibration_scales = CalibrationScales()
 
     if is_male:
+        male_coef = Score2DiabetesMaleCoefficients()
         linear_pred = (
-            coef.male_age * cage
-            + coef.male_smoking * smoking
-            + coef.male_sbp * csbp
-            + coef.male_diabetes * diabetes
-            + coef.male_total_cholesterol * ctchol
-            + coef.male_hdl_cholesterol * chdl
-            + coef.male_age_at_diabetes_diagnosis * cagediab
-            + coef.male_hba1c * ca1c
-            + coef.male_egfr * cegfr
-            + coef.male_egfr_squared * cegfr_squared
-            + coef.male_smoking_age * smoking_age
-            + coef.male_sbp_age * sbp_age
-            + coef.male_diabetes_age * diabetes_age
-            + coef.male_tchol_age * tchol_age
-            + coef.male_hdl_age * hdl_age
-            + coef.male_hba1c_age * hba1c_age
-            + coef.male_egfr_age * egfr_age
+            male_coef.age * cage
+            + male_coef.smoking * smoking
+            + male_coef.sbp * csbp
+            + male_coef.diabetes * diabetes
+            + male_coef.total_cholesterol * ctchol
+            + male_coef.hdl_cholesterol * chdl
+            + male_coef.age_at_diabetes_diagnosis * cagediab
+            + male_coef.hba1c * ca1c
+            + male_coef.egfr * cegfr
+            + male_coef.egfr_squared * cegfr_squared
+            + male_coef.smoking_age * smoking_age
+            + male_coef.sbp_age * sbp_age
+            + male_coef.diabetes_age * diabetes_age
+            + male_coef.tchol_age * tchol_age
+            + male_coef.hdl_age * hdl_age
+            + male_coef.hba1c_age * hba1c_age
+            + male_coef.egfr_age * egfr_age
         )
-        baseline_survival = BaselineSurvival().male
-        scale1 = CalibrationScales().male_scale1
-        scale2 = CalibrationScales().male_scale2
+        baseline_survival = baseline_survival_model.male
+        scale1 = calibration_scales.male_scale1
+        scale2 = calibration_scales.male_scale2
     else:
+        female_coef = Score2DiabetesFemaleCoefficients()
         linear_pred = (
-            coef.female_age * cage
-            + coef.female_smoking * smoking
-            + coef.female_sbp * csbp
-            + coef.female_diabetes * diabetes
-            + coef.female_total_cholesterol * ctchol
-            + coef.female_hdl_cholesterol * chdl
-            + coef.female_age_at_diabetes_diagnosis * cagediab
-            + coef.female_hba1c * ca1c
-            + coef.female_egfr * cegfr
-            + coef.female_egfr_squared * cegfr_squared
-            + coef.female_smoking_age * smoking_age
-            + coef.female_sbp_age * sbp_age
-            + coef.female_diabetes_age * diabetes_age
-            + coef.female_tchol_age * tchol_age
-            + coef.female_hdl_age * hdl_age
-            + coef.female_hba1c_age * hba1c_age
-            + coef.female_egfr_age * egfr_age
+            female_coef.age * cage
+            + female_coef.smoking * smoking
+            + female_coef.sbp * csbp
+            + female_coef.diabetes * diabetes
+            + female_coef.total_cholesterol * ctchol
+            + female_coef.hdl_cholesterol * chdl
+            + female_coef.age_at_diabetes_diagnosis * cagediab
+            + female_coef.hba1c * ca1c
+            + female_coef.egfr * cegfr
+            + female_coef.egfr_squared * cegfr_squared
+            + female_coef.smoking_age * smoking_age
+            + female_coef.sbp_age * sbp_age
+            + female_coef.diabetes_age * diabetes_age
+            + female_coef.tchol_age * tchol_age
+            + female_coef.hdl_age * hdl_age
+            + female_coef.hba1c_age * hba1c_age
+            + female_coef.egfr_age * egfr_age
         )
-        baseline_survival = BaselineSurvival().female
-        scale1 = CalibrationScales().female_scale1
-        scale2 = CalibrationScales().female_scale2
+        baseline_survival = baseline_survival_model.female
+        scale1 = calibration_scales.female_scale1
+        scale2 = calibration_scales.female_scale2
 
     # Calculate uncalibrated risk
     uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred))
 
     # Apply calibration for Belgium (Low Risk region)
-    # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100
-    calibrated_risk: float = float(
-        (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk)))))
-        * 100
-    )
+    calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2)
 
     # Determine risk category based on age
-    risk_category: RiskCategory
-    if age < 50:
-        if calibrated_risk < 2.5:
-            risk_category = "Low to moderate"
-        elif calibrated_risk < 7.5:
-            risk_category = "High"
-        else:
-            risk_category = "Very high"
-    else:  # age 50-69
-        if calibrated_risk < 5:
-            risk_category = "Low to moderate"
-        elif calibrated_risk < 10:
-            risk_category = "High"
-        else:
-            risk_category = "Very high"
+    risk_category: RiskCategory = determine_risk_category(age, calibrated_risk)
 
     return (age, round(calibrated_risk, 2), risk_category)

From 30324a0001bc795a978d39f7284867598528db06 Mon Sep 17 00:00:00 2001
From: fbraza <fbraza@tutanota.com>
Date: Sat, 19 Jul 2025 22:19:21 +0200
Subject: [PATCH 2/3] refactor: the compute function for score2 and
 score2_diabetes

---
 vitals/score2/compute.py          | 39 ++++++++-----------
 vitals/score2_diabetes/compute.py | 63 +++++++++++--------------------
 2 files changed, 38 insertions(+), 64 deletions(-)

diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py
index a536d9b..72fb196 100644
--- a/vitals/score2/compute.py
+++ b/vitals/score2/compute.py
@@ -73,39 +73,30 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     baseline_survival_model = BaselineSurvival()
     calibration_scales = CalibrationScales()
 
+    coef: Score2MaleCoefficients | Score2FemaleCoefficients
     if is_male:
-        male_coef = Score2MaleCoefficients()
-        linear_pred = (
-            male_coef.age * cage
-            + male_coef.smoking * smoking
-            + male_coef.sbp * csbp
-            + male_coef.total_cholesterol * ctchol
-            + male_coef.hdl_cholesterol * chdl
-            + male_coef.smoking_age * smoking_age
-            + male_coef.sbp_age * sbp_age
-            + male_coef.tchol_age * tchol_age
-            + male_coef.hdl_age * hdl_age
-        )
+        coef = Score2MaleCoefficients()
         baseline_survival = baseline_survival_model.male
         scale1 = calibration_scales.male_scale1
         scale2 = calibration_scales.male_scale2
     else:
-        female_coef = Score2FemaleCoefficients()
-        linear_pred = (
-            female_coef.age * cage
-            + female_coef.smoking * smoking
-            + female_coef.sbp * csbp
-            + female_coef.total_cholesterol * ctchol
-            + female_coef.hdl_cholesterol * chdl
-            + female_coef.smoking_age * smoking_age
-            + female_coef.sbp_age * sbp_age
-            + female_coef.tchol_age * tchol_age
-            + female_coef.hdl_age * hdl_age
-        )
+        coef = Score2FemaleCoefficients()
         baseline_survival = baseline_survival_model.female
         scale1 = calibration_scales.female_scale1
         scale2 = calibration_scales.female_scale2
 
+    linear_pred = (
+        coef.age * cage
+        + coef.smoking * smoking
+        + coef.sbp * csbp
+        + coef.total_cholesterol * ctchol
+        + coef.hdl_cholesterol * chdl
+        + coef.smoking_age * smoking_age
+        + coef.sbp_age * sbp_age
+        + coef.tchol_age * tchol_age
+        + coef.hdl_age * hdl_age
+    )
+
     # Calculate uncalibrated risk
     uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred))
 
diff --git a/vitals/score2_diabetes/compute.py b/vitals/score2_diabetes/compute.py
index 4dbfed7..77e69c4 100644
--- a/vitals/score2_diabetes/compute.py
+++ b/vitals/score2_diabetes/compute.py
@@ -86,55 +86,38 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     baseline_survival_model = BaselineSurvival()
     calibration_scales = CalibrationScales()
 
+    coef: Score2DiabetesMaleCoefficients | Score2DiabetesFemaleCoefficients
     if is_male:
-        male_coef = Score2DiabetesMaleCoefficients()
-        linear_pred = (
-            male_coef.age * cage
-            + male_coef.smoking * smoking
-            + male_coef.sbp * csbp
-            + male_coef.diabetes * diabetes
-            + male_coef.total_cholesterol * ctchol
-            + male_coef.hdl_cholesterol * chdl
-            + male_coef.age_at_diabetes_diagnosis * cagediab
-            + male_coef.hba1c * ca1c
-            + male_coef.egfr * cegfr
-            + male_coef.egfr_squared * cegfr_squared
-            + male_coef.smoking_age * smoking_age
-            + male_coef.sbp_age * sbp_age
-            + male_coef.diabetes_age * diabetes_age
-            + male_coef.tchol_age * tchol_age
-            + male_coef.hdl_age * hdl_age
-            + male_coef.hba1c_age * hba1c_age
-            + male_coef.egfr_age * egfr_age
-        )
+        coef = Score2DiabetesMaleCoefficients()
         baseline_survival = baseline_survival_model.male
         scale1 = calibration_scales.male_scale1
         scale2 = calibration_scales.male_scale2
     else:
-        female_coef = Score2DiabetesFemaleCoefficients()
-        linear_pred = (
-            female_coef.age * cage
-            + female_coef.smoking * smoking
-            + female_coef.sbp * csbp
-            + female_coef.diabetes * diabetes
-            + female_coef.total_cholesterol * ctchol
-            + female_coef.hdl_cholesterol * chdl
-            + female_coef.age_at_diabetes_diagnosis * cagediab
-            + female_coef.hba1c * ca1c
-            + female_coef.egfr * cegfr
-            + female_coef.egfr_squared * cegfr_squared
-            + female_coef.smoking_age * smoking_age
-            + female_coef.sbp_age * sbp_age
-            + female_coef.diabetes_age * diabetes_age
-            + female_coef.tchol_age * tchol_age
-            + female_coef.hdl_age * hdl_age
-            + female_coef.hba1c_age * hba1c_age
-            + female_coef.egfr_age * egfr_age
-        )
+        coef = Score2DiabetesFemaleCoefficients()
         baseline_survival = baseline_survival_model.female
         scale1 = calibration_scales.female_scale1
         scale2 = calibration_scales.female_scale2
 
+    linear_pred = (
+        coef.age * cage
+        + coef.smoking * smoking
+        + coef.sbp * csbp
+        + coef.diabetes * diabetes
+        + coef.total_cholesterol * ctchol
+        + coef.hdl_cholesterol * chdl
+        + coef.age_at_diabetes_diagnosis * cagediab
+        + coef.hba1c * ca1c
+        + coef.egfr * cegfr
+        + coef.egfr_squared * cegfr_squared
+        + coef.smoking_age * smoking_age
+        + coef.sbp_age * sbp_age
+        + coef.diabetes_age * diabetes_age
+        + coef.tchol_age * tchol_age
+        + coef.hdl_age * hdl_age
+        + coef.hba1c_age * hba1c_age
+        + coef.egfr_age * egfr_age
+    )
+
     # Calculate uncalibrated risk
     uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred))
 

From f44b39912adfaac5865cde6601ab5d7c7804f06c Mon Sep 17 00:00:00 2001
From: fbraza <fbraza@tutanota.com>
Date: Sat, 19 Jul 2025 23:42:31 +0200
Subject: [PATCH 3/3]   refactor: consolidate shared components and reorganize
 schema modules

  - Move common functions (risk categorization, calibration, gompertz model) to helpers.py
  - Consolidate schema modules into algorithm-specific files (phenoage.py, score2.py)
  - Rename coefficient classes for clarity and consistency
  - Update imports across all compute modules to use new schema structure
  - Remove redundant schema files (core.py, markers.py, units.py)
---
 tests/test_phenoage.py                        |   4 +-
 tests/test_score2.py                          |   6 +-
 tests/test_score2_diabetes.py                 |   4 +-
 vitals/biomarkers/helpers.py                  |  62 ++++++++++-
 vitals/{phenoage => models}/__init__.py       |   0
 .../compute.py => models/phenoage.py}         |  68 ++----------
 .../{score2/compute.py => models/score2.py}   |  36 ++++---
 .../compute.py => models/score2_diabetes.py}  |  39 +++----
 vitals/schemas/core.py                        |  80 --------------
 vitals/schemas/markers.py                     |  42 --------
 vitals/schemas/phenoage.py                    |  62 +++++++++++
 vitals/schemas/{coefficients.py => score2.py} | 101 +++++++++++++++++-
 vitals/schemas/units.py                       |  48 ---------
 vitals/score2/__init__.py                     |   0
 vitals/score2_diabetes/__init__.py            |   5 -
 15 files changed, 272 insertions(+), 285 deletions(-)
 rename vitals/{phenoage => models}/__init__.py (100%)
 rename vitals/{phenoage/compute.py => models/phenoage.py} (50%)
 rename vitals/{score2/compute.py => models/score2.py} (79%)
 rename vitals/{score2_diabetes/compute.py => models/score2_diabetes.py} (81%)
 delete mode 100644 vitals/schemas/core.py
 delete mode 100644 vitals/schemas/markers.py
 create mode 100644 vitals/schemas/phenoage.py
 rename vitals/schemas/{coefficients.py => score2.py} (52%)
 delete mode 100644 vitals/schemas/units.py
 delete mode 100644 vitals/score2/__init__.py
 delete mode 100644 vitals/score2_diabetes/__init__.py

diff --git a/tests/test_phenoage.py b/tests/test_phenoage.py
index 3da32da..60cf7f2 100755
--- a/tests/test_phenoage.py
+++ b/tests/test_phenoage.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from vitals.phenoage import compute
+from vitals.models import phenoage
 
 OUT_FILEPATH = Path(__file__).parent / "inputs" / "phenoage"
 
@@ -25,7 +25,7 @@
 )
 def test_phenoage(filename, expected):
     # Get the actual fixture value using request.getfixturevalue
-    age, pred_age, accl_age = compute.biological_age(OUT_FILEPATH / filename)
+    age, pred_age, accl_age = phenoage.compute(OUT_FILEPATH / filename)
     expected_age, expected_pred_age, expected_accl_age = expected
 
     assert age == expected_age
diff --git a/tests/test_score2.py b/tests/test_score2.py
index 98e76ff..32adee3 100644
--- a/tests/test_score2.py
+++ b/tests/test_score2.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from vitals.score2 import compute
+from vitals.models import score2
 
 OUT_FILEPATH = Path(__file__).parent / "inputs" / "score2"
 
@@ -26,9 +26,7 @@
 )
 def test_score2(filename, expected):
     # Get the actual fixture value using request.getfixturevalue
-    age, pred_risk, pred_risk_category = compute.cardiovascular_risk(
-        OUT_FILEPATH / filename
-    )
+    age, pred_risk, pred_risk_category = score2.compute(OUT_FILEPATH / filename)
     expected_age, expected_risk, expected_category = expected
 
     assert age == expected_age
diff --git a/tests/test_score2_diabetes.py b/tests/test_score2_diabetes.py
index 78401f5..1a57311 100644
--- a/tests/test_score2_diabetes.py
+++ b/tests/test_score2_diabetes.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from vitals.score2_diabetes import compute
+from vitals.models import score2_diabetes
 
 OUT_FILEPATH = Path(__file__).parent / "inputs" / "score2_diabetes"
 
@@ -26,7 +26,7 @@ def test_score2_diabetes(filename, expected):
     They need to be calculated using MDCalc and updated before running tests.
     """
     # Get the actual fixture value
-    age, pred_risk, pred_risk_category = compute.cardiovascular_risk(
+    age, pred_risk, pred_risk_category = score2_diabetes.compute(
         OUT_FILEPATH / filename
     )
     expected_age, expected_risk, expected_category = expected
diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py
index eeed0fa..15657f5 100755
--- a/vitals/biomarkers/helpers.py
+++ b/vitals/biomarkers/helpers.py
@@ -1,13 +1,15 @@
 from collections.abc import Callable
 from pathlib import Path
-from typing import Any, TypedDict, TypeVar
+from typing import Any, Literal, TypeAlias, TypedDict, TypeVar
 
+import numpy as np
 from pydantic import BaseModel
 
-from vitals.schemas.units import PhenoageUnits, Score2DiabetesUnits, Score2Units
+from vitals.schemas import phenoage, score2
 
+RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"]
 Biomarkers = TypeVar("Biomarkers", bound=BaseModel)
-Units = PhenoageUnits | Score2Units | Score2DiabetesUnits
+Units = phenoage.Units | score2.Units | score2.UnitsDiabetes
 
 
 class ConversionInfo(TypedDict):
@@ -198,3 +200,57 @@ def extract_biomarkers_from_json(
         extracted_values[field_name] = value
 
     return biomarker_class(**extracted_values)
+
+
+def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory:
+    """
+    Determine cardiovascular risk category based on age and calibrated risk percentage.
+
+    Args:
+        age: Patient's age in years
+        calibrated_risk: Calibrated 10-year CVD risk as a percentage
+
+    Returns:
+        Risk stratification category
+    """
+    if age < 50:
+        if calibrated_risk < 2.5:
+            return "Low to moderate"
+        elif calibrated_risk < 7.5:
+            return "High"
+        else:
+            return "Very high"
+    else:  # age 50-69
+        if calibrated_risk < 5:
+            return "Low to moderate"
+        elif calibrated_risk < 10:
+            return "High"
+        else:
+            return "Very high"
+
+
+def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float:
+    """
+    Apply regional calibration to uncalibrated risk estimate.
+
+    Args:
+        uncalibrated_risk: Raw risk estimate from the Cox model
+        scale1: First calibration scale parameter
+        scale2: Second calibration scale parameter
+
+    Returns:
+        Calibrated 10-year CVD risk as a percentage
+    """
+    return float(
+        (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk)))))
+        * 100
+    )
+
+
+def gompertz_mortality_model(weighted_risk_score: float) -> float:
+    params = phenoage.Gompertz()
+    return 1 - np.exp(
+        -np.exp(weighted_risk_score)
+        * (np.exp(120 * params.lambda_) - 1)
+        / params.lambda_
+    )
diff --git a/vitals/phenoage/__init__.py b/vitals/models/__init__.py
similarity index 100%
rename from vitals/phenoage/__init__.py
rename to vitals/models/__init__.py
diff --git a/vitals/phenoage/compute.py b/vitals/models/phenoage.py
similarity index 50%
rename from vitals/phenoage/compute.py
rename to vitals/models/phenoage.py
index ac52596..7454608 100755
--- a/vitals/phenoage/compute.py
+++ b/vitals/models/phenoage.py
@@ -1,52 +1,12 @@
 from pathlib import Path
 
 import numpy as np
-from pydantic import BaseModel
 
 from vitals.biomarkers import helpers
-from vitals.schemas.markers import PhenoageMarkers
-from vitals.schemas.units import PhenoageUnits
+from vitals.schemas.phenoage import Gompertz, LinearModel, Markers, Units
 
 
-class LinearModel(BaseModel):
-    """
-    Coefficients used to calculate the PhenoAge from Levine et al 2018
-    """
-
-    intercept: float = -19.9067
-    albumin: float = -0.0336
-    creatinine: float = 0.0095
-    glucose: float = 0.1953
-    log_crp: float = 0.0954
-    lymphocyte_percent: float = -0.0120
-    mean_cell_volume: float = 0.0268
-    red_cell_distribution_width: float = 0.3306
-    alkaline_phosphatase: float = 0.00188
-    white_blood_cell_count: float = 0.0554
-    age: float = 0.0804
-
-
-class Gompertz(BaseModel):
-    """
-    Parameters of the Gompertz distribution for PhenoAge computation
-    """
-
-    lambda_: float = 0.0192
-    coef1: float = 141.50225
-    coef2: float = -0.00553
-    coef3: float = 0.090165
-
-
-def __gompertz_mortality_model(weighted_risk_score: float) -> float:
-    __params = Gompertz()
-    return 1 - np.exp(
-        -np.exp(weighted_risk_score)
-        * (np.exp(120 * __params.lambda_) - 1)
-        / __params.lambda_
-    )
-
-
-def biological_age(filepath: str | Path) -> tuple[float, float, float]:
+def compute(filepath: str | Path) -> tuple[float, float, float]:
     """
     The Phenoage score is calculated as a weighted (coefficients available in Levine et al 2018)
     linear combination of these variables, which was then transformed into units of years using 2 parametric
@@ -57,14 +17,14 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]:
     # Extract biomarkers from JSON file
     biomarkers = helpers.extract_biomarkers_from_json(
         filepath=filepath,
-        biomarker_class=PhenoageMarkers,
-        biomarker_units=PhenoageUnits(),
+        biomarker_class=Markers,
+        biomarker_units=Units(),
     )
 
     age = biomarkers.age
     coef = LinearModel()
 
-    if isinstance(biomarkers, PhenoageMarkers):
+    if isinstance(biomarkers, Markers):
         weighted_risk_score = (
             coef.intercept
             + (coef.albumin * biomarkers.albumin)
@@ -81,7 +41,9 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]:
             + (coef.white_blood_cell_count * biomarkers.white_blood_cell_count)
             + (coef.age * biomarkers.age)
         )
-        gompertz = __gompertz_mortality_model(weighted_risk_score=weighted_risk_score)
+        gompertz = helpers.gompertz_mortality_model(
+            weighted_risk_score=weighted_risk_score
+        )
         model = Gompertz()
         pred_age = (
             model.coef1 + np.log(model.coef2 * np.log(1 - gompertz)) / model.coef3
@@ -90,17 +52,3 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]:
         return (age, pred_age, accl_age)
     else:
         raise ValueError(f"Invalid biomarker class used: {biomarkers}")
-
-
-# if __name__ == "__main__":
-#     from pathlib import Path
-#     input_dir = Path("tests/outputs")
-#     output_dir = Path("tests/outputs")
-
-#     for input_file in input_dir.glob("*.json"):
-#         if "patient" not in str(input_file):
-#             continue
-
-#         # Update biomarker data
-#         age, pred_age, accl_age = biological_age(str(input_file))
-#         print(f"Chrono Age: {age} ::: Predicted Age: {pred_age} ::: Accel {accl_age}")
diff --git a/vitals/score2/compute.py b/vitals/models/score2.py
similarity index 79%
rename from vitals/score2/compute.py
rename to vitals/models/score2.py
index 72fb196..51ca1e3 100644
--- a/vitals/score2/compute.py
+++ b/vitals/models/score2.py
@@ -10,19 +10,19 @@
 import numpy as np
 
 from vitals.biomarkers import helpers
-from vitals.schemas.coefficients import Score2FemaleCoefficients, Score2MaleCoefficients
-from vitals.schemas.core import (
+from vitals.schemas.score2 import (
     BaselineSurvival,
     CalibrationScales,
-    RiskCategory,
-    apply_calibration,
-    determine_risk_category,
+    FemaleCoefficientsBaseModel,
+    MaleCoefficientsBaseModel,
+    Markers,
+    Units,
 )
-from vitals.schemas.markers import Score2Markers
-from vitals.schemas.units import Score2Units
 
 
-def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]:
+def compute(
+    filepath: str | Path,
+) -> tuple[float, float, helpers.RiskCategory]:
     """
     Calculate the 10-year cardiovascular disease risk using the SCORE2 algorithm.
 
@@ -46,11 +46,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     # Extract biomarkers from JSON file
     biomarkers = helpers.extract_biomarkers_from_json(
         filepath=filepath,
-        biomarker_class=Score2Markers,
-        biomarker_units=Score2Units(),
+        biomarker_class=Markers,
+        biomarker_units=Units(),
     )
 
-    if not isinstance(biomarkers, Score2Markers):
+    if not isinstance(biomarkers, Markers):
         raise ValueError(f"Invalid biomarker class used: {biomarkers}")
 
     age: float = biomarkers.age
@@ -73,14 +73,14 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     baseline_survival_model = BaselineSurvival()
     calibration_scales = CalibrationScales()
 
-    coef: Score2MaleCoefficients | Score2FemaleCoefficients
+    coef: MaleCoefficientsBaseModel | FemaleCoefficientsBaseModel
     if is_male:
-        coef = Score2MaleCoefficients()
+        coef = MaleCoefficientsBaseModel()
         baseline_survival = baseline_survival_model.male
         scale1 = calibration_scales.male_scale1
         scale2 = calibration_scales.male_scale2
     else:
-        coef = Score2FemaleCoefficients()
+        coef = FemaleCoefficientsBaseModel()
         baseline_survival = baseline_survival_model.female
         scale1 = calibration_scales.female_scale1
         scale2 = calibration_scales.female_scale2
@@ -101,9 +101,13 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred))
 
     # Apply calibration for Belgium (Low Risk region)
-    calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2)
+    calibrated_risk: float = helpers.apply_calibration(
+        uncalibrated_risk, scale1, scale2
+    )
 
     # Determine risk category based on age
-    risk_category: RiskCategory = determine_risk_category(age, calibrated_risk)
+    risk_category: helpers.RiskCategory = helpers.determine_risk_category(
+        age, calibrated_risk
+    )
 
     return (age, round(calibrated_risk, 2), risk_category)
diff --git a/vitals/score2_diabetes/compute.py b/vitals/models/score2_diabetes.py
similarity index 81%
rename from vitals/score2_diabetes/compute.py
rename to vitals/models/score2_diabetes.py
index 77e69c4..47aa158 100644
--- a/vitals/score2_diabetes/compute.py
+++ b/vitals/models/score2_diabetes.py
@@ -11,22 +11,19 @@
 import numpy as np
 
 from vitals.biomarkers import helpers
-from vitals.schemas.coefficients import (
-    Score2DiabetesFemaleCoefficients,
-    Score2DiabetesMaleCoefficients,
-)
-from vitals.schemas.core import (
+from vitals.schemas.score2 import (
     BaselineSurvival,
     CalibrationScales,
-    RiskCategory,
-    apply_calibration,
-    determine_risk_category,
+    FemaleCoefficientsDiabeticModel,
+    MaleCoefficientsDiabeticModel,
+    MarkersDiabetes,
+    UnitsDiabetes,
 )
-from vitals.schemas.markers import Score2DiabetesMarkers
-from vitals.schemas.units import Score2DiabetesUnits
 
 
-def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]:
+def compute(
+    filepath: str | Path,
+) -> tuple[float, float, helpers.RiskCategory]:
     """
     Calculate the 10-year cardiovascular disease risk using the SCORE2-Diabetes algorithm.
 
@@ -51,11 +48,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     # Extract biomarkers from JSON file
     biomarkers = helpers.extract_biomarkers_from_json(
         filepath=filepath,
-        biomarker_class=Score2DiabetesMarkers,
-        biomarker_units=Score2DiabetesUnits(),
+        biomarker_class=MarkersDiabetes,
+        biomarker_units=UnitsDiabetes(),
     )
 
-    if not isinstance(biomarkers, Score2DiabetesMarkers):
+    if not isinstance(biomarkers, MarkersDiabetes):
         raise ValueError(f"Invalid biomarker class used: {biomarkers}")
 
     age: float = biomarkers.age
@@ -86,14 +83,14 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     baseline_survival_model = BaselineSurvival()
     calibration_scales = CalibrationScales()
 
-    coef: Score2DiabetesMaleCoefficients | Score2DiabetesFemaleCoefficients
+    coef: MaleCoefficientsDiabeticModel | FemaleCoefficientsDiabeticModel
     if is_male:
-        coef = Score2DiabetesMaleCoefficients()
+        coef = MaleCoefficientsDiabeticModel()
         baseline_survival = baseline_survival_model.male
         scale1 = calibration_scales.male_scale1
         scale2 = calibration_scales.male_scale2
     else:
-        coef = Score2DiabetesFemaleCoefficients()
+        coef = FemaleCoefficientsDiabeticModel()
         baseline_survival = baseline_survival_model.female
         scale1 = calibration_scales.female_scale1
         scale2 = calibration_scales.female_scale2
@@ -122,9 +119,13 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor
     uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred))
 
     # Apply calibration for Belgium (Low Risk region)
-    calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2)
+    calibrated_risk: float = helpers.apply_calibration(
+        uncalibrated_risk, scale1, scale2
+    )
 
     # Determine risk category based on age
-    risk_category: RiskCategory = determine_risk_category(age, calibrated_risk)
+    risk_category: helpers.RiskCategory = helpers.determine_risk_category(
+        age, calibrated_risk
+    )
 
     return (age, round(calibrated_risk, 2), risk_category)
diff --git a/vitals/schemas/core.py b/vitals/schemas/core.py
deleted file mode 100644
index 8ae4fdf..0000000
--- a/vitals/schemas/core.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from typing import Literal, TypeAlias
-
-import numpy as np
-from pydantic import BaseModel
-
-RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"]
-
-
-class BaselineSurvival(BaseModel):
-    """
-    Sex-specific baseline survival probabilities for the SCORE2 model.
-
-    These values represent the 10-year survival probability for individuals
-    with all risk factors at their reference values.
-    """
-
-    male: float = 0.9605
-    female: float = 0.9776
-
-
-class CalibrationScales(BaseModel):
-    """
-    Region and sex-specific calibration scales for Belgium (Low Risk region).
-
-    These scales are used to calibrate the uncalibrated risk estimate to match
-    the population-specific cardiovascular disease incidence rates.
-    """
-
-    # Male calibration scales
-    male_scale1: float = -0.5699
-    male_scale2: float = 0.7476
-
-    # Female calibration scales
-    female_scale1: float = -0.7380
-    female_scale2: float = 0.7019
-
-
-def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory:
-    """
-    Determine cardiovascular risk category based on age and calibrated risk percentage.
-
-    Args:
-        age: Patient's age in years
-        calibrated_risk: Calibrated 10-year CVD risk as a percentage
-
-    Returns:
-        Risk stratification category
-    """
-    if age < 50:
-        if calibrated_risk < 2.5:
-            return "Low to moderate"
-        elif calibrated_risk < 7.5:
-            return "High"
-        else:
-            return "Very high"
-    else:  # age 50-69
-        if calibrated_risk < 5:
-            return "Low to moderate"
-        elif calibrated_risk < 10:
-            return "High"
-        else:
-            return "Very high"
-
-
-def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float:
-    """
-    Apply regional calibration to uncalibrated risk estimate.
-
-    Args:
-        uncalibrated_risk: Raw risk estimate from the Cox model
-        scale1: First calibration scale parameter
-        scale2: Second calibration scale parameter
-
-    Returns:
-        Calibrated 10-year CVD risk as a percentage
-    """
-    return float(
-        (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk)))))
-        * 100
-    )
diff --git a/vitals/schemas/markers.py b/vitals/schemas/markers.py
deleted file mode 100644
index b3e630c..0000000
--- a/vitals/schemas/markers.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from pydantic import BaseModel
-
-
-class PhenoageMarkers(BaseModel):
-    """Processed PhenoAge biomarkers with standardized units."""
-
-    albumin: float
-    creatinine: float
-    glucose: float
-    crp: float
-    lymphocyte_percent: float
-    mean_cell_volume: float
-    red_cell_distribution_width: float
-    alkaline_phosphatase: float
-    white_blood_cell_count: float
-    age: float
-
-
-class Score2Markers(BaseModel):
-    """Processed Score2 biomarkers with standardized units."""
-
-    age: float
-    systolic_blood_pressure: float
-    total_cholesterol: float
-    hdl_cholesterol: float
-    smoking: bool
-    is_male: bool
-
-
-class Score2DiabetesMarkers(BaseModel):
-    """Processed Score2-Diabetes biomarkers with standardized units."""
-
-    age: float
-    systolic_blood_pressure: float
-    total_cholesterol: float
-    hdl_cholesterol: float
-    smoking: bool
-    is_male: bool
-    diabetes: bool
-    age_at_diabetes_diagnosis: float
-    hba1c: float
-    egfr: float
diff --git a/vitals/schemas/phenoage.py b/vitals/schemas/phenoage.py
new file mode 100644
index 0000000..fc1b281
--- /dev/null
+++ b/vitals/schemas/phenoage.py
@@ -0,0 +1,62 @@
+from pydantic import BaseModel
+
+
+class Markers(BaseModel):
+    """Processed PhenoAge biomarkers with standardized units."""
+
+    albumin: float
+    creatinine: float
+    glucose: float
+    crp: float
+    lymphocyte_percent: float
+    mean_cell_volume: float
+    red_cell_distribution_width: float
+    alkaline_phosphatase: float
+    white_blood_cell_count: float
+    age: float
+
+
+class Units(BaseModel):
+    """
+    The expected unit to be used for phenoage computation
+    """
+
+    albumin: str = "g/L"
+    creatinine: str = "umol/L"
+    glucose: str = "mmol/L"
+    crp: str = "mg/dL"
+    lymphocyte_percent: str = "%"
+    mean_cell_volume: str = "fL"
+    red_cell_distribution_width: str = "%"
+    alkaline_phosphatase: str = "U/L"
+    white_blood_cell_count: str = "1000 cells/uL"
+    age: str = "years"
+
+
+class LinearModel(BaseModel):
+    """
+    Coefficients used to calculate the PhenoAge from Levine et al 2018
+    """
+
+    intercept: float = -19.9067
+    albumin: float = -0.0336
+    creatinine: float = 0.0095
+    glucose: float = 0.1953
+    log_crp: float = 0.0954
+    lymphocyte_percent: float = -0.0120
+    mean_cell_volume: float = 0.0268
+    red_cell_distribution_width: float = 0.3306
+    alkaline_phosphatase: float = 0.00188
+    white_blood_cell_count: float = 0.0554
+    age: float = 0.0804
+
+
+class Gompertz(BaseModel):
+    """
+    Parameters of the Gompertz distribution for PhenoAge computation
+    """
+
+    lambda_: float = 0.0192
+    coef1: float = 141.50225
+    coef2: float = -0.00553
+    coef3: float = 0.090165
diff --git a/vitals/schemas/coefficients.py b/vitals/schemas/score2.py
similarity index 52%
rename from vitals/schemas/coefficients.py
rename to vitals/schemas/score2.py
index 62a4e9f..336d8e0 100644
--- a/vitals/schemas/coefficients.py
+++ b/vitals/schemas/score2.py
@@ -1,7 +1,65 @@
 from pydantic import BaseModel
 
+# Common for all models
 
-class Score2MaleCoefficients(BaseModel):
+
+class BaselineSurvival(BaseModel):
+    """
+    Sex-specific baseline survival probabilities for the SCORE2 model.
+
+    These values represent the 10-year survival probability for individuals
+    with all risk factors at their reference values.
+    """
+
+    male: float = 0.9605
+    female: float = 0.9776
+
+
+class CalibrationScales(BaseModel):
+    """
+    Region and sex-specific calibration scales for Belgium (Low Risk region).
+
+    These scales are used to calibrate the uncalibrated risk estimate to match
+    the population-specific cardiovascular disease incidence rates.
+    """
+
+    # Male calibration scales
+    male_scale1: float = -0.5699
+    male_scale2: float = 0.7476
+
+    # Female calibration scales
+    female_scale1: float = -0.7380
+    female_scale2: float = 0.7019
+
+
+# ----- For Basal score2 model
+
+
+class Markers(BaseModel):
+    """Processed Score2 biomarkers with standardized units."""
+
+    age: float
+    systolic_blood_pressure: float
+    total_cholesterol: float
+    hdl_cholesterol: float
+    smoking: bool
+    is_male: bool
+
+
+class Units(BaseModel):
+    """
+    The expected unit to be used for Score2 computation
+    """
+
+    age: str = "years"
+    systolic_blood_pressure: str = "mmHg"
+    total_cholesterol: str = "mmol/L"
+    hdl_cholesterol: str = "mmol/L"
+    smoking: str = "yes/no"
+    is_male: str = "yes/no"
+
+
+class MaleCoefficientsBaseModel(BaseModel):
     """
     Male-specific coefficients for the SCORE2 Cox proportional hazards model.
     """
@@ -20,7 +78,7 @@ class Score2MaleCoefficients(BaseModel):
     hdl_age: float = 0.0426
 
 
-class Score2FemaleCoefficients(BaseModel):
+class FemaleCoefficientsBaseModel(BaseModel):
     """
     Female-specific coefficients for the SCORE2 Cox proportional hazards model.
     """
@@ -39,7 +97,42 @@ class Score2FemaleCoefficients(BaseModel):
     hdl_age: float = 0.0613
 
 
-class Score2DiabetesMaleCoefficients(Score2MaleCoefficients):
+# ----- For Diabetic score2 model
+
+
+class MarkersDiabetes(BaseModel):
+    """Processed Score2-Diabetes biomarkers with standardized units."""
+
+    age: float
+    systolic_blood_pressure: float
+    total_cholesterol: float
+    hdl_cholesterol: float
+    smoking: bool
+    is_male: bool
+    diabetes: bool
+    age_at_diabetes_diagnosis: float
+    hba1c: float
+    egfr: float
+
+
+class UnitsDiabetes(BaseModel):
+    """
+    The expected unit to be used for Score2-Diabetes computation
+    """
+
+    age: str = "years"
+    systolic_blood_pressure: str = "mmHg"
+    total_cholesterol: str = "mmol/L"
+    hdl_cholesterol: str = "mmol/L"
+    smoking: str = "yes/no"
+    is_male: str = "yes/no"
+    diabetes: str = "yes/no"
+    age_at_diabetes_diagnosis: str = "years"
+    hba1c: str = "mmol/mol"
+    egfr: str = "mL/min/1.73m²"
+
+
+class MaleCoefficientsDiabeticModel(MaleCoefficientsBaseModel):
     """
     Male-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model.
     Extends the base SCORE2 male coefficients with diabetes-specific parameters.
@@ -71,7 +164,7 @@ class Score2DiabetesMaleCoefficients(Score2MaleCoefficients):
     egfr_age: float = 0.0115
 
 
-class Score2DiabetesFemaleCoefficients(Score2FemaleCoefficients):
+class FemaleCoefficientsDiabeticModel(FemaleCoefficientsBaseModel):
     """
     Female-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model.
     Extends the base SCORE2 female coefficients with diabetes-specific parameters.
diff --git a/vitals/schemas/units.py b/vitals/schemas/units.py
deleted file mode 100644
index dfa3b18..0000000
--- a/vitals/schemas/units.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from pydantic import BaseModel
-
-
-class PhenoageUnits(BaseModel):
-    """
-    The expected unit to be used for phenoage computation
-    """
-
-    albumin: str = "g/L"
-    creatinine: str = "umol/L"
-    glucose: str = "mmol/L"
-    crp: str = "mg/dL"
-    lymphocyte_percent: str = "%"
-    mean_cell_volume: str = "fL"
-    red_cell_distribution_width: str = "%"
-    alkaline_phosphatase: str = "U/L"
-    white_blood_cell_count: str = "1000 cells/uL"
-    age: str = "years"
-
-
-class Score2Units(BaseModel):
-    """
-    The expected unit to be used for Score2 computation
-    """
-
-    age: str = "years"
-    systolic_blood_pressure: str = "mmHg"
-    total_cholesterol: str = "mmol/L"
-    hdl_cholesterol: str = "mmol/L"
-    smoking: str = "yes/no"
-    is_male: str = "yes/no"
-
-
-class Score2DiabetesUnits(BaseModel):
-    """
-    The expected unit to be used for Score2-Diabetes computation
-    """
-
-    age: str = "years"
-    systolic_blood_pressure: str = "mmHg"
-    total_cholesterol: str = "mmol/L"
-    hdl_cholesterol: str = "mmol/L"
-    smoking: str = "yes/no"
-    is_male: str = "yes/no"
-    diabetes: str = "yes/no"
-    age_at_diabetes_diagnosis: str = "years"
-    hba1c: str = "mmol/mol"
-    egfr: str = "mL/min/1.73m²"
diff --git a/vitals/score2/__init__.py b/vitals/score2/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/vitals/score2_diabetes/__init__.py b/vitals/score2_diabetes/__init__.py
deleted file mode 100644
index c1e1944..0000000
--- a/vitals/score2_diabetes/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""SCORE2-Diabetes cardiovascular risk assessment algorithm."""
-
-from .compute import cardiovascular_risk
-
-__all__ = ["cardiovascular_risk"]