From 2a9172436062603c5ffa77fec316041479850e7c Mon Sep 17 00:00:00 2001 From: fbraza Date: Sat, 19 Jul 2025 21:45:07 +0200 Subject: [PATCH 1/3] refactor: extract common code from SCORE2 modules and reorganize schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This refactoring addresses code duplication between SCORE2 and SCORE2-Diabetes modules by: 1. Created new vitals/schemas/ module with organized structure: - units.py: All Units classes (PhenoageUnits, Score2Units, Score2DiabetesUnits) - markers.py: All Markers classes (PhenoageMarkers, Score2Markers, Score2DiabetesMarkers) - coefficients.py: Sex-split coefficient classes for better type safety - core.py: Shared components (BaselineSurvival, CalibrationScales, utility functions) 2. Replaced duplicate ModelCoefficients with sex-specific classes: - Score2MaleCoefficients and Score2FemaleCoefficients - Score2DiabetesMaleCoefficients and Score2DiabetesFemaleCoefficients (extending base) 3. Extracted shared utility functions: - determine_risk_category(): Common risk categorization logic - apply_calibration(): Common calibration formula 4. Updated all imports across the codebase to use new schema structure 5. Removed original biomarkers/schemas.py file This refactoring follows DRY principles while maintaining simplicity and type safety. All tests pass and code formatting is clean. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- vitals/biomarkers/helpers.py | 4 +- vitals/phenoage/compute.py | 10 +- vitals/schemas/__init__.py | 0 vitals/schemas/coefficients.py | 103 ++++++++ vitals/schemas/core.py | 80 +++++++ vitals/schemas/markers.py | 42 ++++ .../schemas.py => schemas/units.py} | 44 ---- vitals/score2/compute.py | 170 ++++---------- vitals/score2_diabetes/compute.py | 220 +++++------------- 9 files changed, 342 insertions(+), 331 deletions(-) create mode 100644 vitals/schemas/__init__.py create mode 100644 vitals/schemas/coefficients.py create mode 100644 vitals/schemas/core.py create mode 100644 vitals/schemas/markers.py rename vitals/{biomarkers/schemas.py => schemas/units.py} (54%) mode change 100755 => 100644 diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py index ca1d8d7..eeed0fa 100755 --- a/vitals/biomarkers/helpers.py +++ b/vitals/biomarkers/helpers.py @@ -4,10 +4,10 @@ from pydantic import BaseModel -from vitals.biomarkers import schemas +from vitals.schemas.units import PhenoageUnits, Score2DiabetesUnits, Score2Units Biomarkers = TypeVar("Biomarkers", bound=BaseModel) -Units = schemas.PhenoageUnits | schemas.Score2Units | schemas.Score2DiabetesUnits +Units = PhenoageUnits | Score2Units | Score2DiabetesUnits class ConversionInfo(TypedDict): diff --git a/vitals/phenoage/compute.py b/vitals/phenoage/compute.py index faf597d..ac52596 100755 --- a/vitals/phenoage/compute.py +++ b/vitals/phenoage/compute.py @@ -3,7 +3,9 @@ import numpy as np from pydantic import BaseModel -from vitals.biomarkers import helpers, schemas +from vitals.biomarkers import helpers +from vitals.schemas.markers import PhenoageMarkers +from vitals.schemas.units import PhenoageUnits class LinearModel(BaseModel): @@ -55,14 +57,14 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=schemas.PhenoageMarkers, - biomarker_units=schemas.PhenoageUnits(), + biomarker_class=PhenoageMarkers, + biomarker_units=PhenoageUnits(), ) age = biomarkers.age coef = LinearModel() - if isinstance(biomarkers, schemas.PhenoageMarkers): + if isinstance(biomarkers, PhenoageMarkers): weighted_risk_score = ( coef.intercept + (coef.albumin * biomarkers.albumin) diff --git a/vitals/schemas/__init__.py b/vitals/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vitals/schemas/coefficients.py b/vitals/schemas/coefficients.py new file mode 100644 index 0000000..62a4e9f --- /dev/null +++ b/vitals/schemas/coefficients.py @@ -0,0 +1,103 @@ +from pydantic import BaseModel + + +class Score2MaleCoefficients(BaseModel): + """ + Male-specific coefficients for the SCORE2 Cox proportional hazards model. + """ + + # Main effects + age: float = 0.3742 + smoking: float = 0.6012 + sbp: float = 0.2777 + total_cholesterol: float = 0.1458 + hdl_cholesterol: float = -0.2698 + + # Age interaction terms + smoking_age: float = -0.0755 + sbp_age: float = -0.0255 + tchol_age: float = -0.0281 + hdl_age: float = 0.0426 + + +class Score2FemaleCoefficients(BaseModel): + """ + Female-specific coefficients for the SCORE2 Cox proportional hazards model. + """ + + # Main effects + age: float = 0.4648 + smoking: float = 0.7744 + sbp: float = 0.3131 + total_cholesterol: float = 0.1002 + hdl_cholesterol: float = -0.2606 + + # Age interaction terms + smoking_age: float = -0.1088 + sbp_age: float = -0.0277 + tchol_age: float = -0.0226 + hdl_age: float = 0.0613 + + +class Score2DiabetesMaleCoefficients(Score2MaleCoefficients): + """ + Male-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. + Extends the base SCORE2 male coefficients with diabetes-specific parameters. + """ + + # Override base values with SCORE2-Diabetes specific values + age: float = 0.5368 + smoking: float = 0.4774 + sbp: float = 0.1322 + total_cholesterol: float = 0.1102 + hdl_cholesterol: float = -0.1087 + + # Override age interaction terms + smoking_age: float = -0.0672 + sbp_age: float = -0.0268 + tchol_age: float = -0.0181 + hdl_age: float = 0.0095 + + # Additional diabetes-specific coefficients + diabetes: float = 0.6457 + age_at_diabetes_diagnosis: float = -0.0998 + hba1c: float = 0.0955 + egfr: float = -0.0591 + egfr_squared: float = 0.0058 + + # Additional age interaction terms + diabetes_age: float = -0.0983 + hba1c_age: float = -0.0134 + egfr_age: float = 0.0115 + + +class Score2DiabetesFemaleCoefficients(Score2FemaleCoefficients): + """ + Female-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. + Extends the base SCORE2 female coefficients with diabetes-specific parameters. + """ + + # Override base values with SCORE2-Diabetes specific values + age: float = 0.6624 + smoking: float = 0.6139 + sbp: float = 0.1421 + total_cholesterol: float = 0.1127 + hdl_cholesterol: float = -0.1568 + + # Override age interaction terms + smoking_age: float = -0.1122 + sbp_age: float = -0.0167 + tchol_age: float = -0.0200 + hdl_age: float = 0.0186 + + # Additional diabetes-specific coefficients + diabetes: float = 0.8096 + age_at_diabetes_diagnosis: float = -0.1180 + hba1c: float = 0.1173 + egfr: float = -0.0640 + egfr_squared: float = 0.0062 + + # Additional age interaction terms + diabetes_age: float = -0.1272 + hba1c_age: float = -0.0196 + egfr_age: float = 0.0169 diff --git a/vitals/schemas/core.py b/vitals/schemas/core.py new file mode 100644 index 0000000..8ae4fdf --- /dev/null +++ b/vitals/schemas/core.py @@ -0,0 +1,80 @@ +from typing import Literal, TypeAlias + +import numpy as np +from pydantic import BaseModel + +RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] + + +class BaselineSurvival(BaseModel): + """ + Sex-specific baseline survival probabilities for the SCORE2 model. + + These values represent the 10-year survival probability for individuals + with all risk factors at their reference values. + """ + + male: float = 0.9605 + female: float = 0.9776 + + +class CalibrationScales(BaseModel): + """ + Region and sex-specific calibration scales for Belgium (Low Risk region). + + These scales are used to calibrate the uncalibrated risk estimate to match + the population-specific cardiovascular disease incidence rates. + """ + + # Male calibration scales + male_scale1: float = -0.5699 + male_scale2: float = 0.7476 + + # Female calibration scales + female_scale1: float = -0.7380 + female_scale2: float = 0.7019 + + +def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory: + """ + Determine cardiovascular risk category based on age and calibrated risk percentage. + + Args: + age: Patient's age in years + calibrated_risk: Calibrated 10-year CVD risk as a percentage + + Returns: + Risk stratification category + """ + if age < 50: + if calibrated_risk < 2.5: + return "Low to moderate" + elif calibrated_risk < 7.5: + return "High" + else: + return "Very high" + else: # age 50-69 + if calibrated_risk < 5: + return "Low to moderate" + elif calibrated_risk < 10: + return "High" + else: + return "Very high" + + +def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float: + """ + Apply regional calibration to uncalibrated risk estimate. + + Args: + uncalibrated_risk: Raw risk estimate from the Cox model + scale1: First calibration scale parameter + scale2: Second calibration scale parameter + + Returns: + Calibrated 10-year CVD risk as a percentage + """ + return float( + (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) + * 100 + ) diff --git a/vitals/schemas/markers.py b/vitals/schemas/markers.py new file mode 100644 index 0000000..b3e630c --- /dev/null +++ b/vitals/schemas/markers.py @@ -0,0 +1,42 @@ +from pydantic import BaseModel + + +class PhenoageMarkers(BaseModel): + """Processed PhenoAge biomarkers with standardized units.""" + + albumin: float + creatinine: float + glucose: float + crp: float + lymphocyte_percent: float + mean_cell_volume: float + red_cell_distribution_width: float + alkaline_phosphatase: float + white_blood_cell_count: float + age: float + + +class Score2Markers(BaseModel): + """Processed Score2 biomarkers with standardized units.""" + + age: float + systolic_blood_pressure: float + total_cholesterol: float + hdl_cholesterol: float + smoking: bool + is_male: bool + + +class Score2DiabetesMarkers(BaseModel): + """Processed Score2-Diabetes biomarkers with standardized units.""" + + age: float + systolic_blood_pressure: float + total_cholesterol: float + hdl_cholesterol: float + smoking: bool + is_male: bool + diabetes: bool + age_at_diabetes_diagnosis: float + hba1c: float + egfr: float diff --git a/vitals/biomarkers/schemas.py b/vitals/schemas/units.py old mode 100755 new mode 100644 similarity index 54% rename from vitals/biomarkers/schemas.py rename to vitals/schemas/units.py index e93e7d4..dfa3b18 --- a/vitals/biomarkers/schemas.py +++ b/vitals/schemas/units.py @@ -1,7 +1,6 @@ from pydantic import BaseModel -# ------ PHENOAGE Schemas class PhenoageUnits(BaseModel): """ The expected unit to be used for phenoage computation @@ -19,22 +18,6 @@ class PhenoageUnits(BaseModel): age: str = "years" -class PhenoageMarkers(BaseModel): - """Processed PhenoAge biomarkers with standardized units.""" - - albumin: float - creatinine: float - glucose: float - crp: float - lymphocyte_percent: float - mean_cell_volume: float - red_cell_distribution_width: float - alkaline_phosphatase: float - white_blood_cell_count: float - age: float - - -# ------ SCORE2 Schemas class Score2Units(BaseModel): """ The expected unit to be used for Score2 computation @@ -48,18 +31,6 @@ class Score2Units(BaseModel): is_male: str = "yes/no" -class Score2Markers(BaseModel): - """Processed Score2 biomarkers with standardized units.""" - - age: float - systolic_blood_pressure: float - total_cholesterol: float - hdl_cholesterol: float - smoking: bool - is_male: bool - - -# ------ SCORE2-Diabetes Schemas class Score2DiabetesUnits(BaseModel): """ The expected unit to be used for Score2-Diabetes computation @@ -75,18 +46,3 @@ class Score2DiabetesUnits(BaseModel): age_at_diabetes_diagnosis: str = "years" hba1c: str = "mmol/mol" egfr: str = "mL/min/1.73m²" - - -class Score2DiabetesMarkers(BaseModel): - """Processed Score2-Diabetes biomarkers with standardized units.""" - - age: float - systolic_blood_pressure: float - total_cholesterol: float - hdl_cholesterol: float - smoking: bool - is_male: bool - diabetes: bool - age_at_diabetes_diagnosis: float - hba1c: float - egfr: float diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py index dc794af..a536d9b 100644 --- a/vitals/score2/compute.py +++ b/vitals/score2/compute.py @@ -6,78 +6,20 @@ """ from pathlib import Path -from typing import Literal, TypeAlias import numpy as np -from pydantic import BaseModel -from vitals.biomarkers import helpers, schemas - -RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] - - -class ModelCoefficients(BaseModel): - """ - Sex-specific coefficients for the SCORE2 Cox proportional hazards model. - - These coefficients are used to calculate the 10-year risk of cardiovascular disease - based on transformed risk factors and their age interactions. - """ - - # Male coefficients - male_age: float = 0.3742 - male_smoking: float = 0.6012 - male_sbp: float = 0.2777 - male_total_cholesterol: float = 0.1458 - male_hdl_cholesterol: float = -0.2698 - - # Male interaction term coefficients - male_smoking_age: float = -0.0755 - male_sbp_age: float = -0.0255 - male_tchol_age: float = -0.0281 - male_hdl_age: float = 0.0426 - - # Female coefficients - female_age: float = 0.4648 - female_smoking: float = 0.7744 - female_sbp: float = 0.3131 - female_total_cholesterol: float = 0.1002 - female_hdl_cholesterol: float = -0.2606 - - # Female interaction term coefficients - female_smoking_age: float = -0.1088 - female_sbp_age: float = -0.0277 - female_tchol_age: float = -0.0226 - female_hdl_age: float = 0.0613 - - -class BaselineSurvival(BaseModel): - """ - Sex-specific baseline survival probabilities for the SCORE2 model. - - These values represent the 10-year survival probability for individuals - with all risk factors at their reference values. - """ - - male: float = 0.9605 - female: float = 0.9776 - - -class CalibrationScales(BaseModel): - """ - Region and sex-specific calibration scales for Belgium (Low Risk region). - - These scales are used to calibrate the uncalibrated risk estimate to match - the population-specific cardiovascular disease incidence rates. - """ - - # Male calibration scales - male_scale1: float = -0.5699 - male_scale2: float = 0.7476 - - # Female calibration scales - female_scale1: float = -0.7380 - female_scale2: float = 0.7019 +from vitals.biomarkers import helpers +from vitals.schemas.coefficients import Score2FemaleCoefficients, Score2MaleCoefficients +from vitals.schemas.core import ( + BaselineSurvival, + CalibrationScales, + RiskCategory, + apply_calibration, + determine_risk_category, +) +from vitals.schemas.markers import Score2Markers +from vitals.schemas.units import Score2Units def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: @@ -104,11 +46,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=schemas.Score2Markers, - biomarker_units=schemas.Score2Units(), + biomarker_class=Score2Markers, + biomarker_units=Score2Units(), ) - if not isinstance(biomarkers, schemas.Score2Markers): + if not isinstance(biomarkers, Score2Markers): raise ValueError(f"Invalid biomarker class used: {biomarkers}") age: float = biomarkers.age @@ -127,72 +69,50 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor tchol_age: float = ctchol * cage hdl_age: float = chdl * cage - # Get model coefficients - coef: ModelCoefficients = ModelCoefficients() - - # Calculate linear predictor (x) based on sex - - linear_pred: float - baseline_survival: float - scale1: float - scale2: float + # Get sex-specific coefficients and calibration values + baseline_survival_model = BaselineSurvival() + calibration_scales = CalibrationScales() if is_male: + male_coef = Score2MaleCoefficients() linear_pred = ( - coef.male_age * cage - + coef.male_smoking * smoking - + coef.male_sbp * csbp - + coef.male_total_cholesterol * ctchol - + coef.male_hdl_cholesterol * chdl - + coef.male_smoking_age * smoking_age - + coef.male_sbp_age * sbp_age - + coef.male_tchol_age * tchol_age - + coef.male_hdl_age * hdl_age + male_coef.age * cage + + male_coef.smoking * smoking + + male_coef.sbp * csbp + + male_coef.total_cholesterol * ctchol + + male_coef.hdl_cholesterol * chdl + + male_coef.smoking_age * smoking_age + + male_coef.sbp_age * sbp_age + + male_coef.tchol_age * tchol_age + + male_coef.hdl_age * hdl_age ) - baseline_survival = BaselineSurvival().male - scale1 = CalibrationScales().male_scale1 - scale2 = CalibrationScales().male_scale2 + baseline_survival = baseline_survival_model.male + scale1 = calibration_scales.male_scale1 + scale2 = calibration_scales.male_scale2 else: + female_coef = Score2FemaleCoefficients() linear_pred = ( - coef.female_age * cage - + coef.female_smoking * smoking - + coef.female_sbp * csbp - + coef.female_total_cholesterol * ctchol - + coef.female_hdl_cholesterol * chdl - + coef.female_smoking_age * smoking_age - + coef.female_sbp_age * sbp_age - + coef.female_tchol_age * tchol_age - + coef.female_hdl_age * hdl_age + female_coef.age * cage + + female_coef.smoking * smoking + + female_coef.sbp * csbp + + female_coef.total_cholesterol * ctchol + + female_coef.hdl_cholesterol * chdl + + female_coef.smoking_age * smoking_age + + female_coef.sbp_age * sbp_age + + female_coef.tchol_age * tchol_age + + female_coef.hdl_age * hdl_age ) - baseline_survival = BaselineSurvival().female - scale1 = CalibrationScales().female_scale1 - scale2 = CalibrationScales().female_scale2 + baseline_survival = baseline_survival_model.female + scale1 = calibration_scales.female_scale1 + scale2 = calibration_scales.female_scale2 # Calculate uncalibrated risk uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) # Apply calibration for Belgium (Low Risk region) - # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100 - calibrated_risk: float = float( - (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) - * 100 - ) + calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2) # Determine risk category based on age - risk_category: RiskCategory - if age < 50: - if calibrated_risk < 2.5: - risk_category = "Low to moderate" - elif calibrated_risk < 7.5: - risk_category = "High" - else: - risk_category = "Very high" - else: # age 50-69 - if calibrated_risk < 5: - risk_category = "Low to moderate" - elif calibrated_risk < 10: - risk_category = "High" - else: - risk_category = "Very high" + risk_category: RiskCategory = determine_risk_category(age, calibrated_risk) return (age, round(calibrated_risk, 2), risk_category) diff --git a/vitals/score2_diabetes/compute.py b/vitals/score2_diabetes/compute.py index a628f50..4dbfed7 100644 --- a/vitals/score2_diabetes/compute.py +++ b/vitals/score2_diabetes/compute.py @@ -7,94 +7,23 @@ import math from pathlib import Path -from typing import Literal, TypeAlias import numpy as np -from pydantic import BaseModel -from vitals.biomarkers import helpers, schemas - -RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] - - -class ModelCoefficients(BaseModel): - """ - Sex-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. - - These coefficients are used to calculate the 10-year risk of cardiovascular disease - based on transformed risk factors and their age interactions. - """ - - # Male main effect coefficients - male_age: float = 0.5368 - male_smoking: float = 0.4774 - male_sbp: float = 0.1322 - male_diabetes: float = 0.6457 - male_total_cholesterol: float = 0.1102 - male_hdl_cholesterol: float = -0.1087 - male_age_at_diabetes_diagnosis: float = -0.0998 - male_hba1c: float = 0.0955 - male_egfr: float = -0.0591 - male_egfr_squared: float = 0.0058 - - # Male interaction term coefficients - male_smoking_age: float = -0.0672 - male_sbp_age: float = -0.0268 - male_diabetes_age: float = -0.0983 - male_tchol_age: float = -0.0181 - male_hdl_age: float = 0.0095 - male_hba1c_age: float = -0.0134 - male_egfr_age: float = 0.0115 - - # Female main effect coefficients - female_age: float = 0.6624 - female_smoking: float = 0.6139 - female_sbp: float = 0.1421 - female_diabetes: float = 0.8096 - female_total_cholesterol: float = 0.1127 - female_hdl_cholesterol: float = -0.1568 - female_age_at_diabetes_diagnosis: float = -0.1180 - female_hba1c: float = 0.1173 - female_egfr: float = -0.0640 - female_egfr_squared: float = 0.0062 - - # Female interaction term coefficients - female_smoking_age: float = -0.1122 - female_sbp_age: float = -0.0167 - female_diabetes_age: float = -0.1272 - female_tchol_age: float = -0.0200 - female_hdl_age: float = 0.0186 - female_hba1c_age: float = -0.0196 - female_egfr_age: float = 0.0169 - - -class BaselineSurvival(BaseModel): - """ - Sex-specific baseline survival probabilities for the SCORE2-Diabetes model. - - These values represent the 10-year survival probability for individuals - with all risk factors at their reference values. - """ - - male: float = 0.9605 - female: float = 0.9776 - - -class CalibrationScales(BaseModel): - """ - Region and sex-specific calibration scales for Belgium (Low Risk region). - - These scales are used to calibrate the uncalibrated risk estimate to match - the population-specific cardiovascular disease incidence rates. - """ - - # Male calibration scales - male_scale1: float = -0.5699 - male_scale2: float = 0.7476 - - # Female calibration scales - female_scale1: float = -0.7380 - female_scale2: float = 0.7019 +from vitals.biomarkers import helpers +from vitals.schemas.coefficients import ( + Score2DiabetesFemaleCoefficients, + Score2DiabetesMaleCoefficients, +) +from vitals.schemas.core import ( + BaselineSurvival, + CalibrationScales, + RiskCategory, + apply_calibration, + determine_risk_category, +) +from vitals.schemas.markers import Score2DiabetesMarkers +from vitals.schemas.units import Score2DiabetesUnits def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: @@ -122,11 +51,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=schemas.Score2DiabetesMarkers, - biomarker_units=schemas.Score2DiabetesUnits(), + biomarker_class=Score2DiabetesMarkers, + biomarker_units=Score2DiabetesUnits(), ) - if not isinstance(biomarkers, schemas.Score2DiabetesMarkers): + if not isinstance(biomarkers, Score2DiabetesMarkers): raise ValueError(f"Invalid biomarker class used: {biomarkers}") age: float = biomarkers.age @@ -153,87 +82,66 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor hba1c_age: float = ca1c * cage egfr_age: float = cegfr * cage - # Get model coefficients - coef: ModelCoefficients = ModelCoefficients() - - # Calculate linear predictor (x) based on sex - linear_pred: float - baseline_survival: float - scale1: float - scale2: float + # Get sex-specific coefficients and calibration values + baseline_survival_model = BaselineSurvival() + calibration_scales = CalibrationScales() if is_male: + male_coef = Score2DiabetesMaleCoefficients() linear_pred = ( - coef.male_age * cage - + coef.male_smoking * smoking - + coef.male_sbp * csbp - + coef.male_diabetes * diabetes - + coef.male_total_cholesterol * ctchol - + coef.male_hdl_cholesterol * chdl - + coef.male_age_at_diabetes_diagnosis * cagediab - + coef.male_hba1c * ca1c - + coef.male_egfr * cegfr - + coef.male_egfr_squared * cegfr_squared - + coef.male_smoking_age * smoking_age - + coef.male_sbp_age * sbp_age - + coef.male_diabetes_age * diabetes_age - + coef.male_tchol_age * tchol_age - + coef.male_hdl_age * hdl_age - + coef.male_hba1c_age * hba1c_age - + coef.male_egfr_age * egfr_age + male_coef.age * cage + + male_coef.smoking * smoking + + male_coef.sbp * csbp + + male_coef.diabetes * diabetes + + male_coef.total_cholesterol * ctchol + + male_coef.hdl_cholesterol * chdl + + male_coef.age_at_diabetes_diagnosis * cagediab + + male_coef.hba1c * ca1c + + male_coef.egfr * cegfr + + male_coef.egfr_squared * cegfr_squared + + male_coef.smoking_age * smoking_age + + male_coef.sbp_age * sbp_age + + male_coef.diabetes_age * diabetes_age + + male_coef.tchol_age * tchol_age + + male_coef.hdl_age * hdl_age + + male_coef.hba1c_age * hba1c_age + + male_coef.egfr_age * egfr_age ) - baseline_survival = BaselineSurvival().male - scale1 = CalibrationScales().male_scale1 - scale2 = CalibrationScales().male_scale2 + baseline_survival = baseline_survival_model.male + scale1 = calibration_scales.male_scale1 + scale2 = calibration_scales.male_scale2 else: + female_coef = Score2DiabetesFemaleCoefficients() linear_pred = ( - coef.female_age * cage - + coef.female_smoking * smoking - + coef.female_sbp * csbp - + coef.female_diabetes * diabetes - + coef.female_total_cholesterol * ctchol - + coef.female_hdl_cholesterol * chdl - + coef.female_age_at_diabetes_diagnosis * cagediab - + coef.female_hba1c * ca1c - + coef.female_egfr * cegfr - + coef.female_egfr_squared * cegfr_squared - + coef.female_smoking_age * smoking_age - + coef.female_sbp_age * sbp_age - + coef.female_diabetes_age * diabetes_age - + coef.female_tchol_age * tchol_age - + coef.female_hdl_age * hdl_age - + coef.female_hba1c_age * hba1c_age - + coef.female_egfr_age * egfr_age + female_coef.age * cage + + female_coef.smoking * smoking + + female_coef.sbp * csbp + + female_coef.diabetes * diabetes + + female_coef.total_cholesterol * ctchol + + female_coef.hdl_cholesterol * chdl + + female_coef.age_at_diabetes_diagnosis * cagediab + + female_coef.hba1c * ca1c + + female_coef.egfr * cegfr + + female_coef.egfr_squared * cegfr_squared + + female_coef.smoking_age * smoking_age + + female_coef.sbp_age * sbp_age + + female_coef.diabetes_age * diabetes_age + + female_coef.tchol_age * tchol_age + + female_coef.hdl_age * hdl_age + + female_coef.hba1c_age * hba1c_age + + female_coef.egfr_age * egfr_age ) - baseline_survival = BaselineSurvival().female - scale1 = CalibrationScales().female_scale1 - scale2 = CalibrationScales().female_scale2 + baseline_survival = baseline_survival_model.female + scale1 = calibration_scales.female_scale1 + scale2 = calibration_scales.female_scale2 # Calculate uncalibrated risk uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) # Apply calibration for Belgium (Low Risk region) - # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100 - calibrated_risk: float = float( - (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) - * 100 - ) + calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2) # Determine risk category based on age - risk_category: RiskCategory - if age < 50: - if calibrated_risk < 2.5: - risk_category = "Low to moderate" - elif calibrated_risk < 7.5: - risk_category = "High" - else: - risk_category = "Very high" - else: # age 50-69 - if calibrated_risk < 5: - risk_category = "Low to moderate" - elif calibrated_risk < 10: - risk_category = "High" - else: - risk_category = "Very high" + risk_category: RiskCategory = determine_risk_category(age, calibrated_risk) return (age, round(calibrated_risk, 2), risk_category) From 30324a0001bc795a978d39f7284867598528db06 Mon Sep 17 00:00:00 2001 From: fbraza Date: Sat, 19 Jul 2025 22:19:21 +0200 Subject: [PATCH 2/3] refactor: the compute function for score2 and score2_diabetes --- vitals/score2/compute.py | 39 ++++++++----------- vitals/score2_diabetes/compute.py | 63 +++++++++++-------------------- 2 files changed, 38 insertions(+), 64 deletions(-) diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py index a536d9b..72fb196 100644 --- a/vitals/score2/compute.py +++ b/vitals/score2/compute.py @@ -73,39 +73,30 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor baseline_survival_model = BaselineSurvival() calibration_scales = CalibrationScales() + coef: Score2MaleCoefficients | Score2FemaleCoefficients if is_male: - male_coef = Score2MaleCoefficients() - linear_pred = ( - male_coef.age * cage - + male_coef.smoking * smoking - + male_coef.sbp * csbp - + male_coef.total_cholesterol * ctchol - + male_coef.hdl_cholesterol * chdl - + male_coef.smoking_age * smoking_age - + male_coef.sbp_age * sbp_age - + male_coef.tchol_age * tchol_age - + male_coef.hdl_age * hdl_age - ) + coef = Score2MaleCoefficients() baseline_survival = baseline_survival_model.male scale1 = calibration_scales.male_scale1 scale2 = calibration_scales.male_scale2 else: - female_coef = Score2FemaleCoefficients() - linear_pred = ( - female_coef.age * cage - + female_coef.smoking * smoking - + female_coef.sbp * csbp - + female_coef.total_cholesterol * ctchol - + female_coef.hdl_cholesterol * chdl - + female_coef.smoking_age * smoking_age - + female_coef.sbp_age * sbp_age - + female_coef.tchol_age * tchol_age - + female_coef.hdl_age * hdl_age - ) + coef = Score2FemaleCoefficients() baseline_survival = baseline_survival_model.female scale1 = calibration_scales.female_scale1 scale2 = calibration_scales.female_scale2 + linear_pred = ( + coef.age * cage + + coef.smoking * smoking + + coef.sbp * csbp + + coef.total_cholesterol * ctchol + + coef.hdl_cholesterol * chdl + + coef.smoking_age * smoking_age + + coef.sbp_age * sbp_age + + coef.tchol_age * tchol_age + + coef.hdl_age * hdl_age + ) + # Calculate uncalibrated risk uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) diff --git a/vitals/score2_diabetes/compute.py b/vitals/score2_diabetes/compute.py index 4dbfed7..77e69c4 100644 --- a/vitals/score2_diabetes/compute.py +++ b/vitals/score2_diabetes/compute.py @@ -86,55 +86,38 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor baseline_survival_model = BaselineSurvival() calibration_scales = CalibrationScales() + coef: Score2DiabetesMaleCoefficients | Score2DiabetesFemaleCoefficients if is_male: - male_coef = Score2DiabetesMaleCoefficients() - linear_pred = ( - male_coef.age * cage - + male_coef.smoking * smoking - + male_coef.sbp * csbp - + male_coef.diabetes * diabetes - + male_coef.total_cholesterol * ctchol - + male_coef.hdl_cholesterol * chdl - + male_coef.age_at_diabetes_diagnosis * cagediab - + male_coef.hba1c * ca1c - + male_coef.egfr * cegfr - + male_coef.egfr_squared * cegfr_squared - + male_coef.smoking_age * smoking_age - + male_coef.sbp_age * sbp_age - + male_coef.diabetes_age * diabetes_age - + male_coef.tchol_age * tchol_age - + male_coef.hdl_age * hdl_age - + male_coef.hba1c_age * hba1c_age - + male_coef.egfr_age * egfr_age - ) + coef = Score2DiabetesMaleCoefficients() baseline_survival = baseline_survival_model.male scale1 = calibration_scales.male_scale1 scale2 = calibration_scales.male_scale2 else: - female_coef = Score2DiabetesFemaleCoefficients() - linear_pred = ( - female_coef.age * cage - + female_coef.smoking * smoking - + female_coef.sbp * csbp - + female_coef.diabetes * diabetes - + female_coef.total_cholesterol * ctchol - + female_coef.hdl_cholesterol * chdl - + female_coef.age_at_diabetes_diagnosis * cagediab - + female_coef.hba1c * ca1c - + female_coef.egfr * cegfr - + female_coef.egfr_squared * cegfr_squared - + female_coef.smoking_age * smoking_age - + female_coef.sbp_age * sbp_age - + female_coef.diabetes_age * diabetes_age - + female_coef.tchol_age * tchol_age - + female_coef.hdl_age * hdl_age - + female_coef.hba1c_age * hba1c_age - + female_coef.egfr_age * egfr_age - ) + coef = Score2DiabetesFemaleCoefficients() baseline_survival = baseline_survival_model.female scale1 = calibration_scales.female_scale1 scale2 = calibration_scales.female_scale2 + linear_pred = ( + coef.age * cage + + coef.smoking * smoking + + coef.sbp * csbp + + coef.diabetes * diabetes + + coef.total_cholesterol * ctchol + + coef.hdl_cholesterol * chdl + + coef.age_at_diabetes_diagnosis * cagediab + + coef.hba1c * ca1c + + coef.egfr * cegfr + + coef.egfr_squared * cegfr_squared + + coef.smoking_age * smoking_age + + coef.sbp_age * sbp_age + + coef.diabetes_age * diabetes_age + + coef.tchol_age * tchol_age + + coef.hdl_age * hdl_age + + coef.hba1c_age * hba1c_age + + coef.egfr_age * egfr_age + ) + # Calculate uncalibrated risk uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) From f44b39912adfaac5865cde6601ab5d7c7804f06c Mon Sep 17 00:00:00 2001 From: fbraza Date: Sat, 19 Jul 2025 23:42:31 +0200 Subject: [PATCH 3/3] refactor: consolidate shared components and reorganize schema modules - Move common functions (risk categorization, calibration, gompertz model) to helpers.py - Consolidate schema modules into algorithm-specific files (phenoage.py, score2.py) - Rename coefficient classes for clarity and consistency - Update imports across all compute modules to use new schema structure - Remove redundant schema files (core.py, markers.py, units.py) --- tests/test_phenoage.py | 4 +- tests/test_score2.py | 6 +- tests/test_score2_diabetes.py | 4 +- vitals/biomarkers/helpers.py | 62 ++++++++++- vitals/{phenoage => models}/__init__.py | 0 .../compute.py => models/phenoage.py} | 68 ++---------- .../{score2/compute.py => models/score2.py} | 36 ++++--- .../compute.py => models/score2_diabetes.py} | 39 +++---- vitals/schemas/core.py | 80 -------------- vitals/schemas/markers.py | 42 -------- vitals/schemas/phenoage.py | 62 +++++++++++ vitals/schemas/{coefficients.py => score2.py} | 101 +++++++++++++++++- vitals/schemas/units.py | 48 --------- vitals/score2/__init__.py | 0 vitals/score2_diabetes/__init__.py | 5 - 15 files changed, 272 insertions(+), 285 deletions(-) rename vitals/{phenoage => models}/__init__.py (100%) rename vitals/{phenoage/compute.py => models/phenoage.py} (50%) rename vitals/{score2/compute.py => models/score2.py} (79%) rename vitals/{score2_diabetes/compute.py => models/score2_diabetes.py} (81%) delete mode 100644 vitals/schemas/core.py delete mode 100644 vitals/schemas/markers.py create mode 100644 vitals/schemas/phenoage.py rename vitals/schemas/{coefficients.py => score2.py} (52%) delete mode 100644 vitals/schemas/units.py delete mode 100644 vitals/score2/__init__.py delete mode 100644 vitals/score2_diabetes/__init__.py diff --git a/tests/test_phenoage.py b/tests/test_phenoage.py index 3da32da..60cf7f2 100755 --- a/tests/test_phenoage.py +++ b/tests/test_phenoage.py @@ -2,7 +2,7 @@ import pytest -from vitals.phenoage import compute +from vitals.models import phenoage OUT_FILEPATH = Path(__file__).parent / "inputs" / "phenoage" @@ -25,7 +25,7 @@ ) def test_phenoage(filename, expected): # Get the actual fixture value using request.getfixturevalue - age, pred_age, accl_age = compute.biological_age(OUT_FILEPATH / filename) + age, pred_age, accl_age = phenoage.compute(OUT_FILEPATH / filename) expected_age, expected_pred_age, expected_accl_age = expected assert age == expected_age diff --git a/tests/test_score2.py b/tests/test_score2.py index 98e76ff..32adee3 100644 --- a/tests/test_score2.py +++ b/tests/test_score2.py @@ -2,7 +2,7 @@ import pytest -from vitals.score2 import compute +from vitals.models import score2 OUT_FILEPATH = Path(__file__).parent / "inputs" / "score2" @@ -26,9 +26,7 @@ ) def test_score2(filename, expected): # Get the actual fixture value using request.getfixturevalue - age, pred_risk, pred_risk_category = compute.cardiovascular_risk( - OUT_FILEPATH / filename - ) + age, pred_risk, pred_risk_category = score2.compute(OUT_FILEPATH / filename) expected_age, expected_risk, expected_category = expected assert age == expected_age diff --git a/tests/test_score2_diabetes.py b/tests/test_score2_diabetes.py index 78401f5..1a57311 100644 --- a/tests/test_score2_diabetes.py +++ b/tests/test_score2_diabetes.py @@ -2,7 +2,7 @@ import pytest -from vitals.score2_diabetes import compute +from vitals.models import score2_diabetes OUT_FILEPATH = Path(__file__).parent / "inputs" / "score2_diabetes" @@ -26,7 +26,7 @@ def test_score2_diabetes(filename, expected): They need to be calculated using MDCalc and updated before running tests. """ # Get the actual fixture value - age, pred_risk, pred_risk_category = compute.cardiovascular_risk( + age, pred_risk, pred_risk_category = score2_diabetes.compute( OUT_FILEPATH / filename ) expected_age, expected_risk, expected_category = expected diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py index eeed0fa..15657f5 100755 --- a/vitals/biomarkers/helpers.py +++ b/vitals/biomarkers/helpers.py @@ -1,13 +1,15 @@ from collections.abc import Callable from pathlib import Path -from typing import Any, TypedDict, TypeVar +from typing import Any, Literal, TypeAlias, TypedDict, TypeVar +import numpy as np from pydantic import BaseModel -from vitals.schemas.units import PhenoageUnits, Score2DiabetesUnits, Score2Units +from vitals.schemas import phenoage, score2 +RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] Biomarkers = TypeVar("Biomarkers", bound=BaseModel) -Units = PhenoageUnits | Score2Units | Score2DiabetesUnits +Units = phenoage.Units | score2.Units | score2.UnitsDiabetes class ConversionInfo(TypedDict): @@ -198,3 +200,57 @@ def extract_biomarkers_from_json( extracted_values[field_name] = value return biomarker_class(**extracted_values) + + +def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory: + """ + Determine cardiovascular risk category based on age and calibrated risk percentage. + + Args: + age: Patient's age in years + calibrated_risk: Calibrated 10-year CVD risk as a percentage + + Returns: + Risk stratification category + """ + if age < 50: + if calibrated_risk < 2.5: + return "Low to moderate" + elif calibrated_risk < 7.5: + return "High" + else: + return "Very high" + else: # age 50-69 + if calibrated_risk < 5: + return "Low to moderate" + elif calibrated_risk < 10: + return "High" + else: + return "Very high" + + +def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float: + """ + Apply regional calibration to uncalibrated risk estimate. + + Args: + uncalibrated_risk: Raw risk estimate from the Cox model + scale1: First calibration scale parameter + scale2: Second calibration scale parameter + + Returns: + Calibrated 10-year CVD risk as a percentage + """ + return float( + (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) + * 100 + ) + + +def gompertz_mortality_model(weighted_risk_score: float) -> float: + params = phenoage.Gompertz() + return 1 - np.exp( + -np.exp(weighted_risk_score) + * (np.exp(120 * params.lambda_) - 1) + / params.lambda_ + ) diff --git a/vitals/phenoage/__init__.py b/vitals/models/__init__.py similarity index 100% rename from vitals/phenoage/__init__.py rename to vitals/models/__init__.py diff --git a/vitals/phenoage/compute.py b/vitals/models/phenoage.py similarity index 50% rename from vitals/phenoage/compute.py rename to vitals/models/phenoage.py index ac52596..7454608 100755 --- a/vitals/phenoage/compute.py +++ b/vitals/models/phenoage.py @@ -1,52 +1,12 @@ from pathlib import Path import numpy as np -from pydantic import BaseModel from vitals.biomarkers import helpers -from vitals.schemas.markers import PhenoageMarkers -from vitals.schemas.units import PhenoageUnits +from vitals.schemas.phenoage import Gompertz, LinearModel, Markers, Units -class LinearModel(BaseModel): - """ - Coefficients used to calculate the PhenoAge from Levine et al 2018 - """ - - intercept: float = -19.9067 - albumin: float = -0.0336 - creatinine: float = 0.0095 - glucose: float = 0.1953 - log_crp: float = 0.0954 - lymphocyte_percent: float = -0.0120 - mean_cell_volume: float = 0.0268 - red_cell_distribution_width: float = 0.3306 - alkaline_phosphatase: float = 0.00188 - white_blood_cell_count: float = 0.0554 - age: float = 0.0804 - - -class Gompertz(BaseModel): - """ - Parameters of the Gompertz distribution for PhenoAge computation - """ - - lambda_: float = 0.0192 - coef1: float = 141.50225 - coef2: float = -0.00553 - coef3: float = 0.090165 - - -def __gompertz_mortality_model(weighted_risk_score: float) -> float: - __params = Gompertz() - return 1 - np.exp( - -np.exp(weighted_risk_score) - * (np.exp(120 * __params.lambda_) - 1) - / __params.lambda_ - ) - - -def biological_age(filepath: str | Path) -> tuple[float, float, float]: +def compute(filepath: str | Path) -> tuple[float, float, float]: """ The Phenoage score is calculated as a weighted (coefficients available in Levine et al 2018) linear combination of these variables, which was then transformed into units of years using 2 parametric @@ -57,14 +17,14 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=PhenoageMarkers, - biomarker_units=PhenoageUnits(), + biomarker_class=Markers, + biomarker_units=Units(), ) age = biomarkers.age coef = LinearModel() - if isinstance(biomarkers, PhenoageMarkers): + if isinstance(biomarkers, Markers): weighted_risk_score = ( coef.intercept + (coef.albumin * biomarkers.albumin) @@ -81,7 +41,9 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: + (coef.white_blood_cell_count * biomarkers.white_blood_cell_count) + (coef.age * biomarkers.age) ) - gompertz = __gompertz_mortality_model(weighted_risk_score=weighted_risk_score) + gompertz = helpers.gompertz_mortality_model( + weighted_risk_score=weighted_risk_score + ) model = Gompertz() pred_age = ( model.coef1 + np.log(model.coef2 * np.log(1 - gompertz)) / model.coef3 @@ -90,17 +52,3 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: return (age, pred_age, accl_age) else: raise ValueError(f"Invalid biomarker class used: {biomarkers}") - - -# if __name__ == "__main__": -# from pathlib import Path -# input_dir = Path("tests/outputs") -# output_dir = Path("tests/outputs") - -# for input_file in input_dir.glob("*.json"): -# if "patient" not in str(input_file): -# continue - -# # Update biomarker data -# age, pred_age, accl_age = biological_age(str(input_file)) -# print(f"Chrono Age: {age} ::: Predicted Age: {pred_age} ::: Accel {accl_age}") diff --git a/vitals/score2/compute.py b/vitals/models/score2.py similarity index 79% rename from vitals/score2/compute.py rename to vitals/models/score2.py index 72fb196..51ca1e3 100644 --- a/vitals/score2/compute.py +++ b/vitals/models/score2.py @@ -10,19 +10,19 @@ import numpy as np from vitals.biomarkers import helpers -from vitals.schemas.coefficients import Score2FemaleCoefficients, Score2MaleCoefficients -from vitals.schemas.core import ( +from vitals.schemas.score2 import ( BaselineSurvival, CalibrationScales, - RiskCategory, - apply_calibration, - determine_risk_category, + FemaleCoefficientsBaseModel, + MaleCoefficientsBaseModel, + Markers, + Units, ) -from vitals.schemas.markers import Score2Markers -from vitals.schemas.units import Score2Units -def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: +def compute( + filepath: str | Path, +) -> tuple[float, float, helpers.RiskCategory]: """ Calculate the 10-year cardiovascular disease risk using the SCORE2 algorithm. @@ -46,11 +46,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=Score2Markers, - biomarker_units=Score2Units(), + biomarker_class=Markers, + biomarker_units=Units(), ) - if not isinstance(biomarkers, Score2Markers): + if not isinstance(biomarkers, Markers): raise ValueError(f"Invalid biomarker class used: {biomarkers}") age: float = biomarkers.age @@ -73,14 +73,14 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor baseline_survival_model = BaselineSurvival() calibration_scales = CalibrationScales() - coef: Score2MaleCoefficients | Score2FemaleCoefficients + coef: MaleCoefficientsBaseModel | FemaleCoefficientsBaseModel if is_male: - coef = Score2MaleCoefficients() + coef = MaleCoefficientsBaseModel() baseline_survival = baseline_survival_model.male scale1 = calibration_scales.male_scale1 scale2 = calibration_scales.male_scale2 else: - coef = Score2FemaleCoefficients() + coef = FemaleCoefficientsBaseModel() baseline_survival = baseline_survival_model.female scale1 = calibration_scales.female_scale1 scale2 = calibration_scales.female_scale2 @@ -101,9 +101,13 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) # Apply calibration for Belgium (Low Risk region) - calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2) + calibrated_risk: float = helpers.apply_calibration( + uncalibrated_risk, scale1, scale2 + ) # Determine risk category based on age - risk_category: RiskCategory = determine_risk_category(age, calibrated_risk) + risk_category: helpers.RiskCategory = helpers.determine_risk_category( + age, calibrated_risk + ) return (age, round(calibrated_risk, 2), risk_category) diff --git a/vitals/score2_diabetes/compute.py b/vitals/models/score2_diabetes.py similarity index 81% rename from vitals/score2_diabetes/compute.py rename to vitals/models/score2_diabetes.py index 77e69c4..47aa158 100644 --- a/vitals/score2_diabetes/compute.py +++ b/vitals/models/score2_diabetes.py @@ -11,22 +11,19 @@ import numpy as np from vitals.biomarkers import helpers -from vitals.schemas.coefficients import ( - Score2DiabetesFemaleCoefficients, - Score2DiabetesMaleCoefficients, -) -from vitals.schemas.core import ( +from vitals.schemas.score2 import ( BaselineSurvival, CalibrationScales, - RiskCategory, - apply_calibration, - determine_risk_category, + FemaleCoefficientsDiabeticModel, + MaleCoefficientsDiabeticModel, + MarkersDiabetes, + UnitsDiabetes, ) -from vitals.schemas.markers import Score2DiabetesMarkers -from vitals.schemas.units import Score2DiabetesUnits -def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: +def compute( + filepath: str | Path, +) -> tuple[float, float, helpers.RiskCategory]: """ Calculate the 10-year cardiovascular disease risk using the SCORE2-Diabetes algorithm. @@ -51,11 +48,11 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=Score2DiabetesMarkers, - biomarker_units=Score2DiabetesUnits(), + biomarker_class=MarkersDiabetes, + biomarker_units=UnitsDiabetes(), ) - if not isinstance(biomarkers, Score2DiabetesMarkers): + if not isinstance(biomarkers, MarkersDiabetes): raise ValueError(f"Invalid biomarker class used: {biomarkers}") age: float = biomarkers.age @@ -86,14 +83,14 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor baseline_survival_model = BaselineSurvival() calibration_scales = CalibrationScales() - coef: Score2DiabetesMaleCoefficients | Score2DiabetesFemaleCoefficients + coef: MaleCoefficientsDiabeticModel | FemaleCoefficientsDiabeticModel if is_male: - coef = Score2DiabetesMaleCoefficients() + coef = MaleCoefficientsDiabeticModel() baseline_survival = baseline_survival_model.male scale1 = calibration_scales.male_scale1 scale2 = calibration_scales.male_scale2 else: - coef = Score2DiabetesFemaleCoefficients() + coef = FemaleCoefficientsDiabeticModel() baseline_survival = baseline_survival_model.female scale1 = calibration_scales.female_scale1 scale2 = calibration_scales.female_scale2 @@ -122,9 +119,13 @@ def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategor uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) # Apply calibration for Belgium (Low Risk region) - calibrated_risk: float = apply_calibration(uncalibrated_risk, scale1, scale2) + calibrated_risk: float = helpers.apply_calibration( + uncalibrated_risk, scale1, scale2 + ) # Determine risk category based on age - risk_category: RiskCategory = determine_risk_category(age, calibrated_risk) + risk_category: helpers.RiskCategory = helpers.determine_risk_category( + age, calibrated_risk + ) return (age, round(calibrated_risk, 2), risk_category) diff --git a/vitals/schemas/core.py b/vitals/schemas/core.py deleted file mode 100644 index 8ae4fdf..0000000 --- a/vitals/schemas/core.py +++ /dev/null @@ -1,80 +0,0 @@ -from typing import Literal, TypeAlias - -import numpy as np -from pydantic import BaseModel - -RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] - - -class BaselineSurvival(BaseModel): - """ - Sex-specific baseline survival probabilities for the SCORE2 model. - - These values represent the 10-year survival probability for individuals - with all risk factors at their reference values. - """ - - male: float = 0.9605 - female: float = 0.9776 - - -class CalibrationScales(BaseModel): - """ - Region and sex-specific calibration scales for Belgium (Low Risk region). - - These scales are used to calibrate the uncalibrated risk estimate to match - the population-specific cardiovascular disease incidence rates. - """ - - # Male calibration scales - male_scale1: float = -0.5699 - male_scale2: float = 0.7476 - - # Female calibration scales - female_scale1: float = -0.7380 - female_scale2: float = 0.7019 - - -def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory: - """ - Determine cardiovascular risk category based on age and calibrated risk percentage. - - Args: - age: Patient's age in years - calibrated_risk: Calibrated 10-year CVD risk as a percentage - - Returns: - Risk stratification category - """ - if age < 50: - if calibrated_risk < 2.5: - return "Low to moderate" - elif calibrated_risk < 7.5: - return "High" - else: - return "Very high" - else: # age 50-69 - if calibrated_risk < 5: - return "Low to moderate" - elif calibrated_risk < 10: - return "High" - else: - return "Very high" - - -def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float: - """ - Apply regional calibration to uncalibrated risk estimate. - - Args: - uncalibrated_risk: Raw risk estimate from the Cox model - scale1: First calibration scale parameter - scale2: Second calibration scale parameter - - Returns: - Calibrated 10-year CVD risk as a percentage - """ - return float( - (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) - * 100 - ) diff --git a/vitals/schemas/markers.py b/vitals/schemas/markers.py deleted file mode 100644 index b3e630c..0000000 --- a/vitals/schemas/markers.py +++ /dev/null @@ -1,42 +0,0 @@ -from pydantic import BaseModel - - -class PhenoageMarkers(BaseModel): - """Processed PhenoAge biomarkers with standardized units.""" - - albumin: float - creatinine: float - glucose: float - crp: float - lymphocyte_percent: float - mean_cell_volume: float - red_cell_distribution_width: float - alkaline_phosphatase: float - white_blood_cell_count: float - age: float - - -class Score2Markers(BaseModel): - """Processed Score2 biomarkers with standardized units.""" - - age: float - systolic_blood_pressure: float - total_cholesterol: float - hdl_cholesterol: float - smoking: bool - is_male: bool - - -class Score2DiabetesMarkers(BaseModel): - """Processed Score2-Diabetes biomarkers with standardized units.""" - - age: float - systolic_blood_pressure: float - total_cholesterol: float - hdl_cholesterol: float - smoking: bool - is_male: bool - diabetes: bool - age_at_diabetes_diagnosis: float - hba1c: float - egfr: float diff --git a/vitals/schemas/phenoage.py b/vitals/schemas/phenoage.py new file mode 100644 index 0000000..fc1b281 --- /dev/null +++ b/vitals/schemas/phenoage.py @@ -0,0 +1,62 @@ +from pydantic import BaseModel + + +class Markers(BaseModel): + """Processed PhenoAge biomarkers with standardized units.""" + + albumin: float + creatinine: float + glucose: float + crp: float + lymphocyte_percent: float + mean_cell_volume: float + red_cell_distribution_width: float + alkaline_phosphatase: float + white_blood_cell_count: float + age: float + + +class Units(BaseModel): + """ + The expected unit to be used for phenoage computation + """ + + albumin: str = "g/L" + creatinine: str = "umol/L" + glucose: str = "mmol/L" + crp: str = "mg/dL" + lymphocyte_percent: str = "%" + mean_cell_volume: str = "fL" + red_cell_distribution_width: str = "%" + alkaline_phosphatase: str = "U/L" + white_blood_cell_count: str = "1000 cells/uL" + age: str = "years" + + +class LinearModel(BaseModel): + """ + Coefficients used to calculate the PhenoAge from Levine et al 2018 + """ + + intercept: float = -19.9067 + albumin: float = -0.0336 + creatinine: float = 0.0095 + glucose: float = 0.1953 + log_crp: float = 0.0954 + lymphocyte_percent: float = -0.0120 + mean_cell_volume: float = 0.0268 + red_cell_distribution_width: float = 0.3306 + alkaline_phosphatase: float = 0.00188 + white_blood_cell_count: float = 0.0554 + age: float = 0.0804 + + +class Gompertz(BaseModel): + """ + Parameters of the Gompertz distribution for PhenoAge computation + """ + + lambda_: float = 0.0192 + coef1: float = 141.50225 + coef2: float = -0.00553 + coef3: float = 0.090165 diff --git a/vitals/schemas/coefficients.py b/vitals/schemas/score2.py similarity index 52% rename from vitals/schemas/coefficients.py rename to vitals/schemas/score2.py index 62a4e9f..336d8e0 100644 --- a/vitals/schemas/coefficients.py +++ b/vitals/schemas/score2.py @@ -1,7 +1,65 @@ from pydantic import BaseModel +# Common for all models -class Score2MaleCoefficients(BaseModel): + +class BaselineSurvival(BaseModel): + """ + Sex-specific baseline survival probabilities for the SCORE2 model. + + These values represent the 10-year survival probability for individuals + with all risk factors at their reference values. + """ + + male: float = 0.9605 + female: float = 0.9776 + + +class CalibrationScales(BaseModel): + """ + Region and sex-specific calibration scales for Belgium (Low Risk region). + + These scales are used to calibrate the uncalibrated risk estimate to match + the population-specific cardiovascular disease incidence rates. + """ + + # Male calibration scales + male_scale1: float = -0.5699 + male_scale2: float = 0.7476 + + # Female calibration scales + female_scale1: float = -0.7380 + female_scale2: float = 0.7019 + + +# ----- For Basal score2 model + + +class Markers(BaseModel): + """Processed Score2 biomarkers with standardized units.""" + + age: float + systolic_blood_pressure: float + total_cholesterol: float + hdl_cholesterol: float + smoking: bool + is_male: bool + + +class Units(BaseModel): + """ + The expected unit to be used for Score2 computation + """ + + age: str = "years" + systolic_blood_pressure: str = "mmHg" + total_cholesterol: str = "mmol/L" + hdl_cholesterol: str = "mmol/L" + smoking: str = "yes/no" + is_male: str = "yes/no" + + +class MaleCoefficientsBaseModel(BaseModel): """ Male-specific coefficients for the SCORE2 Cox proportional hazards model. """ @@ -20,7 +78,7 @@ class Score2MaleCoefficients(BaseModel): hdl_age: float = 0.0426 -class Score2FemaleCoefficients(BaseModel): +class FemaleCoefficientsBaseModel(BaseModel): """ Female-specific coefficients for the SCORE2 Cox proportional hazards model. """ @@ -39,7 +97,42 @@ class Score2FemaleCoefficients(BaseModel): hdl_age: float = 0.0613 -class Score2DiabetesMaleCoefficients(Score2MaleCoefficients): +# ----- For Diabetic score2 model + + +class MarkersDiabetes(BaseModel): + """Processed Score2-Diabetes biomarkers with standardized units.""" + + age: float + systolic_blood_pressure: float + total_cholesterol: float + hdl_cholesterol: float + smoking: bool + is_male: bool + diabetes: bool + age_at_diabetes_diagnosis: float + hba1c: float + egfr: float + + +class UnitsDiabetes(BaseModel): + """ + The expected unit to be used for Score2-Diabetes computation + """ + + age: str = "years" + systolic_blood_pressure: str = "mmHg" + total_cholesterol: str = "mmol/L" + hdl_cholesterol: str = "mmol/L" + smoking: str = "yes/no" + is_male: str = "yes/no" + diabetes: str = "yes/no" + age_at_diabetes_diagnosis: str = "years" + hba1c: str = "mmol/mol" + egfr: str = "mL/min/1.73m²" + + +class MaleCoefficientsDiabeticModel(MaleCoefficientsBaseModel): """ Male-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. Extends the base SCORE2 male coefficients with diabetes-specific parameters. @@ -71,7 +164,7 @@ class Score2DiabetesMaleCoefficients(Score2MaleCoefficients): egfr_age: float = 0.0115 -class Score2DiabetesFemaleCoefficients(Score2FemaleCoefficients): +class FemaleCoefficientsDiabeticModel(FemaleCoefficientsBaseModel): """ Female-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. Extends the base SCORE2 female coefficients with diabetes-specific parameters. diff --git a/vitals/schemas/units.py b/vitals/schemas/units.py deleted file mode 100644 index dfa3b18..0000000 --- a/vitals/schemas/units.py +++ /dev/null @@ -1,48 +0,0 @@ -from pydantic import BaseModel - - -class PhenoageUnits(BaseModel): - """ - The expected unit to be used for phenoage computation - """ - - albumin: str = "g/L" - creatinine: str = "umol/L" - glucose: str = "mmol/L" - crp: str = "mg/dL" - lymphocyte_percent: str = "%" - mean_cell_volume: str = "fL" - red_cell_distribution_width: str = "%" - alkaline_phosphatase: str = "U/L" - white_blood_cell_count: str = "1000 cells/uL" - age: str = "years" - - -class Score2Units(BaseModel): - """ - The expected unit to be used for Score2 computation - """ - - age: str = "years" - systolic_blood_pressure: str = "mmHg" - total_cholesterol: str = "mmol/L" - hdl_cholesterol: str = "mmol/L" - smoking: str = "yes/no" - is_male: str = "yes/no" - - -class Score2DiabetesUnits(BaseModel): - """ - The expected unit to be used for Score2-Diabetes computation - """ - - age: str = "years" - systolic_blood_pressure: str = "mmHg" - total_cholesterol: str = "mmol/L" - hdl_cholesterol: str = "mmol/L" - smoking: str = "yes/no" - is_male: str = "yes/no" - diabetes: str = "yes/no" - age_at_diabetes_diagnosis: str = "years" - hba1c: str = "mmol/mol" - egfr: str = "mL/min/1.73m²" diff --git a/vitals/score2/__init__.py b/vitals/score2/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/vitals/score2_diabetes/__init__.py b/vitals/score2_diabetes/__init__.py deleted file mode 100644 index c1e1944..0000000 --- a/vitals/score2_diabetes/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""SCORE2-Diabetes cardiovascular risk assessment algorithm.""" - -from .compute import cardiovascular_risk - -__all__ = ["cardiovascular_risk"]