diff --git a/tests/test_phenoage.py b/tests/test_phenoage.py index 3da32da..60cf7f2 100755 --- a/tests/test_phenoage.py +++ b/tests/test_phenoage.py @@ -2,7 +2,7 @@ import pytest -from vitals.phenoage import compute +from vitals.models import phenoage OUT_FILEPATH = Path(__file__).parent / "inputs" / "phenoage" @@ -25,7 +25,7 @@ ) def test_phenoage(filename, expected): # Get the actual fixture value using request.getfixturevalue - age, pred_age, accl_age = compute.biological_age(OUT_FILEPATH / filename) + age, pred_age, accl_age = phenoage.compute(OUT_FILEPATH / filename) expected_age, expected_pred_age, expected_accl_age = expected assert age == expected_age diff --git a/tests/test_score2.py b/tests/test_score2.py index 98e76ff..32adee3 100644 --- a/tests/test_score2.py +++ b/tests/test_score2.py @@ -2,7 +2,7 @@ import pytest -from vitals.score2 import compute +from vitals.models import score2 OUT_FILEPATH = Path(__file__).parent / "inputs" / "score2" @@ -26,9 +26,7 @@ ) def test_score2(filename, expected): # Get the actual fixture value using request.getfixturevalue - age, pred_risk, pred_risk_category = compute.cardiovascular_risk( - OUT_FILEPATH / filename - ) + age, pred_risk, pred_risk_category = score2.compute(OUT_FILEPATH / filename) expected_age, expected_risk, expected_category = expected assert age == expected_age diff --git a/tests/test_score2_diabetes.py b/tests/test_score2_diabetes.py index 78401f5..1a57311 100644 --- a/tests/test_score2_diabetes.py +++ b/tests/test_score2_diabetes.py @@ -2,7 +2,7 @@ import pytest -from vitals.score2_diabetes import compute +from vitals.models import score2_diabetes OUT_FILEPATH = Path(__file__).parent / "inputs" / "score2_diabetes" @@ -26,7 +26,7 @@ def test_score2_diabetes(filename, expected): They need to be calculated using MDCalc and updated before running tests. """ # Get the actual fixture value - age, pred_risk, pred_risk_category = compute.cardiovascular_risk( + age, pred_risk, pred_risk_category = score2_diabetes.compute( OUT_FILEPATH / filename ) expected_age, expected_risk, expected_category = expected diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py index ca1d8d7..15657f5 100755 --- a/vitals/biomarkers/helpers.py +++ b/vitals/biomarkers/helpers.py @@ -1,13 +1,15 @@ from collections.abc import Callable from pathlib import Path -from typing import Any, TypedDict, TypeVar +from typing import Any, Literal, TypeAlias, TypedDict, TypeVar +import numpy as np from pydantic import BaseModel -from vitals.biomarkers import schemas +from vitals.schemas import phenoage, score2 +RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] Biomarkers = TypeVar("Biomarkers", bound=BaseModel) -Units = schemas.PhenoageUnits | schemas.Score2Units | schemas.Score2DiabetesUnits +Units = phenoage.Units | score2.Units | score2.UnitsDiabetes class ConversionInfo(TypedDict): @@ -198,3 +200,57 @@ def extract_biomarkers_from_json( extracted_values[field_name] = value return biomarker_class(**extracted_values) + + +def determine_risk_category(age: float, calibrated_risk: float) -> RiskCategory: + """ + Determine cardiovascular risk category based on age and calibrated risk percentage. + + Args: + age: Patient's age in years + calibrated_risk: Calibrated 10-year CVD risk as a percentage + + Returns: + Risk stratification category + """ + if age < 50: + if calibrated_risk < 2.5: + return "Low to moderate" + elif calibrated_risk < 7.5: + return "High" + else: + return "Very high" + else: # age 50-69 + if calibrated_risk < 5: + return "Low to moderate" + elif calibrated_risk < 10: + return "High" + else: + return "Very high" + + +def apply_calibration(uncalibrated_risk: float, scale1: float, scale2: float) -> float: + """ + Apply regional calibration to uncalibrated risk estimate. + + Args: + uncalibrated_risk: Raw risk estimate from the Cox model + scale1: First calibration scale parameter + scale2: Second calibration scale parameter + + Returns: + Calibrated 10-year CVD risk as a percentage + """ + return float( + (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) + * 100 + ) + + +def gompertz_mortality_model(weighted_risk_score: float) -> float: + params = phenoage.Gompertz() + return 1 - np.exp( + -np.exp(weighted_risk_score) + * (np.exp(120 * params.lambda_) - 1) + / params.lambda_ + ) diff --git a/vitals/biomarkers/schemas.py b/vitals/biomarkers/schemas.py deleted file mode 100755 index e93e7d4..0000000 --- a/vitals/biomarkers/schemas.py +++ /dev/null @@ -1,92 +0,0 @@ -from pydantic import BaseModel - - -# ------ PHENOAGE Schemas -class PhenoageUnits(BaseModel): - """ - The expected unit to be used for phenoage computation - """ - - albumin: str = "g/L" - creatinine: str = "umol/L" - glucose: str = "mmol/L" - crp: str = "mg/dL" - lymphocyte_percent: str = "%" - mean_cell_volume: str = "fL" - red_cell_distribution_width: str = "%" - alkaline_phosphatase: str = "U/L" - white_blood_cell_count: str = "1000 cells/uL" - age: str = "years" - - -class PhenoageMarkers(BaseModel): - """Processed PhenoAge biomarkers with standardized units.""" - - albumin: float - creatinine: float - glucose: float - crp: float - lymphocyte_percent: float - mean_cell_volume: float - red_cell_distribution_width: float - alkaline_phosphatase: float - white_blood_cell_count: float - age: float - - -# ------ SCORE2 Schemas -class Score2Units(BaseModel): - """ - The expected unit to be used for Score2 computation - """ - - age: str = "years" - systolic_blood_pressure: str = "mmHg" - total_cholesterol: str = "mmol/L" - hdl_cholesterol: str = "mmol/L" - smoking: str = "yes/no" - is_male: str = "yes/no" - - -class Score2Markers(BaseModel): - """Processed Score2 biomarkers with standardized units.""" - - age: float - systolic_blood_pressure: float - total_cholesterol: float - hdl_cholesterol: float - smoking: bool - is_male: bool - - -# ------ SCORE2-Diabetes Schemas -class Score2DiabetesUnits(BaseModel): - """ - The expected unit to be used for Score2-Diabetes computation - """ - - age: str = "years" - systolic_blood_pressure: str = "mmHg" - total_cholesterol: str = "mmol/L" - hdl_cholesterol: str = "mmol/L" - smoking: str = "yes/no" - is_male: str = "yes/no" - diabetes: str = "yes/no" - age_at_diabetes_diagnosis: str = "years" - hba1c: str = "mmol/mol" - egfr: str = "mL/min/1.73m²" - - -class Score2DiabetesMarkers(BaseModel): - """Processed Score2-Diabetes biomarkers with standardized units.""" - - age: float - systolic_blood_pressure: float - total_cholesterol: float - hdl_cholesterol: float - smoking: bool - is_male: bool - diabetes: bool - age_at_diabetes_diagnosis: float - hba1c: float - egfr: float diff --git a/vitals/phenoage/__init__.py b/vitals/models/__init__.py similarity index 100% rename from vitals/phenoage/__init__.py rename to vitals/models/__init__.py diff --git a/vitals/phenoage/compute.py b/vitals/models/phenoage.py similarity index 50% rename from vitals/phenoage/compute.py rename to vitals/models/phenoage.py index faf597d..7454608 100755 --- a/vitals/phenoage/compute.py +++ b/vitals/models/phenoage.py @@ -1,50 +1,12 @@ from pathlib import Path import numpy as np -from pydantic import BaseModel -from vitals.biomarkers import helpers, schemas +from vitals.biomarkers import helpers +from vitals.schemas.phenoage import Gompertz, LinearModel, Markers, Units -class LinearModel(BaseModel): - """ - Coefficients used to calculate the PhenoAge from Levine et al 2018 - """ - - intercept: float = -19.9067 - albumin: float = -0.0336 - creatinine: float = 0.0095 - glucose: float = 0.1953 - log_crp: float = 0.0954 - lymphocyte_percent: float = -0.0120 - mean_cell_volume: float = 0.0268 - red_cell_distribution_width: float = 0.3306 - alkaline_phosphatase: float = 0.00188 - white_blood_cell_count: float = 0.0554 - age: float = 0.0804 - - -class Gompertz(BaseModel): - """ - Parameters of the Gompertz distribution for PhenoAge computation - """ - - lambda_: float = 0.0192 - coef1: float = 141.50225 - coef2: float = -0.00553 - coef3: float = 0.090165 - - -def __gompertz_mortality_model(weighted_risk_score: float) -> float: - __params = Gompertz() - return 1 - np.exp( - -np.exp(weighted_risk_score) - * (np.exp(120 * __params.lambda_) - 1) - / __params.lambda_ - ) - - -def biological_age(filepath: str | Path) -> tuple[float, float, float]: +def compute(filepath: str | Path) -> tuple[float, float, float]: """ The Phenoage score is calculated as a weighted (coefficients available in Levine et al 2018) linear combination of these variables, which was then transformed into units of years using 2 parametric @@ -55,14 +17,14 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: # Extract biomarkers from JSON file biomarkers = helpers.extract_biomarkers_from_json( filepath=filepath, - biomarker_class=schemas.PhenoageMarkers, - biomarker_units=schemas.PhenoageUnits(), + biomarker_class=Markers, + biomarker_units=Units(), ) age = biomarkers.age coef = LinearModel() - if isinstance(biomarkers, schemas.PhenoageMarkers): + if isinstance(biomarkers, Markers): weighted_risk_score = ( coef.intercept + (coef.albumin * biomarkers.albumin) @@ -79,7 +41,9 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: + (coef.white_blood_cell_count * biomarkers.white_blood_cell_count) + (coef.age * biomarkers.age) ) - gompertz = __gompertz_mortality_model(weighted_risk_score=weighted_risk_score) + gompertz = helpers.gompertz_mortality_model( + weighted_risk_score=weighted_risk_score + ) model = Gompertz() pred_age = ( model.coef1 + np.log(model.coef2 * np.log(1 - gompertz)) / model.coef3 @@ -88,17 +52,3 @@ def biological_age(filepath: str | Path) -> tuple[float, float, float]: return (age, pred_age, accl_age) else: raise ValueError(f"Invalid biomarker class used: {biomarkers}") - - -# if __name__ == "__main__": -# from pathlib import Path -# input_dir = Path("tests/outputs") -# output_dir = Path("tests/outputs") - -# for input_file in input_dir.glob("*.json"): -# if "patient" not in str(input_file): -# continue - -# # Update biomarker data -# age, pred_age, accl_age = biological_age(str(input_file)) -# print(f"Chrono Age: {age} ::: Predicted Age: {pred_age} ::: Accel {accl_age}") diff --git a/vitals/models/score2.py b/vitals/models/score2.py new file mode 100644 index 0000000..51ca1e3 --- /dev/null +++ b/vitals/models/score2.py @@ -0,0 +1,113 @@ +""" +Module for computing the SCORE2 cardiovascular risk assessment. + +This module implements the SCORE2 algorithm for 10-year cardiovascular disease risk estimation +in apparently healthy individuals aged 40-69 years in Europe. +""" + +from pathlib import Path + +import numpy as np + +from vitals.biomarkers import helpers +from vitals.schemas.score2 import ( + BaselineSurvival, + CalibrationScales, + FemaleCoefficientsBaseModel, + MaleCoefficientsBaseModel, + Markers, + Units, +) + + +def compute( + filepath: str | Path, +) -> tuple[float, float, helpers.RiskCategory]: + """ + Calculate the 10-year cardiovascular disease risk using the SCORE2 algorithm. + + This function implements the SCORE2 risk assessment for apparently healthy individuals + aged 40-69 years in Europe. It uses sex-specific Cox proportional hazards model + coefficients and applies regional calibration for Belgium (Low Risk region). + + Args: + filepath: Path to JSON file containing biomarker data including age, sex, + systolic blood pressure, total cholesterol, HDL cholesterol, and smoking status. + + Returns: + A tuple containing: + - age: The patient's chronological age + - risk_percentage: The calibrated 10-year CVD risk as a percentage + - risk_category: Risk stratification category ("Low to moderate", "High", or "Very high") + + Raises: + ValueError: If invalid biomarker class is used + """ + # Extract biomarkers from JSON file + biomarkers = helpers.extract_biomarkers_from_json( + filepath=filepath, + biomarker_class=Markers, + biomarker_units=Units(), + ) + + if not isinstance(biomarkers, Markers): + raise ValueError(f"Invalid biomarker class used: {biomarkers}") + + age: float = biomarkers.age + is_male: bool = biomarkers.is_male # True for male, False for female + + # Apply transformations to biomarkers + cage: float = (age - 60) / 5 + smoking: float = float(biomarkers.smoking) # Convert bool to float (1.0 or 0.0) + csbp: float = (biomarkers.systolic_blood_pressure - 120) / 20 + ctchol: float = biomarkers.total_cholesterol - 6 + chdl: float = (biomarkers.hdl_cholesterol - 1.3) / 0.5 + + # Calculate interaction terms + smoking_age: float = smoking * cage + sbp_age: float = csbp * cage + tchol_age: float = ctchol * cage + hdl_age: float = chdl * cage + + # Get sex-specific coefficients and calibration values + baseline_survival_model = BaselineSurvival() + calibration_scales = CalibrationScales() + + coef: MaleCoefficientsBaseModel | FemaleCoefficientsBaseModel + if is_male: + coef = MaleCoefficientsBaseModel() + baseline_survival = baseline_survival_model.male + scale1 = calibration_scales.male_scale1 + scale2 = calibration_scales.male_scale2 + else: + coef = FemaleCoefficientsBaseModel() + baseline_survival = baseline_survival_model.female + scale1 = calibration_scales.female_scale1 + scale2 = calibration_scales.female_scale2 + + linear_pred = ( + coef.age * cage + + coef.smoking * smoking + + coef.sbp * csbp + + coef.total_cholesterol * ctchol + + coef.hdl_cholesterol * chdl + + coef.smoking_age * smoking_age + + coef.sbp_age * sbp_age + + coef.tchol_age * tchol_age + + coef.hdl_age * hdl_age + ) + + # Calculate uncalibrated risk + uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) + + # Apply calibration for Belgium (Low Risk region) + calibrated_risk: float = helpers.apply_calibration( + uncalibrated_risk, scale1, scale2 + ) + + # Determine risk category based on age + risk_category: helpers.RiskCategory = helpers.determine_risk_category( + age, calibrated_risk + ) + + return (age, round(calibrated_risk, 2), risk_category) diff --git a/vitals/models/score2_diabetes.py b/vitals/models/score2_diabetes.py new file mode 100644 index 0000000..47aa158 --- /dev/null +++ b/vitals/models/score2_diabetes.py @@ -0,0 +1,131 @@ +""" +Module for computing the SCORE2-Diabetes cardiovascular risk assessment. + +This module implements the SCORE2-Diabetes algorithm for 10-year cardiovascular disease +risk estimation in patients with diabetes. +""" + +import math +from pathlib import Path + +import numpy as np + +from vitals.biomarkers import helpers +from vitals.schemas.score2 import ( + BaselineSurvival, + CalibrationScales, + FemaleCoefficientsDiabeticModel, + MaleCoefficientsDiabeticModel, + MarkersDiabetes, + UnitsDiabetes, +) + + +def compute( + filepath: str | Path, +) -> tuple[float, float, helpers.RiskCategory]: + """ + Calculate the 10-year cardiovascular disease risk using the SCORE2-Diabetes algorithm. + + This function implements the SCORE2-Diabetes risk assessment for patients with diabetes. + It uses sex-specific Cox proportional hazards model coefficients and applies regional + calibration for Belgium (Low Risk region). + + Args: + filepath: Path to JSON file containing biomarker data including age, sex, + systolic blood pressure, total cholesterol, HDL cholesterol, smoking status, + diabetes status, age at diabetes diagnosis, HbA1c, and eGFR. + + Returns: + A tuple containing: + - age: The patient's chronological age + - risk_percentage: The calibrated 10-year CVD risk as a percentage + - risk_category: Risk stratification category ("Low to moderate", "High", or "Very high") + + Raises: + ValueError: If invalid biomarker class is used + """ + # Extract biomarkers from JSON file + biomarkers = helpers.extract_biomarkers_from_json( + filepath=filepath, + biomarker_class=MarkersDiabetes, + biomarker_units=UnitsDiabetes(), + ) + + if not isinstance(biomarkers, MarkersDiabetes): + raise ValueError(f"Invalid biomarker class used: {biomarkers}") + + age: float = biomarkers.age + is_male: bool = biomarkers.is_male # True for male, False for female + + # Apply transformations to biomarkers + cage: float = (age - 60) / 5 + smoking: float = float(biomarkers.smoking) # Convert bool to float (1.0 or 0.0) + csbp: float = (biomarkers.systolic_blood_pressure - 120) / 20 + diabetes: float = float(biomarkers.diabetes) # Convert bool to float (1.0 or 0.0) + ctchol: float = biomarkers.total_cholesterol - 6 + chdl: float = (biomarkers.hdl_cholesterol - 1.3) / 0.5 + cagediab: float = diabetes * (biomarkers.age_at_diabetes_diagnosis - 50) / 5 + ca1c: float = (biomarkers.hba1c - 31) / 9.34 + cegfr: float = (math.log(biomarkers.egfr) - 4.5) / 0.15 + cegfr_squared: float = cegfr * cegfr + + # Calculate interaction terms + smoking_age: float = smoking * cage + sbp_age: float = csbp * cage + diabetes_age: float = diabetes * cage + tchol_age: float = ctchol * cage + hdl_age: float = chdl * cage + hba1c_age: float = ca1c * cage + egfr_age: float = cegfr * cage + + # Get sex-specific coefficients and calibration values + baseline_survival_model = BaselineSurvival() + calibration_scales = CalibrationScales() + + coef: MaleCoefficientsDiabeticModel | FemaleCoefficientsDiabeticModel + if is_male: + coef = MaleCoefficientsDiabeticModel() + baseline_survival = baseline_survival_model.male + scale1 = calibration_scales.male_scale1 + scale2 = calibration_scales.male_scale2 + else: + coef = FemaleCoefficientsDiabeticModel() + baseline_survival = baseline_survival_model.female + scale1 = calibration_scales.female_scale1 + scale2 = calibration_scales.female_scale2 + + linear_pred = ( + coef.age * cage + + coef.smoking * smoking + + coef.sbp * csbp + + coef.diabetes * diabetes + + coef.total_cholesterol * ctchol + + coef.hdl_cholesterol * chdl + + coef.age_at_diabetes_diagnosis * cagediab + + coef.hba1c * ca1c + + coef.egfr * cegfr + + coef.egfr_squared * cegfr_squared + + coef.smoking_age * smoking_age + + coef.sbp_age * sbp_age + + coef.diabetes_age * diabetes_age + + coef.tchol_age * tchol_age + + coef.hdl_age * hdl_age + + coef.hba1c_age * hba1c_age + + coef.egfr_age * egfr_age + ) + + # Calculate uncalibrated risk + uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) + + # Apply calibration for Belgium (Low Risk region) + calibrated_risk: float = helpers.apply_calibration( + uncalibrated_risk, scale1, scale2 + ) + + # Determine risk category based on age + risk_category: helpers.RiskCategory = helpers.determine_risk_category( + age, calibrated_risk + ) + + return (age, round(calibrated_risk, 2), risk_category) diff --git a/vitals/score2/__init__.py b/vitals/schemas/__init__.py similarity index 100% rename from vitals/score2/__init__.py rename to vitals/schemas/__init__.py diff --git a/vitals/schemas/phenoage.py b/vitals/schemas/phenoage.py new file mode 100644 index 0000000..fc1b281 --- /dev/null +++ b/vitals/schemas/phenoage.py @@ -0,0 +1,62 @@ +from pydantic import BaseModel + + +class Markers(BaseModel): + """Processed PhenoAge biomarkers with standardized units.""" + + albumin: float + creatinine: float + glucose: float + crp: float + lymphocyte_percent: float + mean_cell_volume: float + red_cell_distribution_width: float + alkaline_phosphatase: float + white_blood_cell_count: float + age: float + + +class Units(BaseModel): + """ + The expected unit to be used for phenoage computation + """ + + albumin: str = "g/L" + creatinine: str = "umol/L" + glucose: str = "mmol/L" + crp: str = "mg/dL" + lymphocyte_percent: str = "%" + mean_cell_volume: str = "fL" + red_cell_distribution_width: str = "%" + alkaline_phosphatase: str = "U/L" + white_blood_cell_count: str = "1000 cells/uL" + age: str = "years" + + +class LinearModel(BaseModel): + """ + Coefficients used to calculate the PhenoAge from Levine et al 2018 + """ + + intercept: float = -19.9067 + albumin: float = -0.0336 + creatinine: float = 0.0095 + glucose: float = 0.1953 + log_crp: float = 0.0954 + lymphocyte_percent: float = -0.0120 + mean_cell_volume: float = 0.0268 + red_cell_distribution_width: float = 0.3306 + alkaline_phosphatase: float = 0.00188 + white_blood_cell_count: float = 0.0554 + age: float = 0.0804 + + +class Gompertz(BaseModel): + """ + Parameters of the Gompertz distribution for PhenoAge computation + """ + + lambda_: float = 0.0192 + coef1: float = 141.50225 + coef2: float = -0.00553 + coef3: float = 0.090165 diff --git a/vitals/schemas/score2.py b/vitals/schemas/score2.py new file mode 100644 index 0000000..336d8e0 --- /dev/null +++ b/vitals/schemas/score2.py @@ -0,0 +1,196 @@ +from pydantic import BaseModel + +# Common for all models + + +class BaselineSurvival(BaseModel): + """ + Sex-specific baseline survival probabilities for the SCORE2 model. + + These values represent the 10-year survival probability for individuals + with all risk factors at their reference values. + """ + + male: float = 0.9605 + female: float = 0.9776 + + +class CalibrationScales(BaseModel): + """ + Region and sex-specific calibration scales for Belgium (Low Risk region). + + These scales are used to calibrate the uncalibrated risk estimate to match + the population-specific cardiovascular disease incidence rates. + """ + + # Male calibration scales + male_scale1: float = -0.5699 + male_scale2: float = 0.7476 + + # Female calibration scales + female_scale1: float = -0.7380 + female_scale2: float = 0.7019 + + +# ----- For Basal score2 model + + +class Markers(BaseModel): + """Processed Score2 biomarkers with standardized units.""" + + age: float + systolic_blood_pressure: float + total_cholesterol: float + hdl_cholesterol: float + smoking: bool + is_male: bool + + +class Units(BaseModel): + """ + The expected unit to be used for Score2 computation + """ + + age: str = "years" + systolic_blood_pressure: str = "mmHg" + total_cholesterol: str = "mmol/L" + hdl_cholesterol: str = "mmol/L" + smoking: str = "yes/no" + is_male: str = "yes/no" + + +class MaleCoefficientsBaseModel(BaseModel): + """ + Male-specific coefficients for the SCORE2 Cox proportional hazards model. + """ + + # Main effects + age: float = 0.3742 + smoking: float = 0.6012 + sbp: float = 0.2777 + total_cholesterol: float = 0.1458 + hdl_cholesterol: float = -0.2698 + + # Age interaction terms + smoking_age: float = -0.0755 + sbp_age: float = -0.0255 + tchol_age: float = -0.0281 + hdl_age: float = 0.0426 + + +class FemaleCoefficientsBaseModel(BaseModel): + """ + Female-specific coefficients for the SCORE2 Cox proportional hazards model. + """ + + # Main effects + age: float = 0.4648 + smoking: float = 0.7744 + sbp: float = 0.3131 + total_cholesterol: float = 0.1002 + hdl_cholesterol: float = -0.2606 + + # Age interaction terms + smoking_age: float = -0.1088 + sbp_age: float = -0.0277 + tchol_age: float = -0.0226 + hdl_age: float = 0.0613 + + +# ----- For Diabetic score2 model + + +class MarkersDiabetes(BaseModel): + """Processed Score2-Diabetes biomarkers with standardized units.""" + + age: float + systolic_blood_pressure: float + total_cholesterol: float + hdl_cholesterol: float + smoking: bool + is_male: bool + diabetes: bool + age_at_diabetes_diagnosis: float + hba1c: float + egfr: float + + +class UnitsDiabetes(BaseModel): + """ + The expected unit to be used for Score2-Diabetes computation + """ + + age: str = "years" + systolic_blood_pressure: str = "mmHg" + total_cholesterol: str = "mmol/L" + hdl_cholesterol: str = "mmol/L" + smoking: str = "yes/no" + is_male: str = "yes/no" + diabetes: str = "yes/no" + age_at_diabetes_diagnosis: str = "years" + hba1c: str = "mmol/mol" + egfr: str = "mL/min/1.73m²" + + +class MaleCoefficientsDiabeticModel(MaleCoefficientsBaseModel): + """ + Male-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. + Extends the base SCORE2 male coefficients with diabetes-specific parameters. + """ + + # Override base values with SCORE2-Diabetes specific values + age: float = 0.5368 + smoking: float = 0.4774 + sbp: float = 0.1322 + total_cholesterol: float = 0.1102 + hdl_cholesterol: float = -0.1087 + + # Override age interaction terms + smoking_age: float = -0.0672 + sbp_age: float = -0.0268 + tchol_age: float = -0.0181 + hdl_age: float = 0.0095 + + # Additional diabetes-specific coefficients + diabetes: float = 0.6457 + age_at_diabetes_diagnosis: float = -0.0998 + hba1c: float = 0.0955 + egfr: float = -0.0591 + egfr_squared: float = 0.0058 + + # Additional age interaction terms + diabetes_age: float = -0.0983 + hba1c_age: float = -0.0134 + egfr_age: float = 0.0115 + + +class FemaleCoefficientsDiabeticModel(FemaleCoefficientsBaseModel): + """ + Female-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. + Extends the base SCORE2 female coefficients with diabetes-specific parameters. + """ + + # Override base values with SCORE2-Diabetes specific values + age: float = 0.6624 + smoking: float = 0.6139 + sbp: float = 0.1421 + total_cholesterol: float = 0.1127 + hdl_cholesterol: float = -0.1568 + + # Override age interaction terms + smoking_age: float = -0.1122 + sbp_age: float = -0.0167 + tchol_age: float = -0.0200 + hdl_age: float = 0.0186 + + # Additional diabetes-specific coefficients + diabetes: float = 0.8096 + age_at_diabetes_diagnosis: float = -0.1180 + hba1c: float = 0.1173 + egfr: float = -0.0640 + egfr_squared: float = 0.0062 + + # Additional age interaction terms + diabetes_age: float = -0.1272 + hba1c_age: float = -0.0196 + egfr_age: float = 0.0169 diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py deleted file mode 100644 index dc794af..0000000 --- a/vitals/score2/compute.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Module for computing the SCORE2 cardiovascular risk assessment. - -This module implements the SCORE2 algorithm for 10-year cardiovascular disease risk estimation -in apparently healthy individuals aged 40-69 years in Europe. -""" - -from pathlib import Path -from typing import Literal, TypeAlias - -import numpy as np -from pydantic import BaseModel - -from vitals.biomarkers import helpers, schemas - -RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] - - -class ModelCoefficients(BaseModel): - """ - Sex-specific coefficients for the SCORE2 Cox proportional hazards model. - - These coefficients are used to calculate the 10-year risk of cardiovascular disease - based on transformed risk factors and their age interactions. - """ - - # Male coefficients - male_age: float = 0.3742 - male_smoking: float = 0.6012 - male_sbp: float = 0.2777 - male_total_cholesterol: float = 0.1458 - male_hdl_cholesterol: float = -0.2698 - - # Male interaction term coefficients - male_smoking_age: float = -0.0755 - male_sbp_age: float = -0.0255 - male_tchol_age: float = -0.0281 - male_hdl_age: float = 0.0426 - - # Female coefficients - female_age: float = 0.4648 - female_smoking: float = 0.7744 - female_sbp: float = 0.3131 - female_total_cholesterol: float = 0.1002 - female_hdl_cholesterol: float = -0.2606 - - # Female interaction term coefficients - female_smoking_age: float = -0.1088 - female_sbp_age: float = -0.0277 - female_tchol_age: float = -0.0226 - female_hdl_age: float = 0.0613 - - -class BaselineSurvival(BaseModel): - """ - Sex-specific baseline survival probabilities for the SCORE2 model. - - These values represent the 10-year survival probability for individuals - with all risk factors at their reference values. - """ - - male: float = 0.9605 - female: float = 0.9776 - - -class CalibrationScales(BaseModel): - """ - Region and sex-specific calibration scales for Belgium (Low Risk region). - - These scales are used to calibrate the uncalibrated risk estimate to match - the population-specific cardiovascular disease incidence rates. - """ - - # Male calibration scales - male_scale1: float = -0.5699 - male_scale2: float = 0.7476 - - # Female calibration scales - female_scale1: float = -0.7380 - female_scale2: float = 0.7019 - - -def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: - """ - Calculate the 10-year cardiovascular disease risk using the SCORE2 algorithm. - - This function implements the SCORE2 risk assessment for apparently healthy individuals - aged 40-69 years in Europe. It uses sex-specific Cox proportional hazards model - coefficients and applies regional calibration for Belgium (Low Risk region). - - Args: - filepath: Path to JSON file containing biomarker data including age, sex, - systolic blood pressure, total cholesterol, HDL cholesterol, and smoking status. - - Returns: - A tuple containing: - - age: The patient's chronological age - - risk_percentage: The calibrated 10-year CVD risk as a percentage - - risk_category: Risk stratification category ("Low to moderate", "High", or "Very high") - - Raises: - ValueError: If invalid biomarker class is used - """ - # Extract biomarkers from JSON file - biomarkers = helpers.extract_biomarkers_from_json( - filepath=filepath, - biomarker_class=schemas.Score2Markers, - biomarker_units=schemas.Score2Units(), - ) - - if not isinstance(biomarkers, schemas.Score2Markers): - raise ValueError(f"Invalid biomarker class used: {biomarkers}") - - age: float = biomarkers.age - is_male: bool = biomarkers.is_male # True for male, False for female - - # Apply transformations to biomarkers - cage: float = (age - 60) / 5 - smoking: float = float(biomarkers.smoking) # Convert bool to float (1.0 or 0.0) - csbp: float = (biomarkers.systolic_blood_pressure - 120) / 20 - ctchol: float = biomarkers.total_cholesterol - 6 - chdl: float = (biomarkers.hdl_cholesterol - 1.3) / 0.5 - - # Calculate interaction terms - smoking_age: float = smoking * cage - sbp_age: float = csbp * cage - tchol_age: float = ctchol * cage - hdl_age: float = chdl * cage - - # Get model coefficients - coef: ModelCoefficients = ModelCoefficients() - - # Calculate linear predictor (x) based on sex - - linear_pred: float - baseline_survival: float - scale1: float - scale2: float - - if is_male: - linear_pred = ( - coef.male_age * cage - + coef.male_smoking * smoking - + coef.male_sbp * csbp - + coef.male_total_cholesterol * ctchol - + coef.male_hdl_cholesterol * chdl - + coef.male_smoking_age * smoking_age - + coef.male_sbp_age * sbp_age - + coef.male_tchol_age * tchol_age - + coef.male_hdl_age * hdl_age - ) - baseline_survival = BaselineSurvival().male - scale1 = CalibrationScales().male_scale1 - scale2 = CalibrationScales().male_scale2 - else: - linear_pred = ( - coef.female_age * cage - + coef.female_smoking * smoking - + coef.female_sbp * csbp - + coef.female_total_cholesterol * ctchol - + coef.female_hdl_cholesterol * chdl - + coef.female_smoking_age * smoking_age - + coef.female_sbp_age * sbp_age - + coef.female_tchol_age * tchol_age - + coef.female_hdl_age * hdl_age - ) - baseline_survival = BaselineSurvival().female - scale1 = CalibrationScales().female_scale1 - scale2 = CalibrationScales().female_scale2 - - # Calculate uncalibrated risk - uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) - - # Apply calibration for Belgium (Low Risk region) - # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100 - calibrated_risk: float = float( - (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) - * 100 - ) - - # Determine risk category based on age - risk_category: RiskCategory - if age < 50: - if calibrated_risk < 2.5: - risk_category = "Low to moderate" - elif calibrated_risk < 7.5: - risk_category = "High" - else: - risk_category = "Very high" - else: # age 50-69 - if calibrated_risk < 5: - risk_category = "Low to moderate" - elif calibrated_risk < 10: - risk_category = "High" - else: - risk_category = "Very high" - - return (age, round(calibrated_risk, 2), risk_category) diff --git a/vitals/score2_diabetes/__init__.py b/vitals/score2_diabetes/__init__.py deleted file mode 100644 index c1e1944..0000000 --- a/vitals/score2_diabetes/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""SCORE2-Diabetes cardiovascular risk assessment algorithm.""" - -from .compute import cardiovascular_risk - -__all__ = ["cardiovascular_risk"] diff --git a/vitals/score2_diabetes/compute.py b/vitals/score2_diabetes/compute.py deleted file mode 100644 index a628f50..0000000 --- a/vitals/score2_diabetes/compute.py +++ /dev/null @@ -1,239 +0,0 @@ -""" -Module for computing the SCORE2-Diabetes cardiovascular risk assessment. - -This module implements the SCORE2-Diabetes algorithm for 10-year cardiovascular disease -risk estimation in patients with diabetes. -""" - -import math -from pathlib import Path -from typing import Literal, TypeAlias - -import numpy as np -from pydantic import BaseModel - -from vitals.biomarkers import helpers, schemas - -RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] - - -class ModelCoefficients(BaseModel): - """ - Sex-specific coefficients for the SCORE2-Diabetes Cox proportional hazards model. - - These coefficients are used to calculate the 10-year risk of cardiovascular disease - based on transformed risk factors and their age interactions. - """ - - # Male main effect coefficients - male_age: float = 0.5368 - male_smoking: float = 0.4774 - male_sbp: float = 0.1322 - male_diabetes: float = 0.6457 - male_total_cholesterol: float = 0.1102 - male_hdl_cholesterol: float = -0.1087 - male_age_at_diabetes_diagnosis: float = -0.0998 - male_hba1c: float = 0.0955 - male_egfr: float = -0.0591 - male_egfr_squared: float = 0.0058 - - # Male interaction term coefficients - male_smoking_age: float = -0.0672 - male_sbp_age: float = -0.0268 - male_diabetes_age: float = -0.0983 - male_tchol_age: float = -0.0181 - male_hdl_age: float = 0.0095 - male_hba1c_age: float = -0.0134 - male_egfr_age: float = 0.0115 - - # Female main effect coefficients - female_age: float = 0.6624 - female_smoking: float = 0.6139 - female_sbp: float = 0.1421 - female_diabetes: float = 0.8096 - female_total_cholesterol: float = 0.1127 - female_hdl_cholesterol: float = -0.1568 - female_age_at_diabetes_diagnosis: float = -0.1180 - female_hba1c: float = 0.1173 - female_egfr: float = -0.0640 - female_egfr_squared: float = 0.0062 - - # Female interaction term coefficients - female_smoking_age: float = -0.1122 - female_sbp_age: float = -0.0167 - female_diabetes_age: float = -0.1272 - female_tchol_age: float = -0.0200 - female_hdl_age: float = 0.0186 - female_hba1c_age: float = -0.0196 - female_egfr_age: float = 0.0169 - - -class BaselineSurvival(BaseModel): - """ - Sex-specific baseline survival probabilities for the SCORE2-Diabetes model. - - These values represent the 10-year survival probability for individuals - with all risk factors at their reference values. - """ - - male: float = 0.9605 - female: float = 0.9776 - - -class CalibrationScales(BaseModel): - """ - Region and sex-specific calibration scales for Belgium (Low Risk region). - - These scales are used to calibrate the uncalibrated risk estimate to match - the population-specific cardiovascular disease incidence rates. - """ - - # Male calibration scales - male_scale1: float = -0.5699 - male_scale2: float = 0.7476 - - # Female calibration scales - female_scale1: float = -0.7380 - female_scale2: float = 0.7019 - - -def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: - """ - Calculate the 10-year cardiovascular disease risk using the SCORE2-Diabetes algorithm. - - This function implements the SCORE2-Diabetes risk assessment for patients with diabetes. - It uses sex-specific Cox proportional hazards model coefficients and applies regional - calibration for Belgium (Low Risk region). - - Args: - filepath: Path to JSON file containing biomarker data including age, sex, - systolic blood pressure, total cholesterol, HDL cholesterol, smoking status, - diabetes status, age at diabetes diagnosis, HbA1c, and eGFR. - - Returns: - A tuple containing: - - age: The patient's chronological age - - risk_percentage: The calibrated 10-year CVD risk as a percentage - - risk_category: Risk stratification category ("Low to moderate", "High", or "Very high") - - Raises: - ValueError: If invalid biomarker class is used - """ - # Extract biomarkers from JSON file - biomarkers = helpers.extract_biomarkers_from_json( - filepath=filepath, - biomarker_class=schemas.Score2DiabetesMarkers, - biomarker_units=schemas.Score2DiabetesUnits(), - ) - - if not isinstance(biomarkers, schemas.Score2DiabetesMarkers): - raise ValueError(f"Invalid biomarker class used: {biomarkers}") - - age: float = biomarkers.age - is_male: bool = biomarkers.is_male # True for male, False for female - - # Apply transformations to biomarkers - cage: float = (age - 60) / 5 - smoking: float = float(biomarkers.smoking) # Convert bool to float (1.0 or 0.0) - csbp: float = (biomarkers.systolic_blood_pressure - 120) / 20 - diabetes: float = float(biomarkers.diabetes) # Convert bool to float (1.0 or 0.0) - ctchol: float = biomarkers.total_cholesterol - 6 - chdl: float = (biomarkers.hdl_cholesterol - 1.3) / 0.5 - cagediab: float = diabetes * (biomarkers.age_at_diabetes_diagnosis - 50) / 5 - ca1c: float = (biomarkers.hba1c - 31) / 9.34 - cegfr: float = (math.log(biomarkers.egfr) - 4.5) / 0.15 - cegfr_squared: float = cegfr * cegfr - - # Calculate interaction terms - smoking_age: float = smoking * cage - sbp_age: float = csbp * cage - diabetes_age: float = diabetes * cage - tchol_age: float = ctchol * cage - hdl_age: float = chdl * cage - hba1c_age: float = ca1c * cage - egfr_age: float = cegfr * cage - - # Get model coefficients - coef: ModelCoefficients = ModelCoefficients() - - # Calculate linear predictor (x) based on sex - linear_pred: float - baseline_survival: float - scale1: float - scale2: float - - if is_male: - linear_pred = ( - coef.male_age * cage - + coef.male_smoking * smoking - + coef.male_sbp * csbp - + coef.male_diabetes * diabetes - + coef.male_total_cholesterol * ctchol - + coef.male_hdl_cholesterol * chdl - + coef.male_age_at_diabetes_diagnosis * cagediab - + coef.male_hba1c * ca1c - + coef.male_egfr * cegfr - + coef.male_egfr_squared * cegfr_squared - + coef.male_smoking_age * smoking_age - + coef.male_sbp_age * sbp_age - + coef.male_diabetes_age * diabetes_age - + coef.male_tchol_age * tchol_age - + coef.male_hdl_age * hdl_age - + coef.male_hba1c_age * hba1c_age - + coef.male_egfr_age * egfr_age - ) - baseline_survival = BaselineSurvival().male - scale1 = CalibrationScales().male_scale1 - scale2 = CalibrationScales().male_scale2 - else: - linear_pred = ( - coef.female_age * cage - + coef.female_smoking * smoking - + coef.female_sbp * csbp - + coef.female_diabetes * diabetes - + coef.female_total_cholesterol * ctchol - + coef.female_hdl_cholesterol * chdl - + coef.female_age_at_diabetes_diagnosis * cagediab - + coef.female_hba1c * ca1c - + coef.female_egfr * cegfr - + coef.female_egfr_squared * cegfr_squared - + coef.female_smoking_age * smoking_age - + coef.female_sbp_age * sbp_age - + coef.female_diabetes_age * diabetes_age - + coef.female_tchol_age * tchol_age - + coef.female_hdl_age * hdl_age - + coef.female_hba1c_age * hba1c_age - + coef.female_egfr_age * egfr_age - ) - baseline_survival = BaselineSurvival().female - scale1 = CalibrationScales().female_scale1 - scale2 = CalibrationScales().female_scale2 - - # Calculate uncalibrated risk - uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(linear_pred)) - - # Apply calibration for Belgium (Low Risk region) - # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100 - calibrated_risk: float = float( - (1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) - * 100 - ) - - # Determine risk category based on age - risk_category: RiskCategory - if age < 50: - if calibrated_risk < 2.5: - risk_category = "Low to moderate" - elif calibrated_risk < 7.5: - risk_category = "High" - else: - risk_category = "Very high" - else: # age 50-69 - if calibrated_risk < 5: - risk_category = "Low to moderate" - elif calibrated_risk < 10: - risk_category = "High" - else: - risk_category = "Very high" - - return (age, round(calibrated_risk, 2), risk_category)