From e77d6bc57926faf12043aeb6095a890b241d70fd Mon Sep 17 00:00:00 2001 From: fbraza Date: Sat, 19 Jul 2025 13:56:03 +0200 Subject: [PATCH] refactor: add comprehensive hints issue #8 --- vitals/biomarkers/helpers.py | 15 +++++----- vitals/biomarkers/io.py | 5 ++-- vitals/phenoage/compute.py | 4 ++- vitals/score2/compute.py | 55 ++++++++++++++++++++---------------- 4 files changed, 44 insertions(+), 35 deletions(-) diff --git a/vitals/biomarkers/helpers.py b/vitals/biomarkers/helpers.py index 6c0fd9c..3580e07 100755 --- a/vitals/biomarkers/helpers.py +++ b/vitals/biomarkers/helpers.py @@ -1,4 +1,5 @@ -from typing import TypeVar +from pathlib import Path +from typing import Any, Callable, TypeVar from pydantic import BaseModel @@ -25,7 +26,7 @@ def format_unit_suffix(unit: str) -> str: return suffix -def update_biomarker_names(biomarkers: dict) -> dict: +def update_biomarker_names(biomarkers: dict[str, Any]) -> dict[str, Any]: """Update biomarker names to include unit suffixes. Args: @@ -49,7 +50,7 @@ def update_biomarker_names(biomarkers: dict) -> dict: def find_biomarker_value( - raw_biomarkers: dict, biomarker_name: str, expected_unit: str + raw_biomarkers: dict[str, Any], biomarker_name: str, expected_unit: str ) -> float | None: """ Find biomarker value by name prefix and expected unit. @@ -71,7 +72,7 @@ def find_biomarker_value( return None -def add_converted_biomarkers(biomarkers: dict) -> dict: +def add_converted_biomarkers(biomarkers: dict[str, Any]) -> dict[str, Any]: """Add converted biomarker entries for glucose, creatinine, albumin, and CRP. Args: @@ -84,7 +85,7 @@ def add_converted_biomarkers(biomarkers: dict) -> dict: result = biomarkers.copy() # Conversion mappings - conversions = { + conversions: dict[str, dict[str, str | Callable[[float], float]]] = { "glucose_mg_dl": { "target_name": "glucose_mmol_l", "target_unit": "mmol/L", @@ -136,7 +137,7 @@ def add_converted_biomarkers(biomarkers: dict) -> dict: # Skip if target already exists if target_name not in result: converted_value = conversion_info["conversion"](source_value) # type: ignore - result[target_name] = { + result[target_name] = { # type: ignore "value": round(converted_value, 4), "unit": conversion_info["target_unit"], } @@ -145,7 +146,7 @@ def add_converted_biomarkers(biomarkers: dict) -> dict: def extract_biomarkers_from_json( - filepath: str, + filepath: str | Path, biomarker_class: type[Biomarkers], biomarker_units: Units, ) -> Biomarkers: diff --git a/vitals/biomarkers/io.py b/vitals/biomarkers/io.py index f1b3ff2..8a22037 100755 --- a/vitals/biomarkers/io.py +++ b/vitals/biomarkers/io.py @@ -1,10 +1,11 @@ import json from pathlib import Path +from typing import Any from vitals.biomarkers import helpers -def update(input_file: Path) -> dict: +def update(input_file: Path) -> dict[str, Any]: """Process a single JSON file and create output file with converted biomarkers. Args: @@ -26,7 +27,7 @@ def update(input_file: Path) -> dict: return data -def write(data: dict, output_file: Path) -> None: +def write(data: dict[str, Any], output_file: Path) -> None: """Write biomarker data to a JSON file. Args: diff --git a/vitals/phenoage/compute.py b/vitals/phenoage/compute.py index 5397876..faf597d 100755 --- a/vitals/phenoage/compute.py +++ b/vitals/phenoage/compute.py @@ -1,3 +1,5 @@ +from pathlib import Path + import numpy as np from pydantic import BaseModel @@ -42,7 +44,7 @@ def __gompertz_mortality_model(weighted_risk_score: float) -> float: ) -def biological_age(filepath: str) -> tuple[float, float, float]: +def biological_age(filepath: str | Path) -> tuple[float, float, float]: """ The Phenoage score is calculated as a weighted (coefficients available in Levine et al 2018) linear combination of these variables, which was then transformed into units of years using 2 parametric diff --git a/vitals/score2/compute.py b/vitals/score2/compute.py index bc963aa..b78d8e0 100644 --- a/vitals/score2/compute.py +++ b/vitals/score2/compute.py @@ -5,11 +5,16 @@ in apparently healthy individuals aged 40-69 years in Europe. """ +from pathlib import Path +from typing import Literal, TypeAlias + import numpy as np from pydantic import BaseModel from vitals.biomarkers import helpers, schemas +RiskCategory: TypeAlias = Literal["Low to moderate", "High", "Very high"] + class ModelCoefficients(BaseModel): """ @@ -75,7 +80,7 @@ class CalibrationScales(BaseModel): female_scale2: float = 0.7019 -def cardiovascular_risk(filepath: str) -> tuple[float, float, str]: +def cardiovascular_risk(filepath: str | Path) -> tuple[float, float, RiskCategory]: """ Calculate the 10-year cardiovascular disease risk using the SCORE2 algorithm. @@ -106,28 +111,28 @@ def cardiovascular_risk(filepath: str) -> tuple[float, float, str]: if not isinstance(biomarkers, schemas.Score2Markers): raise ValueError(f"Invalid biomarker class used: {biomarkers}") - age = biomarkers.age - is_male = biomarkers.is_male # True for male, False for female + age: float = biomarkers.age + is_male: bool = biomarkers.is_male # True for male, False for female # Apply transformations to biomarkers - cage = (age - 60) / 5 - smoking = float(biomarkers.smoking) # Convert bool to float (1.0 or 0.0) - csbp = (biomarkers.systolic_blood_pressure - 120) / 20 - ctchol = biomarkers.total_cholesterol - 6 - chdl = (biomarkers.hdl_cholesterol - 1.3) / 0.5 + cage: float = (age - 60) / 5 + smoking: float = float(biomarkers.smoking) # Convert bool to float (1.0 or 0.0) + csbp: float = (biomarkers.systolic_blood_pressure - 120) / 20 + ctchol: float = biomarkers.total_cholesterol - 6 + chdl: float = (biomarkers.hdl_cholesterol - 1.3) / 0.5 # Calculate interaction terms - smoking_age = smoking * cage - sbp_age = csbp * cage - tchol_age = ctchol * cage - hdl_age = chdl * cage + smoking_age: float = smoking * cage + sbp_age: float = csbp * cage + tchol_age: float = ctchol * cage + hdl_age: float = chdl * cage # Get model coefficients - coef = ModelCoefficients() + coef: ModelCoefficients = ModelCoefficients() # Calculate linear predictor (x) based on sex if is_male: - x = ( + x: float = ( coef.male_age * cage + coef.male_smoking * smoking + coef.male_sbp * csbp + @@ -138,11 +143,11 @@ def cardiovascular_risk(filepath: str) -> tuple[float, float, str]: coef.male_tchol_age * tchol_age + coef.male_hdl_age * hdl_age ) - baseline_survival = BaselineSurvival().male - scale1 = CalibrationScales().male_scale1 - scale2 = CalibrationScales().male_scale2 + baseline_survival: float = BaselineSurvival().male + scale1: float = CalibrationScales().male_scale1 + scale2: float = CalibrationScales().male_scale2 else: - x = ( + x: float = ( coef.female_age * cage + coef.female_smoking * smoking + coef.female_sbp * csbp + @@ -153,28 +158,28 @@ def cardiovascular_risk(filepath: str) -> tuple[float, float, str]: coef.female_tchol_age * tchol_age + coef.female_hdl_age * hdl_age ) - baseline_survival = BaselineSurvival().female - scale1 = CalibrationScales().female_scale1 - scale2 = CalibrationScales().female_scale2 + baseline_survival: float = BaselineSurvival().female + scale1: float = CalibrationScales().female_scale1 + scale2: float = CalibrationScales().female_scale2 # Calculate uncalibrated risk - uncalibrated_risk = 1 - np.power(baseline_survival, np.exp(x)) + uncalibrated_risk: float = 1 - np.power(baseline_survival, np.exp(x)) # Apply calibration for Belgium (Low Risk region) # Calibrated 10-year risk, % = [1 - exp(-exp(scale1 + scale2*ln(-ln(1 - 10-year risk))))] * 100 - calibrated_risk = float((1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) * 100) + calibrated_risk: float = float((1 - np.exp(-np.exp(scale1 + scale2 * np.log(-np.log(1 - uncalibrated_risk))))) * 100) # Determine risk category based on age if age < 50: if calibrated_risk < 2.5: - risk_category = "Low to moderate" + risk_category: RiskCategory = "Low to moderate" elif calibrated_risk < 7.5: risk_category = "High" else: risk_category = "Very high" else: # age 50-69 if calibrated_risk < 5: - risk_category = "Low to moderate" + risk_category: RiskCategory = "Low to moderate" elif calibrated_risk < 10: risk_category = "High" else: