From 9e244fce3a98cdbbddecf136eeacb1f725d9f447 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Sat, 14 Jun 2025 13:35:01 +0300 Subject: [PATCH 1/6] updating README with MAGE algorithm/article --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 75b6ef6..23a3fb3 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0 | cv_glu | ✅ | | cv_measures | ✅ | | ea1c | ✅ | -| episode_calculation | ✅| || no match in lv1_hypo_excl and lv1_hyper_excl| +| episode_calculation | ✅| || | | gmi | ✅ | | grade_eugly | ✅ | | grade_hyper | ✅ | @@ -49,7 +49,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0 | lbgi | ✅ | | mad_glu | ✅ | | mag | ✅ | || IMHO, Original R implementation has an error | -| mage | ✅ | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) | +| mage | ✅ | || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) | | mean_glu | ✅ | | median_glu | ✅ | | modd | ✅ | From e99eabaa985dee04b897c7ce7517e05354dc51b8 Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Sat, 14 Jun 2025 13:35:17 +0300 Subject: [PATCH 2/6] AUC on simulated data --- notebooks/auc_evaluation.ipynb | 207 ++++++++++++++++++++++++++++----- tests/test_auc.py | 4 +- 2 files changed, 176 insertions(+), 35 deletions(-) diff --git a/notebooks/auc_evaluation.ipynb b/notebooks/auc_evaluation.ipynb index a4541bf..dbd8430 100644 --- a/notebooks/auc_evaluation.ipynb +++ b/notebooks/auc_evaluation.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -167,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -196,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -294,6 +294,80 @@ "display(iglu_auc_results)\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lets try to run AUC on simulated data with easily calculatable AUC" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhourly_auc
1subject1102.222222
\n", + "
" + ], + "text/plain": [ + " id hourly_auc\n", + "1 subject1 102.222222" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hours = 1\n", + "dt0 = 5\n", + "samples = int(hours*60/dt0)\n", + "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", + "glucose_values = [80,120]* int(samples/2)\n", + "\n", + "data = pd.DataFrame({\n", + " 'id': ['subject1'] * samples,\n", + " 'time': times,\n", + " 'gl': glucose_values\n", + "})\n", + "\n", + "iglu_auc_results = iglu_py.auc(data)\n", + "iglu_auc_results" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -311,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -324,7 +398,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -360,47 +434,47 @@ " \n", " 0\n", " Subject 1\n", - " 117.0\n", + " 123.0\n", " 123.0\n", " 104.0\n", - " -4.9\n", - " 12.5\n", + " 0.0\n", + " 18.3\n", " \n", " \n", " 1\n", " Subject 2\n", - " 215.0\n", + " 219.0\n", " 218.0\n", " 197.0\n", - " -1.4\n", - " 9.1\n", + " 0.5\n", + " 11.2\n", " \n", " \n", " 2\n", " Subject 3\n", - " 147.0\n", + " 154.0\n", " 154.0\n", " 134.0\n", - " -4.5\n", - " 9.7\n", + " 0.0\n", + " 14.9\n", " \n", " \n", " 3\n", " Subject 4\n", - " 129.0\n", + " 130.0\n", " 130.0\n", " 117.0\n", - " -0.8\n", - " 10.3\n", + " 0.0\n", + " 11.1\n", " \n", " \n", " 4\n", " Subject 5\n", - " 171.0\n", + " 175.0\n", " 174.0\n", " 155.0\n", - " -1.7\n", - " 10.3\n", + " 0.6\n", + " 12.9\n", " \n", " \n", "\n", @@ -408,18 +482,18 @@ ], "text/plain": [ " id IGLU PYTHON AUC (mg*h/dL) IGLU AUC (mg*h/dL) \\\n", - "0 Subject 1 117.0 123.0 \n", - "1 Subject 2 215.0 218.0 \n", - "2 Subject 3 147.0 154.0 \n", - "3 Subject 4 129.0 130.0 \n", - "4 Subject 5 171.0 174.0 \n", + "0 Subject 1 123.0 123.0 \n", + "1 Subject 2 219.0 218.0 \n", + "2 Subject 3 154.0 154.0 \n", + "3 Subject 4 130.0 130.0 \n", + "4 Subject 5 175.0 174.0 \n", "\n", " ChatGPT AUC (mg*h/dL) Difference to IGLU(%) Difference to ChatGPt(%) \n", - "0 104.0 -4.9 12.5 \n", - "1 197.0 -1.4 9.1 \n", - "2 134.0 -4.5 9.7 \n", - "3 117.0 -0.8 10.3 \n", - "4 155.0 -1.7 10.3 " + "0 104.0 0.0 18.3 \n", + "1 197.0 0.5 11.2 \n", + "2 134.0 0.0 14.9 \n", + "3 117.0 0.0 11.1 \n", + "4 155.0 0.6 12.9 " ] }, "metadata": {}, @@ -434,6 +508,7 @@ "# load test data into DF\n", "df = pd.read_csv(test_data, index_col=0)\n", "\n", + "iglu_python.IGLU_R_COMPATIBLE = False\n", "iglu_python_auc_results = iglu_python.auc(df)\n", "iglu_python_auc_results = iglu_python_auc_results.round(0)\n", "\n", @@ -451,6 +526,74 @@ "\n" ] }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhourly_auc
0subject1100.0
\n", + "
" + ], + "text/plain": [ + " id hourly_auc\n", + "0 subject1 100.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hours = 1\n", + "dt0 = 5\n", + "samples = int(hours*60/dt0)\n", + "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", + "glucose_values = [80,120]* int(samples/2)\n", + "\n", + "data = pd.DataFrame({\n", + " 'id': ['subject1'] * samples,\n", + " 'time': times,\n", + " 'gl': glucose_values\n", + "})\n", + "\n", + "iglu_python.IGLU_R_COMPATIBLE = True\n", + "iglu_python_auc_results = iglu_python.auc(data)\n", + "iglu_python_auc_results" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tests/test_auc.py b/tests/test_auc.py index d438c08..1031d8e 100644 --- a/tests/test_auc.py +++ b/tests/test_auc.py @@ -44,8 +44,6 @@ def test_auc_iglu_r_compatible(scenario): pd.set_option('future.no_silent_downcasting', True) expected_df = expected_df.replace({None: np.nan}) - - result_df = iglu.auc(df, **kwargs) assert result_df is not None @@ -65,7 +63,7 @@ def test_auc_iglu_r_compatible(scenario): check_freq=True, check_flags=True, check_exact=False, - rtol=0.01, + rtol=0.00001, ) From 9da9859d65b0a046ea108009c60bad994a6ca42a Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Sun, 15 Jun 2025 12:00:46 +0300 Subject: [PATCH 3/6] introduce is_iglu_r_compatible() --- iglu_python/__init__.py | 5 +++-- iglu_python/auc.py | 5 +++-- iglu_python/episode_calculation.py | 4 ++-- iglu_python/mag.py | 5 +++-- iglu_python/utils.py | 16 ++++++++++++---- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/iglu_python/__init__.py b/iglu_python/__init__.py index 3ed8257..0ff53fc 100644 --- a/iglu_python/__init__.py +++ b/iglu_python/__init__.py @@ -37,7 +37,7 @@ from .sd_measures import sd_measures from .sd_roc import sd_roc from .summary_glu import summary_glu -from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df +from .utils import set_iglu_r_compatible, is_iglu_r_compatible, CGMS2DayByDay, check_data_columns, gd2d_to_df __all__ = [ "above_percent", @@ -62,7 +62,8 @@ "hyper_index", "hypo_index", "igc", - "IGLU_R_COMPATIBLE", + "set_iglu_r_compatible", + "is_iglu_r_compatible", "in_range_percent", "iqr_glu", "j_index", diff --git a/iglu_python/auc.py b/iglu_python/auc.py index 7ca8f62..806f8fe 100644 --- a/iglu_python/auc.py +++ b/iglu_python/auc.py @@ -2,7 +2,8 @@ import numpy as np import pandas as pd -from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, IGLU_R_COMPATIBLE +from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, is_iglu_r_compatible + def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame: @@ -65,7 +66,7 @@ def auc_single(subject_data: pd.DataFrame) -> float: # Convert gd2d to DataFrame input_data = gd2d_to_df(gd2d, actual_dates, dt0) - if IGLU_R_COMPATIBLE: + if is_iglu_r_compatible(): input_data['day'] = input_data['time'].dt.floor('d') input_data['gl_next'] = input_data['gl'].shift(-1) each_day_area = input_data.groupby("day").apply( diff --git a/iglu_python/episode_calculation.py b/iglu_python/episode_calculation.py index 3df3989..c5b759c 100644 --- a/iglu_python/episode_calculation.py +++ b/iglu_python/episode_calculation.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df, get_local_tz +from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, get_local_tz, is_iglu_r_compatible def episode_calculation( @@ -235,7 +235,7 @@ def episode_single( if dt0 is None: dt0 = gd2d_tuple[2] - if IGLU_R_COMPATIBLE: + if is_iglu_r_compatible(): day_one = pd.to_datetime(gd2d_tuple[1][0]).tz_localize(None) # make in naive-timezone day_one = day_one.tz_localize('UTC') # this is how IGLU_R works if tz and tz!="": diff --git a/iglu_python/mag.py b/iglu_python/mag.py index 6751054..70fe644 100644 --- a/iglu_python/mag.py +++ b/iglu_python/mag.py @@ -3,7 +3,8 @@ import numpy as np import pandas as pd -from .utils import CGMS2DayByDay, check_data_columns, IGLU_R_COMPATIBLE +from .utils import CGMS2DayByDay, check_data_columns, is_iglu_r_compatible + def mag( @@ -93,7 +94,7 @@ def mag_single(data: pd.DataFrame, n: int) -> float: # Calculate absolute differences between readings n minutes apart lag = readings_per_interval - if IGLU_R_COMPATIBLE: + if is_iglu_r_compatible(): idx = np.arange(0,len(gl_values),lag) gl_values_idx = gl_values[idx] diffs = gl_values_idx[1:] - gl_values_idx[:-1] diff --git a/iglu_python/utils.py b/iglu_python/utils.py index 1572659..a7f722f 100644 --- a/iglu_python/utils.py +++ b/iglu_python/utils.py @@ -9,7 +9,15 @@ local_tz = get_localzone() # get the local timezone -IGLU_R_COMPATIBLE = True +_IGLU_R_COMPATIBLE = True + +def set_iglu_r_compatible(value: bool) -> None: + global _IGLU_R_COMPATIBLE + _IGLU_R_COMPATIBLE = value + +def is_iglu_r_compatible() -> bool: + global _IGLU_R_COMPATIBLE + return _IGLU_R_COMPATIBLE def localize_naive_timestamp(timestamp: datetime) -> datetime: """ @@ -180,7 +188,7 @@ def CGMS2DayByDay( time_grid = pd.date_range( start=start_time, end=end_time, freq=f"{dt0}min" ) - if IGLU_R_COMPATIBLE: + if is_iglu_r_compatible(): # remove the first time point time_grid = time_grid[1:] else: @@ -234,7 +242,7 @@ def CGMS2DayByDay( interp_data = interp_data.reshape(n_days, n_points_per_day) # Get actual dates - if IGLU_R_COMPATIBLE: + if is_iglu_r_compatible(): # convert start_time into naive datetime start_time = start_time.tz_localize(None) @@ -254,7 +262,7 @@ def gd2d_to_df(gd2d, actual_dates, dt0): time.extend(day_time) df = pd.DataFrame({ - "time": pd.Series(time, dtype='datetime64[ns]'), + "time": pd.Series(time), "gl": pd.Series(gl, dtype='float64') }) From a196cc5ed155f7527098d2c52631a5ecee2d2c6a Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Sun, 15 Jun 2025 13:42:12 +0300 Subject: [PATCH 4/6] auc evaluation --- R_REVIEW.md | 7 + notebooks/auc_evaluation.ipynb | 266 ++++++++++++++++++--------------- 2 files changed, 152 insertions(+), 121 deletions(-) diff --git a/R_REVIEW.md b/R_REVIEW.md index 9dc7b4f..da41283 100644 --- a/R_REVIEW.md +++ b/R_REVIEW.md @@ -7,6 +7,13 @@ (length(na.omit(diffs))*n/60) ``` +## AUC + +``` + day = rep(data_ip[[2]], 1440/dt0), +``` +Generate sequence of days repeated 1440/dt0, while it has to have each day repeated by 1440/dt0 and followed by the next + ## CGMS2DayByDay [ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")) + 1)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L208) has to be ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")))` diff --git a/notebooks/auc_evaluation.ipynb b/notebooks/auc_evaluation.ipynb index dbd8430..3c2f8b3 100644 --- a/notebooks/auc_evaluation.ipynb +++ b/notebooks/auc_evaluation.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -167,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -194,6 +194,104 @@ "print(f\"rpy2 version: {version('rpy2')}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test on synthetic data\n", + "\n", + "- Samples - every 5 min\n", + "- duration - 1h\n", + "- values [80,120] repeated for sampling duration\n", + "\n", + "Expected hourly AUC = 100 mg.h/dL" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhourly_auc
1subject1102.222222
\n", + "
" + ], + "text/plain": [ + " id hourly_auc\n", + "1 subject1 102.222222" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hours = 1\n", + "dt0 = 5\n", + "samples = int(hours*60/dt0)\n", + "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", + "glucose_values = [80,120]* int(samples/2)\n", + "\n", + "syntheticdata = pd.DataFrame({\n", + " 'id': ['subject1'] * samples,\n", + " 'time': times,\n", + " 'gl': glucose_values\n", + "})\n", + "\n", + "synthetic_iglu_auc_results = iglu_py.auc(syntheticdata)\n", + "synthetic_iglu_auc_results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note:** Incorrect AUC calculation is a result of CGMS2DayByDay function bugs:\n", + "- one sample shift in interpolation - results in 11 samples instead of 12\n", + "- actual_dates returns 2 dates instead of one\n", + "\n", + "Additional suspicious code is in AUC itself: `day = rep(data_ip[[2]], 1440/dt0),` - IMHO it resample sequential gl to different days, instead of sequential sampling for each day before sampling for the next \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test on example data " + ] + }, { "cell_type": "code", "execution_count": 6, @@ -280,6 +378,7 @@ } ], "source": [ + "test_data = \"../tests/data/example_data_5_subject.csv\"\n", "# load test data into DF\n", "df = pd.read_csv(test_data, index_col=0)\n", "\n", @@ -298,12 +397,41 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Lets try to run AUC on simulated data with easily calculatable AUC" + "## Conclusions \n", + "IGLU AUC calculations are substantially differ from expected ranges suggested by ChatGPT\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# IGLU_PYTHON results" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Add project directory to PYTHONPATH\n", + "import os\n", + "import sys\n", + "import pandas as pd\n", + "sys.path.append(os.path.abspath('..'))\n", + "import iglu_python\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test on synthetic data" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -333,9 +461,9 @@ " \n", " \n", " \n", - " 1\n", + " 0\n", " subject1\n", - " 102.222222\n", + " 100.0\n", " \n", " \n", "\n", @@ -343,62 +471,36 @@ ], "text/plain": [ " id hourly_auc\n", - "1 subject1 102.222222" + "0 subject1 100.0" ] }, - "execution_count": 18, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "hours = 1\n", - "dt0 = 5\n", - "samples = int(hours*60/dt0)\n", - "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", - "glucose_values = [80,120]* int(samples/2)\n", - "\n", - "data = pd.DataFrame({\n", - " 'id': ['subject1'] * samples,\n", - " 'time': times,\n", - " 'gl': glucose_values\n", - "})\n", - "\n", - "iglu_auc_results = iglu_py.auc(data)\n", - "iglu_auc_results" + "synthetic_iglu_auc_results = iglu_python.auc(syntheticdata)\n", + "synthetic_iglu_auc_results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Conclusions \n", - "IGLU AUC calculations are substantially differ from expected ranges suggested by ChatGPT\n" + "**Note:** Result match expected" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# IGLU_PYTHON results" + "## Test on Example data" ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Add project directory to PYTHONPATH\n", - "import os\n", - "import sys\n", - "\n", - "sys.path.append(os.path.abspath('..'))" - ] - }, - { - "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -501,14 +603,9 @@ } ], "source": [ - "import pandas as pd\n", - "\n", - "import iglu_python\n", - "\n", "# load test data into DF\n", "df = pd.read_csv(test_data, index_col=0)\n", "\n", - "iglu_python.IGLU_R_COMPATIBLE = False\n", "iglu_python_auc_results = iglu_python.auc(df)\n", "iglu_python_auc_results = iglu_python_auc_results.round(0)\n", "\n", @@ -518,80 +615,7 @@ "iglu_python_auc_results['Difference to IGLU(%)'] = ((iglu_python_auc_results['IGLU PYTHON AUC (mg*h/dL)'] - iglu_python_auc_results['IGLU AUC (mg*h/dL)']) / iglu_python_auc_results['IGLU AUC (mg*h/dL)'] * 100).round(1)\n", "iglu_python_auc_results['Difference to ChatGPt(%)'] = ((iglu_python_auc_results['IGLU PYTHON AUC (mg*h/dL)'] - iglu_python_auc_results['ChatGPT AUC (mg*h/dL)']) / iglu_python_auc_results['ChatGPT AUC (mg*h/dL)'] * 100).round(1)\n", "\n", - "\n", - "\n", - "display(iglu_python_auc_results)\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idhourly_auc
0subject1100.0
\n", - "
" - ], - "text/plain": [ - " id hourly_auc\n", - "0 subject1 100.0" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "hours = 1\n", - "dt0 = 5\n", - "samples = int(hours*60/dt0)\n", - "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", - "glucose_values = [80,120]* int(samples/2)\n", - "\n", - "data = pd.DataFrame({\n", - " 'id': ['subject1'] * samples,\n", - " 'time': times,\n", - " 'gl': glucose_values\n", - "})\n", - "\n", - "iglu_python.IGLU_R_COMPATIBLE = True\n", - "iglu_python_auc_results = iglu_python.auc(data)\n", - "iglu_python_auc_results" + "display(iglu_python_auc_results)\n" ] }, { @@ -599,7 +623,7 @@ "metadata": {}, "source": [ "## Conclusions \n", - "IGLU_PYTHON AUC calculations are close to IGLU calculations (-5%), and closer to suggested by ChatGPT\n", + "IGLU_PYTHON AUC calculations are close to IGLU calculations (-0.5%)\n", "\n" ] } From 5bbe7958ea02caac025b221120d0cf902e189aaa Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Sun, 15 Jun 2025 13:57:01 +0300 Subject: [PATCH 5/6] re-commit original test conditions --- tests/test_auc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_auc.py b/tests/test_auc.py index 1031d8e..2f30e5c 100644 --- a/tests/test_auc.py +++ b/tests/test_auc.py @@ -63,7 +63,7 @@ def test_auc_iglu_r_compatible(scenario): check_freq=True, check_flags=True, check_exact=False, - rtol=0.00001, + rtol=0.01, ) From 96f9c5efc34d9473779a1114908aeb364d5fb01a Mon Sep 17 00:00:00 2001 From: Stas Khirman Date: Sun, 15 Jun 2025 15:01:00 +0300 Subject: [PATCH 6/6] more details in Discrepancies notebook --- iglu_r_discrepancies.ipynb | 152 +++++++++++++++++++++++++++++++------ pyproject.toml | 2 +- 2 files changed, 129 insertions(+), 25 deletions(-) diff --git a/iglu_r_discrepancies.ipynb b/iglu_r_discrepancies.ipynb index 157f079..a873ca2 100644 --- a/iglu_r_discrepancies.ipynb +++ b/iglu_r_discrepancies.ipynb @@ -19,6 +19,7 @@ "\n", "import pandas as pd\n", "import rpy2.robjects as ro\n", + "import iglu_py\n", "from iglu_py import bridge" ] }, @@ -80,6 +81,13 @@ " return result\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simple test " + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -182,18 +190,19 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(2, 288)\n", - "[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')]\n", - "5.0\n", + "gd2d.shape=(2, 288) \t/ expected (1,288)\n", + "actual_dates=[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')] \t/ expected [Timestamp('2020-01-01 00:00:00')]\n", + "dt0=5.0\n", + "gd2d[:,0:5]=\n", "[[155. 160. 165. nan nan]\n", - " [ nan nan nan nan nan]]\n" + " [ nan nan nan nan nan]] \t/ expected [[150. 155. 160. 165. nan]]\n" ] } ], @@ -204,11 +213,10 @@ "actual_dates = r_result['actual_dates']\n", "dt0 = r_result['dt0']\n", "\n", - "print(gd2d.shape) # expected (1,288)\n", - "print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n", - "print(dt0) # expected 5\n", - "\n", - "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]\n", + "print(f\"gd2d.shape={gd2d.shape} \\t/ expected (1,288)\") # expected (1,288)\n", + "print(f\"actual_dates={actual_dates} \\t/ expected [Timestamp('2020-01-01 00:00:00')]\") # expected [datetime.date(2020, 1, 1)]\n", + "print(f\"dt0={dt0}\") # expected 5\n", + "print(f\"gd2d[:,0:5]=\\n{gd2d[:,0:5]} \\t/ expected [[150. 155. 160. 165. nan]]\") # expected [[150. 155. 160. 165. nan]]\n", "\n", "\n", "\n" @@ -218,14 +226,99 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4\n", + "**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Impact \n", + "\n", + "While these discrepancies may appear minor, they can significantly impact certain metric calculations.\n", + "\n", + "For example, when calculating AUC on synthetic data (shown below), we expect a result of 100, \n", + "but the AUC metric returns 102.2222 due to these interpolation differences." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idhourly_auc
1subject1102.222222
\n", + "
" + ], + "text/plain": [ + " id hourly_auc\n", + "1 subject1 102.222222" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hours = 1\n", + "dt0 = 5\n", + "samples = int(hours*60/dt0)\n", + "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", + "glucose_values = [80,120]* int(samples/2)\n", + "\n", + "syntheticdata = pd.DataFrame({\n", + " 'id': ['subject1'] * samples,\n", + " 'time': times,\n", + " 'gl': glucose_values\n", + "})\n", "\n", + "synthetic_iglu_auc_results = iglu_py.auc(syntheticdata)\n", + "synthetic_iglu_auc_results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## UTC timezone \n", "Now, lets try to localize to UTC timezone. " ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -265,12 +358,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Midday test\n", "Lets try with a 4 measurement at 10am. On 5 min grid, 10am measurement has to be 10*(60/5)=120 position. " ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -356,7 +450,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -402,12 +496,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Midnight test with UTC\n", + "\n", "Lets look now on data that spans two consecutive days" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -525,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -537,6 +633,9 @@ "5.0\n", "[[155. 160. 165. nan nan]\n", " [155. 160. 165. nan nan]\n", + " [ nan nan nan nan nan]]\n", + "[[ nan nan nan nan 150.]\n", + " [ nan nan nan nan nan]\n", " [ nan nan nan nan nan]]\n" ] } @@ -552,26 +651,28 @@ "print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n", "print(dt0) # expected 5\n", "\n", - "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]" + "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]\n", + "print(gd2d[:,283:])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Note:** gd2d.shape is (3,288) instead of expected (2,288) and start date shifted to 2019-12-31" + "**Note:** gd2d.shape is (3,288) instead of expected (2,288), second day sample shifted to teh first day and start date shifted to 2019-12-31" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "## Cross over midnight with UTC\n", "Lets test two-days records that cross over midnight " ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -689,7 +790,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -699,8 +800,10 @@ "(2, 288)\n", "[Timestamp('2019-12-31 00:00:00'), Timestamp('2020-01-01 00:00:00')]\n", "5.0\n", - "[[ nan nan nan nan nan]\n", - " [175. 180. 185. nan nan]]\n" + "[[ nan nan nan 150. 155. 160. 165. 170.]\n", + " [ nan nan nan nan nan nan nan nan]]\n", + "[[ nan nan nan nan nan nan nan nan]\n", + " [175. 180. 185. nan nan nan nan nan]]\n" ] } ], @@ -715,14 +818,15 @@ "print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n", "print(dt0) # expected 5\n", "\n", - "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]" + "print(gd2d[:,280:]) \n", + "print(gd2d[:,:8]) # expected [[150. 155. 160. 165. nan]]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day." + "**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day and 2020-01-02 disappeared from actual dates" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 78e7562..8c5b951 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "iglu_python" -version = "0.1.5" +version = "0.1.6" description = "Python implementation of the iglu package for continuous glucose monitoring data analysis" readme = "README.md" requires-python = ">=3.11"