From 9e244fce3a98cdbbddecf136eeacb1f725d9f447 Mon Sep 17 00:00:00 2001
From: Stas Khirman <stas@pheno.ai>
Date: Sat, 14 Jun 2025 13:35:01 +0300
Subject: [PATCH 1/6] updating README with MAGE algorithm/article

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 75b6ef6..23a3fb3 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0
 | cv_glu | ✅ |
 | cv_measures | ✅ |
 | ea1c | ✅ |
-| episode_calculation |  ✅| || no match in lv1_hypo_excl and lv1_hyper_excl|
+| episode_calculation |  ✅| || |
 | gmi | ✅ |
 | grade_eugly | ✅ |
 | grade_hyper | ✅ |
@@ -49,7 +49,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0
 | lbgi | ✅ |
 | mad_glu | ✅ |
 | mag |  ✅ | || IMHO, Original R implementation has an error |
-| mage | ✅ | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
+| mage | ✅ | || See algorithm at [MAGE](https://irinagain.github.io/iglu/articles/MAGE.html) |
 | mean_glu | ✅ |
 | median_glu | ✅ |
 | modd | ✅ |

From e99eabaa985dee04b897c7ce7517e05354dc51b8 Mon Sep 17 00:00:00 2001
From: Stas Khirman <stas@pheno.ai>
Date: Sat, 14 Jun 2025 13:35:17 +0300
Subject: [PATCH 2/6] AUC on simulated data

---
 notebooks/auc_evaluation.ipynb | 207 ++++++++++++++++++++++++++++-----
 tests/test_auc.py              |   4 +-
 2 files changed, 176 insertions(+), 35 deletions(-)

diff --git a/notebooks/auc_evaluation.ipynb b/notebooks/auc_evaluation.ipynb
index a4541bf..dbd8430 100644
--- a/notebooks/auc_evaluation.ipynb
+++ b/notebooks/auc_evaluation.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -153,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -167,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -196,7 +196,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -294,6 +294,80 @@
     "display(iglu_auc_results)\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Lets try to run AUC on simulated data with easily calculatable AUC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>hourly_auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>subject1</td>\n",
+       "      <td>102.222222</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         id  hourly_auc\n",
+       "1  subject1  102.222222"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hours = 1\n",
+    "dt0 = 5\n",
+    "samples = int(hours*60/dt0)\n",
+    "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
+    "glucose_values = [80,120]* int(samples/2)\n",
+    "\n",
+    "data = pd.DataFrame({\n",
+    "    'id': ['subject1'] * samples,\n",
+    "    'time': times,\n",
+    "    'gl': glucose_values\n",
+    "})\n",
+    "\n",
+    "iglu_auc_results  = iglu_py.auc(data)\n",
+    "iglu_auc_results"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -311,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -324,7 +398,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -360,47 +434,47 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Subject 1</td>\n",
-       "      <td>117.0</td>\n",
+       "      <td>123.0</td>\n",
        "      <td>123.0</td>\n",
        "      <td>104.0</td>\n",
-       "      <td>-4.9</td>\n",
-       "      <td>12.5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>18.3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Subject 2</td>\n",
-       "      <td>215.0</td>\n",
+       "      <td>219.0</td>\n",
        "      <td>218.0</td>\n",
        "      <td>197.0</td>\n",
-       "      <td>-1.4</td>\n",
-       "      <td>9.1</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>11.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>Subject 3</td>\n",
-       "      <td>147.0</td>\n",
+       "      <td>154.0</td>\n",
        "      <td>154.0</td>\n",
        "      <td>134.0</td>\n",
-       "      <td>-4.5</td>\n",
-       "      <td>9.7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>14.9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Subject 4</td>\n",
-       "      <td>129.0</td>\n",
+       "      <td>130.0</td>\n",
        "      <td>130.0</td>\n",
        "      <td>117.0</td>\n",
-       "      <td>-0.8</td>\n",
-       "      <td>10.3</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>11.1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Subject 5</td>\n",
-       "      <td>171.0</td>\n",
+       "      <td>175.0</td>\n",
        "      <td>174.0</td>\n",
        "      <td>155.0</td>\n",
-       "      <td>-1.7</td>\n",
-       "      <td>10.3</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>12.9</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -408,18 +482,18 @@
       ],
       "text/plain": [
        "          id  IGLU PYTHON AUC (mg*h/dL)  IGLU AUC (mg*h/dL)  \\\n",
-       "0  Subject 1                      117.0               123.0   \n",
-       "1  Subject 2                      215.0               218.0   \n",
-       "2  Subject 3                      147.0               154.0   \n",
-       "3  Subject 4                      129.0               130.0   \n",
-       "4  Subject 5                      171.0               174.0   \n",
+       "0  Subject 1                      123.0               123.0   \n",
+       "1  Subject 2                      219.0               218.0   \n",
+       "2  Subject 3                      154.0               154.0   \n",
+       "3  Subject 4                      130.0               130.0   \n",
+       "4  Subject 5                      175.0               174.0   \n",
        "\n",
        "   ChatGPT AUC (mg*h/dL)  Difference to IGLU(%)  Difference to ChatGPt(%)  \n",
-       "0                  104.0                   -4.9                      12.5  \n",
-       "1                  197.0                   -1.4                       9.1  \n",
-       "2                  134.0                   -4.5                       9.7  \n",
-       "3                  117.0                   -0.8                      10.3  \n",
-       "4                  155.0                   -1.7                      10.3  "
+       "0                  104.0                    0.0                      18.3  \n",
+       "1                  197.0                    0.5                      11.2  \n",
+       "2                  134.0                    0.0                      14.9  \n",
+       "3                  117.0                    0.0                      11.1  \n",
+       "4                  155.0                    0.6                      12.9  "
       ]
      },
      "metadata": {},
@@ -434,6 +508,7 @@
     "# load test data into DF\n",
     "df = pd.read_csv(test_data, index_col=0)\n",
     "\n",
+    "iglu_python.IGLU_R_COMPATIBLE = False\n",
     "iglu_python_auc_results = iglu_python.auc(df)\n",
     "iglu_python_auc_results = iglu_python_auc_results.round(0)\n",
     "\n",
@@ -451,6 +526,74 @@
     "\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>hourly_auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>subject1</td>\n",
+       "      <td>100.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         id  hourly_auc\n",
+       "0  subject1       100.0"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hours = 1\n",
+    "dt0 = 5\n",
+    "samples = int(hours*60/dt0)\n",
+    "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
+    "glucose_values = [80,120]* int(samples/2)\n",
+    "\n",
+    "data = pd.DataFrame({\n",
+    "    'id': ['subject1'] * samples,\n",
+    "    'time': times,\n",
+    "    'gl': glucose_values\n",
+    "})\n",
+    "\n",
+    "iglu_python.IGLU_R_COMPATIBLE = True\n",
+    "iglu_python_auc_results = iglu_python.auc(data)\n",
+    "iglu_python_auc_results"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/tests/test_auc.py b/tests/test_auc.py
index d438c08..1031d8e 100644
--- a/tests/test_auc.py
+++ b/tests/test_auc.py
@@ -44,8 +44,6 @@ def test_auc_iglu_r_compatible(scenario):
     pd.set_option('future.no_silent_downcasting', True)
     expected_df = expected_df.replace({None: np.nan})
 
-
-
     result_df = iglu.auc(df, **kwargs)
 
     assert result_df is not None
@@ -65,7 +63,7 @@ def test_auc_iglu_r_compatible(scenario):
         check_freq=True,
         check_flags=True,
         check_exact=False,
-        rtol=0.01,
+        rtol=0.00001,
     )
 
 

From 9da9859d65b0a046ea108009c60bad994a6ca42a Mon Sep 17 00:00:00 2001
From: Stas Khirman <stas@pheno.ai>
Date: Sun, 15 Jun 2025 12:00:46 +0300
Subject: [PATCH 3/6] introduce is_iglu_r_compatible()

---
 iglu_python/__init__.py            |  5 +++--
 iglu_python/auc.py                 |  5 +++--
 iglu_python/episode_calculation.py |  4 ++--
 iglu_python/mag.py                 |  5 +++--
 iglu_python/utils.py               | 16 ++++++++++++----
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/iglu_python/__init__.py b/iglu_python/__init__.py
index 3ed8257..0ff53fc 100644
--- a/iglu_python/__init__.py
+++ b/iglu_python/__init__.py
@@ -37,7 +37,7 @@
 from .sd_measures import sd_measures
 from .sd_roc import sd_roc
 from .summary_glu import summary_glu
-from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df
+from .utils import set_iglu_r_compatible, is_iglu_r_compatible, CGMS2DayByDay, check_data_columns, gd2d_to_df
 
 __all__ = [
     "above_percent",
@@ -62,7 +62,8 @@
     "hyper_index",
     "hypo_index",
     "igc",
-    "IGLU_R_COMPATIBLE",
+    "set_iglu_r_compatible",
+    "is_iglu_r_compatible",
     "in_range_percent",
     "iqr_glu",
     "j_index",
diff --git a/iglu_python/auc.py b/iglu_python/auc.py
index 7ca8f62..806f8fe 100644
--- a/iglu_python/auc.py
+++ b/iglu_python/auc.py
@@ -2,7 +2,8 @@
 import numpy as np
 import pandas as pd
 
-from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, IGLU_R_COMPATIBLE
+from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, is_iglu_r_compatible
+
 
 
 def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame:
@@ -65,7 +66,7 @@ def auc_single(subject_data: pd.DataFrame) -> float:
 
         # Convert gd2d to DataFrame
         input_data = gd2d_to_df(gd2d, actual_dates, dt0)
-        if IGLU_R_COMPATIBLE:
+        if is_iglu_r_compatible():
             input_data['day'] = input_data['time'].dt.floor('d')
             input_data['gl_next'] = input_data['gl'].shift(-1)
             each_day_area = input_data.groupby("day").apply(
diff --git a/iglu_python/episode_calculation.py b/iglu_python/episode_calculation.py
index 3df3989..c5b759c 100644
--- a/iglu_python/episode_calculation.py
+++ b/iglu_python/episode_calculation.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pandas as pd
 
-from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df, get_local_tz
+from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, get_local_tz, is_iglu_r_compatible
 
 
 def episode_calculation(
@@ -235,7 +235,7 @@ def episode_single(
     if dt0 is None:
         dt0 = gd2d_tuple[2]
 
-    if IGLU_R_COMPATIBLE:
+    if is_iglu_r_compatible():
         day_one = pd.to_datetime(gd2d_tuple[1][0]).tz_localize(None) # make in naive-timezone
         day_one = day_one.tz_localize('UTC') # this is how IGLU_R works
         if tz and tz!="":
diff --git a/iglu_python/mag.py b/iglu_python/mag.py
index 6751054..70fe644 100644
--- a/iglu_python/mag.py
+++ b/iglu_python/mag.py
@@ -3,7 +3,8 @@
 import numpy as np
 import pandas as pd
 
-from .utils import CGMS2DayByDay, check_data_columns, IGLU_R_COMPATIBLE
+from .utils import CGMS2DayByDay, check_data_columns, is_iglu_r_compatible
+
 
 
 def mag(
@@ -93,7 +94,7 @@ def mag_single(data: pd.DataFrame, n: int) -> float:
         # Calculate absolute differences between readings n minutes apart
         lag = readings_per_interval
 
-        if IGLU_R_COMPATIBLE:
+        if is_iglu_r_compatible():
             idx = np.arange(0,len(gl_values),lag)
             gl_values_idx = gl_values[idx]
             diffs = gl_values_idx[1:] - gl_values_idx[:-1]
diff --git a/iglu_python/utils.py b/iglu_python/utils.py
index 1572659..a7f722f 100644
--- a/iglu_python/utils.py
+++ b/iglu_python/utils.py
@@ -9,7 +9,15 @@
 
 local_tz = get_localzone()  # get the local timezone
 
-IGLU_R_COMPATIBLE = True
+_IGLU_R_COMPATIBLE = True
+
+def set_iglu_r_compatible(value: bool) -> None:
+    global _IGLU_R_COMPATIBLE
+    _IGLU_R_COMPATIBLE = value
+
+def is_iglu_r_compatible() -> bool:
+    global _IGLU_R_COMPATIBLE
+    return _IGLU_R_COMPATIBLE
 
 def localize_naive_timestamp(timestamp: datetime) -> datetime:
     """
@@ -180,7 +188,7 @@ def CGMS2DayByDay(
     time_grid = pd.date_range(
         start=start_time, end=end_time, freq=f"{dt0}min"
     )
-    if IGLU_R_COMPATIBLE:
+    if is_iglu_r_compatible():
         # remove the first time point
         time_grid = time_grid[1:]
     else:
@@ -234,7 +242,7 @@ def CGMS2DayByDay(
     interp_data = interp_data.reshape(n_days, n_points_per_day)
 
     # Get actual dates
-    if IGLU_R_COMPATIBLE:
+    if is_iglu_r_compatible():
         # convert start_time into naive datetime
         start_time = start_time.tz_localize(None)
         
@@ -254,7 +262,7 @@ def gd2d_to_df(gd2d, actual_dates, dt0):
         time.extend(day_time)
 
     df = pd.DataFrame({
-            "time": pd.Series(time, dtype='datetime64[ns]'),
+            "time": pd.Series(time),
             "gl": pd.Series(gl, dtype='float64')
         })
 

From a196cc5ed155f7527098d2c52631a5ecee2d2c6a Mon Sep 17 00:00:00 2001
From: Stas Khirman <stas@pheno.ai>
Date: Sun, 15 Jun 2025 13:42:12 +0300
Subject: [PATCH 4/6] auc evaluation

---
 R_REVIEW.md                    |   7 +
 notebooks/auc_evaluation.ipynb | 266 ++++++++++++++++++---------------
 2 files changed, 152 insertions(+), 121 deletions(-)

diff --git a/R_REVIEW.md b/R_REVIEW.md
index 9dc7b4f..da41283 100644
--- a/R_REVIEW.md
+++ b/R_REVIEW.md
@@ -7,6 +7,13 @@
       (length(na.omit(diffs))*n/60)
 ```
 
+## AUC
+
+```
+        day = rep(data_ip[[2]], 1440/dt0),
+```
+Generate sequence of days repeated 1440/dt0, while it has to have each day repeated by 1440/dt0 and followed by the next
+
 ## CGMS2DayByDay
 
 [ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")) + 1)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L208) has to be ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")))`
diff --git a/notebooks/auc_evaluation.ipynb b/notebooks/auc_evaluation.ipynb
index dbd8430..3c2f8b3 100644
--- a/notebooks/auc_evaluation.ipynb
+++ b/notebooks/auc_evaluation.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -153,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -167,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -194,6 +194,104 @@
     "print(f\"rpy2 version: {version('rpy2')}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test on synthetic data\n",
+    "\n",
+    "- Samples - every 5 min\n",
+    "- duration - 1h\n",
+    "- values [80,120] repeated for sampling duration\n",
+    "\n",
+    "Expected hourly AUC = 100 mg.h/dL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>hourly_auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>subject1</td>\n",
+       "      <td>102.222222</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         id  hourly_auc\n",
+       "1  subject1  102.222222"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hours = 1\n",
+    "dt0 = 5\n",
+    "samples = int(hours*60/dt0)\n",
+    "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
+    "glucose_values = [80,120]* int(samples/2)\n",
+    "\n",
+    "syntheticdata = pd.DataFrame({\n",
+    "    'id': ['subject1'] * samples,\n",
+    "    'time': times,\n",
+    "    'gl': glucose_values\n",
+    "})\n",
+    "\n",
+    "synthetic_iglu_auc_results  = iglu_py.auc(syntheticdata)\n",
+    "synthetic_iglu_auc_results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Note:** Incorrect AUC calculation is a result of CGMS2DayByDay function bugs:\n",
+    "- one sample shift in interpolation - results in 11 samples instead of 12\n",
+    "- actual_dates returns 2 dates instead of one\n",
+    "\n",
+    "Additional suspicious code is in AUC itself: `day = rep(data_ip[[2]], 1440/dt0),` - IMHO it resample sequential gl to different days, instead of sequential sampling for each day before sampling for the next \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test on example data  "
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -280,6 +378,7 @@
     }
    ],
    "source": [
+    "test_data = \"../tests/data/example_data_5_subject.csv\"\n",
     "# load test data into DF\n",
     "df = pd.read_csv(test_data, index_col=0)\n",
     "\n",
@@ -298,12 +397,41 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Lets try to run AUC on simulated data with easily calculatable AUC"
+    "## Conclusions \n",
+    "IGLU AUC calculations are substantially differ from expected ranges suggested by ChatGPT\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# IGLU_PYTHON results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add project directory to PYTHONPATH\n",
+    "import os\n",
+    "import sys\n",
+    "import pandas as pd\n",
+    "sys.path.append(os.path.abspath('..'))\n",
+    "import iglu_python\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test on synthetic data"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -333,9 +461,9 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
+       "      <th>0</th>\n",
        "      <td>subject1</td>\n",
-       "      <td>102.222222</td>\n",
+       "      <td>100.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -343,62 +471,36 @@
       ],
       "text/plain": [
        "         id  hourly_auc\n",
-       "1  subject1  102.222222"
+       "0  subject1       100.0"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "hours = 1\n",
-    "dt0 = 5\n",
-    "samples = int(hours*60/dt0)\n",
-    "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
-    "glucose_values = [80,120]* int(samples/2)\n",
-    "\n",
-    "data = pd.DataFrame({\n",
-    "    'id': ['subject1'] * samples,\n",
-    "    'time': times,\n",
-    "    'gl': glucose_values\n",
-    "})\n",
-    "\n",
-    "iglu_auc_results  = iglu_py.auc(data)\n",
-    "iglu_auc_results"
+    "synthetic_iglu_auc_results  = iglu_python.auc(syntheticdata)\n",
+    "synthetic_iglu_auc_results"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Conclusions \n",
-    "IGLU AUC calculations are substantially differ from expected ranges suggested by ChatGPT\n"
+    "**Note:** Result match expected"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# IGLU_PYTHON results"
+    "## Test on Example data"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Add project directory to PYTHONPATH\n",
-    "import os\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(os.path.abspath('..'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -501,14 +603,9 @@
     }
    ],
    "source": [
-    "import pandas as pd\n",
-    "\n",
-    "import iglu_python\n",
-    "\n",
     "# load test data into DF\n",
     "df = pd.read_csv(test_data, index_col=0)\n",
     "\n",
-    "iglu_python.IGLU_R_COMPATIBLE = False\n",
     "iglu_python_auc_results = iglu_python.auc(df)\n",
     "iglu_python_auc_results = iglu_python_auc_results.round(0)\n",
     "\n",
@@ -518,80 +615,7 @@
     "iglu_python_auc_results['Difference to IGLU(%)'] = ((iglu_python_auc_results['IGLU PYTHON AUC (mg*h/dL)'] - iglu_python_auc_results['IGLU AUC (mg*h/dL)']) / iglu_python_auc_results['IGLU AUC (mg*h/dL)'] * 100).round(1)\n",
     "iglu_python_auc_results['Difference to ChatGPt(%)'] = ((iglu_python_auc_results['IGLU PYTHON AUC (mg*h/dL)'] - iglu_python_auc_results['ChatGPT AUC (mg*h/dL)']) / iglu_python_auc_results['ChatGPT AUC (mg*h/dL)'] * 100).round(1)\n",
     "\n",
-    "\n",
-    "\n",
-    "display(iglu_python_auc_results)\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>hourly_auc</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>subject1</td>\n",
-       "      <td>100.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         id  hourly_auc\n",
-       "0  subject1       100.0"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hours = 1\n",
-    "dt0 = 5\n",
-    "samples = int(hours*60/dt0)\n",
-    "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
-    "glucose_values = [80,120]* int(samples/2)\n",
-    "\n",
-    "data = pd.DataFrame({\n",
-    "    'id': ['subject1'] * samples,\n",
-    "    'time': times,\n",
-    "    'gl': glucose_values\n",
-    "})\n",
-    "\n",
-    "iglu_python.IGLU_R_COMPATIBLE = True\n",
-    "iglu_python_auc_results = iglu_python.auc(data)\n",
-    "iglu_python_auc_results"
+    "display(iglu_python_auc_results)\n"
    ]
   },
   {
@@ -599,7 +623,7 @@
    "metadata": {},
    "source": [
     "## Conclusions  \n",
-    "IGLU_PYTHON AUC calculations are close to IGLU calculations (-5%), and closer to  suggested by ChatGPT\n",
+    "IGLU_PYTHON AUC calculations are close to IGLU calculations (-0.5%)\n",
     "\n"
    ]
   }

From 5bbe7958ea02caac025b221120d0cf902e189aaa Mon Sep 17 00:00:00 2001
From: Stas Khirman <stas@pheno.ai>
Date: Sun, 15 Jun 2025 13:57:01 +0300
Subject: [PATCH 5/6] re-commit original test conditions

---
 tests/test_auc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_auc.py b/tests/test_auc.py
index 1031d8e..2f30e5c 100644
--- a/tests/test_auc.py
+++ b/tests/test_auc.py
@@ -63,7 +63,7 @@ def test_auc_iglu_r_compatible(scenario):
         check_freq=True,
         check_flags=True,
         check_exact=False,
-        rtol=0.00001,
+        rtol=0.01,
     )
 
 

From 96f9c5efc34d9473779a1114908aeb364d5fb01a Mon Sep 17 00:00:00 2001
From: Stas Khirman <stas@pheno.ai>
Date: Sun, 15 Jun 2025 15:01:00 +0300
Subject: [PATCH 6/6] more details in Discrepancies notebook

---
 iglu_r_discrepancies.ipynb | 152 +++++++++++++++++++++++++++++++------
 pyproject.toml             |   2 +-
 2 files changed, 129 insertions(+), 25 deletions(-)

diff --git a/iglu_r_discrepancies.ipynb b/iglu_r_discrepancies.ipynb
index 157f079..a873ca2 100644
--- a/iglu_r_discrepancies.ipynb
+++ b/iglu_r_discrepancies.ipynb
@@ -19,6 +19,7 @@
     "\n",
     "import pandas as pd\n",
     "import rpy2.robjects as ro\n",
+    "import iglu_py\n",
     "from iglu_py import bridge"
    ]
   },
@@ -80,6 +81,13 @@
     "    return result\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Simple test "
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -182,18 +190,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(2, 288)\n",
-      "[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')]\n",
-      "5.0\n",
+      "gd2d.shape=(2, 288)         \t/ expected (1,288)\n",
+      "actual_dates=[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')]     \t/ expected [Timestamp('2020-01-01 00:00:00')]\n",
+      "dt0=5.0\n",
+      "gd2d[:,0:5]=\n",
       "[[155. 160. 165.  nan  nan]\n",
-      " [ nan  nan  nan  nan  nan]]\n"
+      " [ nan  nan  nan  nan  nan]]      \t/ expected [[150. 155. 160. 165. nan]]\n"
      ]
     }
    ],
@@ -204,11 +213,10 @@
     "actual_dates = r_result['actual_dates']\n",
     "dt0 = r_result['dt0']\n",
     "\n",
-    "print(gd2d.shape)       # expected (1,288)\n",
-    "print(actual_dates)     # expected [datetime.date(2020, 1, 1)]\n",
-    "print(dt0)              # expected 5\n",
-    "\n",
-    "print(gd2d[:,0:5])      # expected [[150. 155. 160. 165. nan]]\n",
+    "print(f\"gd2d.shape={gd2d.shape}         \\t/ expected (1,288)\")       # expected (1,288)\n",
+    "print(f\"actual_dates={actual_dates}     \\t/ expected [Timestamp('2020-01-01 00:00:00')]\")     # expected [datetime.date(2020, 1, 1)]\n",
+    "print(f\"dt0={dt0}\")              # expected 5\n",
+    "print(f\"gd2d[:,0:5]=\\n{gd2d[:,0:5]}      \\t/ expected [[150. 155. 160. 165. nan]]\")      # expected [[150. 155. 160. 165. nan]]\n",
     "\n",
     "\n",
     "\n"
@@ -218,14 +226,99 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4\n",
+    "**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Impact  \n",
+    "\n",
+    "While these discrepancies may appear minor, they can significantly impact certain metric calculations.\n",
+    "\n",
+    "For example, when calculating AUC on synthetic data (shown below), we expect a result of 100, \n",
+    "but the AUC metric returns 102.2222 due to these interpolation differences."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>hourly_auc</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>subject1</td>\n",
+       "      <td>102.222222</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         id  hourly_auc\n",
+       "1  subject1  102.222222"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hours = 1\n",
+    "dt0 = 5\n",
+    "samples = int(hours*60/dt0)\n",
+    "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
+    "glucose_values = [80,120]* int(samples/2)\n",
+    "\n",
+    "syntheticdata = pd.DataFrame({\n",
+    "    'id': ['subject1'] * samples,\n",
+    "    'time': times,\n",
+    "    'gl': glucose_values\n",
+    "})\n",
     "\n",
+    "synthetic_iglu_auc_results  = iglu_py.auc(syntheticdata)\n",
+    "synthetic_iglu_auc_results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## UTC timezone \n",
     "Now, lets try to localize to UTC timezone. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -265,12 +358,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Midday test\n",
     "Lets try with a 4 measurement at 10am. On 5 min grid, 10am measurement has to be 10*(60/5)=120 position. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -356,7 +450,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -402,12 +496,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Midnight test with UTC\n",
+    "\n",
     "Lets look now on data that spans two consecutive days"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -525,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -537,6 +633,9 @@
       "5.0\n",
       "[[155. 160. 165.  nan  nan]\n",
       " [155. 160. 165.  nan  nan]\n",
+      " [ nan  nan  nan  nan  nan]]\n",
+      "[[ nan  nan  nan  nan 150.]\n",
+      " [ nan  nan  nan  nan  nan]\n",
       " [ nan  nan  nan  nan  nan]]\n"
      ]
     }
@@ -552,26 +651,28 @@
     "print(actual_dates)     # expected [datetime.date(2020, 1, 1)]\n",
     "print(dt0)              # expected 5\n",
     "\n",
-    "print(gd2d[:,0:5])      # expected [[150. 155. 160. 165. nan]]"
+    "print(gd2d[:,0:5])      # expected [[150. 155. 160. 165. nan]]\n",
+    "print(gd2d[:,283:])"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Note:** gd2d.shape is (3,288) instead of expected (2,288) and start date shifted to 2019-12-31"
+    "**Note:** gd2d.shape is (3,288) instead of expected (2,288), second day sample shifted to teh first day and start date shifted to 2019-12-31"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Cross over midnight with UTC\n",
     "Lets test two-days records that cross over midnight  "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -689,7 +790,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -699,8 +800,10 @@
       "(2, 288)\n",
       "[Timestamp('2019-12-31 00:00:00'), Timestamp('2020-01-01 00:00:00')]\n",
       "5.0\n",
-      "[[ nan  nan  nan  nan  nan]\n",
-      " [175. 180. 185.  nan  nan]]\n"
+      "[[ nan  nan  nan 150. 155. 160. 165. 170.]\n",
+      " [ nan  nan  nan  nan  nan  nan  nan  nan]]\n",
+      "[[ nan  nan  nan  nan  nan  nan  nan  nan]\n",
+      " [175. 180. 185.  nan  nan  nan  nan  nan]]\n"
      ]
     }
    ],
@@ -715,14 +818,15 @@
     "print(actual_dates)     # expected [datetime.date(2020, 1, 1)]\n",
     "print(dt0)              # expected 5\n",
     "\n",
-    "print(gd2d[:,0:5])      # expected [[150. 155. 160. 165. nan]]"
+    "print(gd2d[:,280:]) \n",
+    "print(gd2d[:,:8])      # expected [[150. 155. 160. 165. nan]]"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day."
+    "**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day and 2020-01-02 disappeared from actual dates"
    ]
   },
   {
diff --git a/pyproject.toml b/pyproject.toml
index 78e7562..8c5b951 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "iglu_python"
-version = "0.1.5"
+version = "0.1.6"
 description = "Python implementation of the iglu package for continuous glucose monitoring data analysis"
 readme = "README.md"
 requires-python = ">=3.11"