From af0bb116ad82c664adf91e07789bcbafe1f48f99 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Fri, 20 Jun 2025 17:33:53 +0200
Subject: [PATCH 1/8] add new coverage badge

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 8b4ba35..a539801 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 [![Build](https://github.com/lycosystem/lydata/actions/workflows/build.yml/badge.svg)](https://github.com/lycosystem/lydata/actions/workflows/build.yml)
 [![Tests](https://github.com/lycosystem/lydata/actions/workflows/tests.yml/badge.svg)](https://github.com/lycosystem/lydata/actions/workflows/tests.yml)
 [![Documentation Status](https://readthedocs.org/projects/lydata/badge/?version=stable)](https://lydata.readthedocs.io/en/stable/?badge=stable)
+[![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/lycosystem/lydata-package/python-coverage-comment-action-data/endpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lydata-package/blob/python-coverage-comment-action-data/htmlcov/index.html)
 
 This repository provides a Python library for loading, manipulating, and validating the datasets available on [lyDATA](https://github.com/lycosystem/lydata).
 

From 935c27279a8cf49ca409a07622336cdf289479c1 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 10:44:35 +0200
Subject: [PATCH 2/8] test: convert examples to valid doctests

---
 README.md | 106 +++++++++++++++++++++++++-----------------------------
 1 file changed, 49 insertions(+), 57 deletions(-)

diff --git a/README.md b/README.md
index a539801..1b26aa4 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 # Python Library for Loading and Manipulating lyDATA Tables
 
-[![Build](https://github.com/lycosystem/lydata/actions/workflows/build.yml/badge.svg)](https://github.com/lycosystem/lydata/actions/workflows/build.yml)
-[![Tests](https://github.com/lycosystem/lydata/actions/workflows/tests.yml/badge.svg)](https://github.com/lycosystem/lydata/actions/workflows/tests.yml)
-[![Documentation Status](https://readthedocs.org/projects/lydata/badge/?version=stable)](https://lydata.readthedocs.io/en/stable/?badge=stable)
+[![Build](https://github.com/lycosystem/lydata-package/actions/workflows/release.yml/badge.svg)](https://github.com/lycosystem/lydata-package/actions/workflows/release.yml)
+[![Tests](https://github.com/lycosystem/lydata-package/actions/workflows/tests.yml/badge.svg)](https://github.com/lycosystem/lydata-package/actions/workflows/tests.yml)
+[![Documentation Status](https://readthedocs.org/projects/lydata/badge/?version=stable)](https://lydata.readthedocs.io/stable/?badge=stable)
 [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/lycosystem/lydata-package/python-coverage-comment-action-data/endpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lydata-package/blob/python-coverage-comment-action-data/htmlcov/index.html)
 
 This repository provides a Python library for loading, manipulating, and validating the datasets available on [lyDATA](https://github.com/lycosystem/lydata).
@@ -37,72 +37,64 @@ pip install -e .
 The first and most common use case would probably listing and loading the published datasets:
 
 ```python
-import lydata
-
-for dataset_spec in lydata.available_datasets(
-    year=2023,              # show all datasets added in 2023
-    use_github=True,        # do not search on disk, but rather on GitHub
-    ref="61a17e",           # may be some specific hash/tag/branch
-):
-    print(dataset_spec.name)
-
-# output:
-# 2023-clb-multisite
-# 2023-isb-multisite
-
-first_dataset = next(lydata.load_datasets(
-    subsite="oropharynx",   # merge data that include oropharyngeal tumor patients
-    use_github=True,        # again, search GitHub, not on disk (which is the default)
-))
-print(first_dataset.head())
-
-# output:
-#   patient                              ... positive_dissected
-#         #                              ...             contra
-#        id         institution     sex  ...                III   IV    V
-# 0    P011  Centre Léon Bérard    male  ...                0.0  0.0  0.0
-# 1    P012  Centre Léon Bérard  female  ...                0.0  0.0  0.0
-# 2    P014  Centre Léon Bérard    male  ...                0.0  0.0  NaN
-# 3    P015  Centre Léon Bérard    male  ...                0.0  0.0  NaN
-# 4    P018  Centre Léon Bérard    male  ...                NaN  NaN  NaN
-#
-# [5 rows x 82 columns]
+>>> import lydata
+>>> for dataset_spec in lydata.available_datasets(
+...     year=2023,              # show all datasets added in 2023
+...     ref="61a17e",           # may be some specific hash/tag/branch
+... ):
+...     print(dataset_spec.name)
+2023-clb-multisite
+2023-isb-multisite
+
+# return generator of datasets that include oropharyngeal tumor patients
+>>> first_dataset = next(lydata.load_datasets(subsite="oropharynx"))
+>>> print(first_dataset.head())
+... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+  patient                              ... positive_dissected
+        #                              ...             contra
+       id         institution     sex  ...                III   IV    V
+0    P011  Centre Léon Bérard    male  ...                0.0  0.0  0.0
+1    P012  Centre Léon Bérard  female  ...                0.0  0.0  0.0
+2    P014  Centre Léon Bérard    male  ...                0.0  0.0  NaN
+3    P015  Centre Léon Bérard    male  ...                0.0  0.0  NaN
+4    P018  Centre Léon Bérard    male  ...                NaN  NaN  NaN
+[5 rows x 82 columns]
+
 ```
 
 And since the three-level header of the tables is a little unwieldy at times, we also provide some shortcodes via a custom pandas accessor. As soon as `lydata` is imported it can be used like this:
 
 ```python
-print(first_dataset.ly.age)
-
-# output:
-# 0      67
-# 1      62
-#        ..
-# 261    60
-# 262    60
-# Name: (patient, #, age), Length: 263, dtype: int64
+>>> print(first_dataset.ly.age)
+... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+0      67
+1      62
+      ...
+261    60
+262    60
+Name: (patient, #, age), Length: 263, dtype: int64
+
 ```
 
 And we have implemented `Q` and `C` objects inspired by Django that allow easier querying of the tables:
 
 ```python
-from lydata import C
+>>> from lydata import C
 
 # select patients younger than 50 that are not HPV positive (includes NaNs)
-query_result = first_dataset.ly.query((C("age") < 50) & ~(C("hpv") == True))
-print(query_result)
-
-# output:
-#     patient                                  ... positive_dissected
-#           #                                  ...             contra
-#          id         institution     sex age  ...                 II  III   IV    V
-# 2      P014  Centre Léon Bérard    male  43  ...                1.0  0.0  0.0  NaN
-# 7      P024  Centre Léon Bérard    male  45  ...                NaN  NaN  NaN  NaN
-# ..      ...                 ...     ...  ..  ...                ...  ...  ...  ...
-# 212    P270  Centre Léon Bérard    male  47  ...                0.0  0.0  0.0  NaN
-# 217    P275  Centre Léon Bérard    male  49  ...                0.0  0.0  0.0  NaN
-#
-# [13 rows x 82 columns]
+>>> query_result = first_dataset.ly.query((C("age") < 50) & ~(C("hpv") == True))
+>>> print(query_result)
+... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    patient                                                ... positive_dissected
+          #                                                ...             contra
+         id         institution     sex age ... hpv_status ...       III   IV   V
+11     P030  Centre Léon Bérard    male  49 ...      False ...       NaN  NaN NaN
+12     P031  Centre Léon Bérard    male  46 ...      False ...       NaN  NaN NaN
+                                                           ...
+249    P307  Centre Léon Bérard    male  49 ...      False ...       NaN  NaN NaN
+257    P315  Centre Léon Bérard  female  43 ...      False ...       NaN  NaN NaN
+[13 rows x 82 columns]
+
 ```
 
 For more details and further examples or use-cases, have a look at the [official documentation](https://lydata.readthedocs.org/)

From afa553f8d00fbd7d1578b23ec878a31f4d6de5eb Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 10:45:18 +0200
Subject: [PATCH 3/8] ci: run doctest over README.md examples

---
 .github/workflows/{tests.yaml => tests.yml} | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
 rename .github/workflows/{tests.yaml => tests.yml} (81%)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yml
similarity index 81%
rename from .github/workflows/tests.yaml
rename to .github/workflows/tests.yml
index 3ac16c8..2bc5af8 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yml
@@ -42,10 +42,19 @@ jobs:
         env:
           COVERAGE_FILE: .coverage.doctests
 
+      # Lastly, we can make sure the examples in the README.md are up to date.
+      # Because doctests can occur in any text file, we can run it over those examples
+      # as well:
+      - name: Test README.md examples
+        if: success() || failure()
+        run: coverage run -m doctest README.md
+        env:
+          COVERAGE_FILE: .coverage.readme
+
       # Lastly, we collect all files that start with `.coverage` into one file and
       # create a report either as a comment on the PR or in a separate branch if its
       # a commit to the main branch. From that branch we can put badges and coverage
-      # reports into e.g. our main README.md 
+      # reports into e.g. our main README.md
       - name: Add coverage comment
         if: success() || failure()   # run these even if previous step fails
         uses: py-cov-action/python-coverage-comment-action@v3

From ba0939de2749d70fef6a7cbefca8dd4efd5b22c4 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 10:45:48 +0200
Subject: [PATCH 4/8] ci: add (test)build & publish workflows

---
 .github/workflows/release.yml  | 56 +++++++++++++++++++++++++++++++++
 .github/workflows/testpypi.yml | 57 ++++++++++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 .github/workflows/release.yml
 create mode 100644 .github/workflows/testpypi.yml

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..3b31531
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,56 @@
+name: Build
+
+on:
+  release:
+    types: [ created ]
+
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build package from source
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+      - name: Install Python 3
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Install build tools
+        run: |
+          python3 -m pip install build --user
+      - name: Build package
+        run: |
+          python3 -m build
+      - name: Upload to CI runner
+        uses: actions/upload-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+
+  pypi-publish:
+    name: Publish built package on PyPI
+    runs-on: ubuntu-latest
+    needs:
+      - build
+
+    # Specifying a GitHub environment is optional, but strongly encouraged
+    environment:
+      name: pypi
+      url: https://pypi.org/p/lydata
+    permissions:
+      # IMPORTANT: this permission is mandatory for Trusted Publishing
+      id-token: write
+    steps:
+      # retrieve your distributions here
+      - name: Download from CI runner
+        uses: actions/download-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+      - name: Publish on PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/testpypi.yml b/.github/workflows/testpypi.yml
new file mode 100644
index 0000000..e3b6ca0
--- /dev/null
+++ b/.github/workflows/testpypi.yml
@@ -0,0 +1,57 @@
+name: Test Build
+
+on:
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build package from source
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+      - name: Install Python 3
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Install build tools
+        run: |
+          python3 -m pip install build --user
+      - name: Build package
+        run: |
+          python3 -m build
+      - name: Upload to CI runner
+        uses: actions/upload-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+
+  testpypi-publish:
+    name: Publish built package on TestPyPI
+    runs-on: ubuntu-latest
+    needs:
+      - build
+
+    # Specifying a GitHub environment is optional, but strongly encouraged
+    environment:
+      name: testpypi
+      url: https://test.pypi.org/p/lydata
+    permissions:
+      # IMPORTANT: this permission is mandatory for Trusted Publishing
+      id-token: write
+    steps:
+      # retrieve your distributions here
+      - name: Download from CI runner
+        uses: actions/download-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+      - name: Publish on PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/

From f8ff84111689eda64217b03b90cb58a1360ed92a Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 10:54:57 +0200
Subject: [PATCH 5/8] chore: update changelog

---
 CHANGELOG.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 16eae55..440dbee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,26 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.3.0] - 2025-06-26
+
+Th Python package `lydata` is now pulled out of the data repository [lyDATA](https://github.com/lycosystem/lydata) and will be maintained in the repository [lydata-package](https://github.com/lycosystem/lydata-package).
+
+This required some changes, as the data now cannot be easily shipped with the package. Instead, all data mustalways be fetched from the remote repository.
+
+### 💼 Other
+
+- [**breaking**] Use github by default to fetch datasets
+- Fetching from GitHub now works without authentication/token, too
+
+### 🧪 Testing
+
+- Convert examples in `README.md` to valid doctests
+
+### ⚙️ Miscellaneous Tasks
+
+- Run doctest over `README.md` examples during automated tests
+- Add build & publish workflows (both PyPI and TestPyPI)
+
 ## [0.2.5] - 2025-02-05
 
 ### 🐛 Bug Fixes
@@ -253,6 +273,7 @@ Initial implementation of the lyDATA library.
 <!-- generated by git-cliff -->
 <!-- markdownlint-disable-file MD024 -->
 
+[0.3.0]: https://github.com/lycosystem/lydata/compare/8ae13..0.3.0
 [0.2.5]: https://github.com/lycosystem/lydata/compare/0.2.4..0.2.5
 [0.2.4]: https://github.com/lycosystem/lydata/compare/0.2.3..0.2.4
 [0.2.3]: https://github.com/lycosystem/lydata/compare/0.2.2..0.2.3

From 6af643b3b33b5c76c17977003b6125f184907512 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 11:05:50 +0200
Subject: [PATCH 6/8] test: make example doctest harder to fail

---
 README.md | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 1b26aa4..0decae5 100644
--- a/README.md
+++ b/README.md
@@ -83,17 +83,10 @@ And we have implemented `Q` and `C` objects inspired by Django that allow easier
 
 # select patients younger than 50 that are not HPV positive (includes NaNs)
 >>> query_result = first_dataset.ly.query((C("age") < 50) & ~(C("hpv") == True))
->>> print(query_result)
-... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-    patient                                                ... positive_dissected
-          #                                                ...             contra
-         id         institution     sex age ... hpv_status ...       III   IV   V
-11     P030  Centre Léon Bérard    male  49 ...      False ...       NaN  NaN NaN
-12     P031  Centre Léon Bérard    male  46 ...      False ...       NaN  NaN NaN
-                                                           ...
-249    P307  Centre Léon Bérard    male  49 ...      False ...       NaN  NaN NaN
-257    P315  Centre Léon Bérard  female  43 ...      False ...       NaN  NaN NaN
-[13 rows x 82 columns]
+>>> (query_result.ly.age < 50).all()
+np.True_
+>>> (query_result.ly.hpv == False).all()
+np.True_
 
 ```
 

From 22f293b4a22333e4be19486e6f058d8176b28ce0 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 11:06:06 +0200
Subject: [PATCH 7/8] ci: give test runner descriptive name

---
 .github/workflows/tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2bc5af8..f50b909 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -10,6 +10,7 @@ on:
 
 jobs:
   test:
+    name: Run tests & report coverage
     runs-on: ubuntu-latest
     permissions:
       pull-requests: write

From 07207ef5e3012a480176eb24f9b96e7fda472820 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 26 Jun 2025 11:16:39 +0200
Subject: [PATCH 8/8] ci: use github token as secret in actions

---
 .github/workflows/tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f50b909..fbfaedf 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -42,6 +42,7 @@ jobs:
         run: pytest --cov=lydata --cov-config=pyproject.toml --doctest-modules src
         env:
           COVERAGE_FILE: .coverage.doctests
+          GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }}
 
       # Lastly, we can make sure the examples in the README.md are up to date.
       # Because doctests can occur in any text file, we can run it over those examples
@@ -51,6 +52,7 @@ jobs:
         run: coverage run -m doctest README.md
         env:
           COVERAGE_FILE: .coverage.readme
+          GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }}
 
       # Lastly, we collect all files that start with `.coverage` into one file and
       # create a report either as a comment on the PR or in a separate branch if its