lycosystem · rmnldwg · Jun 26, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 26, 2025
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,56 @@
+name: Build
+
+on:
+  release:
+    types: [ created ]
+
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build package from source
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+      - name: Install Python 3
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Install build tools
+        run: |
+          python3 -m pip install build --user
+      - name: Build package
+        run: |
+          python3 -m build
+      - name: Upload to CI runner
+        uses: actions/upload-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+
+  pypi-publish:
+    name: Publish built package on PyPI
+    runs-on: ubuntu-latest
+    needs:
+      - build
+
+    # Specifying a GitHub environment is optional, but strongly encouraged
+    environment:
+      name: pypi
+      url: https://pypi.org/p/lydata
+    permissions:
+      # IMPORTANT: this permission is mandatory for Trusted Publishing
+      id-token: write
+    steps:
+      # retrieve your distributions here
+      - name: Download from CI runner
+        uses: actions/download-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+      - name: Publish on PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/testpypi.yml b/.github/workflows/testpypi.yml
@@ -0,0 +1,57 @@
+name: Test Build
+
+on:
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: Build package from source
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+      - name: Install Python 3
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Install build tools
+        run: |
+          python3 -m pip install build --user
+      - name: Build package
+        run: |
+          python3 -m build
+      - name: Upload to CI runner
+        uses: actions/upload-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+
+  testpypi-publish:
+    name: Publish built package on TestPyPI
+    runs-on: ubuntu-latest
+    needs:
+      - build
+
+    # Specifying a GitHub environment is optional, but strongly encouraged
+    environment:
+      name: testpypi
+      url: https://test.pypi.org/p/lydata
+    permissions:
+      # IMPORTANT: this permission is mandatory for Trusted Publishing
+      id-token: write
+    steps:
+      # retrieve your distributions here
+      - name: Download from CI runner
+        uses: actions/download-artifact@v4
+        with:
+          name: built-package
+          path: dist/
+      - name: Publish on PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
diff --git a/.github/workflows/tests.yaml → .github/workflows/tests.yml b/.github/workflows/tests.yaml → .github/workflows/tests.yml
@@ -10,6 +10,7 @@ on:
 
 jobs:
   test:
+    name: Run tests & report coverage
     runs-on: ubuntu-latest
     permissions:
       pull-requests: write
@@ -41,11 +42,22 @@ jobs:
         run: pytest --cov=lydata --cov-config=pyproject.toml --doctest-modules src
         env:
           COVERAGE_FILE: .coverage.doctests
+          GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }}
+
+      # Lastly, we can make sure the examples in the README.md are up to date.
+      # Because doctests can occur in any text file, we can run it over those examples
+      # as well:
+      - name: Test README.md examples
+        if: success() || failure()
+        run: coverage run -m doctest README.md
+        env:
+          COVERAGE_FILE: .coverage.readme
+          GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }}
 
       # Lastly, we collect all files that start with `.coverage` into one file and
       # create a report either as a comment on the PR or in a separate branch if its
       # a commit to the main branch. From that branch we can put badges and coverage
-      # reports into e.g. our main README.md 
+      # reports into e.g. our main README.md
       - name: Add coverage comment
         if: success() || failure()   # run these even if previous step fails
         uses: py-cov-action/python-coverage-comment-action@v3

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,26 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.3.0] - 2025-06-26
+
+Th Python package `lydata` is now pulled out of the data repository [lyDATA](https://github.com/lycosystem/lydata) and will be maintained in the repository [lydata-package](https://github.com/lycosystem/lydata-package).
+
+This required some changes, as the data now cannot be easily shipped with the package. Instead, all data mustalways be fetched from the remote repository.
+
+### 💼 Other
+
+- [**breaking**] Use github by default to fetch datasets
+- Fetching from GitHub now works without authentication/token, too
+
+### 🧪 Testing
+
+- Convert examples in `README.md` to valid doctests
+
+### ⚙️ Miscellaneous Tasks
+
+- Run doctest over `README.md` examples during automated tests
+- Add build & publish workflows (both PyPI and TestPyPI)
+
 ## [0.2.5] - 2025-02-05
 
 ### 🐛 Bug Fixes
@@ -253,6 +273,7 @@ Initial implementation of the lyDATA library.
 <!-- generated by git-cliff -->
 <!-- markdownlint-disable-file MD024 -->
 
+[0.3.0]: https://github.com/lycosystem/lydata/compare/8ae13..0.3.0
 [0.2.5]: https://github.com/lycosystem/lydata/compare/0.2.4..0.2.5
 [0.2.4]: https://github.com/lycosystem/lydata/compare/0.2.3..0.2.4
 [0.2.3]: https://github.com/lycosystem/lydata/compare/0.2.2..0.2.3

diff --git a/README.md b/README.md
@@ -1,8 +1,9 @@
 # Python Library for Loading and Manipulating lyDATA Tables
 
-[![Build](https://github.com/lycosystem/lydata/actions/workflows/build.yml/badge.svg)](https://github.com/lycosystem/lydata/actions/workflows/build.yml)
-[![Tests](https://github.com/lycosystem/lydata/actions/workflows/tests.yml/badge.svg)](https://github.com/lycosystem/lydata/actions/workflows/tests.yml)
-[![Documentation Status](https://readthedocs.org/projects/lydata/badge/?version=stable)](https://lydata.readthedocs.io/en/stable/?badge=stable)
+[![Build](https://github.com/lycosystem/lydata-package/actions/workflows/release.yml/badge.svg)](https://github.com/lycosystem/lydata-package/actions/workflows/release.yml)
+[![Tests](https://github.com/lycosystem/lydata-package/actions/workflows/tests.yml/badge.svg)](https://github.com/lycosystem/lydata-package/actions/workflows/tests.yml)
+[![Documentation Status](https://readthedocs.org/projects/lydata/badge/?version=stable)](https://lydata.readthedocs.io/stable/?badge=stable)
+[![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/lycosystem/lydata-package/python-coverage-comment-action-data/endpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lydata-package/blob/python-coverage-comment-action-data/htmlcov/index.html)
 
 This repository provides a Python library for loading, manipulating, and validating the datasets available on [lyDATA](https://github.com/lycosystem/lydata).
 
@@ -36,72 +37,57 @@ pip install -e .
 The first and most common use case would probably listing and loading the published datasets:
 
 ```python
-import lydata
-
-for dataset_spec in lydata.available_datasets(
-    year=2023,              # show all datasets added in 2023
-    use_github=True,        # do not search on disk, but rather on GitHub
-    ref="61a17e",           # may be some specific hash/tag/branch
-):
-    print(dataset_spec.name)
-
-# output:
-# 2023-clb-multisite
-# 2023-isb-multisite
-
-first_dataset = next(lydata.load_datasets(
-    subsite="oropharynx",   # merge data that include oropharyngeal tumor patients
-    use_github=True,        # again, search GitHub, not on disk (which is the default)
-))
-print(first_dataset.head())
-
-# output:
-#   patient                              ... positive_dissected
-#         #                              ...             contra
-#        id         institution     sex  ...                III   IV    V
-# 0    P011  Centre Léon Bérard    male  ...                0.0  0.0  0.0
-# 1    P012  Centre Léon Bérard  female  ...                0.0  0.0  0.0
-# 2    P014  Centre Léon Bérard    male  ...                0.0  0.0  NaN
-# 3    P015  Centre Léon Bérard    male  ...                0.0  0.0  NaN
-# 4    P018  Centre Léon Bérard    male  ...                NaN  NaN  NaN
-#
-# [5 rows x 82 columns]
+>>> import lydata
+>>> for dataset_spec in lydata.available_datasets(
+...     year=2023,              # show all datasets added in 2023
+...     ref="61a17e",           # may be some specific hash/tag/branch
+... ):
+...     print(dataset_spec.name)
+2023-clb-multisite
+2023-isb-multisite
+
+# return generator of datasets that include oropharyngeal tumor patients
+>>> first_dataset = next(lydata.load_datasets(subsite="oropharynx"))
+>>> print(first_dataset.head())
+... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+  patient                              ... positive_dissected
+        #                              ...             contra
+       id         institution     sex  ...                III   IV    V
+0    P011  Centre Léon Bérard    male  ...                0.0  0.0  0.0
+1    P012  Centre Léon Bérard  female  ...                0.0  0.0  0.0
+2    P014  Centre Léon Bérard    male  ...                0.0  0.0  NaN
+3    P015  Centre Léon Bérard    male  ...                0.0  0.0  NaN
+4    P018  Centre Léon Bérard    male  ...                NaN  NaN  NaN
+[5 rows x 82 columns]
+
 ```
 
 And since the three-level header of the tables is a little unwieldy at times, we also provide some shortcodes via a custom pandas accessor. As soon as `lydata` is imported it can be used like this:
 
 ```python
-print(first_dataset.ly.age)
-
-# output:
-# 0      67
-# 1      62
-#        ..
-# 261    60
-# 262    60
-# Name: (patient, #, age), Length: 263, dtype: int64
+>>> print(first_dataset.ly.age)
+... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+0      67
+1      62
+      ...
+261    60
+262    60
+Name: (patient, #, age), Length: 263, dtype: int64
+
 ```
 
 And we have implemented `Q` and `C` objects inspired by Django that allow easier querying of the tables:
 
 ```python
-from lydata import C
+>>> from lydata import C
 
 # select patients younger than 50 that are not HPV positive (includes NaNs)
-query_result = first_dataset.ly.query((C("age") < 50) & ~(C("hpv") == True))
-print(query_result)
-
-# output:
-#     patient                                  ... positive_dissected
-#           #                                  ...             contra
-#          id         institution     sex age  ...                 II  III   IV    V
-# 2      P014  Centre Léon Bérard    male  43  ...                1.0  0.0  0.0  NaN
-# 7      P024  Centre Léon Bérard    male  45  ...                NaN  NaN  NaN  NaN
-# ..      ...                 ...     ...  ..  ...                ...  ...  ...  ...
-# 212    P270  Centre Léon Bérard    male  47  ...                0.0  0.0  0.0  NaN
-# 217    P275  Centre Léon Bérard    male  49  ...                0.0  0.0  0.0  NaN
-#
-# [13 rows x 82 columns]
+>>> query_result = first_dataset.ly.query((C("age") < 50) & ~(C("hpv") == True))
+>>> (query_result.ly.age < 50).all()
+np.True_
+>>> (query_result.ly.hpv == False).all()
+np.True_
+
 ```
 
 For more details and further examples or use-cases, have a look at the [official documentation](https://lydata.readthedocs.org/)