lycosystem · rmnldwg · Sep 4, 2025 · Jul 14, 2025 · Jul 23, 2025 · Jul 23, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -29,10 +29,11 @@ jobs:
 
       # Below, we first run pytest in the `tests/` folder. Because we use a `src`
       # layout, this will fail if the package is not installed correctly.
-      - name: Test package is installable
+      - name: Patient-specific and installation tests
         run: pytest --cov=lydata --cov-config=pyproject.toml tests
         env:
           COVERAGE_FILE: .coverage.is_installable
+          GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }}
 
       # Now, we execute all doctests in the `src` tree. This will NOT run with
       # the installed code, but it doesn't matter, because we already know it is

diff --git a/.gitignore b/.gitignore
@@ -176,5 +176,5 @@ pyrightconfig.json
 # End of https://www.toptal.com/developers/gitignore/api/python
 **/_version.py
 
-# VS Code
+## VS Code
 .vscode/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,85 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.4.0] - 2025-09-04
+
+### 🚀 Features
+
+- Allow custom functions via `.pass_to()` of `C` objects
+- Allow importing `LyDataFrame` type from root
+- Add working sorting functions for `LyDataFrame`
+- Add convenience `.ly.enhance()` method
+- Add pydantic patient/tumor model
+- Add schema for modalities
+- Add working dtype casting function
+- Add `.ly.cast()` to lydata accessor
+- Add function to write JSON schema to file
+- Add pre-/suffixes to T/N stages in schema
+- Casting, validating, & enhancing during load
+- Add a `.get_tnm()` helper method
+- Fail more informatively when loading. Fixes [#10].
+- Add `.ly.location` to short column access
+
+### 🐛 Bug Fixes
+
+- [**breaking**] Combine mods & lvl info using probabilities over likelihoods
+- Use spec/sens < 1 in `augment`
+- Make `LyDataFrame` importable
+- Ensure alignment of columns during combine/augment
+- Change mid-level column from `info` to `core`
+- Don't override superlevel when sublevels unknown
+- Join using "outer" in `.ly.enhance()`
+- Avoid `None`s due to index mismatch etc.
+- Replace instead of udpdate augmented columns
+- Augment during combine for max_llh/rank
+- Use default subdivisions in `.ly.enhance()`
+- Make casting safer and better
+- Avoid pydantic's weird `TypeError` for `pd.NaT`
+- Check central info in schema
+- Call `logger.error` over `exception`
+- Allow MX=-1 in schema
+- Allow `None` in more patient fields
+- Side may be `None` when central=`True`
+- Make some fields robust to uppercase strings
+- Allow loading from disk using custom paths
+- Get github fetch working again
+
+### 💼 Other
+
+- Don't use `or` to check for `None` arg
+- [**breaking**] remove old functions to infer/combine data
+- Move `C` & `Q` to own module
+- [**breaking**] Update schema for new 2nd lvl cols
+- Improve final sorting of tables
+- [**breaking**] Rewrite validation using new schema
+- [**breaking**] Start using only pydantic schema for validation
+- Update mid-level cols to new `core`
+- Remove typer dependency
+
+### 📚 Documentation
+
+- Add more info to augment/combine
+- Update some docstrings
+- Add docstrings to JS code
+- Update schema & validation docstrings
+- Add new modules to sphinx
+
+### 🧪 Testing
+
+- Test new combine/augment with CLB patient 17
+- Add basic `.ly.combine()` test
+- Add scripts to compare augment/combine
+- Check one patient with specific issue
+- Add util doctest (though unnecessary)
+- Add some more patient-specific checks
+- Ensure basic functionality of schemas
+- Cover casting with minimal checks
+- Update schema test to use `core`, too
+- Add another 2025-USZ patient to test cases
+- Fix small issues causing tests to fail
+- Update to new, cast data
+- Ensure .env is loaded during all tests
+
 ## [0.3.3] - 2025-07-22
 
 ### 🚀 Features
@@ -301,6 +380,9 @@ Initial implementation of the lyDATA library.
 <!-- generated by git-cliff -->
 <!-- markdownlint-disable-file MD024 -->
 
+[0.4.0]: https://github.com/lycosystem/lydata-package/compare/0.3.3..0.4.0
+[0.3.3]: https://github.com/lycosystem/lydata-package/compare/0.3.2..0.3.3
+[0.3.2]: https://github.com/lycosystem/lydata-package/compare/0.3.1..0.3.2
 [0.3.1]: https://github.com/lycosystem/lydata-package/compare/0.3.0..0.3.1
 [0.3.0]: https://github.com/lycosystem/lydata-package/compare/8ae13..0.3.0
 [0.2.5]: https://github.com/lycosystem/lydata/compare/0.2.4..0.2.5
@@ -321,3 +403,4 @@ Initial implementation of the lyDATA library.
 [#4]: https://github.com/lycosystem/lydata/issues/4
 [#13]: https://github.com/lycosystem/lydata/issues/13
 [#5]: https://github.com/lycosystem/lydata-package/issues/5
+[#10]: https://github.com/lycosystem/lydata-package/issues/10
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,5 @@
+"""Pytest configuration and fixtures for lydata tests."""
+
+from dotenv import load_dotenv
+
+load_dotenv()
diff --git a/docs/source/augmentor.rst b/docs/source/augmentor.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: lydata.augmentor
+
+Enhancing and Augmenting Datasets
+=================================
+
+.. automodule:: lydata.augmentor
+    :members:
diff --git a/docs/source/index.md b/docs/source/index.md
@@ -9,7 +9,10 @@
 :maxdepth: 2
 
 accessor
+augmentor
 loader
+querier
+schema
 utils
 validator
 :::
diff --git a/docs/source/querier.rst b/docs/source/querier.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: lydata.querier
+
+Efficient and Reusable DataFrame Queries
+========================================
+
+.. automodule:: lydata.querier
+    :members:
diff --git a/docs/source/schema.rst b/docs/source/schema.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: lydata.schema
+
+Formal Definition of a Patient Record
+=====================================
+
+.. automodule:: lydata.schema
+    :members:
diff --git a/docs/source/validator.rst b/docs/source/validator.rst
@@ -1,7 +1,7 @@
 .. currentmodule:: lydata.validator
 
-Pandera Schemas to Validate Datasets
-====================================
+Type Casting and Validation
+===========================
 
 .. automodule:: lydata.validator
     :members:
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies = [
     "pandera",
     "pydantic",
     "loguru",
+    "roman",
 ]
 
 [project.urls]
@@ -44,6 +45,7 @@ docs = [
 tests = [
     "pytest",
     "pytest-cov",
+    "python-dotenv>=1.1.1",
 ]
 dev = [
     "pre-commit",
@@ -67,6 +69,9 @@ exclude = ["docs"]
 select = ["E", "F", "W", "B", "C", "R", "U", "D", "I", "S", "T", "A", "N"]
 ignore = ["B028", "N816", "E712"]
 
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["S101"]
+
 [tool.uv]
 package = true
 

diff --git a/src/lydata/__init__.py b/src/lydata/__init__.py
@@ -3,27 +3,28 @@
 from loguru import logger
 
 import lydata._version as _version
-from lydata.accessor import C, Q
+from lydata.accessor import LyDataFrame
 from lydata.loader import (
     available_datasets,
     load_datasets,
 )
-from lydata.utils import infer_and_combine_levels
-from lydata.validator import validate_datasets
+from lydata.querier import C, Q
+from lydata.validator import is_valid
 
 __author__ = "Roman Ludwig"
 __email__ = "roman.ludwig@usz.ch"
 __uri__ = "https://github.com/lycosystem/lydata"
 __version__ = _version.__version__
 
 __all__ = [
+    "LyDataFrame",
     "accessor",
     "Q",
     "C",
     "available_datasets",
     "load_datasets",
-    "validate_datasets",
-    "infer_and_combine_levels",
+    "is_valid",
 ]
 
 logger.disable("lydata")
+logger.remove()