From e57006bba1f5f6df6baca10c37a7b806af87c4e5 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 14 Oct 2025 11:26:22 +0200 Subject: [PATCH 01/10] migrate to the scverse cookiecutter template --- .codecov.yml => .codecov.yaml | 4 +- .cruft.json | 43 ++ .editorconfig | 15 +- .github/ISSUE_TEMPLATE/bug_report.md | 28 - .github/ISSUE_TEMPLATE/bug_report.yml | 94 +++ .github/ISSUE_TEMPLATE/config.yml | 5 + .github/ISSUE_TEMPLATE/feature_request.md | 20 - .github/ISSUE_TEMPLATE/feature_request.yml | 11 + .../pull_request_template.md | 7 - .github/workflows/black.yml | 11 - .github/workflows/build.yaml | 33 + .github/workflows/codecov.yml | 27 - .github/workflows/dev.yml | 35 -- .github/workflows/pythonpackage.yml | 34 - .../workflows/{release.yml => release.yaml} | 5 +- .github/workflows/test.yaml | 103 +++ .gitignore | 149 +---- .pre-commit-config.yaml | 40 +- .readthedocs.yaml | 28 +- CHANGELOG.md | 126 ++++ CONTRIBUTING.md | 23 - MANIFEST.in | 1 - README.md | 50 +- biome.jsonc | 17 + docs/.gitignore | 2 - docs/Makefile | 4 +- docs/_static/img/mudata.png | Bin 0 -> 38538 bytes docs/{ => _static}/img/mudata.svg | 0 docs/{ => _static}/img/muon_header.png | Bin docs/{ => _static}/img/muon_logo.png | Bin docs/{ => _static}/img/muon_logo_coloured.png | Bin tests/__init__.py => docs/_templates/.gitkeep | 0 docs/_templates/autosummary/class.rst | 61 ++ docs/api.md | 32 + docs/changelog.md | 3 + docs/conf.py | 139 +++++ docs/contributing.md | 329 ++++++++++ docs/img/mudata.png | Bin 16338 -> 0 bytes docs/index.md | 66 ++ docs/install.md | 32 + docs/io/input.md | 89 +++ docs/io/mudata.md | 181 ++++++ docs/io/output.md | 77 +++ docs/io/spec.md | 61 ++ docs/make.bat | 35 -- .../notebooks/annotations_management.ipynb | 84 +-- docs/{source => }/notebooks/axes.ipynb | 26 +- docs/{source => }/notebooks/nuances.ipynb | 27 +- .../notebooks/quickstart_mudata.ipynb | 31 +- docs/nuances.md | 26 + docs/pylint.rc | 589 ------------------ docs/references.bib | 26 + docs/references.md | 5 + docs/source/_static/styles.css | 7 - .../_templates/autosummary/function.rst | 5 - docs/source/_templates/autosummary/module.rst | 62 -- docs/source/api/index.rst | 34 - docs/source/changelog.rst | 90 --- docs/source/conf.py | 104 ---- docs/source/index.rst | 68 -- docs/source/install.rst | 36 -- docs/source/io/input.rst | 91 --- docs/source/io/mudata.rst | 167 ----- docs/source/io/output.rst | 78 --- docs/source/io/spec.rst | 57 -- docs/source/notebooks/requirements.txt | 4 - docs/source/nuances.rst | 29 - pyproject.toml | 218 ++++--- src/mudata/_core/io.py | 66 +- 69 files changed, 1933 insertions(+), 2017 deletions(-) rename .codecov.yml => .codecov.yaml (78%) create mode 100644 .cruft.json delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml delete mode 100644 .github/PULL_REQUEST_TEMPLATE/pull_request_template.md delete mode 100644 .github/workflows/black.yml create mode 100644 .github/workflows/build.yaml delete mode 100644 .github/workflows/codecov.yml delete mode 100644 .github/workflows/dev.yml delete mode 100644 .github/workflows/pythonpackage.yml rename .github/workflows/{release.yml => release.yaml} (91%) create mode 100644 .github/workflows/test.yaml create mode 100644 CHANGELOG.md delete mode 100644 CONTRIBUTING.md delete mode 100644 MANIFEST.in create mode 100644 biome.jsonc delete mode 100644 docs/.gitignore create mode 100644 docs/_static/img/mudata.png rename docs/{ => _static}/img/mudata.svg (100%) rename docs/{ => _static}/img/muon_header.png (100%) rename docs/{ => _static}/img/muon_logo.png (100%) rename docs/{ => _static}/img/muon_logo_coloured.png (100%) rename tests/__init__.py => docs/_templates/.gitkeep (100%) create mode 100644 docs/_templates/autosummary/class.rst create mode 100644 docs/api.md create mode 100644 docs/changelog.md create mode 100644 docs/conf.py create mode 100644 docs/contributing.md delete mode 100644 docs/img/mudata.png create mode 100644 docs/index.md create mode 100644 docs/install.md create mode 100644 docs/io/input.md create mode 100644 docs/io/mudata.md create mode 100644 docs/io/output.md create mode 100644 docs/io/spec.md delete mode 100644 docs/make.bat rename docs/{source => }/notebooks/annotations_management.ipynb (97%) rename docs/{source => }/notebooks/axes.ipynb (94%) rename docs/{source => }/notebooks/nuances.ipynb (96%) rename docs/{source => }/notebooks/quickstart_mudata.ipynb (97%) create mode 100644 docs/nuances.md delete mode 100644 docs/pylint.rc create mode 100644 docs/references.bib create mode 100644 docs/references.md delete mode 100644 docs/source/_static/styles.css delete mode 100644 docs/source/_templates/autosummary/function.rst delete mode 100644 docs/source/_templates/autosummary/module.rst delete mode 100644 docs/source/api/index.rst delete mode 100644 docs/source/changelog.rst delete mode 100644 docs/source/conf.py delete mode 100644 docs/source/index.rst delete mode 100644 docs/source/install.rst delete mode 100644 docs/source/io/input.rst delete mode 100644 docs/source/io/mudata.rst delete mode 100644 docs/source/io/output.rst delete mode 100644 docs/source/io/spec.rst delete mode 100644 docs/source/notebooks/requirements.txt delete mode 100644 docs/source/nuances.rst diff --git a/.codecov.yml b/.codecov.yaml similarity index 78% rename from .codecov.yml rename to .codecov.yaml index cb56083..d0c0e29 100644 --- a/.codecov.yml +++ b/.codecov.yaml @@ -1,4 +1,4 @@ -# Based on pydata/xarray, anndata +# Based on pydata/xarray codecov: require_ci_to_pass: no @@ -12,6 +12,6 @@ coverage: changes: false comment: - layout: "diff, flags, files" + layout: diff, flags, files behavior: once require_base: no diff --git a/.cruft.json b/.cruft.json new file mode 100644 index 0000000..6f02e9a --- /dev/null +++ b/.cruft.json @@ -0,0 +1,43 @@ +{ + "template": "https://github.com/scverse/cookiecutter-scverse", + "commit": "d383d94fadff9e4e6fdb59d77c68cb900d7cedec", + "checkout": null, + "context": { + "cookiecutter": { + "project_name": "mudata", + "package_name": "mudata", + "project_description": "Multimodal data", + "author_full_name": "Danila Bredikhin", + "author_email": "danila@stanford.edu", + "github_user": "scverse", + "github_repo": "mudata", + "license": "BSD 3-Clause License", + "ide_integration": false, + "_copy_without_render": [ + ".github/workflows/build.yaml", + ".github/workflows/test.yaml", + "docs/_templates/autosummary/**.rst" + ], + "_exclude_on_template_update": [ + "CHANGELOG.md", + "LICENSE", + "README.md", + "docs/api.md", + "docs/index.md", + "docs/notebooks/example.ipynb", + "docs/references.bib", + "docs/references.md", + "src/**", + "tests/**" + ], + "_render_devdocs": false, + "_jinja2_env_vars": { + "lstrip_blocks": true, + "trim_blocks": true + }, + "_template": "https://github.com/scverse/cookiecutter-scverse", + "_commit": "d383d94fadff9e4e6fdb59d77c68cb900d7cedec" + } + }, + "directory": null +} diff --git a/.editorconfig b/.editorconfig index 66e3d33..66678e3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,12 +1,15 @@ root = true [*] -charset = utf-8 +indent_style = space +indent_size = 4 end_of_line = lf -insert_final_newline = true +charset = utf-8 trim_trailing_whitespace = true -max_line_length = 100 +insert_final_newline = true -[*.py] -indent_size = 4 -indent_style = space +[{*.{yml,yaml,toml},.cruft.json}] +indent_size = 2 + +[Makefile] +indent_style = tab diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index c101f33..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve mudata -title: '' -labels: bug -assignees: '' - ---- - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behaviour. - -Please provide exact steps to reproduce the bug in a clean Python environment. In case it's not clear what's causing this bug, please provide the data or the data generation procedure. -Sometimes it is not possible to share the data but usually it is possible to replicate problems on publicly available datasets or to share a subset of your data. - -**Expected behaviour** -A clear and concise description of what you expected to happen. - -**System** - - OS: [e.g. macOS Monterey] - - Python version [e.g. 3.11] - - Versions of libraries involved [e.g. AnnData 0.10.0] - -**Additional context** -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..3ca1ccb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,94 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + **Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) + detailing how to provide the necessary information for us to reproduce your bug. In brief: + * Please provide exact steps how to reproduce the bug in a clean Python environment. + * In case it's not clear what's causing this bug, please provide the data or the data generation procedure. + * Sometimes it is not possible to share the data, but usually it is possible to replicate problems on publicly + available datasets or to share a subset of your data. + + - type: textarea + id: report + attributes: + label: Report + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: versions + attributes: + label: Versions + description: | + Which version of packages. + + Please install `session-info2`, run the following command in a notebook, + click the “Copy as Markdown” button, then paste the results into the text box below. + + ```python + In[1]: import session_info2; session_info2.session_info(dependencies=True) + ``` + + Alternatively, run this in a console: + + ```python + >>> import session_info2; print(session_info2.session_info(dependencies=True)._repr_mimebundle_()["text/markdown"]) + ``` + render: python + placeholder: | + anndata 0.11.3 + ---- ---- + charset-normalizer 3.4.1 + coverage 7.7.0 + psutil 7.0.0 + dask 2024.7.1 + jaraco.context 5.3.0 + numcodecs 0.15.1 + jaraco.functools 4.0.1 + Jinja2 3.1.6 + sphinxcontrib-jsmath 1.0.1 + sphinxcontrib-htmlhelp 2.1.0 + toolz 1.0.0 + session-info2 0.1.2 + PyYAML 6.0.2 + llvmlite 0.44.0 + scipy 1.15.2 + pandas 2.2.3 + sphinxcontrib-devhelp 2.0.0 + h5py 3.13.0 + tblib 3.0.0 + setuptools-scm 8.2.0 + more-itertools 10.3.0 + msgpack 1.1.0 + sparse 0.15.5 + wrapt 1.17.2 + jaraco.collections 5.1.0 + numba 0.61.0 + pyarrow 19.0.1 + pytz 2025.1 + MarkupSafe 3.0.2 + crc32c 2.7.1 + sphinxcontrib-qthelp 2.0.0 + sphinxcontrib-serializinghtml 2.0.0 + zarr 2.18.4 + asciitree 0.3.3 + six 1.17.0 + sphinxcontrib-applehelp 2.0.0 + numpy 2.1.3 + cloudpickle 3.1.1 + sphinxcontrib-bibtex 2.6.3 + natsort 8.4.0 + jaraco.text 3.12.1 + setuptools 76.1.0 + Deprecated 1.2.18 + packaging 24.2 + python-dateutil 2.9.0.post0 + ---- ---- + Python 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0] + OS Linux-6.11.0-109019-tuxedo-x86_64-with-glibc2.39 + Updated 2025-03-18 15:47 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..5b62547 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: Scverse Community Forum + url: https://discourse.scverse.org/ + about: If you have questions about “How to do X”, please ask them here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 7cd03eb..0000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for mudata -title: '' -labels: enhancement -assignees: '' - ---- - -**Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] - -**Describe the solution you'd like** -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** -Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..0bec61b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Propose a new feature for mudata +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md deleted file mode 100644 index 814f9f3..0000000 --- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md +++ /dev/null @@ -1,7 +0,0 @@ -Fixes # . - -Changes proposed in this pull request: -- -- -- - diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml deleted file mode 100644 index f58e4c6..0000000 --- a/.github/workflows/black.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: Lint - -on: [push, pull_request] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: psf/black@stable diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..83e01a1 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,33 @@ +name: Check Build + +on: + push: + branches: [main] + pull_request: + branches: [main] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u). + shell: bash -euo pipefail {0} + +jobs: + package: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + filter: blob:none + fetch-depth: 0 + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + cache-dependency-glob: pyproject.toml + - name: Build package + run: uv build + - name: Check package + run: uvx twine check --strict dist/*.whl diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml deleted file mode 100644 index e708b90..0000000 --- a/.github/workflows/codecov.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Code coverage -on: [push, pull_request] -jobs: - run: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Set up Python 3.11 - uses: actions/setup-python@v2 - with: - python-version: 3.11 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install uv - uv venv - source .venv/bin/activate - uv pip install pytest coverage - uv pip install .[dev,docs,test] - - name: Run tests and collect coverage - run: | - source .venv/bin/activate - coverage run -m pytest --cache-clear - coverage xml - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v2 diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml deleted file mode 100644 index 1738e7a..0000000 --- a/.github/workflows/dev.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Package dev versions - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.11", "3.12", "3.13"] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install uv - uv venv - source .venv/bin/activate - uv pip install ruff pytest - uv pip install .[dev,docs,test] - - name: Install dev versions - run: | - source .venv/bin/activate - uv pip install -U git+https://github.com/scverse/scanpy - uv pip install -U git+https://github.com/scverse/anndata - - name: Test with pytest - run: | - source .venv/bin/activate - pytest diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml deleted file mode 100644 index 594564a..0000000 --- a/.github/workflows/pythonpackage.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Python package - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.11", "3.12", "3.13"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install uv - uv venv - source .venv/bin/activate - uv pip install ruff pytest - uv pip install .[dev,docs,test] - - name: Ruff check - run: | - source .venv/bin/activate - ruff check src/mudata - - name: Test with pytest - run: | - source .venv/bin/activate - python -m pytest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yaml similarity index 91% rename from .github/workflows/release.yml rename to .github/workflows/release.yaml index 536e83c..91f6bad 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yaml @@ -2,7 +2,10 @@ name: Release on: release: - types: [published] + push: + tags: + - "v?[0-9]+.[0-9]+.[0-9]+**" + workflow_dispatch: defaults: run: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..0bd76e8 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,103 @@ +name: Test + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: "0 5 1,15 * *" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u). + shell: bash -euo pipefail {0} + +jobs: + # Get the test environment from hatch as defined in pyproject.toml. + # This ensures that the pyproject.toml is the single point of truth for test definitions and the same tests are + # run locally and on continuous integration. + # Check [[tool.hatch.envs.hatch-test.matrix]] in pyproject.toml and https://hatch.pypa.io/latest/environment/ for + # more details. + get-environments: + runs-on: ubuntu-latest + outputs: + envs: ${{ steps.get-envs.outputs.envs }} + steps: + - uses: actions/checkout@v4 + with: + filter: blob:none + fetch-depth: 0 + - name: Install uv + uses: astral-sh/setup-uv@v5 + - name: Get test environments + id: get-envs + run: | + ENVS_JSON=$(uvx hatch env show --json | jq -c 'to_entries + | map( + select(.key | startswith("hatch-test")) + | { + name: .key, + label: (if (.key | contains("pre")) then .key + " (PRE-RELEASE DEPENDENCIES)" else .key end), + python: .value.python + } + )') + echo "envs=${ENVS_JSON}" | tee $GITHUB_OUTPUT + + # Run tests through hatch. Spawns a separate runner for each environment defined in the hatch matrix obtained above. + test: + needs: get-environments + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + env: ${{ fromJSON(needs.get-environments.outputs.envs) }} + + name: ${{ matrix.env.label }} + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + with: + filter: blob:none + fetch-depth: 0 + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ matrix.env.python }} + cache-dependency-glob: pyproject.toml + - name: create hatch environment + run: uvx hatch env create ${{ matrix.env.name }} + - name: run tests using hatch + env: + MPLBACKEND: agg + PLATFORM: ${{ matrix.os }} + DISPLAY: :42 + run: uvx hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto + - name: generate coverage report + run: | + # See https://coverage.readthedocs.io/en/latest/config.html#run-patch + test -f .coverage || uvx hatch run ${{ matrix.env.name }}:cov-combine + uvx hatch run ${{ matrix.env.name }}:cov-report # report visibly + uvx hatch run ${{ matrix.env.name }}:coverage xml # create report for upload + - name: Upload coverage + uses: codecov/codecov-action@v5 + + # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch + # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why. + check: + name: Tests pass in all hatch environments + if: always() + needs: + - get-environments + - test + runs-on: ubuntu-latest + steps: + - uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.gitignore b/.gitignore index aaf2cf1..e4b4b33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,134 +1,27 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST -src/mudata/_version.py - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook +# Temp files +.DS_Store +*~ +buck-out/ .ipynb_checkpoints -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid +# IDEs +/.idea/ +/.vscode/ -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# KDevelop project settings -*.kdev4 -.kdev4/ +# Compiled files +.venv/ +__pycache__/ +.*cache/ +/src/mudata/_version.py -# mkdocs documentation -/site +# Distribution / packaging +/dist/ -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json +# Tests and coverage +/data/ +/node_modules/ +/.coverage* -# Pyre type checker -.pyre/ +# docs +/docs/generated/ +/docs/_build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 696c2af..b9de3fe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,38 @@ +fail_fast: false +default_language_version: + python: python3 +default_stages: + - pre-commit + - pre-push +minimum_pre_commit_version: 2.16.0 repos: - - repo: https://github.com/psf/black - rev: 24.10.0 # Replace by any tag/version: https://github.com/psf/black/tags + - repo: https://github.com/biomejs/pre-commit + rev: v2.2.4 hooks: - - id: black - language_version: python3 + - id: biome-format + exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually. + - repo: https://github.com/tox-dev/pyproject-fmt + rev: v2.6.0 + hooks: + - id: pyproject-fmt + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.13.2 + hooks: + - id: ruff-check + types_or: [python, pyi, jupyter] + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + types_or: [python, pyi, jupyter] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: detect-private-key + - id: check-ast + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=lf] + - id: trailing-whitespace + - id: check-case-conflict + # Check that there are no merge conflicts (could be generated by template sync) + - id: check-merge-conflict + args: [--assume-in-merge] diff --git a/.readthedocs.yaml b/.readthedocs.yaml index e9c1c78..c3f3f96 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,19 +1,15 @@ +# https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 - -# Set the OS, Python version and other tools you might need build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: - python: "3.11" - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/source/conf.py - -# Explicitly set the version of Python and its requirements -python: - install: - - method: pip - path: . - extra_requirements: - - docs + python: "3.12" + jobs: + create_environment: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + build: + html: + - uvx hatch run docs:build + - mv docs/_build $READTHEDOCS_OUTPUT diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f2e1bf8 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,126 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog][], +and this project adheres to [Semantic Versioning][]. + +[keep a changelog]: https://keepachangelog.com/en/1.0.0/ +[semantic versioning]: https://semver.org/spec/v2.0.0.html + +## [0.3.2] + +### Fixed + +- Fixed an [issue](https://github.com/scverse/mudata/issues/99) in [`update()`](#mudata.MuData.update) + +## [0.3.1] + +### Fixed + +- compatibility with anndata 0.10.9 + +## [0.3.0] + +### Added + +- Pull/push interface for annotations: [`pull_obs()`](#mudata.MuData.pull_obs), [`pull_var()`](#mudata.MuData.pull_var), [`push_obs()`](#mudata.MuData.push_obs), [`push_var()`](#mudata.MuData.push_var) +- Conversion functions: [`to_anndata()`](#mudata.MuData.to_anndata), [`to_mudata()`](#mudata.to_mudata) +- [Concatenation](#mudata.concat) of MuData objects +- `MuData.mod_names` attribute +- Pretty-printing for `MuData.mod` +- `fsspec` support for readers. + +### Fixed + +- Improved performance and behavior of [`update()`](#mudata.MuData.update). + For compatibility reasons, this release keeps the old behaviour of pulling annotations on read/update as default. +- [`read_zarr()`](#mudata.read_zarr) now supports `mod-order` +- Correct handling of the `uns` attribute by views. + +### Note + +If you want to adopt the new update behaviour, set `mudata.set_options(pull_on_update=False)`. +This will be the default behaviour in the next release. +With it, the annotations will not be copied from the modalities on [`update()`](#mudata.MuData.update) implicitly. + +To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata.MuData.pull_obs) and/or [`pull_var()`](#mudata.MuData.pull_var). + +## [0.2.4] + +### Changed + +- Requires anndata 0.10.8 or newer. + +### Fixed + +- Compatibility with numpy 2.0 +- Compatibility with anndata 0.11 + +## [0.2.3] + +### Fixed + +- Fixes and improvements for backed objects, views, nested MuData objects, I/O and HTML representation. +- Pandas 2.0 compatibility + +## [0.2.2] + +### Fixed + +- [`Path`](#pathlib.Path) objects now work in [](#mudata.read) + +## [0.2.1] + +### Added + +- [`MuData.__len__`](#mudata.MuData.__len__). + This should make it easier to build MuData into workflows that operate on data containers with length. + In practice using [`n_obs`](#mudata.MuData.n_obs) should be preferred. + +### Changed + +- Default `dict` has replaced `OrderedDict`, e.g. in the `uns` slot, to improve compatibility with new serialisation versions. + As of Python 3.6, dictionaries are insertion-ordered. + +### Fixed + +- Improvements and optimizations to [`update()`](#mudata.MuData.update) + +## [0.2.0] + +This version uses new I/O serialisation of `AnnData v0.8 `_. + +Updating a MuData object with :func:`mudata.MuData.update` is even faster in many use cases. + +There's `a new axes interface `_ that allows to use MuData objects as containers with different shared dimensions. + +## [0.1.2] + +### Changed + +- Improved documentation, including [a new page describing the sharp bits](notebooks/nuances.ipynb) + +### Fixed + +- Updating a MuData object with [`update()`](#mudata.MuData.update) is now much faster. + +## [0.1.1] + +- Various stability and bug fixes + +## [0.1.0] + +Initial `mudata` release with [`MuData`](#mudata.MuData), previously a part of the `muon` framework. + +[0.3.2]: https://github.com/scverse/mudata/compare/v0.3.1...v0.3.2 +[0.3.1]: https://github.com/scverse/mudata/compare/v0.3.0...v0.3.1 +[0.3.0]: https://github.com/scverse/mudata/compare/v0.2.4...v0.3.0 +[0.2.4]: https://github.com/scverse/mudata/compare/v0.2.3...v0.2.4 +[0.2.3]: https://github.com/scverse/mudata/compare/v0.2.2...v0.2.3 +[0.2.2]: https://github.com/scverse/mudata/compare/v0.2.1...v0.2.2 +[0.2.1]: https://github.com/scverse/mudata/compare/v0.2.0...v0.2.1 +[0.2.0]: https://github.com/scverse/mudata/compare/v0.1.2...v0.2.0 +[0.1.2]: https://github.com/scverse/mudata/compare/v0.1.1...v0.1.2 +[0.1.1]: https://github.com/scverse/mudata/compare/v0.1.0...v0.1.1 +[0.1.0]: https://github.com/scverse/mudata/releases/tag/v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index f57b2ac..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,23 +0,0 @@ -# Contributing - -This document describes details about contributing to `MuData`. - -The main entry point for a contribution is an issue. Please use issues to discuss the change you wish to make or the funcionality you want to add to `MuData`. For a more in-depth discussion you can also use [discussions](https://github.com/scverse/mudata/discussions) or contact `MuData` authors or maintainers via other communication methods such as email. - -## Issues - -Please consider opening an issue if you've encountered a bug, a performance issue, a documentation issue or have a feature request in mind. For convenience, we provide issue templates that you are very welcome to use. - -When creating an issue about a problem that you've encountered (e.g. an error), please include the minimal amount of source code to reproduce it. When including tracebacks, please paste the full traceback text. - -## Pull requests - -The code that is suggested to be merged into `MuData` is expected to follow reasonable Python code styleguides such as the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html). Below there are a few ideas that may help to improve the code quality. - -- Format the code with [black](https://github.com/psf/black). -- Make sure debugging code (e.g. `pdb.set_trace()`) is removed as well as respective dependencies (`import pdb`). -- Use tools like `pylint` and `flake8` to check proposed code changes. -- Make sure documentation is changed to reflect the changes. That includes docstrings as well as external files such as the ones in `docs/` or respective `README.md` files. -- Consider increasing the version number in `setup.py`. Please stick to [semantic versioning](https://semver.org/). -- Pull requests can be merged when the LGTM (_looks good to me_) has been received from reviewers, probably after a few rounds of reviews. - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index aae9579..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -recursive-exclude tests * diff --git a/README.md b/README.md index 17ffe4a..604a062 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,36 @@ -![mudata header](./docs/img/mudata.svg) +![mudata header](./docs/_static/img/mudata.svg) -[![Documentation Status](https://readthedocs.org/projects/mudata/badge/?version=latest)](http://mudata.readthedocs.io/) -[![PyPi version](https://img.shields.io/pypi/v/mudata)](https://pypi.org/project/mudata) -[![](https://img.shields.io/badge/scverse-core-black.svg?labelColor=white&logo=)](https://scverse.org) -[![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) +[![PyPI][badge-pypi][pypi]] +[![Tests][badge-tests]][tests] +[![Documentation][badge-docs]][documentation] +[![Powered by scverse][badge-scverse]][scverse] +[![Powered by NumFOCUS][badge-numfocus]][numfocus] + +[badge-tests]: https://img.shields.io/github/actions/workflow/status/scverse/mudata/test.yaml?branch=main +[badge-docs]: https://img.shields.io/readthedocs/mudata +[badge-pypi]: https://img.shields.io/pypi/v/mudata +[badge-numfocus]: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A +[badge-scverse]: https://img.shields.io/badge/scverse-core-black.svg?labelColor=white&logo= # MuData – multimodal data -[Documentation](https://mudata.readthedocs.io/) | [Publication](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02577-8) +[Documentation][] | [Publication][muon paper] | [Changelog][] -For using `MuData` in multimodal omics applications see [`muon`](https://github.com/scverse/muon). +For using `MuData` in multimodal omics applications see [muon][]. ## Data structure -In the same vein as [AnnData](https://github.com/theislab/anndata) is designed to represent unimodal annotated datasets in Python, `MuData` is designed to provide functionality to load, process, and store multimodal omics data. +In the same vein as [AnnData][] is designed to represent unimodal annotated datasets in Python, `MuData` is designed to provide functionality to load, process, and store multimodal omics data. ``` MuData .obs -- annotation of observations (cells, samples) .var -- annotation of features (genes, genomic loci, etc.) - .obsm -- multidimensional cell annotation, + .obsm -- multidimensional cell annotation, incl. a boolean for each modality that links .obs to the cells of that modality - .varm -- multidimensional feature annotation, + .varm -- multidimensional feature annotation, incl. a boolean vector for each modality that links .var to the features of that modality .mod @@ -49,13 +56,13 @@ from mudata import MuData mdata = MuData({'rna': adata_rna, 'atac': adata_atac}) ``` -If multimodal data from 10X Genomics is to be read, convenient readers are provided by [`muon`](https://github.com/scverse/muon) that return a `MuData` object with AnnData objects inside, each corresponding to its own modality: +If multimodal data from 10X Genomics is to be read, convenient readers are provided by [muon][] that return a `MuData` object with AnnData objects inside, each corresponding to its own modality: ```py import muon as mu mu.read_10x_h5("filtered_feature_bc_matrix.h5") -# MuData object with n_obs × n_vars = 10000 × 80000 +# MuData object with n_obs × n_vars = 10000 × 80000 # 2 modalities # rna: 10000 x 30000 # var: 'gene_ids', 'feature_types', 'genome', 'interval' @@ -87,10 +94,10 @@ md.write("pbmc_10k.h5mu/rna", adata) If you use `mudata` in your work, please cite the publication as follows: > **MUON: multimodal omics analysis framework** -> +> > Danila Bredikhin, Ilia Kats, Oliver Stegle > -> _Genome Biology_ 2022 Feb 01. doi: [10.1186/s13059-021-02577-8](https://doi.org/10.1186/s13059-021-02577-8). +> _Genome Biology_ 2022 Feb 01. doi: [10.1186/s13059-021-02577-8][muon paper]. You can cite the scverse publication as follows: @@ -98,11 +105,11 @@ You can cite the scverse publication as follows: > > Isaac Virshup, Danila Bredikhin, Lukas Heumos, Giovanni Palla, Gregor Sturm, Adam Gayoso, Ilia Kats, Mikaela Koutrouli, Scverse Community, Bonnie Berger, Dana Pe’er, Aviv Regev, Sarah A. Teichmann, Francesca Finotello, F. Alexander Wolf, Nir Yosef, Oliver Stegle & Fabian J. Theis > -> _Nat Biotechnol._ 2023 Apr 10. doi: [10.1038/s41587-023-01733-8](https://doi.org/10.1038/s41587-023-01733-8). +> _Nat Biotechnol._ 2023 Apr 10. doi: [10.1038/s41587-023-01733-8][scverse paper]. [//]: # (numfocus-fiscal-sponsor-attribution) -mudata is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). +mudata is part of the scverse® project ([website][scverse], [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS][]. If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.
@@ -113,3 +120,14 @@ If you like scverse® and want to support our mission, please consider making a >
+ +[tests]: https://github.com/scverse/mudata/actions/workflows/test.yaml +[documentation]: https://mudata.readthedocs.io +[changelog]: https://mudata.readthedocs.io/en/latest/changelog.html +[pypi]: https://pypi.org/project/mudata +[numfocus]: https://numfocus.org +[scverse]: https://scverse.org +[muon]: https://github.com/scverse/muon +[anndata]: https://github.com/scverse/anndata +[muon paper]: https://doi.org/10.1186/s13059-021-02577-8 +[scverse paper]: https://doi.org/10.1038/s41587-023-01733-8 diff --git a/biome.jsonc b/biome.jsonc new file mode 100644 index 0000000..9f8f220 --- /dev/null +++ b/biome.jsonc @@ -0,0 +1,17 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.2.0/schema.json", + "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true }, + "formatter": { "useEditorconfig": true }, + "overrides": [ + { + "includes": ["./.vscode/*.json", "**/*.jsonc"], + "json": { + "formatter": { "trailingCommas": "all" }, + "parser": { + "allowComments": true, + "allowTrailingCommas": true, + }, + }, + }, + ], +} diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 02fa90c..0000000 --- a/docs/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -**/generated/ -build/ diff --git a/docs/Makefile b/docs/Makefile index d0c3cbf..d4bb2cb 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,8 +5,8 @@ # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build +SOURCEDIR = . +BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: diff --git a/docs/_static/img/mudata.png b/docs/_static/img/mudata.png new file mode 100644 index 0000000000000000000000000000000000000000..2fbc9c4c4bb8154b5fc90687f50c53376d0d639a GIT binary patch literal 38538 zcmb5VWl&t-6E--wTW}|6a0u=Y2rj|h-3jgt5+u0m0KpRo?mD;xcXxLSgR><0{dd1? z?OS`Rs2R>&Io;>!?&oy(jrgP}g@#Oo3;+PoWTYij005YM004>v5&m^VO(eVP^#jRC zTH6HxKu`Yj2X&#{r1v^V==xF1Rn^hL)x+4?9N^*M!D8)T>tbr`WX|H~Y?*l~NCW^- z0AwV@)I76}SG=$kFEzv0cR`{V6uFqF+JJ6Fyn*)Oh95q{9U?!lKMc&e*&ZE^ zwHVHrG#{?IoC-mvpH^!KWs<7nA^Lpp;+;ic7EmcPj<3TJ0Ti(3o?ok8Y7P^Y3|QSw zB>o&jfsF3Z=yVh2Iq+U&I9ZWN2>@_9yPTowaY*=|BPlWgV;qgZ;FvRfAn@QG2kZ41 z1ig-Z{Cl~?Jq+AF<-dbQ6sq~}HQ~}opMWg?234SOawGqZCIOiBY1?*(KM|J#0LQW( zZwmr`t--jIM9@G11g1de+q%!(t<_|8RmiOqO=!9;9v$`%>SEBJ@pGxY0{WpQHQc%s;8606E!_Dx)iCnkGhWQ9dp8-I+i}@mar(ZQ5raLh_H~!D{sv1oZMN(lq++=A>xp{kP@PH2h_jW7!Ev>C_QC>NA7+m3e z@P^$z&rxOVN01r;$EYDZZxw!N^xHYBa$U0Jb=m)XJ8{kUiMYaZ$z7P!hL(kl8!KS( z`pt8t1Dt!Iz*Q#uAN^v*Q>vgeo9(2Q;%}MuXGPZ#D~c+VnK+&cvKf5g2qi%P-1cVB zK>-wSVJg4d;-@(3MtAsZKFlxIp#aX?@X5Q#E%bxq)q~keHwxOsUdWzaWTayG}B5wGV7@x20&}6 z3iqY?GoH&4-}_FW6&vHkOll7CjPagml&Z5j$-a&t3*N8IAR&HnozSPDNvHJhEB za>+TJM4a_$zg_|Kq=80Za~a@uec(J@u-t6?K@oT+e@C3P;sg!Ir8@4jZQ*rvPo9=G=3foc6C#h(*y=WW7;l8`Ir&d{X00V1ERv;l`#Cc8*Q-6OLi~=?T%6py zk+fFY2{lEYIG9#1hgJ(0+BtR{|A*LDJWE{v5$Un$qS3SGd$=wDaBDuc?c_tExr7L~ zJ+KYv%Hip#g{+~4RtW`Q|9$eoS?zF!M72}5%lu>;iX|#xn00V$Z(lh`tJZi&ceSR- zlo7DL+?U8kVSq$2b@J4l$l5ti^gkm4>WtIm)na^xWYFC1vs*#|{AX%z?%Mq?5Az%C zwhz(-tx%tR#TaKj9HhiQlGeL7m`?O1^ZM*x^h*D;J!u(bAWQtynQiPh%5WHf|IK2~ z>J5a4zkVa(;~IFUf3)j6Osr&s(pJ0MuZureSrAtOAD!r44Bzmis`|K2UCqZIdj9J8 zhms@#D(z!VlF9j&mpgQ#=l1yn%y^jlUb(a9d-Gn!zaB!GZ=AsOujE+qZ*l?p|E#hA z_MIsHzfpQ9sIbTii+Ws(z>KROyM_-e z6`bA!>)q~na8W_2>0W!yq^GNgD09I(hc>CAv zcEI1xJNW>-l)TS~YkZoWUY#@0H%ehXsu^kUaC=V+pE)`41+e~|n&)XcKB7Bu!~FMO zN);1MnXLbBpa1_ulmE}DuK^48?`@NUwC#UZEiT?~>{nwGz`E^XiOl;5$ygqvj;u=a zx8DBxo-?mEP%FD;3T!)zI72-Q6&U;=(qn{+;8WpI;N|)j+{|`r__$J_WY1)#Q|S56 zw52VRwHAod%MQ{#9~B8W((K5*`Ofu4zUG?L z!UCV=*+6w@NeUp68d?QFAh~`D3F}7p7TjtMZeh8f;tBYCc0Jav1E!XXFNtjVGx=+u zV2|R=tc@Jw8zh1l8Z8-;OS;@Dve24?r@f8O3uX1Q!S&&?FYbNhj-^N{-y^LbDZzuU zcP{efOMrpOhPrfN@7W#ozFD*R3c?r^`KO)9iV+*z=w5wKL8CJmtF|Y>!2A(5fzv~& zT`Ipx$c(l*3>X4LaO4u^iR?xt!(-CfIC8$$REa6MTly%bB zsa0K37DfbCz&q)2??h4qI49t|0n->(sDB;GwXMH6fjHM8p)oPM25&|)#V@G4q+b1X z1kAS6TqppAAI-PR8S)aizjl8wbie9hzm|#s9+z4SO=@_D0hCrma6CAH;K$nebVIzl)8Tu#S@5?Q9j*HAJ%J?*kes|{Vc}(kK15< z_ftiuyF`UM%$s2SPgYpv7K%s`gL-p$nwppY=+4uk2EgqTkK16ytK{!Fz@<5o0l6%v ze{BT|>oAZh46hDXZ>KciKhY5d2N3P|>mjKFfmlXK+M5N2|I>-u*Ou+dJlD^u&maEH zVe-rifX9bANnp%Zi38;2#QH{(t@F>X;^@g`f6Sy-y#JpZqGEB|sY9tGNdL+8%Hjb3 zeWh4LUE+kPLr)=5LpCEBsE@*8E9SoapFh0ziI>V{bWaeMff7D7)@JD~-kb^e56$6! zSlz#cx%O$&wouq!ZyY)R@Vk0|95q~KkGFxauU{|>42JbCsG+DJmumD|BuB= zucoEzJFRJ!x>HYz3oXY1xM4>z-s?;fdt`c?M15&V>r&K=*YlXp$e&%|vsBD!g-gG? zJxSg_B-z>O1oQeSNNa9&5=qexSFVK3+l_L!uq=_pde~{tFHgRPw@93&wEEev-%5xO z?>mw95WGy-^x1&f`s_6$-{c%)+~6|#4+VIHxOV!`CACZRD2b)9D!6q7ouUGxV8SP?Bt4tQGaJT9J`^>#X!5kSy4`m- zw42rLZ9G;dsS7vVmW{P5AGe;2E$Wyi2LC`}-;zM_f{vmC5TNZb=nqg(Lk9(N@J^Bd zUjsg1Qg~xZUn4%FX`itLu^s9hNs480+~6t|pXFXnGrop40pC*iK6&w!MG<+E7aOc~ zMS|SDPMcZ=?@J=e6cPy)PE3#%4B4I94R%cvWvCKJ&t7#uDo>AeSdw?b9r)QdR7+Sj-tNkqkkuibg)IZg&(xkGg8V#NpYh=DAN(V+xl*@*Yv5H)ifky zd~Nhq;;DE+xd$7acjuZ>~0!Z zlcrkC+U%wR$Z`Nr2w_;DM@gprR1kQ$O0@Lbh=8W8qM&`en7x-hN6iO8SC4w0-vuBZ z(;k0ua9?RDXrd{#txl4+f3QhJdE~sxQmFYZhT1JQjVV2e$)g1v_LX*=ResQ8%{{ro zhe69ux1OA$Q2n+QW?C}O?Kfes6!aYEN=L5cyMbThpSy?U=UmiIB0S>zcr(N{!`0ns zp}6%(7`4C)m|;@`NRP(>F!!K#yOwn4ehU`PLzp?2AogUHN$Dd+QV4C1vBeE#Iqmzi zmcz%^{JII{>n@RWhHXM?cPBsTV^>`51f#~_*@#vl!lBNy5|!LX{k#_9P`*xm-B(H@ z4lrT!uIqyPa}VRp?t?w5u_L#7Qst0%xh-->Qnf8cI;s(lV)$fL88TZVE>zqQnz-+# zCh#>+jzX(U`QX)l8X-q-TWv0kZffW~7(1mt6H-#d)wG)dd&aE(H!cT-E9&B!OxceY zH+p+xF-;FcCT?k-kUwl$D9mr}p+5k7-4Z#D%nL;~#;`oisl+4eGFT*FK1zf|?P~$` z(^Yk>;)`Sz_FF>unuEyQvBCwqXx^#+4scIr71_8?wfZPx+Do{2U7Y zWTqp5;gy9b2W1zwK^Nd(awI3EgD$rcCDrat)YdTywq`4(Bp^8y#oR^Pr@Zw=^tHI? zDlhaq(Hi#M-^CIp%QW8>5S1@jz=ZiBrq+qM?Hw?@tm*-r|7es-m;>z;Fwo=o3i%x@ zk@3t+usuu=ukZ#ck0l7NTgAKn`7X=$bjY2!mo5LN8z=Lf7;npTI4$%Hpp*%Ff14O8 zgu2;iA25#8U3z_s2J`$lSX1s-h!e^s?{hl~;hVSToZdn_tc7{Y3>4q_Mm9+*iI_`l zlf|tSI-gzmp}3W)L}~jsG9=|dVT-EDh9B%Er-QZ#-hUENv{e*D$Q=`TBz^Hb%4NuD95TX( zE?JyP+;N&SQW|3-&^+s+)K#a-@^Ums*4_sBy42d_d*8RK+90AWHN7hE!4%A zK!1YE4eW6O)S#`*DcNVf$mN{2`(1c3m&RDhtp+PwL@;_2EC{ILuW)1c>{vuxiB6|m zDkurTQuK2zcM`t8kVcho8;i*V#CUx3^R!udT+69HuYb`t0gY8kZ_Uxw*O93k4?4af zX)3lAb(bpIi+8z^K?11Vv|a)MY>4Y?0%55|YfJZABKp(SR3Z=j&K%?&ZId1z^2IbF zL{ww%jWEM)^7d9um5w}&6R=@EO^c?B+V;`_}W<9B4eF&{&?Mz z#=pnn%|tU2^j`RCFWLo^^P6<&eG7g4J<=gldt_S>h8BxJE4O&&0aRcVn^9M+8KoH*x(9+499`^u~4J%$k{Xm|0ZGNEc<(45mC=h9>3cgOUhe!;cvd`5tlHRwd#f1QaB{KHp;dB^*0V zn@~-mc6L@uwh4D-{E(K_wOEsf_9dbRE(C3LDzQjvw6L@X#e(PV&neli#K56?{COF?cE; z2K8nkQh8l8EZiorLzZ6yyDNz%4G$R-JK#Rm0#*Q-kMo+bNX?Tufi=HS38X8b3*E>} z0EbAZY>SpAE1gnB!X%Fl)j5!y0QL^fr7&3?gg<8CImQlq6tVi^nVU_IUIXW{HVfw_ zt9ukTMc)3KSn>r0Xo*FTX8n<6QAxBiWu4ReWd^>l!*@}fCw8cpJ*(PkmHu~f_jBT? zgVRz6`EM+)iyZmqt)Gc5@pHbF0_EX8-Ji+I251{7qRz3=)zBX@ zIK7y_Pzx!fzzEmo2eT~IW{AxzA_binyCYcPSe~iI$7tpe*$fJ7cK{+93Qnk`!ed$^ zPpafiJImcW^oL|}UO3I2$Jh0`!kDFH`ul^wpg6J0m~OQBdz3_TL;EHGB8ck&p*oj8 z=wp}Pm&Nr*XBT^p<33s8oFjlEP_@+uwpuGbL+WQ0ygZ-7g4OQvogM|QlV&K_d`LDu zC$P#2H>?PrX?IN^^5%~&s@XOphXg5hg{+j6ZQF6wm{jjiTHCy!*lH@BFK6!?d|Blk zGGAE4^G3joh@5Z2Yl2=uN%P{KNw+#uiQ1s;aJN|Q&5UNiHf%(!ea954imwB0r!&Be9 z1zcu&h2cMXs)u!U?o|%r>sC_m+HbT*{GN#rK)uBaya-XR+CEGPZjX|{4U{}{3O~^PT z!nT#Z5Rp|KkxK9RpSc}@>k$^xc;6nLjR}q5BVl++D=S_V^OKxY%ewGt#Qmq;l=*spVqu{p+Xql z4bZRNC$PuSB|iUBU39qbdCfp1*x1Kp^(T_$aCfFw)}O)br>jbI2ydUEJOfB0 z*r`ovoD@jBgv5x#ZIrnCTUr8~7u&AFPBTSft#;;8wKhPPqDK?KSRI)?sWLIJm{JjQ zJ5uNG<75&>R<~Lb;X&^$x3?PhtS4(x&-|*c^s!D3aRwx+ zu5uDhrD?^6!!sYA4g#z|_CJ40;rKAVCK2Ns(psIKA7E-6C&oX0WgZN}gmH?w+Q3%~ z*x&7T_sF>fx2i&ScMu+{OxiqX8NgunW4e#9^i&XkR&w83mQ~$wK)ouDs($}Rsw2kC zU4qX-*7?Kt$3g|2act{lxkE2bi@{e2_yS^gY+e(ZinSE&fKV5J8nMy4PBKEQ9#H{d z#N`t-{gX5DtM1vaCUnx?V|c@8iuNt$d4VF_H46ZShk1{S4MJ#w7z?J`odTDj1*yq| zRZT`sEUX2k=_*wKcq_)ggRtWykoFqHZurn{0$?dvkm?ulUYbA<(7+~Z{Y}sl*SJ8&zQ^p{SBIAIxiEzF+|9 z*f{(f#$IPlsfys-%IE7m8O*b5zR9bDq%@L4NA1gix6Q1`lLksb?;|`3lMm_#>7$5RVKd8WgZfTL zOGSA!T=HD^xdDbol9&P1I6yJ)f}F0dE`~+t;wV0nBstQV_l^HU_!VGvJ#b>jsjc<* z?WABF>=WGXIZRC@%i_&fTNT|SQT_)Oj?N~=^E+AW<=xJe;V3ZkJccMr^jVG8vG~@K zPNuWJku@tMYWa*uBsXBB&{uOb);xAKDTk?2h&CBjP542SG)+CZWPlYIlMq0Xp zuhr^~WNWICy;5^cy;8Q^+YYF}h1`eX0XR+n&%d)AUAL`8!miO) zKqX6B35nBYmXmpzed!AbqS=VsXVa}epLv%@k5683Hx}PRBD5E>Oz z-l!{$vQfrI*3_MOu~Wb{WrJ0BE5G*GoHY_z>rRp0&Vazpni;zMP~g)>O8?7}jXE)N zHt=w$PB?aS7OFGyup+fG!Y?n7Rraks^l2eIr1rU^_sqt6Y{EU>)kaL-~pXVqtL)W@geM{5|@cPlwFHlzQM1 zRtSh(?2HCzF%yRuzEJd0TAg2|dg1`I=WN+SWRt439G*Tu9A1TDL25O) z)Zp>K3t56YZ(W$7IKxOO$mptifOQnyMI+qfE~bk?Xa}dKYWD9nl!lL@ZsA1;p+e`2c|C zHRS<^(_y=H{Ni#-e7`i3E|HEOKJk(xrUk+a+R?v&iY*LV*yma*;aq}|f)&#d7jgN9 zVbRXv+dp(yVD|YpG4C5x(;VY?rLL_M&vL~ z=+A{n-kC{*M2XHoAGq{8LOi;Ie4P6ptgedQ{?l7@4wvrmxovsNBuh2N*2v zy}Uex`{eDsO(Bp>d>M08#llCNQyG~^yP#f?q{re%z9e(<@J+r-6uiOu$4 zj=tNmbC4n5rh3@Jhp&miSmliwZR<87=QN}39XueKwCD#sp7+#eYBJfkpDOV!Ij~|* zC>j}}Sz#1Z?DiUQf3sP24GKWeQcSI++!%?2zheL?nC$)jUoQYN>cQD>8R5&}Vd93% zaa(~Cu>TwmU_NNJhd$VJiuCa#ybpfFXTWAJ!IVzfmC)725@TD_*sJoTsfFA?2$gW_ z4O65iYdY>1k3adOX#wjMmcGVxz%}CIb|lCcvHfRXf*z@1Cse4^;QJTPG!ReWPq&>w z=o~vCp@;iST0z0HPKw${Yu=Q02zwIY5tP8xdl&L+B91l2v3RZIkv)uFK-X<3#M4)5 zJ>X^N3$vHCkcjbkL13Vqp%ev~o9^&+Gz4H+wDDj~{%CdAWT=-D=?V=t9BZ_v0~*5I z{*J(?Crf2}=zxa=h{cFnXv~&?df`J>g;Cb!9$pk)Q~1(^gW8r=;%r((+bjgn_L*0P zeuYhYk&juc_LlZVD4V7LGJ)UUGGAr}I>I&_UpaTU`6;$bypcdK9tm77yis0*)PNB^ zFcS%K*3#ns@_>TBlAvA+Ew-sAh_Qb68RCEBGm|W^YI{AG;0}!%p|>+zz_Why!RjP zsXM#9jgneY7bnX=C{E+F_1v7`v|EPTD-^OC+(b48qWX4}g*Cz8$`K-l&|1a?#Y#V`mZv#I*<((*SnY&1bw7$FtIFUPQK zjtAY(;$vJ5trNPWjf;!d0FX15vW)QK!TX@Kw87EiIn3B9=c1=(!1paas*F->-P?;n z1-Iu1R*=W8>*Jl#e81M%weZtY z-YYN5vw4Ja!gY{9Vl9VDl@K&&^{OMTnpMbe^|##RMcmP+XIJH4G3J>XdPjk<>)i|dg(C&G)FQ!W=k=2; z)zhY;g4u>xwT|B#&FoJmJ_&DEvE4XUO(P(!HVb7(XIg@H=h%3=C=;9ABduSyK@{~I zOEuyILhyx&$(qLxV}BU(5Nv*Vuzwsm4EOta6EMXfBfUW^PC^`yKZv1>WdXr`p z&~ocncP8XgZe_nHj#er<%{Svv=l#|>_o;>Wz6Z%cmPH7A%Y-az)yEI#kElTV^w84B zN1Vmmn#p_s`ZxCQxKj|&&* z;8z^7gMLJmU?IqL)S5`PjhyiF*I3I&L+>aCpe$`_@`SWwU|5fTWg%6S7eHSB8qS55 zG+ukWRJbWuhDdJRfmV=E+tmK2d?u$Q+Se(CF5OD+pDXZ}kL*SNUVZFx#2C!vDez#? z`K+?h;#IDdW^e?0dEdF0zl#snO0bM+Kk9?}GAzzA`hoA9&(la_0B_;dmGcu8@vSCW zZm*NLPkPe%9o|@u0_)qEDsa?2Qn#2Imn$?gzOUhRthU`ZfAXrwUvou&n9 zx%znds3!ZCiCm+##vEjMDBQ&wtA;^D=h+=+g7(|1MQ!_1JxN=Hs(Myquj^z>keBUO z=~uS0tMwq^Htg!#Ug(<{N^EHZvlgJMVJe%o$ z98tok%F;6U`Klw*P|^TE_ZieOXv`05|GI}%c*X6d8KpgA&X1H6jhWg1q3?uIe2f@M zl1NvI&0;-dNGHHMCcz`*A^QH#8EL{LKDBElrr#7M2Hq12{53oi+1hvf;o2i5hv%#* zi<$*temUVrw-MM=M{zmLQ3N*U0n1R^RNq+@X}%%Td){1h+~(JA!t^-BQmV&_VjCn! z;B)L(YJ9m;Z<5o#Q3=YmYgor7?{D4-ul8B*YdAWu%6IsEN9kJ!X#uB5V zAp;L(T66Ol@VCm<^>9pQz&Ef>qjabB*f+x=Jf}SDE97y?oY?|;fD3Gi%a@r(Kfk0{ z0zB?7pg4{`P-}!$dyn$)PqGV2bdv%mCq} zmkt=)Gks{Qt8Ow^B?uKM>MvEU7dLsgE~%|+%OoWuP*$>1{fkB#LS{4_teo3djeXR| z#k<7^sMgQn3*M(anLGCkXYM_4bKzwiYxKsSOxufpumu6SK5#oFuzDNuc$2r(buYP% z<#?DH98T#Sz-QB(`}WMcI>#wWrAx4Ss9+s6v~wSs#aeQM!@!0UzmN=lf>vgPdjdoR zT6S2yWDOJxJQ_q~O*s7A&1YanTSo|9%hFJPoHIQ!S-4=k{aHBcdPEE$Qtp6O0HpX6 zxbR`uxQmzLK|JoYlV~RSMRam-$c^)n{|YE{lmk16Wg1xYp-4z)fiV%WO{(XJ?F`i- zimnf!Ocw@YtOviIjk{M@#vr{}BN{g!sQ%UoW{`B^Qt$SvuEJq2AWR-}$#45NtQFWX zXW?s3V&N0-@m*lIMxrk+7VSE;QvwW8SzVo(Np`zRyYrV3<^xxJGKwk^(R=ADTTubv zM_sL_DU-bor)Td$djX;IT0;IBfd)24;wxQZs0GM8)Z#JK;l-!RcEq9Fn8Lti#vF`{BM9B*&NmmDW?Ph#;X7UKUnJCI2~xzUZPg?GVj2@Aa) z_8v;jUK>tm8pbVmP^pXqb>kh7bxZQKGwb=_wDJgkxUGu1rT#23Q6wAs7_O@$`-!V( zX#CEU*z))XlO)+qs#je0Qz=G_C<36h@hF{V|6Ducg#*Y1%T3ytWr7VvgEjcQ{$Xs| zV^074E^ioVN;f26Irs!?$OaST(G2iTE0yPUpF?bQqr+g;@$x0nf$^4UH>``#BXofno5F@R~6OdRVrR))u!_q#=WF8$CfUU3cX-KN@7z7NEKN!)sp61%l7|vkZAO5H26d$A?4~jD1pINCt5j z{EFhZs1mYm(fh?WBk*Y8MmH)qa2D{FbUb0|Rg$VNeFeq9EF1`AljD?nqq^cl3(iOH zV^M7wZ=6{lBpfH$wmeB{TzWWpYR<~R#di<$4(aHU>gwEI3T1DLiJ}J8s8R|O$-ro3 zIUnHi$!e^A9tCZbh!{MWsC#T`!uyuZA4F`<4ELmTFu3zycDW!h&EppOdglo%djO3- zA%CU#j5G_=STk?^nuuIBGD;yJ9MusP$?Ha9^2f2cSi33MX_Vz@*Q$&2o z&TXCJG{KEinap=bURQFL^iM+&U-MKre?p@cs_m-LeaV>Y@a7MN0+W0Zc<)k@Y zUKTzKJUnR7v+4?3q7=KJ)~#T22^F04>cJbu&&Q48ehOOeD? zZ$gMhQSW2Vh4i`WOuckwo+r^U+_B9xcBZHPc3D-6Op0VmTFS&u$vsSad*>vw925kw zqfXK^)gO`_lutX{TFHIziX|GyErAo#`J|mUChI*0Q@Bc;TP~oEVPukIA9sAcDzc(Z zDTi~Tkp@OrVjB492|~)94$YPeEVbgfl@@A_D)C^^f5W^l{DGY+qB8gBR>sk36!bP+ z-^?AwE`hD6tjV#5fc*npXNnLu^*X?gg^@6YL;M)ZsQ%s5oQ`brd=lk`}V5H6#vC=d*vhS z!8R631q#xWWhc>3(9HWjCBDh5LHh8f8hdkAA@qNU^%FsgyQW9#^zq4q&jpC*6t)r% z&r5QZQ_!%!($YmX6n5BW{5)Jc*)OU?X=wj%vk-}+>2+fung zeVBEnsA^J-a%AzcQT1lzXbM&9r9yNG%=OdA5O62x;j?!v33dNw)w`~sSur&l$yyRHt|t;@ zSjq}2$^Dl1$vVbaW9W#RQwM3Jcq37H12tcIT1u9x34Xr?iXQ+gHm|sqKQXGom%KEY z;5CoC(GSQ~`{9*baf#nX_S&>esX{9_!9lNn!wPm3y^5w;L+q(tQ!tV$cnr!MnKw9Y zB|#UlM#Vv>wfHG@AXZrL3 zGRe1&b)vP+A?zVdW9?A+(UbavQE-@H-DW{b0vVnQZ`NbmblOA5t*mH6H|lR~N4nIA z!~WL}A+FJRb?uq-5*fALuwZ|qBu4s6w={9H8*Ak|2BF!-!_e&7Z$Fxwn`ud5xn%H5 zud^-ie)jwU3O#YWK<7ye4K__n>tegf=OdE=MahWq2zPitDp82hVyC-Q<3~Bl${_d8 zXJ_xL+-j-f$MOlNnV_7cdtB;9!E-LjMJAL%ECjPy!~CKR2}m$zn zSdz=oMr^Vmbru>f!}92Cv|-gj3fUyqwNsb})^_xR&tFv!7|?aucnA_mRMkOIGq7)M zyOB$);_61rcGC#`DYQgzVib_HsDR3BjU{V=|H=rW&c$-NIcfQk#y>I<`Kjurp-)!B z{3t~|@IsB4h6+4!jlNlYj-3rPU}@M8y(QX8sgcKxM{C1uWWTR{L}J@N9ZhUC8mh&U30NZt=6YiVyszpv$U46K5I zLWL{C`fQ8(fiq|pHg|%F09E{W6zla#EfvywZhGS}@VZCk(a=3($=PMfL+@ z5$C8JvS^u^ZOEmieT|8!fa3QBZH}mOg)$r2xSL{(C!Cu1C=VD7>2ZHhy$ z2~9s34U`bhacWsfX=4fyUFSz}qgKKLeuw<(!4~R|xwAzcLx)Nl-x~abuEh5#jOj*? z+l$xbE8@XgcHuCgG2e;jzz$L2ow3|C@sg-idclMUad-h&=Vv}_az|i>KZZ>euY)Z6 zJ$-|%*;FP+dO|9LH~hTMHD{paE)I3QrX!*<`z_{|@i1hSY;%VmImZm%KkDLgviljY zoU%$Kd-~JV81ca<^JZ4txuyGO}Fga$-Oeb+(QqGoX;6N5!h6fpG9gbA7cqbl8o{I z19@OBvIiIM%O+)7ioAY}z814Is+&#Bn08}@h#azDl!}a@jrxNhSfaJ~_xO?j+9f5v zq1*}IyyV;De$9*cBgS7>*9}+B?+WMq(T`Uf-)ivXmh&%a%){kz@X#{|A1!_?Es;y! zrAl75v3mHRTFv3USi&Bx1cYo^J3xaItx8~r&hDA(IDRPONcFu|ekG@)2=QPAXJ%*= zgD%X#3T#G9##}zDnhra-fBV4wl5{9zNR%5h(@ORVvmEMW>!L!r0%6jkY7c|8zqXrc zRD2zF(};JI*Di<+quyUGsw`gVTY)TdQI_JE7A4KVa7SWG#gk05Cg zRA)N82l@azte*9k@;g(uTr+qRiHB^KJ(=Z`S4<3Wt@M*}{qPShoU(kw3v=FoOiHg7 z$^!ei00bfFU`IWbPhJ;ZvT@g7HXrxvSoTV0`7g3d!Cge6 zy5kKO1rr2*5Y}R0XP|v;#qE;H&Mt*@F!KH$P&Z?dvot(ny^&#XKj+2AwpN4gU7&F& zNGg@%JldOAm)jjj`%{TY)sx!5@3(ps!7PDftU;t2yD*XPv zRcj61AQpOAvNVi0?T4`&5;HP?ybj-;$??~^v5?JT($#R#FF=-(lU0s3$@+=&Ck>9j zjoPv8Gaf95TZkBx`W8F_2;I#BA+q z|J@ZS-xAG88)3h&sADcx0b-~wly@`lLByN7?gd0+a?e=jpyzkG1tVxNDjS>mDgXXu5Gr{skFx?vX&WxK5az4nqTj#oShPQP$ zFbX-`=M8hSn*D+`6%VqJ2$a)EueHu(x!?9V#9Fw0tYLSt<4&>`(2bNJU2)_dcOq_n z6Kt?Th<(Tc?Kn4f{g=L8`=&wa<4Ys|u7RASiTt8+G$0B(rjZ{k+=Z%`U zUL@!>K4Zc!d+g*Ak-zv5JwvTa2g7|?qAF=R^uADRS#}%A~+DEc8(S1$Lra9^I ztDY>?3$lCS>*}DtPdO|_SW}+9kf#X|emIQxvm;o1mVCNdyg$_1Ok^{FGLkT^z%K$U zA)4Tl8o*Qy!5o3^n9m(-iI#~t9L$@2bQ_c$R%#1uAf3MsA?Q_^@^$2OjelbqwsJ@F z!{(FH9jr^(oaUvDBxEu^`H&AK0Y(UIWn$72DVQ}AutH}O1v>HjAtgGcugRsR4yy@h zBmpCWgC3Ll-c+u@8gQ(ugAL%SzT&mVU>Hb2JAxcw*!G-nq? zfn4zAF6c+iDl11kFE6jJko)SXuiRdYK~@)P=QJgH?+)+M4C7rzduq#mNGN zUB!fS=T*Ot>1MmbO)jTxE*+4pD0o2Ve0y5Vp~9d0TqbjJZ3jBgxp?i{jr*ps0&- z5@5?@hIfofgqnp`3O*%UMZ*eLoD+@{!O}_jeEk}ykYF%vw32D)nCC$r6|k!VBU!HA*+!F7RMKu?O>0RCS5zE5J7Uuoas4cizVrerMmw(Kf;pA2wr5{Ylr)Sgy&mHR%u zp{kj+gPJ1PWHp{M`rXIyBmdSrEWZ|@u8mJk#OU7r5yw|ojG6?6(juwDCHMWjU)D^( zGen%U|BiF0bDNe|2_}|c!(jb@G`^X~RcG$gIw9Y?6*^^}BH5lZzj8B&A6rW;2fFo# zJ$L!~x3oX_(p}2OnJAS?Cd>)4SIe1?T>Ca#`5-v4MEZFJ%X($$CNQqQUT0qIyp_XK zxSdo%qpnV~q}Th^(?oW#ELpEn;;TB+?on?emJDy-G*yBdkVX2VL|!s@ltjZ`%aalc z-zKz$?156Q;|;0$u^!97cSNi;IED&_RIRW>HhU8lPJd6&!gc5|@AXDP!s@8ba)w5To|fJO`b`zCy*MZeJV@6_TIu z24m`Np>pF~V!wL~fx-iOYlkL#cI&?C3@)FRgKe%QPr^l-Ty{t7`QABx@}$Rh(W7&w zh(;c}*DK%V#a&}^G?S%mBW}2F(u~= z?9UWdqCNes5?`pj=8S8V{1bZ6aD(fmEnMAGpqg_JT(uP$xlmDcp#6Qu5h_-qWT9ug za~d2ZO`JJ=-5R}Uao<35dEx!~9fwTm!}1;!qrlyL*8G#odCnxVyVkEI7g4^-JG--}@uJwUVrr zIcH|h*|YaEvt{-OGq7s_Zv^zI<4+UI(ktHCBF^nh4;%+?jmg$8TXMO99?1fQ-DPr2 zDrcWQb#ORXmC>lOe{b^h_0$QCez++!*_lcteXTh$PuHaI$$fOR7kZc#MfC!UJEP4f za1&O)l%FX=-%BqgSxMSmgK;VdH2gh8n7F$j*L%wR;Dfu_bO_B|y@}0RqF~)f5%t@~8TiRYKf!2mc?rUrpR+ejet2Al}^>QTP@s za1l%7KRCJW6GMOWD)$%lJR79Yys870UE2sIWC<1v+rOxFOo?up`K7e^c+PqydBqAY zoN`x>4AK%SxKWRarwkvlUCk~g9?7d6MCzj8|NJV=1gzfdD?PyL0wd0iWouRUOU_%0 zXV=$Wm6yEcdxpRo?XIeS^6|8ps7_mU;0fHc`KnWMf9gA_vyb22M0rKD*~~mULWCAO z(?(k`#YAz26x7^0Pd(x=QqL_`z609vVHmY3Luh+u-E29H2Jes93}A_IScYZWp?FgP z%UwFTa*XIS-avMfENEA{@L__$l=mpX1#Jksa?mcOfm#fJi>RR%H~b|z%&@C4zR!up zXHYVjSf0;)u3mRwpju1gU5(neLQcRZ{L;Jtp5|8AC$c|20F zh4-n}G=nx(DTn9lgH&?@g1Xn)Do9<>htq0GO=xX>_T^BrTCKs!ar$GvJxV~WE%$oH zByZdNRVJ!zT85X~aGMH(wO)#w1dmzJ#PJ&eVemy`JEBWkOLWc&pM@3w-w6I-FTF+W zd5w#>)V!7gP81Q{I9?l;7m9elS+UcMi8f)>Z#u3?)%qaH!%|8>1du}e@{=7k_D8C3 zQ-5ApY#k@^qbN&kb~ed~`Z)~jJ&s#i?%M6~;=RkOzL z;?jocx7B*IdUF~pkv1yQ3z{x}SQo|&DybmyRQ5xs3@_c$_~-`sSmK6Y3ty+AQYLPY z&BHTSbH$>1Lik>2|2}?5+oVj+j9D-X#}l*k=R^QGeZ0}8ztQOzb&~4%vIHJ=pBo=H zvY$fxJyrz60FCsX;L1AtX(@wWGj0rM+~1qZ6Ig>hBI+z(yrzkNwNZjTV2Rl;C?KCJ zZ|_Y688l2g2}!yLw&a<>K33XNZl2tF#Z?PN#H)!Q>XO?VU7=tawePMAenr~tChpXK zqgxR+a0Uwe6PSQ!ItJ!CM?Rjf8(nHmvz4C{={&)#NRgH%YB*EsBNuU$BQ_Al$P#2$ zefWD-LH@rnPGmsW`thWFg0AoWdCzZDzx6{RxG2s0yrcnAt1J#uYS zGef))jz+u!5=C<33AY4|wc2z;MWZZzkYePAtmMwCDd3=at1r?C=U3y|Ne)#^`S}-h zzB5VYk$KNT6PGEnq!YvYz8fFc=iwNrASw46Ji9%yiu(Jz4wgT!hn48noJJacEBFUq zdWfL+eHqE1n<(XjE-Dwpc1g%qW)x>S+$WVXQLA*R!w#7}Kiw2t+h`PJ zOvGS$nCj!Z15X*sRvJy!hbPJ6o&2_L6eRP53U%4(zuZ&mx2-=rWb15xs# zLm4!~#d$2)8SMTw^BEA(U~WXzt-(a&U*<=hwJF|o+S)@yxbP9-GI^>j{vKY;-2xg0gzxT+ zrkt425zFF2geoc!8XZd(F*7FE91>SMCkK9-7g!_jDbzW^vZEM+>3r6%XiAC>DWJZGCKhCHl$*P#l_tF-mq?@@r+GBdBE? z1d_TATvigVikI^+sTv{wY4Dl3MDhKhr2`s58@7+IE+46De_lE>jdKeN zuvsu>Zo@PyXf;8dDYn?xd{(DG)}H!jYUGxgwOy&mz(*>Ty;8s{dwTe%<806Ep7#*L z&>6$*izkX)B4nB|(c=&7B<|e6O{1sKk}&^;>g|?As;%a*8=;Eju1e4(0)!?lI~Rzl zlF4T=_j}=>X~bETTn|;ka=Cyx?W>z9&(X?qvqP`NvZl3a*+d7?U#7O@pJ{jg6HTN& zp$jj3Fb8`rRU3vdMJ`8xsCW^A%*_=GM(q6Kb_iVVKyD;f;*G)nYFkR*UcCn!=Fgn( zuv=L7#-HL{XR&Y%)_d5hsADjvDmzG?LlF??3s#G;k22ZBR{(Q=e3ySg;Hz9MOe}F| z`^dR+awXtg^#H8egcL5>U4+H1u1Z4`~54 zO~&V|5-ITQ5~u)9ni4lsv+f@*3U{tmhNtB(QQt;_H2nMr=6z0o@RXquC2?BwyYWu_ zJnSL}*OFJ>b1xa+ef~2D`E3KA#9hp&o~5IO*)7roFOtjV&KfjH$Q~bz%m9UAZYhQe{m0|KQ%5fI zN(E(fzs4X12lxjIUTqBwUaey%ejQ#1RfEhkY?y@xWOqU#Iuzj&ODJ&Wv?w~-cp#yO ztpe48=WBGQd|BvqEB_Z0-wrSxKtW*f%a(UeHIPrMiAA+iq{{xE<_&+9Hyna( zT|}o>_==6De^%?G7DO&WB&jZ6fvW;<)Y@vgrUN?-=3zNU#ffNcVvXh%f?&zscD8oF zU2$PP#gcuRWU!#V=xWBa3Blbs2T{`}SznH_hY8<$lQl0FtS8R!l)2wR;EyLtcbE4Q ziyz$)#gK_&ca*dy_MpRhwPq?duq|v%mh%3rTA(qJGi<~$ViqKSu51KfS;l*$zno^+ z0{Ms?e7T*S5-WstxP>>X9SBW3rSJ>;W{*}Mv;>M1bze$3_b@?+*J zdnRYfr`$J4$6Z{-R^`I?g@(F|Z@#pUII^{?#s%$kaR=~#?Re#{s*Nd}&EGS)V%+xl zMcv9BG3E{q2hGf|j-%zauub>5f5?H^&CLrbidPV}6_3iYdPc`H_M=`?c@4smVGvF5 zqz4^;C#rA-1=tKzr|9Dpz}-*ezOcFlmwL33_QN>bEQ_uA;%V zoX_)B1U`TT5w*b|Pl_l* z1_YSWoiD45U4dI<#_wPEqCFzxvKO2B3GgYrL^h|>WSYtzD|z3px{cfn?`WX27u_d4 z=?7+>@5wi}<*scHG{v>PjWr{YDq`6~zoZ|HEJs;*#cK5Rf-4TT$kLT`eXKD#HK<3M zE#Rb*kL2Dko8G71jMmqLcWNF0P8kEsLg&1(a(T{F2|1oFkE+Vm(42J^5K-g0p0sah zYg}(_0WMB1SmkLvG&Iw@CLq`sLeGw3-hc$Na%`2yrLw;10>=!vAM1B?|K@+a3JZWB z)q4)4B8U!I|A~3w!acKeNF>WCei?y0(cUoJnUOYdRM)2&cIdDP_dRI;i%``b zuo?1sr(DS-jSzQ?rmlO!&)6<-=kQnAmSy|`xJz!R8S)*I#NWf^ObCr~wJEekCTN>w zG|f;y2BmFjX5g~Qa|7PV5GVxWzAVIc>AsYx%FH&_9W$)S1?=s(pQ`* zV>b5Q<3y!ST-63L)TF7>(RHb)I|89v?W+7 zdB8Y=gK15xT(LGqtOb==-L6BiF3 z6A _3Y@{TI<@6kD4T?PU=oLIl?};mT^^kPK59F+A}BppdYo?!UYu__ZPI%#J~5C zFh}2@1{*R}{WCeMLwzc0B^D63%Tip$ zfU0hFe?)+U7Tkoyy(U*9o68JO|1E7bWDl70FFl88G%g*d4g?tbevL~kxTF$fF|m|D zSA2UX{6y8i+&?s>vU-Uie?AimVAn2)IMTIO0M&E}5#f(o&6=OXT zj^!^g0u!Gg6p`nA=c{0_mBGlOGMt6zNBJ)_W*?OM8dHf0$>6%1nG zrT5AqTTP8i94?Vj zX#O~kbH%kZg48pQ8c4_ZdE(k^rjV%Ywn*nEr@5H{AA#3J``Hf#6v?9)V=$C?BYs&DR zrqJL>p>g5vQJ=Bq?&2hbo0e$b+$bCPe@CfXdS#N%jF@%QNhi=u}jKV8?@6~*O!MU=-FBm@A^pZDJgA(nBR|OqS=|wxwpxMn%96C@2q-7lQ`D-NwW0t4lt2F7 zob@0kl8Pg*=wq-1N}fJhIjShzQ0<}Zp@MziX_`P46Y($BuVU|h+FIo8WOl~GJvjHT z5c1l|f~ud|SGDJ9vBmJb9Z!qnT>Klj1?sP=`Tm&OX%t$tqad5>h2(0N@0<7Ejr%s7 zH_xAC2Yu!-MgDP(m zS)5_%9}5{R+O}ie;L)b2)?1RsDV%J+;!IEoq)$&>n)1&5XNIOXa$r z!5FsV8yoOMvN>qF_IV@w^_pYTH&jV$q0$l4%q4S)t+gfH@=XSAYdT+(gbXU54xdu4 z-#=r{Jmx_x_GAVimH*=9rF*< z{&m3gZd`A*>FAy@3(-g|sF$cP{OqkKq1*9%T_ddsnw)oEaG5CUPZRC!#;Pub+d{Mj zMIQ}KRZlntZR{rUUL)N^D_FPMr)qz!wLQ^X_w60c`a}zMx5lO;s^EHT<-K#h=>X=d z*RC%bM~=5PCvgkmbugL+Nf3@;JJS9ne_Q`nz5h6L`%dZKC?rX-fMdw;*`Y@lxX zWx`3V)3nXYU9Ut(_l7Pmcnq@em#awmlriAO3#)hj(u+^w5H3F*~4vGIgW%b1}>77yOl1qed3m?xio4SUI zSr5+H{mO4Q?N7Z-j69@hOm>0OX02H)Wzsd9Nu(tasSnhzE)sPGy z+R;)UI5ZGk>4qBTFf*Y$NZPxyUTM1IuRO8&XvJ-E>NN)BrK6MVe zcWk2qkP@MJ#8N>LXn~k{z4N!G+z(f6u5v2P{oRZH;@JlMjJx3UhX01;+)taXu3;?j zN&hIsx8x-#!%vu8;AcMTo*z@d6$OFnVaI!6{N#N;34f41rrP|2ezC*H3Ttcn>{}E7 zmez=(m+wTvZqc*#6@PjXGk;y9i~7aX9=Vlbu}8UW>w&^}ddaQ7^rFm{8|N z|3!e-34a*I*ixP2XR37kDx`!B5g&5)U3GVqC%16`dthJ@!?Y$02s^RRvta0MB|2Gz3)crF8@3#;h z3MaJ))y(VBeNo*t{LY;eHzB50-2?)ciFliHc@O}VHvnT?mJcl(L-jQE-i1RHPjs_% z9tsp2j7u5t`k_M(`J;Pax_5>*zM^L-FW1;Ra@QRX$o+9@{*?EHlJe0T7E)E+_7we# z?cL>Nz^Z%gT%D)qOh^JuH01&2f4f*i0Xo;<{QG z|Fe5_O5mCj{}sUzQ(5w6@Xr&3lI`7Ohr*<>2;J}m4Qq#EscDc4c|saUpEm7jJD3}> zK)|zgRm$J>okNbMPL2G))mlFsdy-<*rvz_c-wsHjDN#RVWdNoxeFAgEGm49^N5K0Mt5UOU?3;wePp zpr#<9`j#kxbOTSQGA>{O2I`m#_Wk`-XKC-%p*U|hpyV2`je9y$2+lbFJM3z2B>#0V ztm^&l>x3J8-T~b>E7kJR-4;>=xesr@mMYv*y#o@fElMo0bd5cFtlDZc-f%^dDnQm_ zY$qEBz7<{Vmz2-;QQ-=ywa=C@R3+EE%|1XFmB%3$ikq8&?pS~6sN!3kwPJxP;AQ}U z-8U8f(HbztmN|uhj6w9IhM=gxs6nLb_=_0a&Om8c2euK{S=dCWr-b-*Ex0xetHSlm z1JQs?pQ!NRh-H@$2`^I`nG~{4fx9*OU{pCVuvGF}2(e@O6~qAL^}@m)8nZi)PKy`h_BRmh%3jecC#{xcAQgi(SBm zDT?J2n;L@rGOUxiPLRkzHp*tG>deQn&)n$^an=o4Cs}S>LRxw*%QYE{|^15_t9Mzvy5nT5}Ge z0tv_s>d&n0pi_zlDaI|rMSc$|Fzw6kd-t=h*QOD)7#0op#X%Qkpis8P*HbWRXmLtv z!Z`+gw-K!n7Bky2>$dlh8xtlH2}Lw#N9o5pbN5R06^ff<$ z#G#O0)ReP!8s*#ymaE1Zqk@r4E3O1oZ!8TWoJ>a?)rt_u8xOjvQA<`Ss0NSi|3r`| z43=x>wb#(IAQqvnTB()MpL`=S4UTAU6uFkH~md5EN9=-31%3iE%RL0z>% zomLp6-U`^>l2cmOgFk{vIcU7d;H`8d=3X+Z0e&*bIHzF5gxnt1(PwQUcL*->otofR zPE5C*RB7V9R+5>dBGaP4J+(&zxwhGI1|3??3*;d7|uQbeA~Y# z$Kqs=GuIlH`YVt9L937;11_nHBD zGwb)uaT}f^ljxjVm#Q>ONNs}bn%>^T6r@(PDa-itY4g=w&gkkI`Hi^s8lke%Has7$ z2fP|IRO?<}OsI-g@i>6Jgv0^L|(NvM!t=ze+n|~AQN{u4_n?--04^f7gXJl zk1)(M$59Bi-G;n<`=ZLqFOjh8()reB0=>%N=-h}a>dz~}D%k$Rz$wQXtC@TRWAZ@g{QqACfD;W^-dj*p_KBorrj{Qj8j9*Si?aiPm;o+RUn0UIKHw0C4 z>;XI0!@Nt8II%*x9EC!y$pd=v^}2O0(eD-k4ZZTAA`lFDLERrbIL>@$oRKsi8(h-e zk}cc(NBd*L4bj+gTx!tW(YW23j%LV03Aq5G*{jW(mSE8hVYM39?|QtXJ~-xE)brKw z9ys&i1ik}K*P-{ddNd_U?9-}0u$QPZZYZF{ zlIgS`R4W$?s;2Z~XWqUjzfrpwtStXa>cDV}B-rtlMprd`U?)+-(7(t0-?-{63_^n( zE~V$!xotyvH-0CL^@)lLnQGHbK5E~N!|1u=yWYXRtCfMoW%O(wx@g~Hmmcv%cD02?5 z_7DHkLw+1^!N}haqr*yN6X&BL!v4n*t5G&s4p@82WjJZ7zxN3~Zww1sg27)?#vjOk z!?|*X9S;nxmI4S63RD5kJ#j7jm(3Xga@u5p{2@O;F*2n%!~{Al^G;G}cpaw~6hHVR zaXd+O&Y0f9Y;?*~To-dq7+2KPyr^JI{7%_lpLU<_Enfiq-{)!@+=P|ZUm1zSo!}kn z?6>D|lF~E*W&(Q9c+d)R(pR7p@(ASIgh!+|yf&fB@~O0Yr~CF0svsc%h~`_q7;ai) zm*vlSA6yFn=Dk00z3yFOxg;H2lev3ke~@)+#`4-162S)oW6RSCScV;*gr;~Im8_3V zz7C9c>Q0;F9uq&yd_IJk2W6sU7J+KDA1ZoEv~S2aJY)xV05DqG8f*KW;1juP4RM3C zY6xkTr;$j}rLM>s5vbR(cu7CYTN6i%1!tFs~c z(5MdSNeQ)ZEq?LPA$x~SyJ};{EB$^S%tBe7)QLmO)=P*yFu`K`z`rPS$2Y=tqjt>a zN8A1$QOvoh2Bpr&cW;o)6@GYys1dDUnMEx}7H*Xe`^;2`i6}srg1XY?Z z5C|aXNU*nmDao~d_5m9`-UPB<3*EIDxCgcRh-3eY>H_Z`oIHsU==;6^el!R;bys%Y z#!LW{_oS3m{|W>)?wUpLMdOK(^=A$|l|mb^r54;GxC ztZsW0n5fT2gDlblV-l9&Eai}-MWz-^7QPK-OI( z6NF~TylnVF3ip!P6jui2CjtC+-D#x#Q2xamwtAGDa)4a{ za`l19wc*Nbp>EIj(gYd@Rj#YZz-;s7JQcSZ z`{5vdd=fS8AlklJE%u=)UzlkO%d$YRm9(9$?Aegx{^54*ZQuPJpXY5tA$!M(bwSx# zX?iE<@P}o}B;q&*tvzrIepqIUThx6L$ofpd+>2eP)>%%!>v4s##~ykT{bVS74kGIP`4fJ$-D2v;r^NS9B)zS2LZ*-n_m$OtoXB_9G`;yk$b9^7xCzNU#d3amkV3!9#*>~! zo;)U=r;8n||B1VuRI|rrF62FhK{7E33C@u4aWq@&@N6q9i*|UbT@{BUQa_Rr4CEg= zbl>ICB+9vw(FhdBzE&H%j>fg;6e1F_oTwf4tmL3o{dzHq$ejPQL zU5I6Upv!7*(xXhjyHN)`)NMVN^hlOs zb=PDxQzPX8wRH=p&aQg)7a#R!#vQVo{BqRK4DSTky|$2E9eQUg>{e8@YO+qU#zmU1 zFkfxwshaN3#EZC=PRqIfh3ZlrZf=J)=2LD;dw*=i{Z%!4=02L4KAk6kY$T5oZ81aFe?i3*+9&?fHv=JITe zFShvb>J9jjX*y?!Y*8sClH+a1msHjcZw}PI;VDD6f8faa<8ihj@=Hg2sTc@lY?sUf zD3V_zHoo%jdP2&$+(9)8?~^-E@O1#l0M7!^YovR*xhscUU zD){m{nt83RXq4CCAGW)>rbJtWuE$K<{7X9#HB@}U??n}yGs(pgf-Qcfwd-};pH zZc@$SwGGNc6iNA9q-mnT!a(5nd_?YI@gMtrAr0p3>pRIS@ZVySbU=E|&tc6K^_rU8ovBp{DReqmjR zc|Z{a{6{in;?V&Q(fC{A%a321o5_n^+3g{>@m^%dqcFd^DrV z)>QgUAazY>fUCvRW!?h8{fl&p?=6~c5EcHW3#bjb4ufQQTj?|U=Bk>dhIZQU3Uq~1 z-5fcA?3C#M8sYNvn7C87liSIwo2E*z_t3KhuR&BqoLtf6X7qe@%S$8dLn=Br|4jlu zg)AIp`#PVw;VI`npklCE&-OG$=v`%4i0+{~fYjsnhXRSHE9GwZf~JdCb?vd~^bJse zZKZ+~oKu?tUuCY@Xu-?fPzU4wt`Z<{#q1#SDKns%G(ibR4$CLQ#enSxomF|4qT00% zIpsXMKREEU?Tm=c&}#Lm^IT-VOo9(S$4cEH=n^jV!}y9Tx)BkG)%+lH9^uB2=%K9# zRRR^`N*gg*kR~=)?N{CFCup)?o+sZ}ndP~QY*{zb4XksDB$jWS`z_x<8Ma>2TjIkr zORQPBSLPw3>J&?ER|{V}7}}ZAGJus`NEn?wLs@|B28!PvIULs^Zg6C>LgXYHu4wd# zBH`&N6}Do^Eqnay9Ct<+O}0Ij+s@5B(-IidOe}elS;jOos1xXgtCFqk|2(Gw;bsQ} zlfrrDM3Q2N+Hilqh=!ZpX6VT92ZAjU57ygxV67b5@HvEF?PQ8<0-iuLbo1xLJM(M2 zP$VCH$C1_YPM%8V?-HNkx3|=rC=Dx(F_Rdsy8}jlJ#3d`*E~|SF_Z3R)x1EzcS=CD zJe+HR!dW#pBk~>5EKm0gMzCoFc>V4DMp~Z|$Rau%EU&b6(OqqAzdx4Im_HX%8F-deaFZe_?>u~ z3fJN9l;t~L%IP$L#sk?#@QLTaC|AqJX-jRlq;8hi>9r|RyYZ$;+Dlr>*pG|% zR;go7__}|qKyREL>jH_{*6hu88r53TURtlqE-a5f04*J?X@>qQMJm(D16L`|uO~OJ z{ATz2vP$VQsjIibYMQk%(`jFs7(IR zyW>jQl6}b{5p$jAV2=K?U-3QGXV6(>cD=OI2=%y;hP|r?P!_ ziF?=PGuDyqbe))-8o6H`29nb;emn$?sx{j;v3@zBdFN|ESGKYHbq9`IF88S=m=&## z<&g=&eyhiTpFm%cRS)1DgtvtkH3kE^UCW?Gvch$3c4}6#{NU$ZdYFbzL$m7rAQOIW zNgzs=pt)~Kot2o@i9tr-C$D)nofe%DUe_Z;Q*VhR`1+&ZB9o^-0h$^b^(VDvjHXN# zfJ}x*bQg3IQI|J|`rJ#cWNr7R+aOTAlg-KqsCq&S<-uWIx#NlquaQos|AzLaQ_UVj z_VJAF&PXE;l_H*X7C{v`D?&3&9S&E@WkJ#m8z8YFxy;$3M*$IQn-ldKrXah0yEHz0 z{bU!AQaLCDx*~{^L%myD{}469ypJyDhFU%#Hmy5T>O-w$85;33CyEh*cKJTUX39C2 zczpWzR}Dl>>Juirh=u5pQMpfnk#Djbuj>_X=6ZmVUyhFj`IkCqnJdzYo{McQ``G%9 zBRqY5J(d=K3_UIY{@S-oL4y7x_I$1cOahD|Ht*GX>57{Rg9?MS4k>-n4>`KfM7fWF z0~%nY*T_S~w(DrX)kKz1`O`^lur>k+1?Tum0oHE2%^(BqL~f~LNCgr`BCzQw0}ek# z#>PTqW8fL*M#05ySw=@S^Q#UtUQzyd^Q=lAYQ;Jd&BW@Yr?B)(*9{s9y)%uC^X1_| zBe8(RNxI#{*!zIjI2ULMUQLOW1c-%0SYcDD)lsMd()fR|xaor$&( zDOkkSp4h<$RRQnd%H>d{ZMN60akkU8V;yec2$yE4G<|+DNOLI} zgEcTugM2MA4=ZnixKh5fcSzOnkV(yc!CDE8)3d-m2^j8A54*(2^;y4hZ5v>#UBN~v$Hl4z+vvQ)8GTyqcV z+5DQ+_iq!!|9o^Pp_1_5KhW?ASg3IP|9KGw!yOSyu>X8W0gHw%Fq^>^f7WlZ~Aps(}OPyS8EpCL5K!{=~dy;)$O;-92gf*;uy-OUiXLfi^jsH zOzWKnc6OC`@q_KBgP_L~*=WLfEP11_;Qwr_{nyWtkql5CCh7ZA@|i|G$O~H?Z9{Ov zqH|ZT_bYkpZFI_J#ZYV7|J^;hJQ{?E?uVH8lb7qNJz6 z$>qY0tT1kJ{9)=+y1on$!^CiDYirLLxs3C5 zYRo$rd7sq#G8h~D{@hI!5D7z5bAt>Whli(fe>*!nCdzp6dV`lU6DSk9BVCgGYgxW4 z&FI~`l@qmobixlF#ZQ3|&l~@$P_9W@qslBqiW%P*8J&;NWULN=O=F6R2s@Vn}MPUiuE3 z+t_#)@b%BVHB3qW*FQ!uxt~7tIAv-~UZoh=5;<6ZcbvS!LUPOlWq+U%MtY(kj(>vn zF#%R*9e!SW%bbLFwBaRPgJp^hJ!#y18Dnbc|enLz}GVwsZBM z1|uj;7)^oOv{b{wP`caVP4niHrrW zr_oe}1X=gke8T4ynD3azg&+sS^Y1VYL}D!~8#`-)qJPMi=3 zHu~3-d1Sl)JwHFsf&FlMG+%=k5U+3hw&=D-*4o;-y%~r5?_}HQy<6Ob6^-UKO}>24 z;_dY{xpm9~VgP)GmA6iw`hCjJ5`amV;o?h0gbz;9MrU?fsMIE4@=8@}zB=zaHoq!R z{G{bn{%HO}grgD^4bunLj|k%r`{cX44>XSXm)sbs1Ns9 zEInGZ)p8;;&9>phN9;`+=iy`Ny(f&ng!Km1qyd;lUUDA@pY(qRzykigy9NQsi$KvkFdl{&;e5bDgc2_y3)Tmv zODph@Vqtkbmsbe`hE@CK0}M|LS8Lhf7to`d_ABy>UDeS-x?MKz!g45$bE!DGpTox7 z)voi~x=Q`BRT$_LYlG$OoC`;2QD0b;GQLsI))M{`0`-0NsW9+Hn5mzGu{>in&MnbC&ZIV6Ud>?3j(DQo8Ast)K-sM)XJOHE-e)l6tu!Op(MkeT}Vqy zn^er+B5T(RTt9(}KvWSjG_8Z%7Y$$kG&QcD>UWNiz!>Y5k8=+?fiX5r1M_2PE-ySp zz{$gASYy!UENYj0bu0DjgCjBnMz{``2%}T^TL#UX$ss;45TsG<8$uXHLbOn$?F)}F z;##Tzx&VLpfC$4m<&~7VUqwfSR)p-v$P3AlrWDPUf9}{@v}yY z6eje&?o1$5J#_XEN*RmB04!axr6tpH2J>K6N+wW?Dw!+9!%iv<@FsnZwn_Y zi{t6QpRsoQ0_pDVu7^Rwi|ng@HlJI8k|lF-cJ^xA70@p7NgN7spu^9bKs+m!ueJy$%sXbq}?{BDeo;fwihX-62g8^D$*Q;GJiPCPs`}_r7>TI zUTFXB(K0mMXDuKbclOj)7VVpA7_s%)+1fe%R*bG}74X(|*QBiNNh>lE(kph=&Kt#R zVRiKxSSs(Flb`L6zwYnsx42hdn2i?u{^OSV+Y3W>-J7S*&)q>r`P2KtBZD9>T}w;I z@UU48cGVGMU+Z7DH8+?lrqt-6R_Hkcg2Af)P*^)DB9jPhVsO{F*;_*&i=qxd8J|N2 zEg&&XL5K(5$eK8>N@e3GybBMya-ylx%NM{=(1q@#fe8){_5`-S|3nxm!%t>nd%_5F zOwQC&qv6ageqR0ySijJ+=>_FrD){qsiD{K)!->k|`qs|QH)h+DCzze1HSvw#U6XfE z9vu(5$~EI|#*yGeUHg6onOrZ^c^Sr!l@kviF|#!R;bz7wmCX9<%)f(U;`fZc_=BC0 zQH}_ae0h0MgKeT$pF0i?XrW0M>PAuaiT+rn??}nZ&3*a*x;yW$Cbn=5PePL#5l{pK zjS5JWDjkJTMT#=?rU-~sL8O`lR0vW$z~LZWv4BID6OfJ`0Ra&dLVys8(yNMuk`V4# z{)M~NU3Y$*nYCxn-rxS-=ly1_*(sCNQ0nHrKTAKXR9j06+yCf;yJ4q$Wu2OXRz8;f zY_N9}*(t0J{{2K|RQ7T?ajKeR@HqGasaucV@rc&A+D$nq%I%S~yR=|V)SRaH19#M9 z*V&JWQ>^aj11=w2lZ~IOixRa@pN4Z-r8+cMw&Zj4OcP!$Q-a$|6- zg|Chg_U@aEgid+wk+x9(VF#xJn0z?7GNOLKEE31)mh=)srOTGio%X{Z! zCI6A&4Xm?Efl}GmS6^HYelC`Xx#G^x>D|}r1;d&U!e1Vah{=Z4USM|HXaz4zT){7k`MU6Bo& z--?4YKYc1*)!*6C!H3cnyEc277w}jaCF1567e&r_8E?xEQ7GEC{r&xKoj8!4BpFBTvXeVl z7Xc4&PfB*4ev++>SA2Xt4X7&oqA>?(L|qgaM^dTOZ?(0x&6w%afSSF)>9bNlcVUMD zsTjrmX}F-2RR25Q;Sld_33sK%b=rYG_fjQt-feU|#Idr(=s&@YFdsLd&QMm6X-xBMTaK?lobqQxf%Ri1u0#jU29PJgD+XfsEH)Rx zUoY79aYk=&;e0Q)$viL7tf~lapVLJGWaq!&o$MQRn5dXHsHmahk_L}}E(`#q>LwbCkP$~ zuw0W0{EkZM>a`)gIcior(QjA)15I8@7@fI+69(;mz!>W!mOm1MVg6%TyeIU^yVGe! z>32+xvJ>9*s2^vr+R72_NV;!48^7wh$Yd`+&QD|J= zTXdD;9cs0*mB}1Mw6#BuekE&P;UXAwdcV4|GP>GfBL?sC`B%Old{aieuvK+^#y|b8~l|FeZ^gp-*EGcoJ6HimyVfOr167#T1HKS{Kk!=p>9~!c&k>L3)_D+3U z7nfx?>#l2JWjDNXkD*(6#Y-2vdfg~gYO@MnGqy+#_KG5f=N@3-ORgd5`YPjpDLuce zZyQ$FHs!f+v*ATH&Tl2D1r?QfBQR=@X6&hiV=@$2kpcXU*b%QRH49pn`Lm*ZURk)u z28Eoq8^ygY_CY~W?DN08M)_P}Y+;@AtKg~kQJJvb)7VZ6t%}_TIUIxn|AK z9~tT4gRvz{;Cp>V^drcPf+6?ZE_WzWK!dFk=;t}6`r z+_=s|^%e9H*z16-Q?U40>J_%Sf)6&5BpdRQUm=E*FCQi9)D_LWR={N(pld!eLuiYuAwZx#ysq+r)g z8Q{>4QA%wAH&$x2j5u-O#DPY8-*9bmn7%+FMjWUz*@|&?rYP{Ps+@xBHER2s*4O%` zCvr?rc;8!ke(}YT_=JQY;51ng(maTb-_b@IY0s^1i!!D|gmn6rLc}^*zG?$g?-$ap zlegZ)ZYEsUVReY_mYT%UK z+l;i4cWa7x8%73gkjNjn&oF{>0iW z1y?7F69;?_dty-@KCKZr0Hu){T1Oa@MKA68FH2n>XdHy8m%UBfZ^Ic>Jd~y~)fo&$ zl_||C!QDrGErC{#qH7rH-JEmk$X8r#CueSz5y_gVp;-imm3r>3S;l|rm%FZpyt|R7 zf|<4i@`0mhll$-7UC7M>ZCv~#`#gd`UCvzOJjGh1(a?`5k3J0QB65`n3yYx6h6=hT zPzp2|U}zvo0P{3YVOIhOWUYEMmJBgrQs+ok((k#d9R4@uOh*}J;j%psU>gR9%wE<~ z!=5dvGqir}Bk)((RpxBucsRImbMKQhK1efl-H{x&W=%>#zm&qm;t{lu!F3+-p4HlN zIU@aw2KhcAg#EW*wx7u8ic|7d!1F%71eX!S0Fx&nw|>+{=wSKqX3p_$(t;J8vulH^ zB@k@+^n zy~GjxX~z*UcAz4AumWRp3};Ul1PnKJ@_J^(5y5&gK3T;G4equ>n5)O|m#R-`xceqY zCO`@Xr>qsV4KNB9Nev^PqzqvLKz52oBh64YJE`IvlLojltIp?>G41p7^Cs5YQSII^ ziXB0J-?skt0np={R0gQnes^YpUI`SGWc(xvA&4Z4%{?$OdRgbM-iSE6wSm6{RB{FR zXz1v~=&4UYYt{faL+=BNHQxt$eCd~OTMN+6MC{HDeoa(cvG8v`Ls*k+{-y=Fad_n8 zT(XqBr6YJ{vKoh8>RIcH;eS#%3SO2hh_U2RM9%=Eg(E?rlw?NNF>p8h(F%kpgzSDq zUb|MZ81lJCv6WGX4$(4Kc`&8^X7})3ei^ZkG{3YSORuQ3k&@er)R`y=$?U80Mpna~ z$^}FTrip%xh}^CL6y}$gbvjVvi?Dlyk>E8D2wm!h7Q+(~5>W6dKR}hS440Y?fV)Qx z&Q}>SkD;^LzkECMd|+Xw-epx2)Ro9w3$U+I3f@cMT~rV7@R+pZ*A#AB34>A+->Jd& zIswJUSHbqxrCyPE;Io%<_Iaae0e}e>Q`OMv4t0Ul%WyMkOeRwb^VUcJSKRUNE;N2| z#~S~L`6pz zJNr$rOr8bv70>}gqc~*5MGUt2ItgkNSqX4Gk4_twI*^zLL3HG>__G9=2Sg%~I_-lk ztm0O9byPiI^j`sRLz+MGB!13z;}n2`%Y5!T2#Jnc>G>B8O47S%KuiJM2rW7RR?wm@T>1^) z;iSn#6r7B+8G`mUN$}P$xd3ZfEw7i!pyUfEY({M7W2{f ze4hw4NZ@^;oxfWRKF?mqsl2e2(SjqI$GnGe23ows)Bl2!&x%jm?jA?e6%?LUd!Q#%9=&#ze^yg=7SVb=@vA)(KR3NWD z6JDoq^u??%zF3v`iSQq5@4gmj_CPT^r>u}$tG1hWeziXTEcyS#;Qe}+|Fwx0MlkhG zp0U#**%}eU0pSrS|M6+I;GSR07k={m%-fP2D`doHqo%n~W$8(h9iwVx$BCPm#=iMxs66}{d1MN2U%mDZ> MGqS`~7 Given a version number MAJOR.MINOR.PATCH, increment the: +> +> 1. MAJOR version when you make incompatible API changes, +> 2. MINOR version when you add functionality in a backwards compatible manner, and +> 3. PATCH version when you make backwards compatible bug fixes. +> +> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format. + +Once you are done, commit your changes and tag the commit as `vX.X.X. +Push the tag. +This will automatically release to [PyPI][]. + +[semver]: https://semver.org/ +[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository +[pypi]: https://pypi.org/ + +## Writing documentation + +Please write documentation for new or changed features and use-cases. +This project uses [sphinx][] with the following features: + +- The [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [sphinx-autodoc-typehints][], to automatically reference annotated input and output types +- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/) + +See scanpy’s {doc}`scanpy:dev/documentation` for more information on how to write your own. + +[sphinx]: https://www.sphinx-doc.org/en/master/ +[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html +[myst-nb]: https://myst-nb.readthedocs.io/en/latest/ +[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html +[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html +[sphinx-autodoc-typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints + +### Tutorials with myst-nb and jupyter notebooks + +The documentation is set-up to render jupyter notebooks stored in the `docs/notebooks` directory using [myst-nb][]. +Currently, only notebooks in `.ipynb` format are supported that will be included with both their input and output cells. +It is your responsibility to update and re-run the notebook whenever necessary. + +If you are interested in automatically running notebooks as part of the continuous integration, +please check out [this feature request][issue-render-notebooks] in the `cookiecutter-scverse` repository. + +[issue-render-notebooks]: https://github.com/scverse/cookiecutter-scverse/issues/40 + +#### Hints + +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. + Only if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, + you can add an entry to the `nitpick_ignore` list in `docs/conf.py` + +(docs-building)= + +### Building the docs locally + +:::::{tabs} +::::{group-tab} Hatch + +```bash +hatch run docs:build +hatch run docs:open +``` + +:::: + +::::{group-tab} uv + +```bash +cd docs +uv run sphinx-build -M html . _build -W +(xdg-)open _build/html/index.html +``` + +:::: + +::::{group-tab} Pip + +```bash +source .venv/bin/activate +cd docs +sphinx-build -M html . _build -W +(xdg-)open _build/html/index.html +``` + +:::: +::::: diff --git a/docs/img/mudata.png b/docs/img/mudata.png deleted file mode 100644 index faa33fd634aa4c333bbe182d8e8a92048c45bbc7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16338 zcmd^mXH=8X(`Q1JE=78g4$?bF6NnTM5DP_m7wNrsrAQM{dQ%V!Er>wqL6C^_4iX@A zgh1#4LfHrX?|b$=yC3({?sE6LyiklGBSLlNS)#;` zmy5Q}=A32pa=$V)MJjV7k*?L}4{*CjZ5hi(jTVNB-PL3lcz%so6`#^n9}E<|zuEYz?^gLU;WR#tG)u#*55W&@?;VsT370!wULHL$pa>Eg zF9`n>A)Peo4)yqM+#{|+Nxrms-IdC-uzHo@$mUq@;Ng>fbd(F5zSx(iwk!j(#pjmE zF1ugtX&cHO?s${yNjz(fjy<_I7=EPC6tkWiI?jtmtEc7%h=k%xDLM^Q_!pA{jY8t7 zZR!I8kk8jVG#i% z!V-vWbl-q*kSbOp^#!^wKxG~?Ad1Vu1hnCE{eSY%Sn7;`$X%wAQdQ{I6V6laCB|*tp3tj&n1^8z;GfHdsE)T$;PN_qDlZ)%&vOXu4+WBKQ zb$pjgS9;C|_ztU_q95=dbBzEOm#HCDQ+&KOd!H-jdfwIzKUNx5shMP?LH7Zup3qkr zpncOJ`OBehsa?fB&h-XS3Jz|)3*iy^R}t%zQn!bX51uB*Ya@Lr0M4sNoAeX01q8xF zRi;1guTfnWAK#_-4@DZ?cs}GJ!o}q^dOd*p?~_LN;GYd+<-fz%>*%S|V1jB4)YLsO zRl4q~|B&AlC&tGwtF?aFinDZ~rcTJKy5hKcbijxIXkq_Fqw%GtPE*Pjz3(pfPg=SQ zFy`vvAsmYl<>E>w)?=Oc@2VcwAN|Kp4J6P$SI@&c%IiRP4jSsR`JY<}n{|op(4)8F+y_zplsVPB*~P*@ibw8aMdr9xd>}$ zgW$UdiNC26&qZPNv%E`d^+0!MXX@QZnxEY8DGnol?jjfKK z-Gz*3kThw7$)75f2zj!4(uhw%Eq*SZ%RSpol9A>^8-NZ9dJtI_Is2t%XxhCHNkGmbDh)1BTx6q|c7Ex?LM&3aOSyB) zQ_+(Q>fM9dRp3uE+kmbClA-L3M685pwb#~66H*W_52~k1c7jj>6H=uX`<>%dZnFA2 z^<$H_Ib!Gsr-ptT$N`XQTl=E7I)zx{+(0En;^euby0u$Odv?D-$mu9)WIRVUWAKuy6N zWmF#AIAObrDiypR<&sIs-hS)7B)8A;B2#DNEtGY!4_SF7z7Za67C?bMs99Os`5~o` z#|I)F*~?n3gH0mLZU>&Vlio0^r?gTkA;2~z;pu}Y69X9r7ibR6BI7b0bLg%#`CT^< z%@ku(YBNZg>DmTBDDlFtu~LPqbX&L)uoGVp_FVyGNNjIPT>(jn-;gH^*n4N<3MwQJ zy&{R|5K@1emy5)p^ihz*OEaH21n=g4l&UD3Qn*_Zj)qz%pT^f2OEjUcN&*v%)4P>L z(dcO;wBkmXL@0{iw8muVB8wM+_a&2}ai{SUDVvF6n4&egC#5@MxVZ{ZDC&!YhUopf z2EXCI|KLXv|J<_g8t#oXH(@b=3^M|}1nvYH^|GMzEAfPE^S6nq8(af4~z#%X-+aG%;If>ZvBAo@^P?xyaAZg zWS76skTrG!GFZHD@=%Vcn%6kW7UDjzqZo_#f+7jp3c};xMAdZdOd4@$cJ@#X>VLWb z*MN_A<>ll1b{%?FvzjQLM+5STtBYuW*FdbivU=ySdbo`cStv8kYAHK{Nl#-n@^#? zoj+d+(^L1vUOB!`cgewS(EEeYuRes~CLh1e zofZL#c1+WJL+a}eWNSYF0xCWac{l_=8QbfcpH4nzvvV}anKsp#l;~Yym1-`J(0lSw z19KO`RRSysrGN=Ia2?#8ByQxfn6)8*ZDIEGU4-8F1bjZq3S2;X!z8mFfBTQsqr>D< z{Iu^IR*%NXU<~mslhW$Ea1H=JjQ`Ia@8R~cFH$>H#aw?(yvHbD9{>O*9pVwogX+(> znLiQv-@Bd3`WCDUu>bIOei5(zPCzZoU#t(>q~Y@ANa~Z!lnkN9S+1GCcy_=uy(hH+ z&&7`$J9Gr&YwK(hBNV=seX-K5He>zXo#+4KK{X)bFAbYS71OasxQXx*MJRnM+o0PM z`1pXb^7DqhXGX^m-8YM=m0W<4D~Cs%&@aI?;rBb4Ct8esa_jlNv+ANaW^+K%+qU^v z+QTEn+22AkaV0S}b+KE_wlnG~`r_zsWrthFQit}P13F4g^&Dm_N0Z?)kCqB4REzAu z+$q&P!}mg3Psi_X*-RXieu~gl3 z!l}2-RBo3GZNHr#9YQ+>;G&X2Q+D2?tu_r$r6tFizH#H`dZ?ZIK!YcrJ}&iRUp1h> z;$VF^s|7q%?YO}ZHP6fXNVF}od3+q3P6@GHc{FAA>Fx*a?IU{+el~3%fL#O}PL@w+ zm`0>T+(Iv0?Q~{NaR3S?QVk%Y1XB{K&sw}^YK>AoD9<@P)JkpRDUxEvfW*K!2(6Lr z1=AC?^XFykAY^jL)_W*(VBsLUkjLWfIOdcLx2P~LgpO6Q3j|j*0hsa?5H6K{zXySk zYLYa43-QwbmSH2Z-8;yI%zE<#CYCURYWulOfTm!@XMNdB+W zbuS^O|3_Hn+NPQ{-wqb>i?5Sx2a;qOv$)DlQjWiqr9rQE^4-pXR%M`sl{zgt zaf9re_V7mYvtJQEF^P9vGC@+2Asc{Xzhoghj0cN{g2pc%tBBJb`=`vffmTs%`PH9< z@?JkFo16^e1elz~QEwM~Vb6dG_aPqC*u{!H6k93?jPR?no!Nmy}7KYqKg)3G%& zQ60TabMqC%3x9|F!6Rc?8#{uPVL&CV7r)S6!XHg*M~mqf4%9!!n2squ6;94CLoI=V z^7rvNkkSWliglYA9z|c^p}-CHd1JEUfFKQWXv`V*Ztmn0KbBfb4&Txup1qbRj6rXj zbTUScWJ=0aA?mZA11;aOsb=|`Vvn-xB&X7Q7{Us~Y%=O=(^rx-^Nbk)$0F($ihL0l zA-PS^N;4^HW04SYsg+LPNUAb3fLFzJmhhSbsLZjRuc5<7z-d;XLW-I{lmPGrt)J2f z&_?(8kqOyzoQZ*C0@Xr!I}dyfWk}}|C-Jy7=yzqI2KD?ufiU$>Mwd=ALnncZ9OKan zpj<9aZvYIq%><}q9=YOAsOmD)sF6ZN^8obGl{dMImX$tfgaOofrX$`?`c>07jzd#l z)3wk=XWglvBcjQN8@20SRbnk(uSpo%nG^w-Yj@w~;{&z^mIKY66U*i}HKJrE9EdwI z^_}*Q)gxt^Bms)!xx`Yv{G5ju$(+kfT<9goIiO-5!C+B>=;fQp@p{k1qTbb% z%0$@C5XNkU3C-R;e|>n8#}vaMmUpa)lmM`X`xu?NML6x)B6h;E+-L*xxws4s0h=vV zl)3cSmolje)kmJFFse6iX~SJ1t_{DDXO13yH_;$3th5{ zw33`SeCL;^MEgltp8Wn$X-PaQ7ZlN%(z_)8Nf!ML;F>_t+gk&2o8-7Mn}~kyx#;?8 z8+?tf+F|G`fPA4?56`U$UGKt?H}lxDgs;k`d+9E1vQ4OA^|o!MqBu9cPC+rkfE^j? zKbio(YU`NjG-4SG}h=8+-^gs1QRpGQXItp`y*xqAlg zBexp7{g$sRpz#ym(#~#n3k8J+{20=;Ydm`=b0tkLSER$($wdzAn2qiYeVFb2sKpc5 zVm(C#fPA}Y!4q5HHbqO_Rd52c1;j7{kh3eP4yXsqk>O7hgA}29Xl0ha^OwmiOy1b+ z_f&$DAt$SZLAR@sc_A$B zc2mZrOfJ7IF`o&d>8SA2DoB19H~1UB-e&;G9e(Jngemt8|TE~Gawv>N=C zGD`>7s$YivPD&)Z>Vgb?*))=KYFj)A%-lg2mgYz%xUe4uYknRmeG83FmxZ?53_&=N~a`3DQb zt*w3;%-SxX!zzE_C9-M?ct~vR`m^5{6|2twd4_m4?sC#20uYGpRczZD8Y|2!>jJ>1A^ z>!^gA!K;Gkd}beY@slz?5%QG=Y>eULlnbMV@qd4?x3>)K zIV47g{J8;_(Ls#l^T+AQq5?V|N|P5-)|m1aD?WJLn>4a#`RZ3jCto{7exWaGW1#oc z@?RM;n2v_euuw`M1oFO&-8XHAUP;E)yTe^-bPP~mP3+!No81nf)@7&1x6!}e<L2o8^ye52VX6-|kK*>?FLyu2Ss3liJdt@6r=`)=wse8L zAAi;~3U$lzcm9->b0|~G&bbI%`(u;EE9(>D(uWocYPn3ORQ2ma$GmoHv;*6XZ5H5h zroN44koeZKc^g9J(cI`bKQfHPLu ze7^h3r=@GXP-tg_RjTys*W_d~nKO9Rnyk8Setghm-f)<@;FO&KDbe@|8%mz zO`SDCJUGqZDeS|aFzk5mNswj5}S-lk5o7X&s-)oj?}kDn=Bek!YB=rmXj8FENeFWP^HKfl5T`U`VCv&QSihQzrJ4R3IUFhiFnsX zwr}8G|FBKv7X@iDf%XvnY~Yp8b=!tNf72!s~}Z zf3OeJkJ>iB)1Yo5d-6y-DRty|sMAE!ulmxf^G%}~1l6C<8c!GFp8{1|XZ0R^Y<@aK zALFNkxZZ6UU=euJv#rA!KB0qnM4lcl#%ojEEQ%%w?L|4kvocb&AojW+ug)KY_7X`G zmBY0S*hXly#?q^@a$XjYe-VgH$-7}db-K3AYK0ei<2WZL`Tp%f+l+&d&#zSi){fcX zRDEdkn`lTXFvPflj=@FK099Uh;ZEjKxjP(l(Wr=^A_z}twlpanPM!RKXG*e?sc+Y;E^{w; z*#m-Wql$PKi{iCiua45Sk>4$K=L)#AhjeVB+YkkoB;wTBk5LwrOX=v<^zunSsJ-R` z1r|0cq#gOSQF;5|h+vz5Qh29I)p|ce`qIo2U}%EZ0H*&vRe2RQQuN-Z za1izquhrKpzg4T=f1A{TqvqH^CxhvVb4KrEwA4UUZg zI5fdUw%Q_-!3K*d84hr7R3pH3c0NStZc`l-ug&+bM@H=bU{@{uNa>uw{Tvv0dz)$o ztGr*M7Msrre;Ow*5P2 zOgXKH9368i`(0zwV&;4N{H+Ze?cAjqr<_PSyYGzPA4U+l;@h)~eLZ+G4AQ<@xsb)+ z8XMeMFErNn{a&xr6BYl~B_}hcBdtAy0V`rh{D@$ewmxy)Fh~`}=CMPvCdU zW36@}bP_ASpj#PJ9-5Bj5B%-LU#u zpMT@hnHXNizQ7_DZeUE&&eiAS@W!_y{wMuE`&F1+|IQCFrk`9sG)$iy?KcGBj)fiu zDT>z)>6pEdc%e7c&I$}TaXUGhw!V%eyU6M|WDC*p#0C<5aM!Y1WaH_Qlm%mr=7A@c;Va_waWDK z2Op|5Ahlg`B;?Ur382G`kTuPhRbnIG7X%MbcipE#o)J0ky)DHDPO39vVYB z{?i)koZE4DWF<+ww;YegOYK17wGsGQu~P>EsI>Ubgsv2_h~NEvM0`v!Ibl*=jt#AH zi(j0$sQ9QH{jUqQbK^yteo|Bhq3<8}(CpVQeJ|Q}atu>AD@n!in9gZ!LfCZMUakBcAsR@a;U_jpSm;8|UKieG zRAQCPe9X5_M9kuApn+pPo3VZrFHhvwzJnaR%c{>q9?n?rR`P6qi=oiW7OE8=rpnr} z&Fv6m-fQ)9P%1`-(+>F|(k5K<=hE^!td_aO&sDe4BeKES8SCnA9RG<7xoD0D zEP+Fh?HMtp01V)+?+qE0#T^lv=B-4N+zj1v?~`~*0a}Bcxa5L(Juh~1aQ+04dgQaE z0KWpnXMF3+I(-Es2P%3oB|XgUYn^(1rsaAN-3iX{>NdJd@2YOPy@!J~H-NfT-OirI zw-k!G4+61mT;&4X$!4FCiquIX%81*d&mZmV3(xjt&FOQMSkPnvZp?0Y$p=Z)*1&Yo z=}9)&>v`vLdw2Hi9hU^&W4ImbnSUN`zY{K*FXxH|%YwSD48_NxUw?34Lb z7x@y?k^Xu&zi}(t=XN)p(Exdy#}Kz(XDkJMT#^I4o@{`LRYUuoYcnc;i+rH(Bj8r6 zn33zaS&?W9Eyo$cwF7a+0$;P$@2GZ8TVx&F#+`4uCjD8T9#g)d=JG@IoD8 zPOgX7oR7H#(*>l+>i*3aUpfJNq_ty1Y?8C*$fWjBw(54El%lF@UiWO9qpV_T_Lty3 zj!CIZ_}3x33m9-8;YR1i@&uPVEyZ~h7R;kKsF8ZI)Ra4|=QzPF;+7*K6(Vn+pHMJG z5|qz621vB&{6H2X&b(F}{ncu&_A6qg`Z`1Ep>(j z#-!~!mK3i|^S}19(~X@4l6J{X$JpdvXeUMamTddin+--Az*n_RFS(*XYBt$KTkI2- zqhR)$0{bbC=KkuW^Gm4vzK||?^LJ( zsVB~^y{?2sRJt17!Cnm~Z>@x$!J}Ry?j_hnTxlrg76tf}enN>UHXl_W68^JvNs%@D z>mL;{Oc7E`t6Sw#RYk?*q9x@nTv?^`;riAb&#IuKNqja|+|JqAZw34^zV=&_cPSH9c8zh()x>8b z07OvE-G~y$a8mOOCkY(L4}cB_)R$@u>Xr*piBwj#uzVtFx@+nm)km1QY})Mwrv+HD z0NdPY)$W6UZ+}4B8HxMxaX`r}In9i{DSzgjHK2C0+^-y@0A60;?*?!SEa8Rb=9I(7 z#3AbycZ{t8Eh{wui7@BU!jhp>5$J;5dw&i-0!Vsf0d?~Z(gEzK27Z%aMvlh z{dhqyj?~=7&n|~kwUM{2HtD|OAm}E9jfa|VG_GT8(Xd6|6(-rDR zYGF#xS7QJWQIKY=@Zw|Ca@Dw<2dL`Ieyjpf5?>BLy^GbuCxRGS=7s{SO#t~4l%J&~ z!5!`3H@Q(mh37%s%Wvumg25b%&L`{L_j7pD|_s29%3M&WA}ppU~p` zHGLltQI0HP_GiD>cNYT02&FO#eSbG#m7V-0j+I*vl~;Yu4LlG^iJM?^BJ+t4!F^=# zR(Ke^78YXK-tIq!HUKw0P8IA3Gu$FXzi1{to+M}aMSnnebfq2rP`T{YS#Tr^n%JDu zd=3!42ZYA$Q4P^6vvnno9t7(E13pw4NQNR4jS`kaL$57rzp%-;R#Y5=Q`xaVHCuc^ zAj!;0kad8jiuwEwV1T=GwUc(H^l=xCNui(a=?<|R@F3Tt{Q%C;kgZSC4ff4yJB2u* z?b7=uI8ZwQU%+Q}DTeGu06@-1uvqsn`=RwUaU}FnBm{HoCRaM&RY;-h^gO%uYL{XW z2iT=L))?F;ZLR*|#}Q>_zoYNAx$>%=b#jM4tZ+dfloUz$M(eR}ayj7}apwEzB3rOKfNs*-SWNA@Qc5Tq;UA zs+0ScN1QbiqhU~G5XUjIC)wTfV&VbtugF%K-h)OxD8s$0f%Qzk> z`=mc_-%{^t9qQ9Jklz;XWpk9h?TX~$`f=xKuS4&xlL)lws}jo1UuzZ8jJ7-OQ-NeH zNoK|>YlF*%nw*tFMLx|PlN7S0#amxZ*~JB{0LrLz9j0kdfrF9NX~&lIw?LjlMu#8( zAk3TUo&K#Vbig*IH^UY2+H2lgoR1K3r@mK%h`(B>i1t)U9a748HM8}MobD<3!HuIP z(^EcBWK3oa=H&QZZ+vQ{;>77m1Lgc<@_R=>+&1hazr_if(g`qoEL3qu2m$3#62{iH z(1Ej3`~-L)a+cuLFMv7;V&G@fqAw)UtP(5Q_Zo@%moo>{a4(G>n3L#?*(S0EgSw&I z6d|Inzzxj=vwwY~ncxi<9FX$C66>bjB9j*(ERKd9uJ~N!tsYresjTsK60sarcbQe3 zAVy=u8~|hH?Yj%Q>-)L^jrD*Hkj`-Hsc!WKM-}os_o-A09CXOb=_K3Pxpkf zZSinD6!;sF*SoOi-8wpccf0|8cb041k1lsK%8Pc9WXX)kF)gB4nLK;0aUzo^qfcG$ zQI|sjYz{saTd-ux^`UvBSn2=~tmKlO3(j4jeN{n<#dSM(_FG?(wWEJy&f(01@cxl9 ziH~(6wO5Su+0)umA$6~}a-I6>f!~de{>3@x+=K}2&b{|J6w?4V4AAE~;bV!uS`l4< zNey1kSs$dZadU6yUc8%S%I$ci8y0LELaS}bd95d#Ff&qK2N6$E@zOTiZ6A>e+On`t zSK6G@SB(@|wE3D}HR>piZ+KdV{Xm)lGO}0^)jnP-Lg#saArC09Ol(#?(R(@NBveS) z_RvZK8x0U#%uM+2o3*j3l&+5(_qEc>)t;J=`k-S^RMApbiY=`q8t?*gbVjnY`iM@) zjQcE*-}gU<4JO1d|J~*i$t}wd_uY!gY?G$T?}#B1TQP}ap&@NB<+O~MBxlzaKwf^X z&UQJx;o~7AKZH_>87C~1{@h*c0a$4Tz|BrQ%qPN{ZfDH!h9?nh;hbX5DME$L2)t7n z$t$X}L;L|g8Id1MEteZ3$Tk3G{tHn0Rjsi@iqIVqK+Z>u*hrsPl!oylzFNteWJZwP zEeE{Q1XQh-BADfD%sQu$WOq?sle>Vn1R#hI5i|D5nwX*NK1`t20WP~uq1AeML`=sh zk->)(WqvLiqpJ)&p2|+Qk_Nmgf?Liw*PZHFfaTr`~!T#t>Qv>XA9tFPf&)k`oY`|Qs4%Ww2>DQTdQg)47@+@)@th; z4Oc7h1p#`t`%4iXgZr0kY~m1gcPRjl?LOQt0I?=CUwV&icw!gZ2m`$z{ZQ(atjCXJ zi4tq5*&wo(6c?HKT0-A4e~LFxnmPF6TRCx*DLfeft_1i}V1rAwwI=6+NGCOm`FR^s zM8QXR{%$sJgHZ~KUULyGmt!*YU=ePmI9TrH3*G=*h4?l6LRIaE6G{`1kHilLrP}7s#e<{iT*FKj&@%#Y+>A^hbN`sqJtawUW zxQp|CrFjy0!YO9hIQE?CslZ3F2ZX-Fg?nL_Uyk4FQw{D1<5>~Qf+MhDml^TBmrCMv zfrHp{vl3xNI}Sy>75HtG<~(*10Zp}tqK3zHs0eR_0Y|ITMt?K7x$fG?ih}~#rfPH6 zh(QtL;TVEGw|DvYQ9d#An^FF0#rXW-c8YK-d`JPr3Y0{YrHyO54DVXO&j*{cpItCR z1J?W9un1aD-4QRqsikSLFd#Sk>M4y3zRGPU70#wU zr$KR}gstUJp~b3D)g5_vysGx&w#&!e{#N)Y__3f6e4)s@aPoPqs+*0KF$f0^IBtv3 z6O!;Fs3HsoM*xoN7T%~4+7Ys5u|_0))C?&MaP4k)k#cguDh(1-g53ARE(7#_oq;{^ zb|Nu2sp#7g43n}m_?LJWBxeKI^YIQ@3oogkR3w=xcPt*HwmTZ8TEc_wSRr}*m7|;@ z`J+^EZ4n0a4rt=XL=LP36?p96?J}>u7TKV}09JR|qgc~@Rj?(Z9*b-ab`Q0@ot~iOtn$DB1iZU!}9G(dGw$8=i%5z17gdZxh$8I$nkpgCht zDO0Oh-3@2Q)(oY=@}02DmySn%vfG!qf=GH=U-RfN#~obo&Abbr z;DjZKZ-+%(n`v~4Zg)E=Dy`jKdKAsK`_1^=N`owf`NJb39ZG|W zH?m34L(88#qgO%-*DQ$hXMR;K-SX54#3rd?y9!)SWOrmx9jo>I0(LIuEXi(rWA!jv zc`FKAg&FL*RfCZcBT!9-fao%x?xJ=&;g2DOa&&>U$&yVdM~rG~&m(0|sY|NvaU0(Z zTK-NieurGzr(X{CSD{&gnq!y`ANcwvwe zI&<4^+@Ac<>shc$!tmt0XKrHVT;KRbnF^PJ<|au)=hwBoNTy}sRWJyhHB%L~u>+s; z*LJe*fGQ{)Unx~Lusu~bJPs5=c@#oW6?YVj5-}Is$Wb%@NVI$Q^$(@epb-8A&Dm(= zW+W~qESw}@=BsifWT@F$Om z!H6YIq61JkU_O2urk%MWge;j`{B*%m)pP4~K56fA!agDbUz4bPo&$|hLnf3gz-Voh zqvnZlA+5>BcOf*utrsx!1R;Rl>(r^LG>B6Wo&F3u>fl{-iHvH)s20@Bt-#oST)+sn zO;A^c|>>TnPGHU2W$QNEtGm2o@E$Z zyT{i7a`U~J>bKrUWfBVQgz`VI=clN7?2`V?qD5@2ZSdQ}SoeN)peI^0W_ zpG#?QCa!z=3;G4pG3dIK?G?J1kH^JDiEE;}q@B?#q3xD^J4oxnSmJAjxKYOu8D9v)}-AKGMdCchdx|pHWpH;)Rs?zT2#H6*PYWhO8rqmwt=*L=bn^02@!( zdMEHs6_07rw>_km$l2T&qGPafpNd~*qJB&Rjl0ks+@Dyi61jji`r+ja%DClOb_|ZU zVH%8xT*Kd6GceO03_6{)R?ZZ)&UEinXsjZZM4wF6B#)P*2Q4m+(@%QOIhTtM?x(n(Pr{+nEADt)s=bfhbZ(5B9cbq6KFxk5ytL7^2+8+@ruljh_cFTQ2 zQ1nF51#F%8z#k)D_!<7H7&MZ$^eDRTJ6Bte-USv;3Qk$A z+6Qe$=JMh4bnV2N;dRFZq!y&OutV}|md$4Yrf#I6OowZ_`mPuR$zJ{V2Retx^=IFg+& z*2Oy*AY0HfxZg4&t|n_Io7{5}Z5-*zJ>q7r#Q_leq%BWv0(myjr_K#dgW2UR*mVzuJiX{inTkhFm z)~GvVdYt|FlRJ@OyGj@zJ>zRt6z#0f7&F7{GM;RItkDVsR*@ow9_nwM<1mt~+ zT0%<7;9q%}!))2(-cR(`Yq-~;61jl4$buP4053-km8B`7E`zsZ1a6;cYpMG zPM0zRFr@dDFpfyiKdn&_!G!^Gq9W5)iVPCFO-(9H4lHDq)6JFGk>*Q6|SAr%6+*C3+jJOSur3hQKH2D10O^nK*;p> zg*-EHAXWy?6RTXi@a5StMoFODQR+yDH@_ydInlZ0#^eGVVokKoqsE-^p%G^KqeG8C zjv;`o4(rupso{M=w^KmRm)=YQ<2r7|-;qK>JPej-uqB@S8lum5Q@r3Y9T+QOcQ`Ks zvlzSuyo*GI%zU)<6>ElcA*jj<#oIAJ>7WK-%V!eS4}~1^Up8AQ!2>+E_Q2cpen9%f zez{j|MZ-_*xk2Q@D^Dtg?{Lb+Aw(}}x z_TUvw3nU4we0#`FI7b;?<>Jsv9COI-x~(&hb@6O%B%3;$Lt#+CA*n!Y%mGhNI+r1Qc-a+hx6UMFc(Q9x2eI-e!5ZUYt1BoJ?NRRTq4bviHbeHq-4B?d>K+{MfR&-c&YMc zY`Le~xV6kSM2%OJ(=D(dYgoRkxx=o*$ADnQp&xrr(S}JSx=8JhON~*~bXw9!BZ*t0 zSYj{kWk_1$rN9ez0!5cTW4-)TN3rKc=|46%aTiumU>L2A!Dt|>a&FB)BK)TLV8rP8 zEJ^+X?&8GW3;8O(G=Xp6IyJSkR`db_w?MNhH!6)*a_*2>3t5V7QC-7bASykEU70K* z&!Yl>pqksBbp}rnB*wRUO3 + + + +

+ + +## MuData objects as containers + +The `mudata` package introduces multimodal data objects ([MuData class](#mudata.MuData)) allowing Python users to work with increasigly complex datasets efficiently and to build new workflows and computational tools around it. + +``` +MuData object with n_obs × n_vars = 10110 × 110101 + 2 modalities + atac: 10110 x 100001 + rna: 10110 x 10100 +``` + +MuData objects enable multimodal information to be stored & accessed naturally, embrace [AnnData](https://github.com/theislab/anndata) for the individual modalities, and can be serialized to `.h5mu` files. [Learn more about multimodal objects](io/mudata.md) as well as [file formats for storing & sharing them](io/output.md). + +## Natural interface + +MuData objects feature an AnnData-like interface and familiar concepts such as *observations* and *variables* for the two data dimensions. Get familiar with MuData in the [Quickstart tutorial](notebooks/quickstart_mudata). + +## Handling MuData objects + +A flagship framework for multimodal omics analysis — `muon` — has been built around the MuData format. Find more information on it [in its documentation](https://muon.readthedocs.io/en/latest/) and [on the tutorials page](https://muon-tutorials.readthedocs.io/en/latest/) as well as in the corresponding publication{cite:p}`bredikhin_2022`. + +```{eval-rst} +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Getting started + + notebooks/quickstart_mudata.ipynb + notebooks/nuances.ipynb + notebooks/axes.ipynb + notebooks/annotations_management.ipynb + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Documentation + + install + io/input + io/mudata + io/output + io/spec + api + changelog + contributing + references +``` diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 0000000..4b5c034 --- /dev/null +++ b/docs/install.md @@ -0,0 +1,32 @@ +# Install mudata + +```{contents} +:local: +:depth: 3 +``` + +```{toctree} +:maxdepth: 10 +:glob: * +``` + +## Stable version + +`mudata` can be installed [from PyPI](https://pypi.org/project/mudata) with `pip`: + +```console +pip install mudata +``` + +## Development version + +To use a pre-release version of `mudata`, install it from [from the GitHub repository](https://github.com/scverse/mudata): + +```console +pip install git+https://github.com/scverse/mudata +``` + +## Troubleshooting + +Please consult the details on installing `scanpy` and its dependencies [here](https://scanpy.readthedocs.io/en/stable/installation.html). +If there are issues that have not been described, addressed, or documented, please consider [opening an issue](https://github.com/scverse/mudata/issues). diff --git a/docs/io/input.md b/docs/io/input.md new file mode 100644 index 0000000..13b0705 --- /dev/null +++ b/docs/io/input.md @@ -0,0 +1,89 @@ +# Input data + +A default way to import `MuData` is the following: + +```python +from mudata import MuData +``` + +There are various ways in which the data can be provided to create a MuData object: + +```{contents} +:local: +:depth: 3 +``` + +```{toctree} +:maxdepth: 10 +:glob: * +``` + +## AnnData objects + +MuData object can be constructed from a dictionary of existing AnnData objects: + +```python +mdata = MuData({'rna': adata_rna, 'atac': adata_atac}) +``` + +AnnData objects themselves can be easily constructed from NumPy arrays and/or Pandas DataFrames annotating features (*variables*) and samples/cells (*observations*). This makes it a rather general data format to work with any type of high-dimensional data. + +```python +from anndata import AnnData +adata = AnnData(X=matrix, obs=metadata_df, var=features_df) +``` + +Please see more details on how to operate on AnnData objects [in the anndata documentation](https://anndata.readthedocs.io/). + +## Omics data + +When data fromats specific to genomics are of interest, specialised readers can be found in analysis frameworks such as [muon](https://muon.readthedocs.io/). These functions, including the ones for Cell Ranger count matrices as well as Snap files, [are described here](https://muon.readthedocs.io/en/latest/io/input.html). + + +## Remote storage + +MuData objects can be read and cached from remote locations including via HTTP(S) or from S3 buckets. This is achieved via [`fsspec`](https://github.com/fsspec/filesystem_spec). For example, to read a MuData object from a remote server: + +```python +import fsspec + +fname = "https://github.com/gtca/h5xx-datasets/raw/main/datasets/minipbcite.h5mu?download=" +with fsspec.open(fname) as f: + mdata = mudata.read_h5mu(f) +``` + +A caching layer can be added in the following way: + +```python +fname_cached = "filecache::" + fname +with fsspec.open(fname_cached, filecache={'cache_storage': '/tmp/'}) as f: + mdata = mudata.read_h5mu(f) +``` + +For more `fsspec` usage examples see [its documentation](https://filesystem-spec.readthedocs.io/). + +###S3 + +MuData objects in the `.h5mu` format stored in an S3 bucket can be read with `fsspec` as well: + +```python +storage_options = { + 'endpoint_url': 'localhost:9000', + 'key': 'AWS_ACCESS_KEY_ID', + 'secret': 'AWS_SECRET_ACCESS_KEY', +} + +with fsspec.open('s3://bucket/dataset.h5mu', **storage_options) as f: + mudata.read_h5mu(f) +``` + + +MuData objects stored in the `.zarr` format in an S3 bucket can be read from a *mapping*: + +```python +import s3fs + +s3 = s3fs.S3FileSystem(**storage_options) +store = s3.get_mapper('s3://bucket/dataset.zarr') +mdata = mudata.read_zarr(store) +``` diff --git a/docs/io/mudata.md b/docs/io/mudata.md new file mode 100644 index 0000000..6fe7ed4 --- /dev/null +++ b/docs/io/mudata.md @@ -0,0 +1,181 @@ +# Multimodal data objects + +[MuData](#mudata.MuData) is a class for multimodal objects: + +```python +from mudata import MuData +``` + + +`MuData` objects comprise a dictionary with `AnnData` objects, one per modality, in their `.mod` attribute. Just as `AnnData` objects themselves, they also contain attributes like `.obs` with annotation of observations (samples or cells), `.obsm` with their multidimensional annotations such as embeddings, etc. + +```{contents} +:local: +:depth: 3 +``` + +```{toctree} +:maxdepth: 10 +:glob: * +``` + +## MuData's attributes + +Key attributes and methods of `MuData` objects as well as important concepts are described below. A full list of attributes and methods of multimodal containers can be found in the [MuData](#mudata.MuData) documentation. + +### `.mod` + +Modalities are stored in a collection accessible via the `.mod` attribute of the `MuData` object with names of modalities as keys and `AnnData` objects as values. + +```python +list(mdata.mod.keys()) +# => ['atac', 'rna'] +``` + +Individual modalities can be accessed with their names via the `.mod` attribute or via the `MuData` object itself as a shorthand: + +```python +mdata.mod['rna'] +# or +mdata['rna'] +# => AnnData object +``` + +### `.obs` & `.var` + +:::{warning} +Version 0.3 introduces pull/push interface for annotations. +For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default. + +This will be changed in the next release, and the annotations will not be copied implicitly. +To adopt the new behaviour, use [](#mudata.set_options) with `pull_on_update=False`. +The new approach to `.update()` and annotations is described below. +::: + +Samples (cells) annotations are stored in the data frame accessible via the `.obs` attribute. Same goes for `.var`, which contains annotation of variables (features). + +Copies of columns from `.obs` or `.var` data frames of individual modalities can be added with the `.pull_obs()` or `.pull_var()` methods: + +```python +mdata.pull_obs() +mdata.pull_var() +``` + +When the annotations are changed in `AnnData` objects of modalities, e.g. new columns are added, they can be propagated to the `.obs` or `.var` data frames with the same `.pull_obs()` or `.pull_var()` methods. + +Observations columns copied from individual modalities contain modality name as their prefix, e.g. `rna:n_genes`. Same is true for variables columns however if there are columns with identical names in `.var` of multiple modalities — e.g. `n_cells`, — these columns are merged across modalities and no prefix is added. + +When there are changes directly related to observations or variables, e.g. samples (cells) are filtered out or features (genes) are renamed, the changes have to be fetched with the `.update()` method: + +```python +mdata.update() +``` + +### `.obsm` + +Multidimensional annotations of samples (cells) are accessible in the `.obsm` attribute. For instance, that can be UMAP coordinates that were learnt jointly on all modalities. Or [MOFA](https://biofam.github.io/MOFA2/) embeddings — a generalisation of PCA to multiple omics. + +```python +# mdata is a MuData object with CITE-seq data +mdata.obsm +# => MuAxisArrays with keys: X_umap, X_mofa, prot, rna +``` + +As another multidimensional embedding, this slot may contain boolean vectors, one per modality, indicating if samples (cells) are available in the respective modality. For instance, if all samples (cells) are the same across modalities, all values in those vectors are `True`. + +### Container shape + +The `MuData` object's shape is represented by two numbers calculated from the shapes of individual modalities — one for the number of observations and one for the number of variables. + +```python +mdata.shape +# => (9573, 132465) +mdata.n_obs +# => 9573 +mdata.n_vars +# => 132465 +``` + +By default, variables are always counted as belonging uniquely to a single modality while observations with the same name are counted as the same observation, which has variables across multiple modalities measured for. + +```python +[ad.shape for ad in mdata.mod.values()] +# => [(9500, 10100), (9573, 122364)] +``` + +If the shape of a modality is changed, [MuData.update](#mudata.MuData.update) has to be run to bring the respective updates to the `MuData` object. + +### Keeping containers up to date + +:::{warning} +Version 0.3 introduces pull/push interface for annotations. +For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default. + +This will be changed in the next release, and the annotations will not be copied implicitly. +To adopt the new behaviour, use [](#mudata.set_options) with `pull_on_update=False`. +The new approach to `.update()` and annotations is described below. +::: + +Modalities inside the `MuData` container are full-fledged `AnnData` objects, which can be operated independently with any tool that works on `AnnData` objects. +When modalities are changed externally, the shape of the `MuData` object as well as metadata fetched from individual modalities will then reflect the previous state of the data. +To keep the container up to date, there is an `.update()` method that syncs the `.obs_names` and `.var_names` of the `MuData` object with the ones of the modalities. + + +### Managing annotations + +To fetch the corresponding annotations from individual modalities, there are [MuData.pull_obs](#mudata.MuData.pull_obs) and [MuData.pull_var](#mudata.MuData.pull_var) methods. + +To update the annotations of individual modalities with the global annotations, [MuData.push_obs](#mudata.MuData.push_obs) and [MuData.push_var](#mudata.MuData.push_var) methods can be used. + + +### Backed containers + +To enable the backed mode for the count matrices in all the modalities, `.h5mu` files can be read with the relevant flag: + +```python +mdata_b = mudata.read("filename.h5mu", backed=True) +mdata_b.isbacked +# => True +``` + +When creating a copy of a backed `MuData` object, the filename has to be provided, and the copy of the object will be backed at a new location. + +```python +mdata_copy = mdata_b.copy("filename_copy.h5mu") +mdata_b.file.filename +# => 'filename_copy.h5mu' +``` + +### Container views + +Analogous to the behaviour of `AnnData` objects, slicing `MuData` objects returns views of the original data. + +```python +view = mdata[:100,:1000] +view.is_view +# => True + +# In the view, each modality is a view as well +view["A"].is_view +# => True +``` + +Subsetting `MuData` objects is special since it slices them across modalities. I.e. the slicing operation for a set of `obs_names` and/or `var_names` will be performed for each modality and not only for the global multimodal annotation. + +This behaviour makes workflows memory-efficient, which is especially important when working with large datasets. If the object is to be modified however, a copy of it should be created, which is not a view anymore and has no dependance on the original object. + +```python +mdata_sub = view.copy() +mdata_sub.is_view +# => False +``` + +If the original object is backed, the filename has to be provided to the `.copy()` call, and the resulting object will be backed at a new location. + +```python +mdata_sub = backed_view.copy("mdata_sub.h5mu") +mdata_sub.is_view +# => False +mdata_sub.isbacked +# => True +``` diff --git a/docs/io/output.md b/docs/io/output.md new file mode 100644 index 0000000..d796146 --- /dev/null +++ b/docs/io/output.md @@ -0,0 +1,77 @@ +# Output data + +In order to save & share multimodal data, `.h5mu` file format has been designed. + +```{contents} +:local: +:depth: 3 +``` + +```{toctree} +:maxdepth: 10 +:glob: * +``` + +## `.h5mu` files + +`.h5mu` files are the default storage for MuData objects. These are HDF5 files with a standardised structure, which is similar to the one of `.h5ad` files where AnnData objects are stored. The most noticeable distinction is `.mod` group presence where individual modalities are stored — in the same way as they would be stored in the `.h5ad` files. + +```python +mdata.write("mudata.h5mu") +``` + +Inspect the contents of the file in the terminal: + +```console +$ h5ls mudata.h5mu +mod Group +obs Group +obsm Group +var Group +varm Group + +$ h5ls data/mudata.h5mu/mod +atac Group +rna Group +``` + +## AnnData inside `.h5mu` + +Individual modalities in the `.h5mu` file are stored in exactly the same way as AnnData objects. This, together with the hierarchical nature of HDF5 files, makes it possible to read individual modalities from `.h5mu` files as well as to save individual modalities to the `.h5mu` file: + +```python +adata = mudata.read("mudata.h5mu/rna") + +mudata.write("mudata.h5mu/rna", adata) +``` + +The function [](#mudata.read) automatically decides based on the input if [](#mudata.read_h5mu) or rather [](#mudata.read_h5ad) should be called. + +Learn more about the on-disk format specification shared by MuData and AnnData [in the AnnData documentation](https://anndata.readthedocs.io/en/latest/fileformat-prose.html). + +## `.zarr` files + +[Zarr](https://zarr.readthedocs.io/en/stable/) is a cloud-friendly format for chunked N-dimensional arrays. Zarr is another supported serialisation format for MuData objects: + +```python +mdata = mudata.read_zarr("mudata.zarr") + +mdata.write_zarr("mudata.zarr") +``` + +Just as with `.h5mu` files, MuData objects saved in `.zarr` format resemble how AnnData objects are stored, with one additional level of abstraction: + +```console +$ tree -L 1 mudata.zarr +mudata.zarr +├── mod +├── obs +├── obsm +├── obsmap +├── obsp +├── uns +├── var +├── varm +├── varmap +└── varp +``` diff --git a/docs/io/spec.md b/docs/io/spec.md new file mode 100644 index 0000000..0cc6779 --- /dev/null +++ b/docs/io/spec.md @@ -0,0 +1,61 @@ +# MuData specification [RFC] + +Building on top of the [AnnData spec](https://anndata.readthedocs.io/en/latest/fileformat-prose.html), this document provides details on the `MuData` on-disk format. For user-facing features, please see (this document)[mudata.md]. + +```python-console +>>> import h5py +>>> f = h5py.File("citeseq.h5mu") +>>> list(f.keys()) +['mod', 'obs', 'obsm', 'obsmap', 'uns', 'var', 'varm', 'varmap'] +``` + +```{contents} +:local: +:depth: 3 +``` + +```{toctree} +:maxdepth: 10 +:glob: * +``` + +## `.mod` + +Modalities are stored in a `.mod` group of the `.h5mu` file in the alphabetical order. To preserve the order of the modalities, there is an attribute `mod-order` that lists the modalities in their respective order. If some modalities are missing from that attribute, the attribute is to be ignored. + +```python-console +>>> dict(f["mod"].attrs) +{'mod-order': array(['prot', 'rna'], dtype=object)} +``` + +## `.obsmap` and `.varmap` + +While in practice `MuData` relies on `.obs_names` and `.var_names` to collate global observations and variables, it also allows to disambiguate between items with the same name using integer maps. For example, global observations will have non-zero integer values in `.obsmap["rna"]` if they are present in the `rna` modality. If an observation or a variable is missing from a modality, it will correspond to a `0` value. + +```python-console +>>> list(f["obsmap"].keys()) +['prot', 'rna'] +>>> import numpy as np +>>> np.array(f["obsmap"]["rna"]) +array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32) +>>> np.array(f["obsmap"]["prot"]) +array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32) + +>>> list(f["varmap"].keys()) +['prot', 'rna'] +>>> np.array(f["varmap"]["rna"]) +array([ 0, 0, 0, ..., 17804, 17805, 17806], dtype=uint32) +>>> np.array(f["varmap"]["prot"]) +array([1, 2, 3, ..., 0, 0, 0], dtype=uint32) +``` + +## `.axis` + +Axis describes which dimensions are shared: observations (`axis=0`), variables (`axis=1`), or both (`axis=-1`). It is recorded in the `axis` attribute of the file: + +```python-console +>>> f.attrs["axis"] +0 +``` + +Multimodal datasets, which have observations shared between modalities, will have `axis=0`. If no axis attribute is available such as in files with the older versions of this specification, it is assumed to be `0` by default. diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 6247f7e..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/source/notebooks/annotations_management.ipynb b/docs/notebooks/annotations_management.ipynb similarity index 97% rename from docs/source/notebooks/annotations_management.ipynb rename to docs/notebooks/annotations_management.ipynb index 3ad518f..80dc462 100644 --- a/docs/source/notebooks/annotations_management.ipynb +++ b/docs/notebooks/annotations_management.ipynb @@ -114,7 +114,7 @@ "mdata = make_mdata()\n", "# TODO: shouldn't be needed from 0.4\n", "# mdata.update(pull=False)\n", - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -196,7 +196,7 @@ "outputs": [], "source": [ "# Clean up\n", - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -247,7 +247,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -296,7 +296,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -348,7 +348,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -398,7 +398,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -440,7 +440,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -498,7 +498,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -539,7 +539,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -589,7 +589,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.var = mdata.var.loc[:,[]]" + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -721,7 +721,7 @@ "\n", " # column present in some (2 out of 3) modalities (non-unique)\n", " mod2.obs[\"arange\"] = np.arange(N)\n", - " mod3.obs[\"arange\"] = np.arange(N, 2*N)\n", + " mod3.obs[\"arange\"] = np.arange(N, 2 * N)\n", "\n", " # column present in one modality (unique)\n", " mod3.obs[\"mod3_cell\"] = True\n", @@ -740,7 +740,7 @@ "mdata = make_mdata()\n", "# TODO: shouldn't be needed from 0.4\n", "# mdata.update(pull=False)\n", - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -849,7 +849,7 @@ "outputs": [], "source": [ "# Clean up\n", - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -902,7 +902,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -943,7 +943,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -996,7 +996,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -1046,7 +1046,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -1088,7 +1088,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -1138,7 +1138,7 @@ "metadata": {}, "outputs": [], "source": [ - "mdata.obs = mdata.obs.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]" ] }, { @@ -1270,18 +1270,20 @@ " mod3 = AnnData(np.arange(5101, 8101, 1).reshape(-1, D3))\n", " mod3.obs_names = mod1.obs_names.copy()\n", " mod3.var_names = [f\"var{i}\" for i in range(D1 + D2, D)]\n", - " \n", + "\n", " mdata = MuData({\"mod1\": mod1, \"mod2\": mod2, \"mod3\": mod3})\n", "\n", " # common column to be propagated to all modalities\n", " mdata.var[\"highly_variable\"] = True\n", "\n", " # prefix column to be propagated to the respective modalities\n", - " mdata.var[\"mod2:if_mod2\"] = np.concatenate([\n", - " np.repeat(pd.NA, D1), \n", - " np.repeat(True, D2),\n", - " np.repeat(pd.NA, D3),\n", - " ])\n", + " mdata.var[\"mod2:if_mod2\"] = np.concatenate(\n", + " [\n", + " np.repeat(pd.NA, D1),\n", + " np.repeat(True, D2),\n", + " np.repeat(pd.NA, D3),\n", + " ]\n", + " )\n", "\n", " return mdata" ] @@ -1382,7 +1384,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].var = mdata[m].var.loc[:,[]]" + " mdata[m].var = mdata[m].var.loc[:, []]" ] }, { @@ -1438,7 +1440,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].var = mdata[m].var.loc[:,[]]" + " mdata[m].var = mdata[m].var.loc[:, []]" ] }, { @@ -1490,7 +1492,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].var = mdata[m].var.loc[:,[]]" + " mdata[m].var = mdata[m].var.loc[:, []]" ] }, { @@ -1558,7 +1560,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].var = mdata[m].var.loc[:,[]]" + " mdata[m].var = mdata[m].var.loc[:, []]" ] }, { @@ -1780,7 +1782,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].obs = mdata[m].obs.loc[:,[]]" + " mdata[m].obs = mdata[m].obs.loc[:, []]" ] }, { @@ -1830,7 +1832,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].obs = mdata[m].obs.loc[:,[]]" + " mdata[m].obs = mdata[m].obs.loc[:, []]" ] }, { @@ -1895,7 +1897,7 @@ "source": [ "# Clean up\n", "for m in mdata.mod.keys():\n", - " mdata[m].obs = mdata[m].obs.loc[:,[]]" + " mdata[m].obs = mdata[m].obs.loc[:, []]" ] }, { @@ -2046,8 +2048,8 @@ "mdata = make_mdata()\n", "# TODO: shouldn't be needed from 0.4\n", "# mdata.update(pull=False)\n", - "mdata.obs = mdata.obs.loc[:,[]]\n", - "mdata.var = mdata.var.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]\n", + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -2198,8 +2200,8 @@ "mdata = make_staged_mdata()\n", "# TODO: shouldn't be needed from 0.4\n", "# mdata.update(pull=False)\n", - "mdata.obs = mdata.obs.loc[:,[]]\n", - "mdata.var = mdata.var.loc[:,[]]" + "mdata.obs = mdata.obs.loc[:, []]\n", + "mdata.var = mdata.var.loc[:, []]" ] }, { @@ -2315,14 +2317,14 @@ "source": [ "def make_nested_mdata():\n", " stages = make_staged_mdata()\n", - " stages.obs = stages.obs.loc[:,[]] # pre-0.3\n", - " \n", + " stages.obs = stages.obs.loc[:, []] # pre-0.3\n", + "\n", " mod2 = AnnData(np.arange(10000, 12000, 1).reshape(10, -1))\n", " mod2.obs_names = [f\"obs{i}\" for i in range(mod2.n_obs)]\n", " mod2.var_names = [f\"mod2:var{i}\" for i in range(mod2.n_vars)]\n", "\n", " mdata = MuData({\"mod1\": stages, \"mod2\": mod2}, axis=-1)\n", - " \n", + "\n", " mdata.obs[\"dataset\"] = \"ref\"\n", "\n", " return mdata" @@ -2454,7 +2456,7 @@ } ], "source": [ - "for m, mod in mdata['mod1'].mod.items():\n", + "for m, mod in mdata[\"mod1\"].mod.items():\n", " print(mod.obs.dtypes)" ] }, @@ -2497,7 +2499,7 @@ "metadata": {}, "outputs": [], "source": [ - "for m, mod in mdata['mod1'].mod.items():\n", + "for m, mod in mdata[\"mod1\"].mod.items():\n", " assert \"dataset\" in mod.obs" ] }, diff --git a/docs/source/notebooks/axes.ipynb b/docs/notebooks/axes.ipynb similarity index 94% rename from docs/source/notebooks/axes.ipynb rename to docs/notebooks/axes.ipynb index dcf1abe..f3e5358 100644 --- a/docs/source/notebooks/axes.ipynb +++ b/docs/notebooks/axes.ipynb @@ -68,7 +68,6 @@ "metadata": {}, "outputs": [], "source": [ - "import mudata as md\n", "from mudata import MuData, AnnData" ] }, @@ -79,7 +78,6 @@ "outputs": [], "source": [ "import numpy as np\n", - "import pandas as pd\n", "\n", "np.random.seed(1)" ] @@ -113,9 +111,9 @@ "source": [ "n, d1, d2 = 100, 1000, 1500\n", "\n", - "ax = AnnData(np.random.normal(size=(n,d1)))\n", + "ax = AnnData(np.random.normal(size=(n, d1)))\n", "\n", - "ay = AnnData(np.random.normal(size=(n,d2)))" + "ay = AnnData(np.random.normal(size=(n, d2)))" ] }, { @@ -180,8 +178,8 @@ } ], "source": [ - "print(\"ax.var_names: [\", \", \".join(ax.var_names.values[:5]) + \", ..., \", ax.var_names.values[d1-1], \"]\")\n", - "print(\"ay.var_names: [\", \", \".join(ay.var_names.values[:5]) + \", ..., \", ay.var_names.values[d2-1], \"]\")" + "print(\"ax.var_names: [\", \", \".join(ax.var_names.values[:5]) + \", ..., \", ax.var_names.values[d1 - 1], \"]\")\n", + "print(\"ay.var_names: [\", \", \".join(ay.var_names.values[:5]) + \", ..., \", ay.var_names.values[d2 - 1], \"]\")" ] }, { @@ -197,8 +195,8 @@ "metadata": {}, "outputs": [], "source": [ - "ax.var_names = [f\"x_var{i+1}\" for i in range(d1)]\n", - "ay.var_names = [f\"y_var{i+1}\" for i in range(d2)]" + "ax.var_names = [f\"x_var{i + 1}\" for i in range(d1)]\n", + "ay.var_names = [f\"y_var{i + 1}\" for i in range(d2)]" ] }, { @@ -253,8 +251,8 @@ "source": [ "n1, n2, d = 100, 500, 1000\n", "\n", - "ad1 = AnnData(np.random.normal(size=(n1,d)))\n", - "ad2 = AnnData(np.random.normal(size=(n2,d)))" + "ad1 = AnnData(np.random.normal(size=(n1, d)))\n", + "ad2 = AnnData(np.random.normal(size=(n2, d)))" ] }, { @@ -264,8 +262,8 @@ "outputs": [], "source": [ "# Cell barcodes are dataset-specific\n", - "ad1.obs_names = [f\"dat1-cell{i+1}\" for i in range(n1)]\n", - "ad2.obs_names = [f\"dat2-cell{i+1}\" for i in range(n2)]" + "ad1.obs_names = [f\"dat1-cell{i + 1}\" for i in range(n1)]\n", + "ad2.obs_names = [f\"dat2-cell{i + 1}\" for i in range(n2)]" ] }, { @@ -360,8 +358,8 @@ "source": [ "n, d_raw, d_preproc = 100, 900, 300\n", "\n", - "a_raw = AnnData(np.random.normal(size=(n,d_raw)))\n", - "a_preproc = a_raw[:,np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))].copy()" + "a_raw = AnnData(np.random.normal(size=(n, d_raw)))\n", + "a_preproc = a_raw[:, np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))].copy()" ] }, { diff --git a/docs/source/notebooks/nuances.ipynb b/docs/notebooks/nuances.ipynb similarity index 96% rename from docs/source/notebooks/nuances.ipynb rename to docs/notebooks/nuances.ipynb index 828f9fd..7662a0d 100644 --- a/docs/source/notebooks/nuances.ipynb +++ b/docs/notebooks/nuances.ipynb @@ -34,15 +34,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "%pip install mudata" ] @@ -83,8 +75,7 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "import pandas as pd" + "import numpy as np" ] }, { @@ -103,9 +94,9 @@ "n, d1, d2, k = 1000, 100, 200, 10\n", "\n", "np.random.seed(1)\n", - "z = np.random.normal(loc=np.arange(k), scale=np.arange(k)*2, size=(n,k))\n", - "w1 = np.random.normal(size=(d1,k))\n", - "w2 = np.random.normal(size=(d2,k))\n", + "z = np.random.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))\n", + "w1 = np.random.normal(size=(d1, k))\n", + "w2 = np.random.normal(size=(d2, k))\n", "\n", "mod1 = AnnData(X=np.dot(z, w1.T))\n", "mod2 = AnnData(X=np.dot(z, w2.T))" @@ -276,8 +267,8 @@ "\n", "smaller_mdata = mdata.copy()\n", "\n", - "smaller_mdata.mod['mod1'] = mod1[:900]\n", - "smaller_mdata.mod['mod2'] = mod2[:900]" + "smaller_mdata.mod[\"mod1\"] = mod1[:900]\n", + "smaller_mdata.mod[\"mod2\"] = mod2[:900]" ] }, { @@ -332,7 +323,7 @@ } ], "source": [ - "print(max(smaller_mdata.obs['dummy_index']))" + "print(max(smaller_mdata.obs[\"dummy_index\"]))" ] }, { @@ -388,7 +379,7 @@ } ], "source": [ - "print(max(smaller_mdata.obs['dummy_index']))" + "print(max(smaller_mdata.obs[\"dummy_index\"]))" ] }, { diff --git a/docs/source/notebooks/quickstart_mudata.ipynb b/docs/notebooks/quickstart_mudata.ipynb similarity index 97% rename from docs/source/notebooks/quickstart_mudata.ipynb rename to docs/notebooks/quickstart_mudata.ipynb index b38c223..363d2b9 100644 --- a/docs/source/notebooks/quickstart_mudata.ipynb +++ b/docs/notebooks/quickstart_mudata.ipynb @@ -76,12 +76,13 @@ ], "source": [ "import numpy as np\n", + "\n", "np.random.seed(1)\n", "\n", "n, d, k = 1000, 100, 10\n", "\n", - "z = np.random.normal(loc=np.arange(k), scale=np.arange(k)*2, size=(n,k))\n", - "w = np.random.normal(size=(d,k))\n", + "z = np.random.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))\n", + "w = np.random.normal(size=(d, k))\n", "y = np.dot(z, w.T)\n", "y.shape" ] @@ -113,8 +114,8 @@ "from anndata import AnnData\n", "\n", "adata = AnnData(y)\n", - "adata.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n", - "adata.var_names = [f\"var_{j+1}\" for j in range(d)]\n", + "adata.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n", + "adata.var_names = [f\"var_{j + 1}\" for j in range(d)]\n", "adata" ] }, @@ -143,12 +144,12 @@ ], "source": [ "d2 = 50\n", - "w2 = np.random.normal(size=(d2,k))\n", + "w2 = np.random.normal(size=(d2, k))\n", "y2 = np.dot(z, w2.T)\n", "\n", "adata2 = AnnData(y2)\n", - "adata2.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n", - "adata2.var_names = [f\"var2_{j+1}\" for j in range(d2)]\n", + "adata2.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n", + "adata2.var_names = [f\"var2_{j + 1}\" for j in range(d2)]\n", "adata2" ] }, @@ -274,7 +275,7 @@ } ], "source": [ - "mdata.varm['A']" + "mdata.varm[\"A\"]" ] }, { @@ -353,9 +354,9 @@ } ], "source": [ - "print(f\"Outdated variables names: ...,\", \", \".join(mdata.var_names[-3:]))\n", + "print(\"Outdated variables names: ...,\", \", \".join(mdata.var_names[-3:]))\n", "mdata.update()\n", - "print(f\"Updated variables names: ...,\", \", \".join(mdata.var_names[-3:]))" + "print(\"Updated variables names: ...,\", \", \".join(mdata.var_names[-3:]))" ] }, { @@ -504,7 +505,7 @@ } ], "source": [ - "with md.set_options(display_style = \"html\", display_html_expand = 0b000):\n", + "with md.set_options(display_style=\"html\", display_html_expand=0b000):\n", " display(mdata)" ] }, @@ -720,7 +721,7 @@ } ], "source": [ - "with md.set_options(display_style = \"html\", display_html_expand = 0b000):\n", + "with md.set_options(display_style=\"html\", display_html_expand=0b000):\n", " display(mdata_r)" ] }, @@ -774,7 +775,7 @@ "source": [ "def simple_pca(mdata):\n", " from sklearn import decomposition\n", - " \n", + "\n", " x = np.hstack([m.X for m in mdata.mod.values()])\n", "\n", " pca = decomposition.PCA(n_components=2)\n", @@ -783,8 +784,8 @@ " # By default, methods operate in-place\n", " # and embeddings are stored in the .obsm slot\n", " mdata.obsm[\"X_pca\"] = components\n", - " \n", - " return " + "\n", + " return" ] }, { diff --git a/docs/nuances.md b/docs/nuances.md new file mode 100644 index 0000000..7adfcf5 --- /dev/null +++ b/docs/nuances.md @@ -0,0 +1,26 @@ +# Nuances + +This is *the sharp bits* page for `mudata`, which provides information on the nuances when working with `MuData` objects. + +```{contents} +:local: +:depth: 3 +``` + +```{toctree} +:maxdepth: 10 +:glob: * +``` + +## Variable names + +`MuData` is designed with features (variables) being different in different modalities in mind. Hence their names should be unique and different between modalities. In other words, `.var_names` are checked for uniqueness across modalities. + +This behaviour ensures all the functions are easy to reason about. For instance, if there is a `var_name` that is present in both modalities, what happens during plotting a joint embedding from `.obsm` coloured by this `var_name` is not strictly defined. + +Nevertheless, `MuData` can accommodate modalities with duplicated `.var_names`. For the typical workflows, we recommend renaming them manually or calling `.var_names_make_unique()`. + + +## Update + +Modalities in `MuData` objects are full-featured `AnnData` objects. Hence they can be operated individually, and their `MuData` parent will have to be updated to fetch this information. diff --git a/docs/pylint.rc b/docs/pylint.rc deleted file mode 100644 index 45a4dcf..0000000 --- a/docs/pylint.rc +++ /dev/null @@ -1,589 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-whitelist= - -# Specify a score threshold to be exceeded before program exits with error. -fail-under=10.0 - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python module names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, - raw-checker-failed, - bad-inline-option, - locally-disabled, - file-ignored, - suppressed-message, - useless-suppression, - deprecated-pragma, - use-symbolic-message-instead, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - deprecated-operator-function, - deprecated-urllib-function, - xreadlines-attribute, - deprecated-sys-function, - exception-escape, - comprehension-escape, - W0311 - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member - - -[REPORTS] - -# Python expression which should return a score less than or equal to 10. You -# have access to the variables 'error', 'warning', 'refactor', and 'convention' -# which contain the number of messages in each category, as well as 'statement' -# which is the total number of statements analyzed. This score is used by the -# global evaluation report (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit - - -[LOGGING] - -# The type of string formatting that logging methods do. `old` means using % -# formatting, `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - - -[SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. Available dictionaries: none. To make it work, -# install the python-enchant package. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains the private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to the private dictionary (see the -# --spelling-private-dict-file option) instead of raising a message. -spelling-store-unknown-words=no - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - -# Regular expression of note tags to take in consideration. -#notes-rgx= - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis). It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - -# List of decorators that change the signature of a decorated function. -signature-mutators= - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=100 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=foo, - bar, - baz, - toto, - tutu, - tata - -# Bad variable names regexes, separated by a comma. If names match any regex, -# they will always be refused -bad-names-rgxs= - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. -#class-attribute-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - ex, - Run, - _ - -# Good variable names regexes, separated by a comma. If names match any regex, -# they will always be accepted -good-names-rgxs= - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. -#variable-rgx= - - -[STRING] - -# This flag controls whether inconsistent-quotes generates a warning when the -# character used as a quote delimiter is used inconsistently within a module. -check-quote-consistency=no - -# This flag controls whether the implicit-str-concat should generate a warning -# on implicit string concatenation in sequences defined over several lines. -check-str-concat-over-line-jumps=no - - -[IMPORTS] - -# List of modules that can be imported at any level, not just the top level -# one. -allow-any-import-level= - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled). -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled). -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - -# Couples of modules and preferred modules, separated by a comma. -preferred-modules= - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp, - __post_init__ - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=cls - - -[DESIGN] - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement (see R0916). -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception diff --git a/docs/references.bib b/docs/references.bib new file mode 100644 index 0000000..f05dc51 --- /dev/null +++ b/docs/references.bib @@ -0,0 +1,26 @@ +@article{Virshup_2023, + doi = {10.1038/s41587-023-01733-8}, + url = {https://doi.org/10.1038%2Fs41587-023-01733-8}, + year = 2023, + month = {apr}, + publisher = {Springer Science and Business Media {LLC}}, + author = {Isaac Virshup and Danila Bredikhin and Lukas Heumos and Giovanni Palla and Gregor Sturm and Adam Gayoso and Ilia Kats and Mikaela Koutrouli and Philipp Angerer and Volker Bergen and Pierre Boyeau and Maren Büttner and Gokcen Eraslan and David Fischer and Max Frank and Justin Hong and Michal Klein and Marius Lange and Romain Lopez and Mohammad Lotfollahi and Malte D. Luecken and Fidel Ramirez and Jeffrey Regier and Sergei Rybakov and Anna C. Schaar and Valeh Valiollah Pour Amiri and Philipp Weiler and Galen Xing and Bonnie Berger and Dana Pe'er and Aviv Regev and Sarah A. Teichmann and Francesca Finotello and F. Alexander Wolf and Nir Yosef and Oliver Stegle and Fabian J. Theis and}, + title = {The scverse project provides a computational ecosystem for single-cell omics data analysis}, + journal = {Nature Biotechnology} +} + +@article{bredikhin_2022, + abstract = {Advances in multi-omics have led to an explosion of multimodal datasets to address questions from basic biology to translation. While these data provide novel opportunities for discovery, they also pose management and analysis challenges, thus motivating the development of tailored computational solutions. Here, we present a data standard and an analysis framework for multi-omics, MUON, designed to organise, analyse, visualise, and exchange multimodal data. MUON stores multimodal data in an efficient yet flexible and interoperable data structure. MUON enables a versatile range of analyses, from data preprocessing to flexible multi-omics alignment.}, + author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver}, + doi = {10.1186/s13059-021-02577-8}, + journal = {{Genome Biol}}, + month = feb, + nlmuniqueid = {100960660}, + number = {1}, + pages = {42}, + pii = {10.1186/s13059-021-02577-8}, + pubmed = {35105358}, + title = {{MUON: multimodal omics analysis framework}}, + volume = {23}, + year = {2022} +} diff --git a/docs/references.md b/docs/references.md new file mode 100644 index 0000000..00ad6a6 --- /dev/null +++ b/docs/references.md @@ -0,0 +1,5 @@ +# References + +```{bibliography} +:cited: +``` diff --git a/docs/source/_static/styles.css b/docs/source/_static/styles.css deleted file mode 100644 index d3acee4..0000000 --- a/docs/source/_static/styles.css +++ /dev/null @@ -1,7 +0,0 @@ -.wy-side-nav-search, .wy-nav-top { - background: #FFFFFF; -} - -.wy-side-nav-search > a { - padding: 0; -} \ No newline at end of file diff --git a/docs/source/_templates/autosummary/function.rst b/docs/source/_templates/autosummary/function.rst deleted file mode 100644 index 611659f..0000000 --- a/docs/source/_templates/autosummary/function.rst +++ /dev/null @@ -1,5 +0,0 @@ -{{ fullname | escape | underline}} - -.. currentmodule:: {{ module }} - -.. autofunction:: {{ fullname }} diff --git a/docs/source/_templates/autosummary/module.rst b/docs/source/_templates/autosummary/module.rst deleted file mode 100644 index 9c0b618..0000000 --- a/docs/source/_templates/autosummary/module.rst +++ /dev/null @@ -1,62 +0,0 @@ -{{ fullname | escape | underline}} - -.. automodule:: {{ fullname }} - - {% block attributes %} - {% if attributes %} - .. rubric:: {{ _('Module Attributes') }} - - .. autosummary:: - {% for item in attributes %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - - {% block functions %} - {% if functions %} - .. rubric:: {{ _('Functions') }} - - .. autosummary:: - :toctree: - {% for item in functions %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - - {% block classes %} - {% if classes %} - .. rubric:: {{ _('Classes') }} - - .. autosummary:: - :toctree: - {% for item in classes %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - - {% block exceptions %} - {% if exceptions %} - .. rubric:: {{ _('Exceptions') }} - - .. autosummary:: - :toctree: - {% for item in exceptions %} - {{ item }} - {%- endfor %} - {% endif %} - {% endblock %} - -{% block modules %} -{% if modules %} -.. rubric:: Modules - -.. autosummary:: - :toctree: -{% for item in modules %} - {{ item }} -{%- endfor %} -{% endif %} -{% endblock %} diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst deleted file mode 100644 index 62bf8be..0000000 --- a/docs/source/api/index.rst +++ /dev/null @@ -1,34 +0,0 @@ -.. mudata documentation master file, created by - sphinx-quickstart on Sun Sep 13 02:51:46 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - - - -API reference -============= - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - -Multimodal omics ----------------- - -.. module:: mudata - -.. autosummary:: - :toctree: generated - - MuData - -Input/Output ------------- - -.. automodsumm:: mudata - :functions-only: - :toctree: generated - diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst deleted file mode 100644 index 958284d..0000000 --- a/docs/source/changelog.rst +++ /dev/null @@ -1,90 +0,0 @@ -Release notes -============= - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * -v0.3.1 ------- - -This release brings compatibility with the anndata 0.10.9 release. - -v0.3.0 ------- - -This version comes with a notable change to the way the annotations of individual modalities are treated. -It implements pull/push interface for annotations with functions :func:`mudata.MuData.pull_obs`, :func:`mudata.MuData.pull_var`, :func:`mudata.MuData.push_obs`, and :func:`mudata.MuData.push_var`. - -:func:`mudata.MuData.update` performance and behaviour have been generally improved. -For compatibility reasons, this release keeps the old behaviour of pulling annotations on read/update as default. - -.. note:: - If you want to adopt the new update behaviour, set ``mudata.set_options(pull_on_update=False)``. This will be the default behaviour in the next release. - With it, the annotations will not be copied from the modalities on :func:`mudata.MuData.update` implicitly. - - To copy the annotations explicitly, you will need to use :func:`mudata.MuData.pull_obs` and/or :func:`mudata.MuData.pull_var`. - -This release also comes with new functionalities such as :func:`mudata.to_anndata`, :func:`mudata.to_mudata`, and :func:`mudata.concat`. - -:class:`mudata.MuData` objects now have a new ``.mod_names`` attribute. ``MuData.mod`` can be pretty-printed. Readers support ``fsspec``, and :func:`mudata.read_zarr` now supports ``mod-order``. The ``uns`` attribute now properly handled by the views. - - -v0.2.4 ------- - -This version brings compatibility with the numpy 2.0.0 release and the future anndata 0.11 release with dtype argument deprecation. - -Requires anndata 0.10.8 or newer. - -v0.2.3 ------- - -Fixes and improvements for backed objects, views, nested MuData objects, I/O and HTML representation. - -Pandas 2.0 compatibility. - -v0.2.2 ------- - -Path objects ``pathlib.Path`` now work in :func:`mudata.read`. - -v0.2.1 ------- - -This version comes with :func:`mudata.MuData.update` improvements and optimisations. - -There is now :func:`mudata.MuData.__len__`. This should make it easier to build MuData into workflows that operate on data containers with length. In practice using :func:`mudata.MuData.n_obs` should be preferred. - -In this implementation of MuData, default ``dict`` has replaced ``OrderedDict``, e.g. in the ``.uns`` slot, to improve compatibility with new serialisation versions. As of Python 3.6, dictionaries are insertion-ordered. - -v0.2.0 ------- - -This version uses new I/O serialisation of `AnnData v0.8 `_. - -Updating a MuData object with :func:`mudata.MuData.update` is even faster in many use cases. - -There's `a new axes interface `_ that allows to use MuData objects as containers with different shared dimensions. - - -v0.1.2 ------- - -Updating a MuData object with :func:`mudata.MuData.update` is now much faster. - -This version also comes with an improved documentation, including `a new page describing the sharp bits `__. - -v0.1.1 ------- - -This version comes with improved stability and bug fixes. - -v0.1.0 ------- - -Initial ``mudata`` release with ``MuData`` (:class:`mudata.MuData`), previously a part of the ``muon`` framework. - diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 7c3097e..0000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,104 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import sys -from pathlib import Path - -from recommonmark.transform import AutoStructify - -# sys.path.insert(0, os.path.abspath('.')) -sys.path.insert(0, Path("../").resolve()) - - -# -- Project information ----------------------------------------------------- - -project = "mudata" -copyright = "2020 - 2024, Danila Bredikhin" -author = "Danila Bredikhin" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "recommonmark", - "sphinx.ext.napoleon", - "sphinx.ext.autosummary", - "sphinx.ext.autodoc", - "sphinx_automodapi.automodapi", - "sphinx.ext.autosectionlabel", - "sphinx.ext.mathjax", - "nbsphinx", -] -autosectionlabel_prefix_document = True - -source_suffix = { - ".rst": "restructuredtext", - ".txt": "markdown", - ".md": "markdown", -} - -# autodoc/autosummary config -autosummary_generate = True -autosummary_imported_members = False -autodoc_default_options = { - "members": True, - "private-members": False, # Do not document private methods - "special-members": False, # Do not document special methods like __init__ - "inherited-members": False, - "show-inheritance": True, -} - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = "sphinx_book_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] -html_logo = "../img/mudata.svg" -html_theme_options = { - "logo_only": True, - "display_version": False, -} - - -# app setup hook -def setup(app): - app.add_config_value( - "recommonmark_config", - { - #'url_resolver': lambda url: github_doc_root + url, - "auto_toc_tree_section": "Contents", - "enable_auto_toc_tree": True, - "enable_math": False, - "enable_inline_math": False, - "enable_eval_rst": True, - }, - True, - ) - app.add_transform(AutoStructify) - app.add_css_file("styles.css") diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index ea75e7a..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,68 +0,0 @@ -Say hello to MuData -=================== - -**MuData** is a format for annotated multimodal datasets. MuData is native to Python but provides cross-language functionality via HDF5-based ``.h5mu`` files. - -.. _website: https://scverse.org/ -.. _governance: https://scverse.org/about/roles/ -.. _NumFOCUS: https://numfocus.org/ -.. _donation: https://numfocus.org/donate-to-scverse/ - -MuData is part of the scverse® project (`website`_, `governance`_) and is fiscally sponsored by `NumFOCUS`_. -Please consider making a tax-deductible `donation`_ to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs. - -.. raw:: html - -

- - - -

- - -MuData objects as containers ----------------------------- - -``mudata`` package introduces multimodal data objects (:class:`mudata.MuData` class) allowing Python users to work with increasigly complex datasets efficiently and to build new workflows and computational tools around it. -:: - MuData object with n_obs × n_vars = 10110 × 110101 -  2 modalities -   atac: 10110 x 100001 -   rna: 10110 x 10100 - -MuData objects enable multimodal information to be stored & accessed naturally, embrace `AnnData `_ for the individual modalities, and can be serialized to ``.h5mu`` files. :doc:`Learn more about multimodal objects ` as well as :doc:`file formats for storing & sharing them `. - -Natural interface ------------------ - -MuData objects feature an AnnData-like interface and familiar concepts such as *observations* and *variables* for the two data dimensions. Get familiar with MuData in the :doc:`Quickstart tutorial
`. - -Handling MuData objects ------------------------ - -A flagship framework for multimodal omics analysis — ``muon`` — has been built around the MuData format. Find more information on it `in its documentation `_ and `on the tutorials page `_ as well as `in the corresponding publication `_. - - -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: Getting started - - notebooks/quickstart_mudata.ipynb - notebooks/nuances.ipynb - notebooks/axes.ipynb - notebooks/annotations_management.ipynb - -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: Documentation - - install - io/input - io/mudata - io/output - io/spec - api/index - changelog - diff --git a/docs/source/install.rst b/docs/source/install.rst deleted file mode 100644 index 83f9d7e..0000000 --- a/docs/source/install.rst +++ /dev/null @@ -1,36 +0,0 @@ -Install mudata -============== - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * - -Stable version --------------- - -``mudata`` can be installed `from PyPI `_ with ``pip``: -:: - pip install mudata - - -Development version -------------------- - -To use a pre-release version of ``mudata``, install it from `from the GitHub repository `_: -:: - pip install git+https://github.com/scverse/mudata - - -Troubleshooting ---------------- - -Please consult the details on installing ``scanpy`` and its dependencies `here `_. If there are issues that have not beed described, addressed, or documented, please consider `opening an issue `_. - - -Hacking on mudata ------------------ -For hacking on the package, it is most convenient to do a so-called development-mode install, which symlinks files in your Python package directory to your mudata working directory, such that you do not need to reinstall after every change. We use `flit `_ as our build system. After installing flit, you can run ``flit install -s`` from within the mudata project directory to perform a development-mode install. Happy hacking! diff --git a/docs/source/io/input.rst b/docs/source/io/input.rst deleted file mode 100644 index 0ac0d5f..0000000 --- a/docs/source/io/input.rst +++ /dev/null @@ -1,91 +0,0 @@ -.. MuData documentation master file, created by - sphinx-quickstart on Thu Oct 22 02:24:42 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Input data -========== - -A default way to import ``MuData`` is the following: -:: - from mudata import MuData - - -There are various ways in which the data can be provided to create a MuData object: - - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * - - -AnnData objects ---------------- - -MuData object can be constructed from a dictionary of existing AnnData objects: -:: - mdata = MuData({'rna': adata_rna, 'atac': adata_atac}) - - -AnnData objects themselves can be easily constructed from NumPy arrays and/or Pandas DataFrames annotating features (*variables*) and samples/cells (*observations*). This makes it a rather general data format to work with any type of high-dimensional data. -:: - from anndata import AnnData - adata = AnnData(X=matrix, obs=metadata_df, var=features_df) - - -Please see more details on how to operate on AnnData objects `in the anndata documentation `_. - - -Omics data ----------- - -When data fromats specific to genomics are of interest, specialised readers can be found in analysis frameworks such as `muon `_. These functions, including the ones for Cell Ranger count matrices as well as Snap files, `are described here `_. - - -Remote storage --------------- - -MuData objects can be read and cached from remote locations including via HTTP(S) or from S3 buckets. This is achieved via [`fsspec`](https://github.com/fsspec/filesystem_spec). For example, to read a MuData object from a remote server: -:: - import fsspec - - fname = "https://github.com/gtca/h5xx-datasets/raw/main/datasets/minipbcite.h5mu?download=" - with fsspec.open(fname) as f: - mdata = mudata.read_h5mu(f) - - -A caching layer can be added in the following way: -:: - fname_cached = "filecache::" + fname - with fsspec.open(fname_cached, filecache={'cache_storage': '/tmp/'}) as f: - mdata = mudata.read_h5mu(f) - - -For more `fsspec` usage examples see [its documentation](https://filesystem-spec.readthedocs.io/). - -S3 -^^ - -MuData objects in the ``.h5mu`` format stored in an S3 bucket can be read with ``fsspec`` as well: -:: - storage_options = { - 'endpoint_url': 'localhost:9000', - 'key': 'AWS_ACCESS_KEY_ID', - 'secret': 'AWS_SECRET_ACCESS_KEY', - } - - with fsspec.open('s3://bucket/dataset.h5mu', **storage_options) as f: - mudata.read_h5mu(f) - - -MuData objects stored in the ``.zarr`` format in an S3 bucket can be read from a *mapping*: -:: - import s3fs - - s3 = s3fs.S3FileSystem(**storage_options) - store = s3.get_mapper('s3://bucket/dataset.zarr') - mdata = mudata.read_zarr(store) diff --git a/docs/source/io/mudata.rst b/docs/source/io/mudata.rst deleted file mode 100644 index f1c826f..0000000 --- a/docs/source/io/mudata.rst +++ /dev/null @@ -1,167 +0,0 @@ -Multimodal data objects -======================= - -:class:`mudata.MuData` is a class for multimodal objects: -:: - from mudata import MuData - - -``MuData`` objects comprise a dictionary with ``AnnData`` objects, one per modality, in their ``.mod`` attribute. Just as ``AnnData`` objects themselves, they also contain attributes like ``.obs`` with annotation of observations (samples or cells), ``.obsm`` with their multidimensional annotations such as embeddings, etc. - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * - -MuData's attributes -------------------- - -Key attributes & method of ``MuData`` objects as well as important concepts are described below. A full list of attributes and methods of multimodal containers can be found in the :class:`mudata.MuData` documentation. - -.mod -^^^^ - -Modalities are stored in a collection accessible via the ``.mod`` attribute of the ``MuData`` object with names of modalities as keys and ``AnnData`` objects as values. -:: - list(mdata.mod.keys()) - # => ['atac', 'rna'] - - -Individual modalities can be accessed with their names via the ``.mod`` attribute or via the ``MuData`` object itself as a shorthand: -:: - mdata.mod['rna'] - # or - mdata['rna'] - # => AnnData object - - -.obs & .var -^^^^^^^^^^^ - -.. warning:: - Version 0.3 introduces pull/push interface for annotations. For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default. - - This will be changed in the next release, and the annotations will not be copied implicitly. - To adopt the new behaviour, use :func:`mudata.set_options` with ``pull_on_update=False``. - The new approach to ``.update()`` and annotations is described below. - -Samples (cells) annotations are stored in the data frame accessible via the ``.obs`` attribute. Same goes for ``.var``, which contains annotation of variables (features). - -Copies of columns from ``.obs`` or ``.var`` data frames of individual modalities can be added with the ``.pull_obs()`` or ``.pull_var()`` methods: -:: - mdata.pull_obs() - mdata.pull_var() - -When the annotations are changed in ``AnnData`` objects of modalities, e.g. new columns are added, they can be propagated to the ``.obs`` or ``.var`` data frames with the same ``.pull_obs()`` or ``.pull_var()`` methods. - -Observations columns copied from individual modalities contain modality name as their prefix, e.g. ``rna:n_genes``. Same is true for variables columns however if there are columns with identical names in ``.var`` of multiple modalities — e.g. ``n_cells``, — these columns are merged across modalities and no prefix is added. - -When there are changes directly related to observations or variables, e.g. samples (cells) are filtered out or features (genes) are renamed, the changes have to be fetched with the ``.update()`` method: -:: - mdata.update() - - -.obsm -^^^^^ - -Multidimensional annotations of samples (cells) are accessible in the ``.obsm`` attribute. For instance, that can be UMAP coordinates that were learnt jointly on all modalities. Or `MOFA `_ embeddings — a generalisation of PCA to multiple omics. -:: - # mdata is a MuData object with CITE-seq data - mdata.obsm - # => MuAxisArrays with keys: X_umap, X_mofa, prot, rna - -As another multidimensional embedding, this slot may contain boolean vectors, one per modality, indicating if samples (cells) are available in the respective modality. For instance, if all samples (cells) are the same across modalities, all values in those vectors are ``True``. - - -Container's shape ------------------ - -The ``MuData`` object's shape is represented by two numbers calculated from the shapes of individual modalities — one for the number of observations and one for the number of variables. -:: - mdata.shape - # => (9573, 132465) - mdata.n_obs - # => 9573 - mdata.n_vars - # => 132465 - -By default, variables are always counted as belonging uniquely to a single modality while observations with the same name are counted as the same observation, which has variables across multiple modalities measured for. -:: - [ad.shape for ad in mdata.mod.values()] - # => [(9500, 10100), (9573, 122364)] - -If the shape of a modality is changed, :func:`mudata.MuData.update` has to be run to bring the respective updates to the ``MuData`` object. - - -Keeping containers up to date ------------------------------ - -.. warning:: - Version 0.3 introduces pull/push interface for annotations. For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default. - - This will be changed in the next release, and the annotations will not be copied implicitly. - To adopt the new behaviour, use :func:`mudata.set_options` with ``pull_on_update=False``. - The new approach to ``.update()`` and annotations is described below. - -Modalities inside the ``MuData`` container are full-fledged ``AnnData`` objects, which can be operated independently with any tool that works on ``AnnData`` objects. -When modalities are changed externally, the shape of the ``MuData`` object as well as metadata fetched from individual modalities will then reflect the previous state of the data. -To keep the container up to date, there is an ``.update()`` method that syncs the ``.obs_names`` and ``.var_names`` of the ``MuData`` object with the ones of the modalities. - - -Managing annotations --------------------- - -To fetch the corresponding annotations from individual modalities, there are :func:`mudata.MuData.pull_obs` and :func:`mudata.MuData.pull_var` methods. - -To update the annotations of individual modalities with the global annotations, :func:`mudata.MuData.push_obs` and :func:`mudata.MuData.push_var` methods can be used. - - -Backed containers ------------------ - -To enable the backed mode for the count matrices in all the modalities, ``.h5mu`` files can be read with the relevant flag: -:: - mdata_b = mudata.read("filename.h5mu", backed=True) - mdata_b.isbacked - # => True - - -When creating a copy of a backed ``MuData`` object, the filename has to be provided, and the copy of the object will be backed at a new location. -:: - mdata_copy = mdata_b.copy("filename_copy.h5mu") - mdata_b.file.filename - # => 'filename_copy.h5mu' - - -Container's views ------------------ - -Analogous to the behaviour of ``AnnData`` objects, slicing ``MuData`` objects returns views of the original data. -:: - view = mdata[:100,:1000] - view.is_view - # => True - - # In the view, each modality is a view as well - view["A"].is_view - # => True - -Subsetting ``MuData`` objects is special since it slices them across modalities. I.e. the slicing operation for a set of ``obs_names`` and/or ``var_names`` will be performed for each modality and not only for the global multimodal annotation. - -This behaviour makes workflows memory-efficient, which is especially important when working with large datasets. If the object is to be modified however, a copy of it should be created, which is not a view anymore and has no dependance on the original object. -:: - mdata_sub = view.copy() - mdata_sub.is_view - # => False - -If the original object is backed, the filename has to be provided to the ``.copy()`` call, and the resulting object will be backed at a new location. -:: - mdata_sub = backed_view.copy("mdata_sub.h5mu") - mdata_sub.is_view - # => False - mdata_sub.isbacked - # => True - diff --git a/docs/source/io/output.rst b/docs/source/io/output.rst deleted file mode 100644 index 2169f2f..0000000 --- a/docs/source/io/output.rst +++ /dev/null @@ -1,78 +0,0 @@ -Output data -=========== - -In order to save & share multimodal data, ``.h5mu`` file format has been designed. - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * - - -.h5mu files ------------ - -``.h5mu`` files are the default storage for MuData objects. These are HDF5 files with a standardised structure, which is similar to the one of ``.h5ad`` files where AnnData objects are stored. The most noticeable distinction is ``.mod`` group presence where individual modalities are stored — in the same way as they would be stored in the ``.h5ad`` files. -:: - mdata.write("mudata.h5mu") - -Inspect the contents of the file in the terminal: - -.. code-block:: console - - > h5ls mudata.h5mu - mod Group - obs Group - obsm Group - var Group - varm Group - - > h5ls data/mudata.h5mu/mod - atac Group - rna Group - - - -AnnData inside .h5mu -^^^^^^^^^^^^^^^^^^^^ - -Individual modalities in the ``.h5mu`` file are stored in exactly the same way as AnnData objects. This, together with the hierarchical nature of HDF5 files, makes it possible to read individual modalities from ``.h5mu`` files as well as to save individual modalities to the ``.h5mu`` file: -:: - adata = mudata.read("mudata.h5mu/rna") - - mudata.write("mudata.h5mu/rna", adata) - -The function :func:`mudata.read` automatically decides based on the input if :func:`mudata.read_h5mu` or rather :func:`mudata.read_h5ad` should be called. - -Learn more about the on-disk format specification shared by MuData and AnnData `in the AnnData documentation `_. - - -.zarr files ------------ - -`Zarr `_ is a cloud-friendly format for chunked N-dimensional arrays. Zarr is another supported serialisation format for MuData objects: -:: - mdata = mudata.read_zarr("mudata.zarr") - - mdata.write_zarr("mudata.zarr") - -Just as with ``.h5mu`` files, MuData objects saved in ``.zarr`` format resemble how AnnData objects are stored, with one additional level of abstraction: - -.. code-block:: console - - > tree -L 1 mudata.zarr - mudata.zarr - ├── mod - ├── obs - ├── obsm - ├── obsmap - ├── obsp - ├── uns - ├── var - ├── varm - ├── varmap - └── varp - diff --git a/docs/source/io/spec.rst b/docs/source/io/spec.rst deleted file mode 100644 index bb3d5dd..0000000 --- a/docs/source/io/spec.rst +++ /dev/null @@ -1,57 +0,0 @@ -MuData specification [RFC] -========================== - -Building on top of the `AnnData spec `_, this document provides details on the ``MuData`` on-disk format. For user-facing features, please see `this document `__. -:: - >>> import h5py - >>> f = h5py.File("citeseq.h5mu") - >>> list(f.keys()) - ['mod', 'obs', 'obsm', 'obsmap', 'uns', 'var', 'varm', 'varmap'] - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * - - -.mod ----- - -Modalities are stored in a ``.mod`` group of the ``.h5mu`` file in the alphabetical order. To preserve the order of the modalities, there is an attribute ``"mod-order"`` that lists the modalities in their respective order. If some modalities are missing from that attribute, the attribute is to be ignored. -:: - >>> dict(f["mod"].attrs) - {'mod-order': array(['prot', 'rna'], dtype=object)} - - -.obsmap and .varmap -------------------- - -While in practice ``MuData`` relies on ``.obs_names`` and ``.var_names`` to collate global observations and variables, it also allows to disambiguate between items with the same name using integer maps. For example, global observations will have non-zero integer values in ``.obsmap["rna"]`` if they are present in the ``"rna"`` modality. If an observation or a variable is missing from a modality, it will correspond to a ``0`` value. -:: - >>> list(f["obsmap"].keys()) - ['prot', 'rna'] - >>> import numpy as np - >>> np.array(f["obsmap"]["rna"]) - array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32) - >>> np.array(f["obsmap"]["prot"]) - array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32) - - >>> list(f["varmap"].keys()) - ['prot', 'rna'] - >>> np.array(f["varmap"]["rna"]) - array([ 0, 0, 0, ..., 17804, 17805, 17806], dtype=uint32) - >>> np.array(f["varmap"]["prot"]) - array([1, 2, 3, ..., 0, 0, 0], dtype=uint32) - -.axis ------ - -Axis describes which dimensions are shared: observations (``axis=0``), variables (``axis=1```), or both (``axis=-1``). It is recorded in the ``axis`` attribute of the file: -:: - >>> f.attrs["axis"] - 0 - -Multimodal datasets, which have observations shared between modalities, will have ``axis=0``. If no axis attribute is available such as in files with the older versions of this specification, it is assumed to be ``0`` by default. diff --git a/docs/source/notebooks/requirements.txt b/docs/source/notebooks/requirements.txt deleted file mode 100644 index 131a333..0000000 --- a/docs/source/notebooks/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -numpy -pandas -anndata -mudata diff --git a/docs/source/nuances.rst b/docs/source/nuances.rst deleted file mode 100644 index 2e5aee4..0000000 --- a/docs/source/nuances.rst +++ /dev/null @@ -1,29 +0,0 @@ -Nuances -======= - -This is *the sharp bits* page for ``mudata``, which provides information on the nuances when working with ``MuData`` objects. - -.. contents:: :local: - :depth: 3 - -.. toctree:: - :maxdepth: 10 - - * - - -Variable names --------------- - -``MuData`` is designed with features (variables) being different in different modalities in mind. Hence their names should be unique and different between modalities. In other words, ``.var_names`` are checked for uniqueness across modalities. - -This behaviour ensures all the functions are easy to reason about. For instance, if there is a ``var_name`` that is present in both modalities, what happens during plotting a joint embedding from ``.obsm`` coloured by this ``var_name`` is not strictly defined. - -Nevertheless, ``MuData`` can accommodate modalities with duplicated ``.var_names``. For the typical workflows, we recommend renaming them manually or calling ``.var_names_make_unique()``. - - -Update ------- - -Modalities in ``MuData`` objects are full-featured ``AnnData`` objects. Hence they can be operated individually, and their ``MuData`` parent will have to be updated to fetch this information. - diff --git a/pyproject.toml b/pyproject.toml index 1e2b5c5..1e7dbdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,64 +1,72 @@ [build-system] build-backend = "hatchling.build" -requires = ["hatchling", "hatch-vcs"] +requires = [ "hatch-vcs", "hatchling" ] [project] name = "mudata" description = "Multimodal data" -requires-python = ">= 3.10" -license = "BSD-3-Clause" -authors = [ - { name = "Danila Bredikhin" }, -] +readme = "README.md" +license = { file = "LICENSE" } maintainers = [ - { name = "Danila Bredikhin", email = "danila@stanford.edu" }, + { name = "Danila Bredikhin", email = "danila@stanford.edu" }, ] -readme = "README.md" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Development Status :: 3 - Alpha", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Intended Audience :: Science/Research" +authors = [ + { name = "Danila Bredikhin" }, ] -dependencies = [ - "anndata >= 0.10.8", +requires-python = ">=3.10" +classifiers = [ + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Scientific/Engineering :: Bio-Informatics", ] -dynamic = ["version"] - -[project.urls] -Documentation = "https://mudata.readthedocs.io/en/latest/" -Source = "https://github.com/scverse/mudata" -Home-page = "https://muon.scverse.org/" +dynamic = [ "version" ] -[project.optional-dependencies] -dev = [ - "setuptools_scm", +dependencies = [ + "anndata>=0.10.8", + "numpy", + "pandas", + "scipy", + # for debug logging (referenced from the issue template) + "session-info2", ] -docs = [ - "sphinx", - "sphinx-book-theme", - "pydata-sphinx-theme", - "readthedocs-sphinx-search", - "nbsphinx", - "sphinx_automodapi", - "recommonmark" +optional-dependencies.dev = [ + "fsspec", + "pre-commit", + "twine>=4.0.2", + "zarr<3", ] -test = [ - "zarr<3", - "pytest" +optional-dependencies.doc = [ + "docutils>=0.8,!=0.18.*,!=0.19.*", + "ipykernel", + "ipython", + "myst-nb>=1.1", + "pandas", + # Until pybtex >0.24.0 releases: https://bitbucket.org/pybtex-devs/pybtex/issues/169/ + "setuptools", + "sphinx>=8.1", + "sphinx-autodoc-typehints", + "sphinx-automodapi", + "sphinx-book-theme>=1", + "sphinx-copybutton", + "sphinx-tabs", + "sphinxcontrib-bibtex>=1", + "sphinxext-opengraph", ] - -[tool.pytest.ini_options] -python_files = "test_*.py" -testpaths = [ - "./tests", # unit tests +optional-dependencies.test = [ + "coverage>=7.10", + "pytest", + "zarr<3", ] - -[tool.black] -line-length = 100 -target-version = ['py39'] +# https://docs.pypi.org/project_metadata/#project-urls +urls.Documentation = "https://mudata.readthedocs.io/" +urls.Homepage = "https://muons.scverse.org" +urls.Source = "https://github.com/scverse/mudata" [tool.hatch.version] source = "vcs" @@ -66,38 +74,98 @@ source = "vcs" [tool.hatch.build.hooks.vcs] version-file = "src/mudata/_version.py" -[tool.hatch.build.targets.wheel] -packages = ["src/mudata"] +[tool.hatch.envs.default] +installer = "uv" +features = [ "dev" ] -[tool.hatch.build.targets.sdist] -exclude = [ - "/.github", - "/docs", +[tool.hatch.envs.docs] +features = [ "doc" ] +scripts.build = "sphinx-build -M html docs docs/_build {args}" +scripts.open = "python -m webbrowser -t docs/_build/html/index.html" +scripts.clean = "git clean -fdX -- {args:docs}" + +# Test the lowest and highest supported Python versions with normal deps +[[tool.hatch.envs.hatch-test.matrix]] +deps = [ "stable" ] +python = [ "3.10", "3.13" ] + +# Test the newest supported Python version also with pre-release deps +[[tool.hatch.envs.hatch-test.matrix]] +deps = [ "pre" ] +python = [ "3.13" ] + +[tool.hatch.envs.hatch-test] +features = [ "dev", "test" ] + +[tool.hatch.envs.hatch-test.overrides] +# If the matrix variable `deps` is set to "pre", +# set the environment variable `UV_PRERELEASE` to "allow". +matrix.deps.env-vars = [ + { key = "UV_PRERELEASE", value = "allow", if = [ "pre" ] }, ] [tool.ruff] -src = ["src"] - -[tool.ruff.format] -docstring-code-format = true - -[tool.ruff.lint] -select = [ - "E", # Error detected by Pycodestyle - "F", # Errors detected by Pyflakes - "W", # Warning detected by Pycodestyle - "UP", # pyupgrade - "I", # isort - "TCH", # manage type checking blocks - "ICN", # Follow import conventions - "PTH", # Pathlib instead of os.path - "PT", # Pytest conventions +line-length = 120 +src = [ "src" ] +extend-include = [ "*.ipynb" ] + +format.docstring-code-format = true + +lint.select = [ + "B", # flake8-bugbear + "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "D", # pydocstyle + "E", # Error detected by Pycodestyle + "F", # Errors detected by Pyflakes + "I", # isort + "RUF100", # Report unused noqa directives + "TID", # flake8-tidy-imports + "UP", # pyupgrade + "W", # Warning detected by Pycodestyle +] +lint.ignore = [ + "B008", # Errors from function calls in argument defaults. These are fine when the result is immutable. + "D100", # Missing docstring in public module + "D104", # Missing docstring in public package + "D105", # __magic__ methods are often self-explanatory, allow missing docstrings + "D107", # Missing docstring in __init__ + # Disable one in each pair of mutually incompatible rules + "D203", # We don’t want a blank line before a class docstring + "D213", # <> We want docstrings to start immediately after the opening triple quote + "D400", # first line should end with a period [Bug: doesn’t work with single-line docstrings] + "D401", # First line should be in imperative mood; try rephrasing + "E501", # line too long -> we accept long comment lines; formatter gets rid of long code lines + "E731", # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient + "E741", # allow I, O, l as variable names -> I is the identity matrix ] -ignore = [ - # line too long -> we accept long comment lines; formatter gets rid of long code lines - "E501", - # Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments, - "E731", - # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation - "E741", +lint.per-file-ignores."*/__init__.py" = [ "F401" ] +lint.per-file-ignores."docs/*" = [ "I" ] +lint.per-file-ignores."tests/*" = [ "D" ] +lint.pydocstyle.convention = "numpy" + +[tool.pytest.ini_options] +testpaths = [ "tests" ] +xfail_strict = true +addopts = [ + "--import-mode=importlib", # allow using test files with same name +] + +[tool.coverage.run] +source = [ "mudata" ] +patch = [ "subprocess" ] +omit = [ + "**/test_*.py", +] + +[tool.cruft] +skip = [ + "tests", + "src/**/__init__.py", + "src/**/basic.py", + "docs/api.md", + "docs/changelog.md", + "docs/references.bib", + "docs/references.md", + "docs/notebooks/example.ipynb", ] diff --git a/src/mudata/_core/io.py b/src/mudata/_core/io.py index 6882348..17ac5bb 100644 --- a/src/mudata/_core/io.py +++ b/src/mudata/_core/io.py @@ -36,9 +36,7 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs): file, "obs", mdata.strings_to_categoricals( - mdata._shrink_attr("obs", inplace=False).copy() - if OPTIONS["pull_on_update"] is None - else mdata.obs.copy() + mdata._shrink_attr("obs", inplace=False).copy() if OPTIONS["pull_on_update"] is None else mdata.obs.copy() ), dataset_kwargs=kwargs, ) @@ -46,9 +44,7 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs): file, "var", mdata.strings_to_categoricals( - mdata._shrink_attr("var", inplace=False).copy() - if OPTIONS["pull_on_update"] is None - else mdata.var.copy() + mdata._shrink_attr("var", inplace=False).copy() if OPTIONS["pull_on_update"] is None else mdata.var.copy() ), dataset_kwargs=kwargs, ) @@ -239,9 +235,7 @@ def write_h5mu(filename: PathLike, mdata: MuData, **kwargs): nbytes = f.write( f"MuData (format-version={__mudataversion__};creator=muon;creator-version={__version__})".encode() ) - f.write( - b"\0" * (512 - nbytes) - ) # this is only needed because the H5file was written in append mode + f.write(b"\0" * (512 - nbytes)) # this is only needed because the H5file was written in append mode def write_h5ad(filename: PathLike, mod: str, data: MuData | AnnData): @@ -314,14 +308,14 @@ def write(filename: PathLike, data: MuData | AnnData): This function is designed to enhance I/O ease of use. It recognises the following formats of filename: - - for MuData - - FILE.h5mu - - for AnnData - - FILE.h5mu/MODALITY - - FILE.h5mu/mod/MODALITY - - FILE.h5ad - """ + - for MuData + - `FILE.h5mu` + - for AnnData + - `FILE.h5mu/MODALITY` + - `FILE.h5mu/mod/MODALITY` + - `FILE.h5ad` + """ import re if filename.endswith(".h5mu") or isinstance(data, MuData): @@ -395,9 +389,7 @@ def _validate_h5mu(filename: PathLike) -> (str, Callable | None): callback = lambda: fname.__exit__() ish5mu = fname.read(6) == b"MuData" except ImportError as e: - raise ImportError( - "To read from remote storage or cache, install fsspec: pip install fsspec" - ) from e + raise ImportError("To read from remote storage or cache, install fsspec: pip install fsspec") from e else: ish5mu = False raise e @@ -475,6 +467,7 @@ def read_h5mu(filename: PathLike, backed: str | bool | None = None): def read_zarr(store: str | Path | MutableMapping | zarr.Group): """\ Read from a hierarchical Zarr array store. + Parameters ---------- store @@ -612,10 +605,7 @@ def read_h5ad( except TypeError as e: fname, callback = filename, None # Support fsspec - if ( - filename.__class__.__name__ == "BufferedReader" - or filename.__class__.__name__ == "OpenFile" - ): + if filename.__class__.__name__ == "BufferedReader" or filename.__class__.__name__ == "OpenFile": try: from fsspec.core import OpenFile @@ -623,9 +613,7 @@ def read_h5ad( fname = filename.__enter__() callback = lambda: fname.__exit__() except ImportError as e: - raise ImportError( - "To read from remote storage or cache, install fsspec: pip install fsspec" - ) from e + raise ImportError("To read from remote storage or cache, install fsspec: pip install fsspec") from e adata = read_h5ad(fname, backed=backed) if callable is not None: @@ -662,17 +650,25 @@ def read(filename: PathLike, **kwargs) -> MuData | AnnData: This function is designed to enhance I/O ease of use. It recognises the following formats: - - FILE.h5mu - - FILE.h5mu/MODALITY - - FILE.h5mu/mod/MODALITY - - FILE.h5ad + + - `FILE.h5mu` + - `FILE.h5mu/MODALITY` + - `FILE.h5mu/mod/MODALITY` + - `FILE.h5ad` OpenFile and BufferedReader from fsspec are supported for remote storage, e.g.: - - mdata = read(fsspec.open("s3://bucket/file.h5mu"))) - - with fsspec.open("s3://bucket/file.h5mu") as f: - mdata = read(f) - - with fsspec.open("https://server/file.h5ad") as f: - adata = read(f) + + - .. code-block:: + + mdata = read(fsspec.open("s3://bucket/file.h5mu"))) + - .. code-block:: + + with fsspec.open("s3://bucket/file.h5mu") as f: + mdata = read(f) + - .. code-block:: + + with fsspec.open("https://server/file.h5ad") as f: + adata = read(f) """ import re From 3f12a298f4c913e02bd2b11325ab6c6b4b0b60c8 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Tue, 14 Oct 2025 13:45:23 +0200 Subject: [PATCH 02/10] run pre-commit hooks and fix linter warnings --- docs/notebooks/annotations_management.ipynb | 12 +- pyproject.toml | 17 +- src/mudata/__init__.py | 4 +- src/mudata/_core/config.py | 4 +- src/mudata/_core/file_backing.py | 5 +- src/mudata/_core/io.py | 32 +- src/mudata/_core/merge.py | 26 +- src/mudata/_core/mudata.py | 466 +++++++------------- src/mudata/_core/repr.py | 30 +- src/mudata/_core/to_.py | 15 +- src/mudata/_core/utils.py | 22 +- src/mudata/_core/views.py | 8 +- tests/test_axis_-1.py | 4 +- tests/test_obs_var.py | 4 +- tests/test_pull_push.py | 8 +- tests/test_update.py | 22 +- tests/test_update_axis_1.py | 14 +- tests/test_view_copy.py | 4 +- 18 files changed, 243 insertions(+), 454 deletions(-) diff --git a/docs/notebooks/annotations_management.ipynb b/docs/notebooks/annotations_management.ipynb index 80dc462..52a1c61 100644 --- a/docs/notebooks/annotations_management.ipynb +++ b/docs/notebooks/annotations_management.ipynb @@ -72,7 +72,6 @@ "outputs": [], "source": [ "def make_mdata():\n", - " N = 100\n", " D1, D2, D3 = 10, 20, 30\n", " D = D1 + D2 + D3\n", "\n", @@ -1255,7 +1254,6 @@ "outputs": [], "source": [ "def make_mdata():\n", - " N = 100\n", " D1, D2, D3 = 10, 20, 30\n", " D = D1 + D2 + D3\n", "\n", @@ -1671,9 +1669,7 @@ "outputs": [], "source": [ "def make_mdata():\n", - " N = 100\n", " D1, D2 = 10, 20\n", - " D = D1 + D2\n", "\n", " mod1 = AnnData(np.arange(0, 100, 0.1).reshape(-1, D1))\n", " mod1.obs_names = [f\"obs{i}\" for i in range(mod1.n_obs)]\n", @@ -2433,7 +2429,7 @@ } ], "source": [ - "for m, mod in mdata.mod.items():\n", + "for mod in mdata.mod.values():\n", " print(mod.obs.dtypes)" ] }, @@ -2456,7 +2452,7 @@ } ], "source": [ - "for m, mod in mdata[\"mod1\"].mod.items():\n", + "for mod in mdata[\"mod1\"].mod.values():\n", " print(mod.obs.dtypes)" ] }, @@ -2477,7 +2473,7 @@ "source": [ "def push_obs_rec(mdata: MuData):\n", " mdata.push_obs()\n", - " for m, mod in mdata.mod.items():\n", + " for mod in mdata.mod.values():\n", " if isinstance(mod, MuData):\n", " push_obs_rec(mod)" ] @@ -2499,7 +2495,7 @@ "metadata": {}, "outputs": [], "source": [ - "for m, mod in mdata[\"mod1\"].mod.items():\n", + "for mod in mdata[\"mod1\"].mod.values():\n", " assert \"dataset\" in mod.obs" ] }, diff --git a/pyproject.toml b/pyproject.toml index 1e7dbdd..e16b527 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ optional-dependencies.dev = [ "fsspec", "pre-commit", + "setuptools-scm", # for version handling in editable mode "twine>=4.0.2", "zarr<3", ] @@ -131,16 +132,18 @@ lint.ignore = [ "D105", # __magic__ methods are often self-explanatory, allow missing docstrings "D107", # Missing docstring in __init__ # Disable one in each pair of mutually incompatible rules - "D203", # We don’t want a blank line before a class docstring - "D213", # <> We want docstrings to start immediately after the opening triple quote - "D400", # first line should end with a period [Bug: doesn’t work with single-line docstrings] - "D401", # First line should be in imperative mood; try rephrasing - "E501", # line too long -> we accept long comment lines; formatter gets rid of long code lines - "E731", # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient - "E741", # allow I, O, l as variable names -> I is the identity matrix + "D203", # We don’t want a blank line before a class docstring + "D213", # <> We want docstrings to start immediately after the opening triple quote + "D400", # first line should end with a period [Bug: doesn’t work with single-line docstrings] + "D401", # First line should be in imperative mood; try rephrasing + "E501", # line too long -> we accept long comment lines; formatter gets rid of long code lines + "E731", # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient + "E741", # allow I, O, l as variable names -> I is the identity matrix + "TID252", # allow relative imports ] lint.per-file-ignores."*/__init__.py" = [ "F401" ] lint.per-file-ignores."docs/*" = [ "I" ] +lint.per-file-ignores."docs/notebooks/*" = [ "D", "F403", "F405" ] lint.per-file-ignores."tests/*" = [ "D" ] lint.pydocstyle.convention = "numpy" diff --git a/src/mudata/__init__.py b/src/mudata/__init__.py index e8ff1ac..e005730 100644 --- a/src/mudata/__init__.py +++ b/src/mudata/__init__.py @@ -7,8 +7,8 @@ except (ImportError, LookupError): try: from ._version import __version__ - except ModuleNotFoundError: - raise RuntimeError("mudata is not correctly installed. Please install it, e.g. with pip.") + except ModuleNotFoundError as e: + raise RuntimeError("mudata is not correctly installed. Please install it, e.g. with pip.") from e from anndata import AnnData diff --git a/src/mudata/_core/config.py b/src/mudata/_core/config.py index 93631cf..f58c9d8 100644 --- a/src/mudata/_core/config.py +++ b/src/mudata/_core/config.py @@ -25,12 +25,12 @@ class set_options: Options can be set in the context: - >>> with mudata.set_options(display_style='html'): + >>> with mudata.set_options(display_style="html"): ... print("Options are applied here") ... or globally: - >>> mudata.set_options(display_style='html') + >>> mudata.set_options(display_style="html") """ def __init__(self, **kwargs): diff --git a/src/mudata/_core/file_backing.py b/src/mudata/_core/file_backing.py index 5d5552e..8cf07c3 100644 --- a/src/mudata/_core/file_backing.py +++ b/src/mudata/_core/file_backing.py @@ -27,10 +27,7 @@ def open( add_ref=False, ) -> bool: if self.is_open and ( - filename is None - and filemode is None - or filename == self.filename - and filemode == self._filemode + filename is None and filemode is None or filename == self.filename and filemode == self._filemode ): if add_ref: self.counter += 1 diff --git a/src/mudata/_core/io.py b/src/mudata/_core/io.py index 17ac5bb..046a626 100644 --- a/src/mudata/_core/io.py +++ b/src/mudata/_core/io.py @@ -61,11 +61,9 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs): attrs["axis"] = mdata.axis mod = file.require_group("mod") - for k, v in mdata.mod.items(): + for k, adata in mdata.mod.items(): group = mod.require_group(k) - adata = mdata.mod[k] - adata.strings_to_categoricals() if adata.raw is not None: adata.strings_to_categoricals(adata.raw.var) @@ -173,11 +171,9 @@ def write_zarr( attrs["axis"] = mdata.axis mod = file.require_group("mod") - for k, v in mdata.mod.items(): + for k, adata in mdata.mod.items(): group = mod.require_group(k) - adata = mdata.mod[k] - adata.strings_to_categoricals() if adata.raw is not None: adata.strings_to_categoricals(adata.raw.var) @@ -398,20 +394,19 @@ def _validate_h5mu(filename: PathLike) -> (str, Callable | None): if isinstance(filename, str) or isinstance(filename, Path): if h5py.is_hdf5(filename): warn( - "The HDF5 file was not created by muon/mudata, we can't guarantee that everything will work correctly" + "The HDF5 file was not created by muon/mudata, we can't guarantee that everything will work correctly", + stacklevel=2, ) else: raise ValueError("The file is not an HDF5 file") else: - warn("Cannot verify that the (remote) file is a valid H5MU file") + warn("Cannot verify that the (remote) file is a valid H5MU file", stacklevel=2) return fname, callback def read_h5mu(filename: PathLike, backed: str | bool | None = None): - """ - Read MuData object from HDF5 file - """ + """Read MuData object from HDF5 file.""" assert backed in [ None, True, @@ -446,7 +441,7 @@ def read_h5mu(filename: PathLike, backed: str | bool | None = None): mod_order = None if "mod-order" in gmods.attrs: mod_order = _read_attr(gmods.attrs, "mod-order") - if mod_order is not None and all([m in gmods for m in mod_order]): + if mod_order is not None and all(m in gmods for m in mod_order): mods = {k: mods[k] for k in mod_order} d[k] = mods @@ -465,8 +460,7 @@ def read_h5mu(filename: PathLike, backed: str | bool | None = None): def read_zarr(store: str | Path | MutableMapping | zarr.Group): - """\ - Read from a hierarchical Zarr array store. + """Read from a hierarchical Zarr array store. Parameters ---------- @@ -504,7 +498,7 @@ def read_zarr(store: str | Path | MutableMapping | zarr.Group): mod_order = None if "mod-order" in gmods.attrs: mod_order = _read_attr(gmods.attrs, "mod-order") - if mod_order is not None and all([m in gmods for m in mod_order]): + if mod_order is not None and all(m in gmods for m in mod_order): mods = {k: mods[k] for k in mod_order} d[k] = mods @@ -580,9 +574,7 @@ def read_h5ad( mod: str | None, backed: str | bool | None = None, ) -> AnnData: - """ - Read AnnData object from inside a .h5mu file - or from a standalone .h5ad file (mod=None) + """Read AnnData object from inside a .h5mu file or from a standalone .h5ad file (mod=None). Currently replicates and modifies anndata._io.h5ad.read_h5ad. Matrices are loaded as they are in the file (sparse or dense). @@ -644,9 +636,7 @@ def read_h5ad( def read(filename: PathLike, **kwargs) -> MuData | AnnData: - """ - Read MuData object from HDF5 file - or AnnData object (a single modality) inside it + """Read MuData object from HDF5 file or AnnData object (a single modality) inside it. This function is designed to enhance I/O ease of use. It recognises the following formats: diff --git a/src/mudata/_core/merge.py b/src/mudata/_core/merge.py index 7623f46..ebf77d1 100644 --- a/src/mudata/_core/merge.py +++ b/src/mudata/_core/merge.py @@ -97,7 +97,6 @@ def concat( Examples -------- - Preparing example objects >>> import mudata as md, anndata as ad, pandas as pd, numpy as np @@ -130,10 +129,7 @@ def concat( if isinstance(mdatas, Mapping): if keys is not None: - raise TypeError( - "Cannot specify categories in both mapping keys and using `keys`. " - "Only specify this once." - ) + raise TypeError("Cannot specify categories in both mapping keys and using `keys`. Only specify this once.") keys, mdatas = list(mdatas.keys()), list(mdatas.values()) else: mdatas = list(mdatas) @@ -141,23 +137,19 @@ def concat( if keys is None: keys = np.arange(len(mdatas)).astype(str) - assert all( - [isinstance(m, MuData) for m in mdatas] - ), "For concatenation to work, all objects should be of type MuData" + assert all(isinstance(m, MuData) for m in mdatas), "For concatenation to work, all objects should be of type MuData" assert len(mdatas) > 1, "mdatas collection should have more than one MuData object" - if len(set(m.axis for m in mdatas)) != 1: + if len({m.axis for m in mdatas}) != 1: "All MuData objects in mdatas should have the same axis." axis = mdatas[0].axis # Modalities intersection - common_mods = reduce( - np.intersect1d, [np.array(list(m.mod.keys())).astype("object") for m in mdatas] - ) + common_mods = reduce(np.intersect1d, [np.array(list(m.mod.keys())).astype("object") for m in mdatas]) assert len(common_mods) > 0, "There should be at least one common modality across all mdatas" # Concatenate all the modalities - modalities: dict[str, AnnData] = dict() + modalities: dict[str, AnnData] = {} for m in common_mods: modalities[m] = ad_concat( [mdata[m] for mdata in mdatas], @@ -183,9 +175,7 @@ def concat( ) # Combining indexes - concat_indices = pd.concat( - [pd.Series(axis_indices(m, axis=axis)) for m in mdatas], ignore_index=True - ) + concat_indices = pd.concat([pd.Series(axis_indices(m, axis=axis)) for m in mdatas], ignore_index=True) if index_unique is not None: concat_indices = concat_indices.str.cat(label_col.map(str), sep=index_unique) concat_indices = pd.Index(concat_indices) @@ -271,13 +261,13 @@ def concat( alt_mapping = merge( [ {k: r(v, axis=0) for k, v in getattr(a, f"{alt_dim}m").items()} - for r, a in zip(reindexers, mdatas) + for r, a in zip(reindexers, mdatas, strict=False) ], ) alt_pairwise = merge( [ {k: r(r(v, axis=0), axis=1) for k, v in getattr(a, f"{alt_dim}p").items()} - for r, a in zip(reindexers, mdatas) + for r, a in zip(reindexers, mdatas, strict=False) ] ) uns = uns_merge([m.uns for m in mdatas]) diff --git a/src/mudata/_core/mudata.py b/src/mudata/_core/mudata.py index 76721dd..adf198b 100644 --- a/src/mudata/_core/mudata.py +++ b/src/mudata/_core/mudata.py @@ -10,6 +10,7 @@ from pathlib import Path from random import choices from string import ascii_letters, digits +from types import MappingProxyType from typing import Any, Literal, Union import numpy as np @@ -79,20 +80,14 @@ def _repr_hierarchy( if isinstance(v, MuData): maybe_axis = ( - ( - " [shared obs] " - if v.axis == 0 - else " [shared var] " if v.axis == 1 else " [shared obs and var] " - ) + (" [shared obs] " if v.axis == 0 else " [shared var] " if v.axis == 1 else " [shared obs and var] ") if hasattr(v, "axis") else "" ) - descr += ( - f"\n{indent}{k} MuData{maybe_axis}({v.n_obs} × {v.n_vars}){backed_at}{is_view}" - ) + descr += f"\n{indent}{k} MuData{maybe_axis}({v.n_obs} × {v.n_vars}){backed_at}{is_view}" if i != len(self) - 1: - levels = [nest_level] + [level for level in active_levels] + levels = [nest_level] + active_levels else: levels = [level for level in active_levels if level != nest_level] descr += v.mod._repr_hierarchy(nest_level=nest_level + 1, active_levels=levels) @@ -130,38 +125,39 @@ class MuData: such as embeddings and neighbours graphs learned jointly on multiple modalities and generalised sample and feature metadata tables. + + Parameters + ---------- + data + AnnData object or dictionary with AnnData objects as values. + If a dictionary is passed, the keys will be used as modality names. + If None, creates an empty MuData object. + feature_types_names + Dictionary to map feature types encoded in data.var["feature_types"] to modality names. + Only relevant when data is an AnnData object. + Default: {"Gene Expression": "rna", "Peaks": "atac", "Antibody Capture": "prot"} + as_view + Create a view of the MuData object. + index + Index to slice the MuData object when creating the view. + **kwargs + Additional arguments to create a MuData object. """ def __init__( self, data: Union[AnnData, Mapping[str, AnnData], "MuData"] | None = None, - feature_types_names: dict | None = { - "Gene Expression": "rna", - "Peaks": "atac", - "Antibody Capture": "prot", - }, + feature_types_names: dict | None = MappingProxyType( + { + "Gene Expression": "rna", + "Peaks": "atac", + "Antibody Capture": "prot", + } + ), as_view: bool = False, index: tuple[slice | Integral, slice | Integral] | slice | Integral | None = None, **kwargs, ): - """ - Parameters - ---------- - data - AnnData object or dictionary with AnnData objects as values. - If a dictionary is passed, the keys will be used as modality names. - If None, creates an empty MuData object. - feature_types_names - Dictionary to map feature types encoded in data.var["feature_types"] to modality names. - Only relevant when data is an AnnData object. - Default: {"Gene Expression": "rna", "Peaks": "atac", "Antibody Capture": "prot"} - as_view - Create a view of the MuData object. - index - Index to slice the MuData object when creating the view. - **kwargs - Additional arguments to create a MuData object. - """ self._init_common() if as_view: self._init_as_view(data, index) @@ -242,14 +238,14 @@ def __init__( self._var = pd.DataFrame() # Make obs map for each modality - self._obsm = MuAxisArrays(self, axis=0, store=dict()) - self._obsp = PairwiseArrays(self, axis=0, store=dict()) - self._obsmap = MuAxisArrays(self, axis=0, store=dict()) + self._obsm = MuAxisArrays(self, axis=0, store={}) + self._obsp = PairwiseArrays(self, axis=0, store={}) + self._obsmap = MuAxisArrays(self, axis=0, store={}) # Make var map for each modality - self._varm = MuAxisArrays(self, axis=1, store=dict()) - self._varp = PairwiseArrays(self, axis=1, store=dict()) - self._varmap = MuAxisArrays(self, axis=1, store=dict()) + self._varm = MuAxisArrays(self, axis=1, store={}) + self._varp = PairwiseArrays(self, axis=1, store={}) + self._varmap = MuAxisArrays(self, axis=1, store={}) self._axis = 0 @@ -262,7 +258,7 @@ def _init_common(self): # Unstructured annotations # NOTE: this is dict in contract to OrderedDict in anndata # due to favourable performance and lack of need to preserve the insertion order - self._uns = dict() + self._uns = {} # For compatibility with calls requiring AnnData slots self.raw = None @@ -313,13 +309,9 @@ def _init_as_view(self, mudata_ref: "MuData", index): cvaridx = slice(None) if a.is_view: if isinstance(a, MuData): - self.mod[m] = a._mudata_ref[ - _resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._mudata_ref) - ] + self.mod[m] = a._mudata_ref[_resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._mudata_ref)] else: - self.mod[m] = a._adata_ref[ - _resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._adata_ref) - ] + self.mod[m] = a._adata_ref[_resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._adata_ref)] else: self.mod[m] = a[cobsidx, cvaridx] @@ -386,7 +378,9 @@ def _init_from_dict_( k: ( v if isinstance(v, AnnData) or isinstance(v, MuData) - else MuData(**v) if "mod" in v else AnnData(**v) + else MuData(**v) + if "mod" in v + else AnnData(**v) ) for k, v in mod.items() }, @@ -403,12 +397,7 @@ def _init_from_dict_( ) def _check_duplicated_attr_names(self, attr: str): - if any( - [ - not getattr(self.mod[mod_i], attr + "_names").astype(str).is_unique - for mod_i in self.mod - ] - ): + if any(not getattr(self.mod[mod_i], attr + "_names").astype(str).is_unique for mod_i in self.mod): # If there are non-unique attr_names, we can only handle outer joins # under the condition the duplicated values are restricted to one modality dups = [ @@ -422,13 +411,10 @@ def _check_duplicated_attr_names(self, attr: str): for i, mod_i_dup_attrs in enumerate(dups): for j, mod_j in enumerate(self.mod): if j != i: - if any( - np.isin( - mod_i_dup_attrs, getattr(self.mod[mod_j], attr + "_names").values - ) - ): + if any(np.isin(mod_i_dup_attrs, getattr(self.mod[mod_j], attr + "_names").values)): warnings.warn( - f"Duplicated {attr}_names should not be present in different modalities due to the ambiguity that leads to." + f"Duplicated {attr}_names should not be present in different modalities due to the ambiguity that leads to.", + stacklevel=3, ) return True return False @@ -453,16 +439,12 @@ def _check_changed_attr_names(self, attr: str, columns: bool = False): if not hasattr(self, attrhash): attr_names_changed, attr_columns_changed = True, True else: - for m, mod in self.mod.items(): + for m in self.mod.keys(): if m in getattr(self, attrhash): cached_hash = getattr(self, attrhash)[m] new_hash = ( - sha1( - np.ascontiguousarray(getattr(self.mod[m], attr).index.values) - ).hexdigest(), - sha1( - np.ascontiguousarray(getattr(self.mod[m], attr).columns.values) - ).hexdigest(), + sha1(np.ascontiguousarray(getattr(self.mod[m], attr).index.values)).hexdigest(), + sha1(np.ascontiguousarray(getattr(self.mod[m], attr).columns.values)).hexdigest(), ) if cached_hash[0] != new_hash[0]: attr_names_changed = True @@ -514,9 +496,7 @@ def copy(self, filename: PathLike | None = None) -> "MuData": return read_h5mu(filename, self.file._filemode) def strings_to_categoricals(self, df: pd.DataFrame | None = None): - """ - Transform string columns in .var and .obs slots of MuData to categorical - as well as of .var and .obs slots in each AnnData object + """Transform string columns in .var and .obs slots of MuData to categorical as well as of .var and .obs slots in each AnnData object. This keeps it compatible with AnnData.strings_to_categoricals() method. """ @@ -560,16 +540,12 @@ def _create_global_attr_index(self, attr: str, axis: int): if axis == (1 - self._axis): # Shared indices modindices = [getattr(self.mod[m], attr).index for m in self.mod] - if all([modindices[i].equals(modindices[i + 1]) for i in range(len(modindices) - 1)]): + if all(modindices[i].equals(modindices[i + 1]) for i in range(len(modindices) - 1)): attrindex = modindices[0].copy() - attrindex = reduce( - pd.Index.union, [getattr(self.mod[m], attr).index for m in self.mod] - ).values + attrindex = reduce(pd.Index.union, [getattr(self.mod[m], attr).index for m in self.mod]).values else: # Modality-specific indices - attrindex = np.concatenate( - [getattr(self.mod[m], attr).index.values for m in self.mod], axis=0 - ) + attrindex = np.concatenate([getattr(self.mod[m], attr).index.values for m in self.mod], axis=0) return attrindex def _update_attr( @@ -586,7 +562,6 @@ def _update_attr( - are there intersecting obs_names/var_names between modalities? - have obs_names/var_names of modalities changed? """ - if OPTIONS["pull_on_update"] is None: warnings.warn( "From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. " @@ -614,11 +589,11 @@ def _update_attr( if attr_duplicated: warnings.warn( - f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`." + f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`.", stacklevel=2 ) if self._axis == -1: warnings.warn( - f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first." + f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first.", stacklevel=2 ) if not any(attr_changed): @@ -673,7 +648,7 @@ def _update_attr( ) ) - for mod, amod in self.mod.items(): + for mod in self.mod.keys(): colname = mod + ":" + rowcol # use 0 as special value for missing # we could use a pandas.array, which has missing values support, but then we get an Exception upon hdf5 write @@ -701,14 +676,10 @@ def _update_attr( if mod in getattr(self, attr + "map"): data_global[mod + ":" + rowcol] = getattr(self, attr + "map")[mod] attrmap_columns = [ - mod + ":" + rowcol - for mod in self.mod.keys() - if mod in getattr(self, attr + "map") + mod + ":" + rowcol for mod in self.mod.keys() if mod in getattr(self, attr + "map") ] - data_mod = data_mod.merge( - data_global, on=[col_index, *attrmap_columns], how="left", sort=False - ) + data_mod = data_mod.merge(data_global, on=[col_index, *attrmap_columns], how="left", sort=False) # Restore the index and remove the helper column data_mod = data_mod.set_index(col_index).rename_axis(None, axis=0) @@ -745,9 +716,7 @@ def _update_attr( if len(mask) > 0: negativemask = ~newidx.index.get_level_values(0).isin(globalidx) newidx = pd.MultiIndex.from_frame( - pd.concat( - [newidx.loc[globalidx[mask], :], newidx.iloc[negativemask, :]], axis=0 - ) + pd.concat([newidx.loc[globalidx[mask], :], newidx.iloc[negativemask, :]], axis=0) ) data_mod = data_mod.reindex(newidx, copy=False) @@ -771,11 +740,10 @@ def _update_attr( if len(data_global) > 0: if not data_global.index.is_unique: warnings.warn( - f"{attr}_names is not unique, global {attr} is present, and {attr}map is empty. The update() is not well-defined, verify if global {attr} map to the correct modality-specific {attr}." - ) - data_mod.reset_index( - data_mod.index.names.difference(data_global.index.names), inplace=True + f"{attr}_names is not unique, global {attr} is present, and {attr}map is empty. The update() is not well-defined, verify if global {attr} map to the correct modality-specific {attr}.", + stacklevel=2, ) + data_mod.reset_index(data_mod.index.names.difference(data_global.index.names), inplace=True) data_mod = _make_index_unique(data_mod) data_global = _make_index_unique(data_global) data_mod = data_mod.join(data_global, how="left", sort=False) @@ -783,7 +751,7 @@ def _update_attr( data_mod.index.set_names(None, inplace=True) # get adata positions and remove columns from the data frame - mdict = dict() + mdict = {} for m in self.mod.keys(): colname = m + ":" + rowcol mdict[m] = data_mod[colname].to_numpy() @@ -811,9 +779,7 @@ def _update_attr( for mod in self.mod.keys(): if mod in getattr(self, attr + "map"): data_global[mod + ":" + rowcol] = getattr(self, attr + "map")[mod] - attrmap_columns = [ - mod + ":" + rowcol for mod in self.mod.keys() if mod in getattr(self, attr + "map") - ] + attrmap_columns = [mod + ":" + rowcol for mod in self.mod.keys() if mod in getattr(self, attr + "map")] data_mod = data_mod.merge(data_global, on=attrmap_columns, how="left", sort=False) @@ -857,8 +823,7 @@ def _update_attr( new_index = ~now_index.isin(prev_index) if new_index.sum() == 0 or ( - keep_index.sum() + new_index.sum() == len(now_index) - and len(now_index) > len(prev_index) + keep_index.sum() + new_index.sum() == len(now_index) and len(now_index) > len(prev_index) ): # Another length (filtered) or new modality added # Update .obsm/.varm (size might have changed) @@ -885,13 +850,13 @@ def _update_attr( index_order = prev_index.get_indexer(now_index) - for mx_key, mx in attrm.items(): + for mx_key in attrm.keys(): if mx_key not in self.mod.keys(): # not a modality name attrm[mx_key] = attrm[mx_key][index_order] attrm[mx_key][index_order == -1] = np.nan # Update .obsp/.varp (size might have changed) - for mx_key, mx in attrp.items(): + for mx_key in attrp.keys(): attrp[mx_key] = attrp[mx_key][index_order, index_order] attrp[mx_key][index_order == -1, :] = -1 attrp[mx_key][:, index_order == -1] = -1 @@ -911,7 +876,7 @@ def _update_attr( # Write _attrhash if attr_changed: if not hasattr(self, _attrhash): - setattr(self, _attrhash, dict()) + setattr(self, _attrhash, {}) for m, mod in self.mod.items(): getattr(self, _attrhash)[m] = ( sha1(np.ascontiguousarray(getattr(mod, attr).index.values)).hexdigest(), @@ -945,11 +910,11 @@ def _update_attr_legacy( if attr_duplicated: warnings.warn( - f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`." + f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`.", stacklevel=2 ) if self._axis == -1: warnings.warn( - f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first." + f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first.", stacklevel=2 ) if not any(attr_changed): @@ -961,7 +926,7 @@ def _update_attr_legacy( if join_common: if attr_intersecting: warnings.warn( - f"Cannot join columns with the same name because {attr}_names are intersecting." + f"Cannot join columns with the same name because {attr}_names are intersecting.", stacklevel=2 ) join_common = False @@ -971,17 +936,16 @@ def _update_attr_legacy( map( all, zip( - *list( + *[ [ - [ - not col.startswith(mod + ":") - or col[col.startswith(mod + ":") and len(mod + ":") :] - not in getattr(self.mod[mod], attr).columns - for col in getattr(self, attr).columns - ] - for mod in self.mod + not col.startswith(mod + ":") + or col[col.startswith(mod + ":") and len(mod + ":") :] + not in getattr(self.mod[mod], attr).columns + for col in getattr(self, attr).columns ] - ) + for mod in self.mod + ], + strict=False, ), ) ) @@ -1049,25 +1013,18 @@ def _update_attr_legacy( sort=False, ) data_common = pd.concat( - [ - _maybe_coerce_to_boolean(getattr(a, attr)[columns_common]) - for m, a in self.mod.items() - ], + [_maybe_coerce_to_boolean(getattr(a, attr)[columns_common]) for m, a in self.mod.items()], join="outer", axis=0, sort=False, ) - data_mod = _maybe_coerce_to_bool( - data_mod.join(data_common, how="left", sort=False) - ) + data_mod = _maybe_coerce_to_bool(data_mod.join(data_common, how="left", sort=False)) data_common = _maybe_coerce_to_bool(data_common) # this occurs when join_common=True and we already have a global data frame, e.g. after reading from H5MU sharedcols = data_mod.columns.intersection(data_global.columns) - data_global.rename( - columns={col: f"global:{col}" for col in sharedcols}, inplace=True - ) + data_global.rename(columns={col: f"global:{col}" for col in sharedcols}, inplace=True) else: data_mod = _maybe_coerce_to_bool( pd.concat( @@ -1085,7 +1042,7 @@ def _update_attr_legacy( ) ) - for mod, amod in self.mod.items(): + for mod in self.mod.keys(): colname = mod + ":" + rowcol # use 0 as special value for missing # we could use a pandas.array, which has missing values support, but then we get an Exception upon hdf5 write @@ -1108,12 +1065,8 @@ def _update_attr_legacy( data_mod = data_mod.rename_axis(col_index, axis=0).reset_index() data_mod[col_cumcount] = data_mod.groupby(col_index).cumcount() data_global = data_global.rename_axis(col_index, axis=0).reset_index() - data_global[col_cumcount] = ( - data_global.reset_index().groupby(col_index).cumcount() - ) - data_mod = data_mod.merge( - data_global, on=[col_index, col_cumcount], how="left", sort=False - ) + data_global[col_cumcount] = data_global.reset_index().groupby(col_index).cumcount() + data_mod = data_mod.merge(data_global, on=[col_index, col_cumcount], how="left", sort=False) # Restore the index and remove the helper column data_mod = data_mod.set_index(col_index).rename_axis(None, axis=0) del data_mod[col_cumcount] @@ -1147,9 +1100,7 @@ def _update_attr_legacy( data_common = pd.concat( [ - _maybe_coerce_to_boolean( - _make_index_unique(getattr(a, attr)[columns_common]) - ) + _maybe_coerce_to_boolean(_make_index_unique(getattr(a, attr)[columns_common])) for m, a in self.mod.items() ], join="outer", @@ -1162,9 +1113,7 @@ def _update_attr_legacy( else: dfs = [ _make_index_unique( - getattr(a, attr) - .assign(**{rowcol: np.arange(getattr(a, attr).shape[0])}) - .add_prefix(m + ":") + getattr(a, attr).assign(**{rowcol: np.arange(getattr(a, attr).shape[0])}).add_prefix(m + ":") ) for m, a in self.mod.items() ] @@ -1187,18 +1136,14 @@ def _update_attr_legacy( if len(mask) > 0: negativemask = ~newidx.index.get_level_values(0).isin(globalidx) newidx = pd.MultiIndex.from_frame( - pd.concat( - [newidx.loc[globalidx[mask], :], newidx.iloc[negativemask, :]], axis=0 - ) + pd.concat([newidx.loc[globalidx[mask], :], newidx.iloc[negativemask, :]], axis=0) ) data_mod = data_mod.reindex(newidx, copy=False) # this occurs when join_common=True and we already have a global data frame, e.g. after reading from HDF5 if join_common: sharedcols = data_mod.columns.intersection(data_global.columns) - data_global.rename( - columns={col: f"global:{col}" for col in sharedcols}, inplace=True - ) + data_global.rename(columns={col: f"global:{col}" for col in sharedcols}, inplace=True) data_mod = _restore_index(data_mod) data_mod.index.set_names(rowcol, inplace=True) @@ -1220,11 +1165,10 @@ def _update_attr_legacy( if len(data_global) > 0: if not data_global.index.is_unique: warnings.warn( - f"{attr}_names is not unique, global {attr} is present, and {attr}map is empty. The update() is not well-defined, verify if global {attr} map to the correct modality-specific {attr}." - ) - data_mod.reset_index( - data_mod.index.names.difference(data_global.index.names), inplace=True + f"{attr}_names is not unique, global {attr} is present, and {attr}map is empty. The update() is not well-defined, verify if global {attr} map to the correct modality-specific {attr}.", + stacklevel=2, ) + data_mod.reset_index(data_mod.index.names.difference(data_global.index.names), inplace=True) data_mod = _make_index_unique(data_mod) data_global = _make_index_unique(data_global) data_mod = data_mod.join(data_global, how="left", sort=False) @@ -1238,11 +1182,12 @@ def _update_attr_legacy( data_mod.drop(columns=gcol, inplace=True) else: warnings.warn( - f"Column {col} was present in {attr} but is also a common column in all modalities, and their contents differ. {attr}.{col} was renamed to {attr}.{gcol}." + f"Column {col} was present in {attr} but is also a common column in all modalities, and their contents differ. {attr}.{col} was renamed to {attr}.{gcol}.", + stacklevel=2, ) # get adata positions and remove columns from the data frame - mdict = dict() + mdict = {} for m in self.mod.keys(): colname = m + ":" + rowcol mdict[m] = data_mod[colname].to_numpy() @@ -1276,8 +1221,7 @@ def _update_attr_legacy( new_index = ~now_index.isin(prev_index) if new_index.sum() == 0 or ( - keep_index.sum() + new_index.sum() == len(now_index) - and len(now_index) > len(prev_index) + keep_index.sum() + new_index.sum() == len(now_index) and len(now_index) > len(prev_index) ): # Another length (filtered) or new modality added # Update .obsm/.varm (size might have changed) @@ -1304,13 +1248,13 @@ def _update_attr_legacy( index_order = prev_index.get_indexer(now_index) - for mx_key, mx in attrm.items(): + for mx_key in attrm.keys(): if mx_key not in self.mod.keys(): # not a modality name attrm[mx_key] = attrm[mx_key][index_order] attrm[mx_key][index_order == -1] = np.nan # Update .obsp/.varp (size might have changed) - for mx_key, mx in attrp.items(): + for mx_key in attrp.keys(): attrp[mx_key] = attrp[mx_key][index_order, :][:, index_order] attrp[mx_key][index_order == -1, :] = -1 attrp[mx_key][:, index_order == -1] = -1 @@ -1330,7 +1274,7 @@ def _update_attr_legacy( # Write _attrhash if attr_changed: if not hasattr(self, _attrhash): - setattr(self, _attrhash, dict()) + setattr(self, _attrhash, {}) for m, mod in self.mod.items(): getattr(self, _attrhash)[m] = ( sha1(np.ascontiguousarray(getattr(mod, attr).index.values)).hexdigest(), @@ -1338,20 +1282,14 @@ def _update_attr_legacy( ) def _shrink_attr(self, attr: str, inplace=True) -> pd.DataFrame: - """ - Remove observations/variables for each modality from the global observations/variables table - """ + """Remove observations/variables for each modality from the global observations/variables table.""" # Figure out which global columns exist columns_global = list( map( all, zip( - *list( - [ - [not col.startswith(mod + ":") for col in getattr(self, attr).columns] - for mod in self.mod - ] - ) + *([not col.startswith(mod + ":") for col in getattr(self, attr).columns] for mod in self.mod), + strict=False, ), ) ) @@ -1374,7 +1312,8 @@ def n_mod(self) -> int: """ Number of modalities in the MuData object. - Returns: + Returns + ------- int: The number of modalities. """ return len(self.mod) @@ -1384,7 +1323,8 @@ def isbacked(self) -> bool: """ Whether the MuData object is backed. - Returns: + Returns + ------- bool: True if the object is backed, False otherwise. """ return self.file.filename is not None @@ -1394,7 +1334,8 @@ def filename(self) -> Path | None: """ Filename of the MuData object. - Returns: + Returns + ------- Path | None: The path to the file if backed, None otherwise. """ return self.file.filename @@ -1417,9 +1358,7 @@ def filename(self, filename: PathLike | None): @property def obs(self) -> pd.DataFrame: - """ - Annotation of observation - """ + """Annotation of observation""" return self._obs @obs.setter @@ -1435,15 +1374,11 @@ def obs(self, value: pd.DataFrame): @property def n_obs(self) -> int: - """ - Total number of observations - """ + """Total number of observations""" return self._obs.shape[0] def obs_vector(self, key: str, layer: str | None = None) -> np.ndarray: - """ - Return an array of values for the requested key of length n_obs - """ + """Return an array of values for the requested key of length n_obs""" if key not in self.obs.columns: for m, a in self.mod.items(): if key in a.obs.columns: @@ -1454,9 +1389,7 @@ def obs_vector(self, key: str, layer: str | None = None) -> np.ndarray: return self.obs[key].values def update_obs(self): - """ - Update global .obs_names according to the .obs_names of all the modalities. - """ + """Update global .obs_names according to the .obs_names of all the modalities.""" join_common = self.axis == 1 self._update_attr("obs", axis=1, join_common=join_common) @@ -1485,12 +1418,11 @@ def obs_names_make_unique(self): ki = mods[i] for j in range(i + 1, len(self.mod)): kj = mods[j] - common_obs.append( - self.mod[ki].obs_names.intersection(self.mod[kj].obs_names.values) - ) - if any(map(lambda x: len(x) > 0, common_obs)): + common_obs.append(self.mod[ki].obs_names.intersection(self.mod[kj].obs_names.values)) + if any(len(x) > 0 for x in common_obs): warnings.warn( - "Modality names will be prepended to obs_names since there are identical obs_names in different modalities." + "Modality names will be prepended to obs_names since there are identical obs_names in different modalities.", + stacklevel=1, ) for k in self.mod: self.mod[k].obs_names = k + ":" + self.mod[k].obs_names.astype(str) @@ -1501,16 +1433,12 @@ def obs_names_make_unique(self): @property def obs_names(self) -> pd.Index: - """ - Names of variables (alias for `.obs.index`) - """ + """Names of variables (alias for `.obs.index`).""" return self.obs.index @obs_names.setter def obs_names(self, names: Sequence[str]): - """ - Set the observation names for all the nested AnnData/MuData objects. - """ + """Set the observation names for all the nested AnnData/MuData objects.""" if isinstance(names, pd.Index): if not isinstance(names.name, str | type(None)): raise ValueError( @@ -1535,7 +1463,7 @@ def obs_names(self, names: Sequence[str]): self._init_as_actual(self.copy()) self._obs.index = names - for mod, a in self.mod.items(): + for mod in self.mod.keys(): indices = self.obsmap[mod] self.mod[mod].obs_names = names[indices[indices != 0] - 1] @@ -1543,9 +1471,7 @@ def obs_names(self, names: Sequence[str]): @property def var(self) -> pd.DataFrame: - """ - Annotation of variables - """ + """Annotation of variables.""" return self._var @var.setter @@ -1560,16 +1486,12 @@ def var(self, value: pd.DataFrame): @property def n_vars(self) -> int: - """ - Total number of variables - """ + """Total number of variables.""" return self._var.shape[0] @property def n_var(self) -> int: - """ - Total number of variables - """ + """Total number of variables.""" # warnings.warn( # ".n_var will be removed in the next version, use .n_vars instead", # DeprecationWarning, @@ -1578,9 +1500,7 @@ def n_var(self) -> int: return self._var.shape[0] def var_vector(self, key: str, layer: str | None = None) -> np.ndarray: - """ - Return an array of values for the requested key of length n_var - """ + """Return an array of values for the requested key of length n_var.""" if key not in self.var.columns: for m, a in self.mod.items(): if key in a.var.columns: @@ -1591,9 +1511,7 @@ def var_vector(self, key: str, layer: str | None = None) -> np.ndarray: return self.var[key].values def update_var(self): - """ - Update global .var_names according to the .var_names of all the modalities. - """ + """Update global .var_names according to the .var_names of all the modalities.""" join_common = self.axis == 0 self._update_attr("var", axis=0, join_common=join_common) @@ -1622,12 +1540,11 @@ def var_names_make_unique(self): ki = mods[i] for j in range(i + 1, len(self.mod)): kj = mods[j] - common_vars.append( - np.intersect1d(self.mod[ki].var_names.values, self.mod[kj].var_names.values) - ) - if any(map(lambda x: len(x) > 0, common_vars)): + common_vars.append(np.intersect1d(self.mod[ki].var_names.values, self.mod[kj].var_names.values)) + if any(len(x) > 0 for x in common_vars): warnings.warn( - "Modality names will be prepended to var_names since there are identical var_names in different modalities." + "Modality names will be prepended to var_names since there are identical var_names in different modalities.", + stacklevel=1, ) for k in self.mod: self.mod[k].var_names = k + ":" + self.mod[k].var_names.astype(str) @@ -1638,16 +1555,12 @@ def var_names_make_unique(self): @property def var_names(self) -> pd.Index: - """ - Names of variables (alias for `.var.index`) - """ + """Names of variables (alias for `.var.index`)""" return self.var.index @var_names.setter def var_names(self, names: Sequence[str]): - """ - Set the variable names for all the nested AnnData/MuData objects. - """ + """Set the variable names for all the nested AnnData/MuData objects.""" if isinstance(names, pd.Index): if not isinstance(names.name, str | type(None)): raise ValueError( @@ -1672,7 +1585,7 @@ def var_names(self, names: Sequence[str]): self._init_as_actual(self.copy()) self._var.index = names - for mod, a in self.mod.items(): + for mod in self.mod.keys(): indices = self.varmap[mod] self.mod[mod].var_names = names[indices[indices != 0] - 1] @@ -1682,9 +1595,7 @@ def var_names(self, names: Sequence[str]): @property def obsm(self) -> MuAxisArrays | MuAxisArraysView: - """ - Multi-dimensional annotation of observation - """ + """Multi-dimensional annotation of observation.""" return self._obsm @obsm.setter @@ -1696,13 +1607,11 @@ def obsm(self, value): @obsm.deleter def obsm(self): - self.obsm = dict() + self.obsm = {} @property def obsp(self) -> PairwiseArrays | PairwiseArraysView: - """ - Pairwise annotatation of observations - """ + """Pairwise annotatation of observations.""" return self._obsp @obsp.setter @@ -1714,7 +1623,7 @@ def obsp(self, value): @obsp.deleter def obsp(self): - self.obsp = dict() + self.obsp = {} @property def obsmap(self) -> PairwiseArrays | PairwiseArraysView: @@ -1727,9 +1636,7 @@ def obsmap(self) -> PairwiseArrays | PairwiseArraysView: @property def varm(self) -> MuAxisArrays | MuAxisArraysView: - """ - Multi-dimensional annotation of variables - """ + """Multi-dimensional annotation of variables.""" return self._varm @varm.setter @@ -1741,13 +1648,11 @@ def varm(self, value): @varm.deleter def varm(self): - self.varm = dict() + self.varm = {} @property def varp(self) -> PairwiseArrays | PairwiseArraysView: - """ - Pairwise annotatation of variables - """ + """Pairwise annotatation of variables.""" return self._varp @varp.setter @@ -1759,7 +1664,7 @@ def varp(self, value): @varp.deleter def varp(self): - self.varp = dict() + self.varp = {} @property def varmap(self) -> PairwiseArrays | PairwiseArraysView: @@ -1793,7 +1698,7 @@ def uns(self, value: MutableMapping): @uns.deleter def uns(self): - self.uns = dict() + self.uns = {} # _keys methods to increase compatibility # with calls requiring those AnnData methods @@ -1830,9 +1735,7 @@ def update(self): @property def axis(self) -> int: - """ - MuData axis - """ + """MuData axis.""" return self._axis @property @@ -1859,8 +1762,7 @@ def _pull_attr( only_drop: bool = False, ): """ - Copy the data from the modalities to the global .obs/.var, - existing columns to be overwritten + Copy the data from the modalities to the global .obs/.var, existing columns to be overwritten. Parameters ---------- @@ -1906,7 +1808,6 @@ def _pull_attr( If True, drop the columns but do not actually pull them. Forces drop=True. False by default. """ - # TODO: run update() before pulling? if self.is_view: @@ -1928,10 +1829,7 @@ def _pull_attr( cols = _classify_attr_columns( np.concatenate( - [ - [f"{m}:{val}" for val in getattr(mod, attr).columns.values] - for m, mod in self.mod.items() - ] + [[f"{m}:{val}" for val in getattr(mod, attr).columns.values] for m, mod in self.mod.items()] ), self.mod.keys(), ) @@ -2003,9 +1901,7 @@ def _pull_attr( mask = mod_map != 0 mod_df = getattr(mod, attr) - mod_columns = [ - col["derived_name"] for col in cols if col["prefix"] == "" or col["prefix"] == m - ] + mod_columns = [col["derived_name"] for col in cols if col["prefix"] == "" or col["prefix"] == m] mod_df = mod_df[mod_df.columns.intersection(mod_columns)] if drop: @@ -2024,9 +1920,7 @@ def _pull_attr( and col["prefix"] == m and derived_name_count[col["derived_name"]] == col["count"] ] - mod_df.columns = [ - col if col in cols_special else f"{m}:{col}" for col in mod_df.columns - ] + mod_df.columns = [col if col in cols_special else f"{m}:{col}" for col in mod_df.columns] else: mod_df.columns = [f"{m}:{col}" for col in mod_df.columns] @@ -2063,8 +1957,7 @@ def pull_obs( only_drop: bool = False, ): """ - Copy the data from the modalities to the global .obs, - existing columns to be overwritten or updated + Copy the data from the modalities to the global .obs, existing columns to be overwritten or updated. Parameters ---------- @@ -2134,8 +2027,7 @@ def pull_var( only_drop: bool = False, ): """ - Copy the data from the modalities to the global .var, - existing columns to be overwritten or updated + Copy the data from the modalities to the global .var, existing columns to be overwritten or updated. Parameters ---------- @@ -2202,8 +2094,7 @@ def _push_attr( only_drop: bool = False, ): """ - Copy the data from the global .obs/.var to the modalities, - existing columns to be overwritten + Copy the data from the global .obs/.var to the modalities, existing columns to be overwritten. Parameters ---------- @@ -2230,7 +2121,6 @@ def _push_attr( If True, drop the columns but do not actually pull them. Forces drop=True. False by default. """ - if self.is_view: raise ValueError(f"Cannot push {attr} columns on a view.") @@ -2300,11 +2190,7 @@ def _push_attr( df = getattr(self, attr)[mask].loc[:, [col["name"] for col in mod_cols]] df.columns = [col["derived_name"] for col in mod_cols] - df = ( - df.set_index(np.arange(mod_n_attr)) - .iloc[mod_map[mask] - 1] - .set_index(np.arange(mod_n_attr)) - ) + df = df.set_index(np.arange(mod_n_attr)).iloc[mod_map[mask] - 1].set_index(np.arange(mod_n_attr)) if not only_drop: # TODO: _maybe_coerce_to_bool @@ -2329,8 +2215,7 @@ def push_obs( only_drop: bool = False, ): """ - Copy the data from the mdata.obs to the modalities, - existing columns to be overwritten + Copy the data from the mdata.obs to the modalities, existing columns to be overwritten. Parameters ---------- @@ -2375,8 +2260,7 @@ def push_var( only_drop: bool = False, ): """ - Copy the data from the mdata.var to the modalities, - existing columns to be overwritten + Copy the data from the mdata.var to the modalities, existing columns to be overwritten. Parameters ---------- @@ -2412,9 +2296,7 @@ def push_var( ) def write_h5mu(self, filename: str | None = None, **kwargs): - """ - Write MuData object to an HDF5 file - """ + """Write MuData object to an HDF5 file.""" from .io import _write_h5mu, write_h5mu if self.isbacked and (filename is None or filename == self.filename): @@ -2433,16 +2315,14 @@ def write_h5mu(self, filename: str | None = None, **kwargs): write = write_h5mu def write_zarr(self, store: MutableMapping | str | Path, **kwargs): - """ - Write MuData object to a Zarr store - """ + """Write MuData object to a Zarr store.""" from .io import write_zarr write_zarr(store, self, **kwargs) def to_anndata(self, **kwargs) -> AnnData: """ - Convert MuData to AnnData + Convert MuData to AnnData. If mdata.axis == 0 (shared observations), concatenate modalities along axis 1 (`anndata.concat(axis=1)`). @@ -2453,9 +2333,7 @@ def to_anndata(self, **kwargs) -> AnnData: Parameters ---------- - data : MuData - MuData object to convert to AnnData - kwargs : dict + kwargs Keyword arguments passed to `anndata.concat()` """ from .to_ import to_anndata @@ -2467,11 +2345,7 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0) backed_at = f" backed at {str(self.filename)!r}" if self.isbacked else "" view_of = "View of " if self.is_view else "" maybe_axis = ( - ( - "" - if self.axis == 0 - else " (shared var) " if self.axis == 1 else " (shared obs and var) " - ) + ("" if self.axis == 0 else " (shared var) " if self.axis == 1 else " (shared obs and var) ") if hasattr(self, "axis") else "" ) @@ -2485,27 +2359,23 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0) map( all, zip( - *list( - [ - [ - not col.startswith(mod + mod_sep) - for col in getattr(self, attr).keys() - ] - for mod in self.mod - ] - ) + *[ + [not col.startswith(mod + mod_sep) for col in getattr(self, attr).keys()] + for mod in self.mod + ], + strict=False, ), ) ) if any(global_keys): - descr += f"\n{indent} {attr}:\t{str([keys[i] for i in range(len(keys)) if global_keys[i]])[1:-1]}" + descr += ( + f"\n{indent} {attr}:\t{str([keys[i] for i in range(len(keys)) if global_keys[i]])[1:-1]}" + ) descr += f"\n{indent} {len(self.mod)} modalit{'y' if len(self.mod) == 1 else 'ies'}" for k, v in self.mod.items(): mod_indent = " " * (nest_level + 1) if isinstance(v, MuData): - descr += f"\n{mod_indent}{k}:\t" + v._gen_repr( - v.n_obs, v.n_vars, extensive, nest_level + 1 - ) + descr += f"\n{mod_indent}{k}:\t" + v._gen_repr(v.n_obs, v.n_vars, extensive, nest_level + 1) continue descr += f"\n{mod_indent}{k}:\t{v.n_obs} x {v.n_vars}" for attr in [ @@ -2531,8 +2401,7 @@ def __repr__(self) -> str: def _repr_html_(self, expand=None): """ - HTML formatter for MuData objects - for rich display in notebooks. + HTML formatter for MuData objects for rich display in notebooks. This formatter has an optional argument `expand`, which is a 3-bit flag: @@ -2540,7 +2409,6 @@ def _repr_html_(self, expand=None): 010 - expand .mod slots 001 - expand slots for each modality """ - # Return text representation if set in options if OPTIONS["display_style"] == "text": from html import escape @@ -2577,10 +2445,8 @@ def _repr_html_(self, expand=None): ) # General object properties - mods += ( - "{} object {} obs × {} var".format( - type(dat).__name__, *(dat.shape) - ) + mods += "{} object {} obs × {} var".format( + type(dat).__name__, *(dat.shape) ) if dat.isbacked: mods += f"
backed at {self.file.filename}" diff --git a/src/mudata/_core/repr.py b/src/mudata/_core/repr.py index 481d0c6..f46b638 100644 --- a/src/mudata/_core/repr.py +++ b/src/mudata/_core/repr.py @@ -18,7 +18,7 @@ def maybe_module_class(obj, sep=".", builtins=False) -> tuple[str, str]: m = "" else: m += sep - except Exception: + except AttributeError: m += "" return (m, cl) @@ -49,7 +49,7 @@ def format_values(x): elif isinstance(x, pd.Series): x = x.to_numpy() else: - warn(f"got unknown array type {type(x)}, don't know how handle it.") + warn(f"got unknown array type {type(x)}, don't know how handle it.", stacklevel=1) return type(x) if x.dtype == object: try: @@ -63,16 +63,12 @@ def format_values(x): pass if testval is None: testval = x[0] - if ( - isinstance(testval, Integral) - or isinstance(testval, np.bool_) - or isinstance(testval, bool) - ): + if isinstance(testval, Integral) or isinstance(testval, np.bool_) or isinstance(testval, bool): s += ",".join([f"{i}" for i in x]) elif isinstance(testval, Real): s += ",".join([f"{i:.2f}" for i in x]) elif isinstance(testval, Complex): - warn("got complex number, don't know how to handle it") + warn("got complex number, don't know how to handle it", stacklevel=1) elif isinstance(testval, Iterable): s += ",".join(map(format_values, x)) lastidx = max(50, s.find(",")) @@ -88,9 +84,7 @@ def block_matrix(data, attr, name): s += "
" s += """ {}    {}{} - """.format( - obj.dtype, *maybe_module_class(obj) - ) + """.format(obj.dtype, *maybe_module_class(obj)) s += "
" return s @@ -131,9 +125,7 @@ def details_block_table(data, attr, name, expand=0, dims=True, square=False): [ """ {} {} {}{} - """.format( - attr_key, obj[attr_key].dtype, *maybe_module_class(obj[attr_key]) - ) + """.format(attr_key, obj[attr_key].dtype, *maybe_module_class(obj[attr_key])) for attr_key in obj.keys() ] ) @@ -146,11 +138,7 @@ def details_block_table(data, attr, name, expand=0, dims=True, square=False): attr_key, obj[attr_key].dtype if hasattr(obj[attr_key], "dtype") else "", *maybe_module_class(obj[attr_key]), - ( - f"{obj[attr_key].shape[1]} columns" - if len(obj[attr_key].shape) > 1 and dims - else "" - ), + (f"{obj[attr_key].shape[1]} columns" if len(obj[attr_key].shape) > 1 and dims else ""), ) for attr_key in obj.keys() ] @@ -166,9 +154,7 @@ def details_block_table(data, attr, name, expand=0, dims=True, square=False): s += "
" s += """ - """.format( - obj.dtype, *maybe_module_class(obj) - ) + """.format(obj.dtype, *maybe_module_class(obj)) s += "
{} {}{}
" s += "" else: # Unstructured diff --git a/src/mudata/_core/to_.py b/src/mudata/_core/to_.py index 8088918..cdde51a 100644 --- a/src/mudata/_core/to_.py +++ b/src/mudata/_core/to_.py @@ -8,7 +8,7 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData: """ - Convert MuData to AnnData by concatenating modalities + Convert MuData to AnnData by concatenating modalities. If mdata.axis == 0 (shared observations), concatenate modalities along axis 1 (`anndata.concat(axis=1)`). @@ -17,9 +17,9 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData: Parameters ---------- - data : MuData + data MuData object to convert to AnnData - kwargs : dict + kwargs Keyword arguments passed to anndata.concat """ if mdata.axis == -1: @@ -42,8 +42,7 @@ def to_mudata( by: str, ) -> MuData: """ - Convert AnnData to MuData by splitting it - along obs or var + Convert AnnData to MuData by splitting it along obs or var. Axis signifies the shared axis. Use `axis=0` for getting MuData with shared observations (axis=0), @@ -51,11 +50,11 @@ def to_mudata( Paramteters ----------- - adata : AnnData + adata AnnData object to convert to MuData - axis : int + axis Axis of shared observations (0) or variables (1) - by : str + by Key in `adata.var` (if axis=0) or `adata.obs` (if axis=1) to split by """ # Use AnnData.split_by() when it's ready diff --git a/src/mudata/_core/utils.py b/src/mudata/_core/utils.py index 712dc36..fc8dd07 100644 --- a/src/mudata/_core/utils.py +++ b/src/mudata/_core/utils.py @@ -38,12 +38,9 @@ def _maybe_coerce_to_boolean(df: T) -> T: return df -def _classify_attr_columns( - names: Sequence[str], prefixes: Sequence[str] -) -> Sequence[dict[str, str]]: +def _classify_attr_columns(names: Sequence[str], prefixes: Sequence[str]) -> Sequence[dict[str, str]]: """ - Classify names into common, non-unique, and unique - w.r.t. to the list of prefixes. + Classify names into common, non-unique, and unique w.r.t. to the list of prefixes. - Common columns do not have modality prefixes. - Non-unqiue columns have a modality prefix, @@ -93,20 +90,15 @@ def _classify_attr_columns( for name_res in res: name_res["class"] = ( - "common" - if name_res["count"] == n_mod - else "unique" if name_res["count"] == 1 else "nonunique" + "common" if name_res["count"] == n_mod else "unique" if name_res["count"] == 1 else "nonunique" ) return res -def _classify_prefixed_columns( - names: Sequence[str], prefixes: Sequence[str] -) -> Sequence[dict[str, str]]: +def _classify_prefixed_columns(names: Sequence[str], prefixes: Sequence[str]) -> Sequence[dict[str, str]]: """ - Classify names into common and prefixed - w.r.t. to the list of prefixes. + Classify names into common and prefixed w.r.t. to the list of prefixes. - Common columns do not have modality prefixes. - Prefixed columns are prefixed by modality names. @@ -156,9 +148,7 @@ def _update_and_concat(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame: # df.update(df2) common_cols = df1.columns.intersection(df2.columns) for col in common_cols: - if isinstance(df[col].values, pd.Categorical) and isinstance( - df2[col].values, pd.Categorical - ): + if isinstance(df[col].values, pd.Categorical) and isinstance(df2[col].values, pd.Categorical): common_cats = pd.api.types.union_categoricals([df[col], df2[col]]).categories df[col] = df[col].cat.set_categories(common_cats) df2[col] = df2[col].cat.set_categories(common_cats) diff --git a/src/mudata/_core/views.py b/src/mudata/_core/views.py index c2c06ae..71535b2 100644 --- a/src/mudata/_core/views.py +++ b/src/mudata/_core/views.py @@ -8,9 +8,7 @@ class _ViewMixin(_SetItemMixin): - """ - AnnData View Mixin but using ._mudata_ref - """ + """AnnData View Mixin but using ._mudata_ref""" def __init__( self, @@ -30,8 +28,6 @@ def __deepcopy__(self, memo): class DictView(_ViewMixin, dict): - """ - AnnData DictView adopted for MuData - """ + """AnnData DictView adapted for MuData""" pass diff --git a/tests/test_axis_-1.py b/tests/test_axis_-1.py index 5bbccf4..0ddbaee 100644 --- a/tests/test_axis_-1.py +++ b/tests/test_axis_-1.py @@ -13,9 +13,7 @@ def test_create(self): n, d_raw, d_preproc = 100, 900, 300 a_raw = AnnData(np.random.normal(size=(n, d_raw))) - a_preproc = a_raw[ - :, np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False)) - ].copy() + a_preproc = a_raw[:, np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))].copy() mdata = MuData({"raw": a_raw, "preproc": a_preproc}, axis=-1) diff --git a/tests/test_obs_var.py b/tests/test_obs_var.py index 164efa5..167f711 100644 --- a/tests/test_obs_var.py +++ b/tests/test_obs_var.py @@ -30,9 +30,7 @@ def test_obs_global_columns(self, mdata, filepath_h5mu): assert list(mdata.obs.columns.values) == [f"{m}:demo" for m in mdata.mod.keys()] + ["demo"] mdata.write(filepath_h5mu) mdata_ = mudata.read(filepath_h5mu) - assert list(mdata_.obs.columns.values) == [f"{m}:demo" for m in mdata_.mod.keys()] + [ - "demo" - ] + assert list(mdata_.obs.columns.values) == [f"{m}:demo" for m in mdata_.mod.keys()] + ["demo"] def test_var_global_columns(self, mdata, filepath_h5mu): for m, mod in mdata.mod.items(): diff --git a/tests/test_pull_push.py b/tests/test_pull_push.py index ddb0520..2bb1bd9 100644 --- a/tests/test_pull_push.py +++ b/tests/test_pull_push.py @@ -11,7 +11,7 @@ @pytest.fixture() def modalities(request, obs_n, var_unique): n_mod = 3 - mods = dict() + mods = {} np.random.seed(100) for i in range(n_mod): i1 = i + 1 @@ -35,9 +35,7 @@ def modalities(request, obs_n, var_unique): if obs_n: if obs_n == "disjoint": - mod2_which_obs = np.random.choice( - mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False - ) + mod2_which_obs = np.random.choice(mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False) mods["mod2"] = mods["mod2"][mod2_which_obs].copy() return mods @@ -46,7 +44,7 @@ def modalities(request, obs_n, var_unique): @pytest.fixture() def datasets(request, var_n, obs_unique): n_datasets = 3 - datasets = dict() + datasets = {} np.random.seed(100) for i in range(n_datasets): i1 = i + 1 diff --git a/tests/test_update.py b/tests/test_update.py index 2a4e3ee..c91b9a4 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -23,9 +23,7 @@ def mdata(request, obs_n, obs_across, obs_mod): if obs_n: if obs_n == "disjoint": - mod2_which_obs = np.random.choice( - mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False - ) + mod2_which_obs = np.random.choice(mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False) mods["mod2"] = mods["mod2"][mod2_which_obs].copy() if obs_across: @@ -51,7 +49,7 @@ def mdata(request, obs_n, obs_across, obs_mod): @pytest.fixture() def modalities(request, obs_n, obs_across, obs_mod): n_mod = 3 - mods = dict() + mods = {} np.random.seed(100) for i in range(n_mod): i1 = i + 1 @@ -62,9 +60,7 @@ def modalities(request, obs_n, obs_across, obs_mod): if obs_n: if obs_n == "disjoint": - mod2_which_obs = np.random.choice( - mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False - ) + mod2_which_obs = np.random.choice(mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False) mods["mod2"] = mods["mod2"][mod2_which_obs].copy() if obs_across: @@ -191,21 +187,15 @@ def test_update_after_obs_reordered(self, mdata): some_obs_names = mdata.obs_names.values[:2] true_obsm_values = [ - mdata.obsm["test_obsm"][np.where(mdata.obs_names.values == name)[0][0]] - for name in some_obs_names + mdata.obsm["test_obsm"][np.where(mdata.obs_names.values == name)[0][0]] for name in some_obs_names ] mdata.mod["mod1"] = mdata["mod1"][::-1].copy() mdata.update() - test_obsm_values = [ - mdata.obsm["test_obsm"][np.where(mdata.obs_names == name)[0][0]] - for name in some_obs_names - ] + test_obsm_values = [mdata.obsm["test_obsm"][np.where(mdata.obs_names == name)[0][0]] for name in some_obs_names] - assert all( - [all(true_obsm_values[i] == test_obsm_values[i]) for i in range(len(true_obsm_values))] - ) + assert all(all(true_obsm_values[i] == test_obsm_values[i]) for i in range(len(true_obsm_values))) # @pytest.mark.usefixtures("filepath_h5mu") diff --git a/tests/test_update_axis_1.py b/tests/test_update_axis_1.py index 5192a37..f97d936 100644 --- a/tests/test_update_axis_1.py +++ b/tests/test_update_axis_1.py @@ -51,7 +51,7 @@ def mdata(request, var_n, var_across, var_mod): @pytest.fixture() def datasets(request, var_n, var_across, var_mod): n_sets = 3 - datasets = dict() + datasets = {} np.random.seed(100) for i in range(n_sets): i1 = i + 1 @@ -177,21 +177,15 @@ def test_update_after_var_reordered(self, mdata): some_var_names = mdata.var_names.values[:2] true_varm_values = [ - mdata.varm["test_varm"][np.where(mdata.var_names.values == name)[0][0]] - for name in some_var_names + mdata.varm["test_varm"][np.where(mdata.var_names.values == name)[0][0]] for name in some_var_names ] mdata.mod["set1"] = mdata["set1"][:, ::-1].copy() mdata.update() - test_varm_values = [ - mdata.varm["test_varm"][np.where(mdata.var_names == name)[0][0]] - for name in some_var_names - ] + test_varm_values = [mdata.varm["test_varm"][np.where(mdata.var_names == name)[0][0]] for name in some_var_names] - assert all( - [all(true_varm_values[i] == test_varm_values[i]) for i in range(len(true_varm_values))] - ) + assert all(all(true_varm_values[i] == test_varm_values[i]) for i in range(len(true_varm_values))) # @pytest.mark.usefixtures("filepath_h5mu") diff --git a/tests/test_view_copy.py b/tests/test_view_copy.py index af19655..4001778 100644 --- a/tests/test_view_copy.py +++ b/tests/test_view_copy.py @@ -164,9 +164,7 @@ def test_obsp_slicing(self, mdata_with_obsp): # Slice a subset of cells n_obs_subset = 50 - random_indices = np.random.choice( - mdata_with_obsp.obs_names, size=n_obs_subset, replace=False - ) + random_indices = np.random.choice(mdata_with_obsp.obs_names, size=n_obs_subset, replace=False) # Create a slice view mdata_slice = mdata_with_obsp[random_indices] From 730262460467a8e7a2df0a86ac4a9232b7082d90 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Wed, 15 Oct 2025 09:53:42 +0200 Subject: [PATCH 03/10] use codecov upload token --- .github/workflows/test.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 0bd76e8..3a2317a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -87,6 +87,8 @@ jobs: uvx hatch run ${{ matrix.env.name }}:coverage xml # create report for upload - name: Upload coverage uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why. From 480e97b78163229ba41358b847f74856d6e7c7da Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Wed, 15 Oct 2025 09:54:35 +0200 Subject: [PATCH 04/10] docs: delete unused file --- docs/nuances.md | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 docs/nuances.md diff --git a/docs/nuances.md b/docs/nuances.md deleted file mode 100644 index 7adfcf5..0000000 --- a/docs/nuances.md +++ /dev/null @@ -1,26 +0,0 @@ -# Nuances - -This is *the sharp bits* page for `mudata`, which provides information on the nuances when working with `MuData` objects. - -```{contents} -:local: -:depth: 3 -``` - -```{toctree} -:maxdepth: 10 -:glob: * -``` - -## Variable names - -`MuData` is designed with features (variables) being different in different modalities in mind. Hence their names should be unique and different between modalities. In other words, `.var_names` are checked for uniqueness across modalities. - -This behaviour ensures all the functions are easy to reason about. For instance, if there is a `var_name` that is present in both modalities, what happens during plotting a joint embedding from `.obsm` coloured by this `var_name` is not strictly defined. - -Nevertheless, `MuData` can accommodate modalities with duplicated `.var_names`. For the typical workflows, we recommend renaming them manually or calling `.var_names_make_unique()`. - - -## Update - -Modalities in `MuData` objects are full-featured `AnnData` objects. Hence they can be operated individually, and their `MuData` parent will have to be updated to fetch this information. From 8ae716c98adc4d0f50de96b1d37222707e617147 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Wed, 15 Oct 2025 10:03:19 +0200 Subject: [PATCH 05/10] attempt to fix concurrent tests --- tests/conftest.py | 29 ++++++++++++----------------- tests/test_io.py | 2 -- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ec77214..6faed70 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,26 +1,21 @@ import pytest -@pytest.fixture(scope="module") -def filepath_h5mu(tmpdir_factory): - return str(tmpdir_factory.mktemp("tmp_test_dir").join("testA.h5mu")) +@pytest.fixture +def filepath_h5mu(tmp_path): + return tmp_path / "testA.h5mu" -@pytest.fixture(scope="module") -def filepath2_h5mu(tmpdir_factory): - return str(tmpdir_factory.mktemp("tmp_test_dir").join("testB.h5mu")) +@pytest.fixture +def filepath2_h5mu(tmp_path): + return tmp_path / "testB.h5mu" -@pytest.fixture(scope="module") -def filepath_hdf5(tmpdir_factory): - return str(tmpdir_factory.mktemp("tmp_mofa_dir").join("mofa_pytest.hdf5")) +@pytest.fixture +def filepath_zarr(tmp_path): + return tmp_path / "testA.zarr" -@pytest.fixture(scope="module") -def filepath_zarr(tmpdir_factory): - return str(tmpdir_factory.mktemp("tmp_test_dir").join("testA.zarr")) - - -@pytest.fixture(scope="module") -def filepath2_zarr(tmpdir_factory): - return str(tmpdir_factory.mktemp("tmp_test_dir").join("testB.zarr")) +@pytest.fixture +def filepath2_zarr(tmp_path): + return tmp_path / "testB.zarr" diff --git a/tests/test_io.py b/tests/test_io.py index 273b0ab..683e2dd 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -18,7 +18,6 @@ def mdata(): ) -@pytest.mark.usefixtures("filepath_h5mu", "filepath_zarr") class TestMuData: def test_write_read_h5mu_basic(self, mdata, filepath_h5mu): mdata.write(filepath_h5mu) @@ -59,7 +58,6 @@ def test_write_read_zarr_mod_obs_colname(self, mdata, filepath_zarr): assert mdata_.obs["mod1:column"].values[0] == 2 -@pytest.mark.usefixtures("filepath_h5mu") class TestMuDataMod: def test_h5mu_mod_backed(self, mdata, filepath_h5mu): mdata.write( From 3503c657f18c9d353e80d34620cf0522ce591309 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Wed, 15 Oct 2025 15:45:46 +0200 Subject: [PATCH 06/10] changelog: link to latest version of keepachangelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2e1bf8..b9f3108 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog][], and this project adheres to [Semantic Versioning][]. -[keep a changelog]: https://keepachangelog.com/en/1.0.0/ +[keep a changelog]: https://keepachangelog.com/en/1.1.0/ [semantic versioning]: https://semver.org/spec/v2.0.0.html ## [0.3.2] From c0c4bec0d7841cb740852dfd5850f2b6735efe83 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Wed, 15 Oct 2025 17:50:33 +0200 Subject: [PATCH 07/10] changelog: reformat 0.2.0 entry --- CHANGELOG.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9f3108..647b9a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,11 +89,17 @@ To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata. ## [0.2.0] -This version uses new I/O serialisation of `AnnData v0.8 `_. +### Added + +- [new axes interface](https://github.com/scverse/mudata/blob/master/docs/source/notebooks/axes.ipynb) that allows to use MuData objects as containers with different shared dimensions. + +### Changed + +- new I/O serialisation of [AnnData v0.8](https://anndata.readthedocs.io/en/latest/release-notes/index.html#th-march-2022)_. -Updating a MuData object with :func:`mudata.MuData.update` is even faster in many use cases. +### Fixed -There's `a new axes interface `_ that allows to use MuData objects as containers with different shared dimensions. +- Updating a MuData object with `MuData.update()` is even faster in many use cases. ## [0.1.2] From 964fc1cce958c89766452d516135af626f741cca Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Thu, 16 Oct 2025 09:16:46 +0200 Subject: [PATCH 08/10] changelog: remove internal documentation links They don't work on Github and may not work on readthedocs in the future, since we don't keep old versions of the documentation around. --- CHANGELOG.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 647b9a9..0f3711e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning][]. ### Fixed -- Fixed an [issue](https://github.com/scverse/mudata/issues/99) in [`update()`](#mudata.MuData.update) +- Fixed an [issue](https://github.com/scverse/mudata/issues/99) in `update()` ## [0.3.1] @@ -24,27 +24,27 @@ and this project adheres to [Semantic Versioning][]. ### Added -- Pull/push interface for annotations: [`pull_obs()`](#mudata.MuData.pull_obs), [`pull_var()`](#mudata.MuData.pull_var), [`push_obs()`](#mudata.MuData.push_obs), [`push_var()`](#mudata.MuData.push_var) -- Conversion functions: [`to_anndata()`](#mudata.MuData.to_anndata), [`to_mudata()`](#mudata.to_mudata) -- [Concatenation](#mudata.concat) of MuData objects +- Pull/push interface for annotations: `pull_obs()`, `pull_var()`, `push_obs()`, `push_var()` +- Conversion functions: `to_anndata()`, `to_mudata()` +- Concatenation of MuData objects - `MuData.mod_names` attribute - Pretty-printing for `MuData.mod` - `fsspec` support for readers. ### Fixed -- Improved performance and behavior of [`update()`](#mudata.MuData.update). +- Improved performance and behavior of `update()`. For compatibility reasons, this release keeps the old behaviour of pulling annotations on read/update as default. -- [`read_zarr()`](#mudata.read_zarr) now supports `mod-order` +- `read_zarr()` now supports `mod-order` - Correct handling of the `uns` attribute by views. ### Note If you want to adopt the new update behaviour, set `mudata.set_options(pull_on_update=False)`. This will be the default behaviour in the next release. -With it, the annotations will not be copied from the modalities on [`update()`](#mudata.MuData.update) implicitly. +With it, the annotations will not be copied from the modalities on `update()` implicitly. -To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata.MuData.pull_obs) and/or [`pull_var()`](#mudata.MuData.pull_var). +To copy the annotations explicitly, you will need to use `pull_obs()` and/or `pull_var()`. ## [0.2.4] @@ -68,15 +68,15 @@ To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata. ### Fixed -- [`Path`](#pathlib.Path) objects now work in [](#mudata.read) +- `Path` objects now work in `mudata.read()` ## [0.2.1] ### Added -- [`MuData.__len__`](#mudata.MuData.__len__). +- `MuData.__len__`. This should make it easier to build MuData into workflows that operate on data containers with length. - In practice using [`n_obs`](#mudata.MuData.n_obs) should be preferred. + In practice using `n_obs` should be preferred. ### Changed @@ -85,7 +85,7 @@ To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata. ### Fixed -- Improvements and optimizations to [`update()`](#mudata.MuData.update) +- Improvements and optimizations to `update()` ## [0.2.0] @@ -95,7 +95,7 @@ To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata. ### Changed -- new I/O serialisation of [AnnData v0.8](https://anndata.readthedocs.io/en/latest/release-notes/index.html#th-march-2022)_. +- new I/O serialisation of [AnnData v0.8](https://anndata.readthedocs.io/en/latest/release-notes/index.html#th-march-2022). ### Fixed @@ -105,11 +105,11 @@ To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata. ### Changed -- Improved documentation, including [a new page describing the sharp bits](notebooks/nuances.ipynb) +- Improved documentation, including a new page describing the sharp bits. ### Fixed -- Updating a MuData object with [`update()`](#mudata.MuData.update) is now much faster. +- Updating a MuData object with `update()` is now much faster. ## [0.1.1] @@ -117,7 +117,7 @@ To copy the annotations explicitly, you will need to use [`pull_obs()`](#mudata. ## [0.1.0] -Initial `mudata` release with [`MuData`](#mudata.MuData), previously a part of the `muon` framework. +Initial `mudata` release with `MuData`, previously a part of the `muon` framework. [0.3.2]: https://github.com/scverse/mudata/compare/v0.3.1...v0.3.2 [0.3.1]: https://github.com/scverse/mudata/compare/v0.3.0...v0.3.1 From e1661974d8e3a7b712666a271d42a46cf58ea352 Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Fri, 17 Oct 2025 13:55:29 +0200 Subject: [PATCH 09/10] update version handling in dev mode adapt current version of hatch-vcs-footgun-example --- pyproject.toml | 2 +- src/mudata/__init__.py | 11 +---------- src/mudata/version.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 11 deletions(-) create mode 100755 src/mudata/version.py diff --git a/pyproject.toml b/pyproject.toml index e16b527..3c82098 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,8 @@ dependencies = [ ] optional-dependencies.dev = [ "fsspec", + "hatch-vcs", # for version handling in editable mode "pre-commit", - "setuptools-scm", # for version handling in editable mode "twine>=4.0.2", "zarr<3", ] diff --git a/src/mudata/__init__.py b/src/mudata/__init__.py index e005730..4377213 100644 --- a/src/mudata/__init__.py +++ b/src/mudata/__init__.py @@ -1,15 +1,5 @@ """Multimodal datasets""" -try: # See https://github.com/maresb/hatch-vcs-footgun-example - from setuptools_scm import get_version - - __version__ = get_version(root="../..", relative_to=__file__) -except (ImportError, LookupError): - try: - from ._version import __version__ - except ModuleNotFoundError as e: - raise RuntimeError("mudata is not correctly installed. Please install it, e.g. with pip.") from e - from anndata import AnnData from ._core import utils @@ -29,6 +19,7 @@ from ._core.merge import concat from ._core.mudata import MuData from ._core.to_ import to_anndata, to_mudata +from .version import __version__, __version_tuple__ __anndataversion__ = "0.1.0" __mudataversion__ = "0.1.0" diff --git a/src/mudata/version.py b/src/mudata/version.py new file mode 100755 index 0000000..b0c7fcd --- /dev/null +++ b/src/mudata/version.py @@ -0,0 +1,37 @@ +"""Compute the version number and store it in the `__version__` variable. + +Based on . +""" + + +def _get_hatch_version(): + """Compute the most up-to-date version number in a development environment. + + Returns `None` if Hatchling is not installed, e.g. in a production environment. + + For more details, see . + """ + import os + + try: + from hatchling.metadata.core import ProjectMetadata + from hatchling.plugin.manager import PluginManager + from hatchling.utils.fs import locate_file + except ImportError: + # Hatchling is not installed, so probably we are not in + # a development environment. + return None + + pyproject_toml = locate_file(__file__, "pyproject.toml") + if pyproject_toml is None: + return None + root = os.path.dirname(pyproject_toml) + metadata = ProjectMetadata(root=root, plugin_manager=PluginManager()) + # Version can be either statically set in pyproject.toml or computed dynamically: + return metadata.core.version or metadata.hatch.version.cached + + +__version__ = _get_hatch_version() +__version_tuple__ = None +if not __version__: # not in development mode + from ._version import __version__ From 22c47588c382c5ed3a533840ee274c24228cd1ec Mon Sep 17 00:00:00 2001 From: Ilia Kats Date: Wed, 22 Oct 2025 10:10:53 +0200 Subject: [PATCH 10/10] formatter: set skip-magic-trailing-comma=true and reformat --- docs/conf.py | 17 ++------ pyproject.toml | 2 + src/mudata/_core/compat.py | 30 +++----------- src/mudata/_core/config.py | 6 +-- src/mudata/_core/file_backing.py | 26 ++---------- src/mudata/_core/io.py | 44 ++++----------------- src/mudata/_core/merge.py | 20 +++------- src/mudata/_core/mudata.py | 68 +++++--------------------------- src/mudata/_core/repr.py | 7 +--- src/mudata/_core/to_.py | 6 +-- src/mudata/_core/utils.py | 24 ++--------- src/mudata/_core/views.py | 7 +--- src/mudata/version.py | 0 tests/test_io.py | 4 +- tests/test_view_copy.py | 54 +++++++++++-------------- 15 files changed, 68 insertions(+), 247 deletions(-) mode change 100755 => 100644 src/mudata/version.py diff --git a/docs/conf.py b/docs/conf.py index d835221..75d9ba2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -72,14 +72,7 @@ napoleon_use_rtype = True # having a separate entry generally helps readability napoleon_use_param = True myst_heading_anchors = 6 # create anchors for h1-h6 -myst_enable_extensions = [ - "amsmath", - "colon_fence", - "deflist", - "dollarmath", - "html_image", - "html_admonition", -] +myst_enable_extensions = ["amsmath", "colon_fence", "deflist", "dollarmath", "html_image", "html_admonition"] myst_url_schemes = ("http", "https", "mailto") nb_output_stderr = "remove" nb_execution_mode = "off" @@ -87,14 +80,10 @@ typehints_defaults = "braces" ogp_social_cards = { - "image": "_static/img/mudata.png", # doesn't support SVGs' + "image": "_static/img/mudata.png" # doesn't support SVGs' } -source_suffix = { - ".rst": "restructuredtext", - ".ipynb": "myst-nb", - ".myst": "myst-nb", -} +source_suffix = {".rst": "restructuredtext", ".ipynb": "myst-nb", ".myst": "myst-nb"} intersphinx_mapping = { "python": ("https://docs.python.org/3", None), diff --git a/pyproject.toml b/pyproject.toml index 3c82098..de86478 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,8 @@ line-length = 120 src = [ "src" ] extend-include = [ "*.ipynb" ] +format.skip-magic-trailing-comma = true +format.exclude = [ "*.ipynb" ] format.docstring-code-format = true lint.select = [ diff --git a/src/mudata/_core/compat.py b/src/mudata/_core/compat.py index 1419811..cc7f7a0 100644 --- a/src/mudata/_core/compat.py +++ b/src/mudata/_core/compat.py @@ -12,35 +12,17 @@ from anndata._core.aligned_mapping import AlignedView, AxisArrays, PairwiseArrays except ImportError: # anndata < 0.10.9 - from anndata._core.aligned_mapping import ( - AlignedViewMixin as AlignedView, - ) - from anndata._core.aligned_mapping import ( - AxisArrays as AxisArraysLegacy, - ) - from anndata._core.aligned_mapping import ( - AxisArraysBase, - ) - from anndata._core.aligned_mapping import ( - PairwiseArrays as PairwiseArraysLegacy, - ) + from anndata._core.aligned_mapping import AlignedViewMixin as AlignedView + from anndata._core.aligned_mapping import AxisArrays as AxisArraysLegacy + from anndata._core.aligned_mapping import AxisArraysBase + from anndata._core.aligned_mapping import PairwiseArrays as PairwiseArraysLegacy class AxisArrays(AxisArraysLegacy): - def __init__( - self, - parent: AnnData | Raw, - axis: int, - store: Mapping | AxisArraysBase | None = None, - ): + def __init__(self, parent: AnnData | Raw, axis: int, store: Mapping | AxisArraysBase | None = None): super().__init__(parent, axis=axis, vals=store) class PairwiseArrays(PairwiseArraysLegacy): - def __init__( - self, - parent: AnnData, - axis: int, - store: Mapping | None = None, - ): + def __init__(self, parent: AnnData, axis: int, store: Mapping | None = None): super().__init__(parent, axis=axis, vals=store) diff --git a/src/mudata/_core/config.py b/src/mudata/_core/config.py index f58c9d8..6e5671b 100644 --- a/src/mudata/_core/config.py +++ b/src/mudata/_core/config.py @@ -1,10 +1,6 @@ import logging as log -OPTIONS = { - "display_style": "text", - "display_html_expand": 0b010, - "pull_on_update": None, -} +OPTIONS = {"display_style": "text", "display_html_expand": 0b010, "pull_on_update": None} _VALID_OPTIONS = { "display_style": lambda x: x in ("text", "html"), diff --git a/src/mudata/_core/file_backing.py b/src/mudata/_core/file_backing.py index 8cf07c3..9b8e147 100644 --- a/src/mudata/_core/file_backing.py +++ b/src/mudata/_core/file_backing.py @@ -9,23 +9,14 @@ class MuDataFileManager(AnnDataFileManager): - def __init__( - self, - filename: PathLike | None = None, - filemode: Literal["r", "r+"] | None = None, - ): + def __init__(self, filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None): self._counter = 0 self._children = WeakSet() if filename is not None: filename = Path(filename) super().__init__(ad.AnnData(), filename, filemode) - def open( - self, - filename: PathLike | None = None, - filemode: Literal["r", "r+"] | None = None, - add_ref=False, - ) -> bool: + def open(self, filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None, add_ref=False) -> bool: if self.is_open and ( filename is None and filemode is None or filename == self.filename and filemode == self._filemode ): @@ -77,12 +68,7 @@ def filename(self, filename: PathLike | None): class AnnDataFileManager(ad._core.file_backing.AnnDataFileManager): _h5files = {} - def __init__( - self, - adata: ad.AnnData, - mod: str, - parent: MuDataFileManager, - ): + def __init__(self, adata: ad.AnnData, mod: str, parent: MuDataFileManager): self._parent = parent self._mod = mod parent._children.add(self) @@ -91,11 +77,7 @@ def __init__( if parent.is_open: self._set_file() - def open( - self, - filename: PathLike | None = None, - filemode: Literal["r", "r+"] | None = None, - ): + def open(self, filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None): if not self._parent.open(filename, filemode, add_ref=True): self._set_file() diff --git a/src/mudata/_core/io.py b/src/mudata/_core/io.py index 046a626..c6c101c 100644 --- a/src/mudata/_core/io.py +++ b/src/mudata/_core/io.py @@ -102,13 +102,7 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs): mdata.update() -def write_zarr( - store: MutableMapping | str | Path, - data: MuData | AnnData, - chunks=None, - write_data=True, - **kwargs, -): +def write_zarr(store: MutableMapping | str | Path, data: MuData | AnnData, chunks=None, write_data=True, **kwargs): """ Write MuData or AnnData object to the Zarr store @@ -407,13 +401,7 @@ def _validate_h5mu(filename: PathLike) -> (str, Callable | None): def read_h5mu(filename: PathLike, backed: str | bool | None = None): """Read MuData object from HDF5 file.""" - assert backed in [ - None, - True, - False, - "r", - "r+", - ], "Argument `backed` should be boolean, or r/r+, or None" + assert backed in [None, True, False, "r", "r+"], "Argument `backed` should be boolean, or r/r+, or None" from anndata._io.h5ad import read_dataframe from anndata._io.specs.registry import read_elem @@ -469,12 +457,8 @@ def read_zarr(store: str | Path | MutableMapping | zarr.Group): """ import zarr from anndata._io.specs.registry import read_elem - from anndata._io.zarr import ( - read_dataframe, - ) - from anndata._io.zarr import ( - read_zarr as anndata_read_zarr, - ) + from anndata._io.zarr import read_dataframe + from anndata._io.zarr import read_zarr as anndata_read_zarr if isinstance(store, Path): store = str(store) @@ -532,11 +516,7 @@ def _read_zarr_mod(g: zarr.Group, manager: MuDataFileManager = None, backed: boo ad.file = AnnDataFileManager(ad, Path(g.name).name, manager) raw = _read_legacy_raw( - g, - d.get("raw"), - read_dataframe, - read_elem, - attrs=("var", "varm") if backed else ("var", "varm", "X"), + g, d.get("raw"), read_dataframe, read_elem, attrs=("var", "varm") if backed else ("var", "varm", "X") ) if raw: ad._raw = Raw(ad, **raw) @@ -569,11 +549,7 @@ def _read_h5mu_mod(g: h5py.Group, manager: MuDataFileManager = None, backed: boo return ad -def read_h5ad( - filename: PathLike, - mod: str | None, - backed: str | bool | None = None, -) -> AnnData: +def read_h5ad(filename: PathLike, mod: str | None, backed: str | bool | None = None) -> AnnData: """Read AnnData object from inside a .h5mu file or from a standalone .h5ad file (mod=None). Currently replicates and modifies anndata._io.h5ad.read_h5ad. @@ -581,13 +557,7 @@ def read_h5ad( Ideally this is merged later to anndata._io.h5ad.read_h5ad. """ - assert backed in [ - None, - True, - False, - "r", - "r+", - ], "Argument `backed` should be boolean, or r/r+, or None" + assert backed in [None, True, False, "r", "r+"], "Argument `backed` should be boolean, or r/r+, or None" from anndata import read_h5ad diff --git a/src/mudata/_core/merge.py b/src/mudata/_core/merge.py index ebf77d1..53c1ba2 100644 --- a/src/mudata/_core/merge.py +++ b/src/mudata/_core/merge.py @@ -170,8 +170,7 @@ def concat( # Label column label_col = pd.Categorical.from_codes( - np.repeat(np.arange(len(mdatas)), [m.shape[axis] for m in mdatas]), - categories=keys, + np.repeat(np.arange(len(mdatas)), [m.shape[axis] for m in mdatas]), categories=keys ) # Combining indexes @@ -185,11 +184,7 @@ def concat( # Annotation for concatenation axis check_combinable_cols([getattr(m, dim).columns for m in mdatas], join=join) - concat_annot = pd.concat( - unify_dtypes([getattr(m, dim) for m in mdatas]), - join=join, - ignore_index=True, - ) + concat_annot = pd.concat(unify_dtypes([getattr(m, dim) for m in mdatas]), join=join, ignore_index=True) concat_annot.index = concat_indices if label is not None: concat_annot[label] = label_col @@ -213,10 +208,7 @@ def concat( patch_alt_dim.append(elems_alt_dim) if join == "inner": - concat_mapping = inner_concat_aligned_mapping( - [getattr(m, f"{dim}m") for m in mdatas], - index=concat_indices, - ) + concat_mapping = inner_concat_aligned_mapping([getattr(m, f"{dim}m") for m in mdatas], index=concat_indices) if pairwise: concat_pairwise = concat_pairwise_mapping( mappings=[getattr(m, f"{dim}p") for m in mdatas], @@ -227,9 +219,7 @@ def concat( concat_pairwise = {} elif join == "outer": concat_mapping = outer_concat_aligned_mapping( - [getattr(m, f"{dim}m") for m in mdatas], - index=concat_indices, - fill_value=fill_value, + [getattr(m, f"{dim}m") for m in mdatas], index=concat_indices, fill_value=fill_value ) if pairwise: concat_pairwise = concat_pairwise_mapping( @@ -262,7 +252,7 @@ def concat( [ {k: r(v, axis=0) for k, v in getattr(a, f"{alt_dim}m").items()} for r, a in zip(reindexers, mdatas, strict=False) - ], + ] ) alt_pairwise = merge( [ diff --git a/src/mudata/_core/mudata.py b/src/mudata/_core/mudata.py index adf198b..2f69048 100644 --- a/src/mudata/_core/mudata.py +++ b/src/mudata/_core/mudata.py @@ -16,10 +16,7 @@ import numpy as np import pandas as pd from anndata import AnnData -from anndata._core.aligned_mapping import ( - AxisArraysBase, - PairwiseArraysView, -) +from anndata._core.aligned_mapping import AxisArraysBase, PairwiseArraysView from anndata._core.views import DataFrameView from anndata.utils import convert_to_dict @@ -148,11 +145,7 @@ def __init__( self, data: Union[AnnData, Mapping[str, AnnData], "MuData"] | None = None, feature_types_names: dict | None = MappingProxyType( - { - "Gene Expression": "rna", - "Peaks": "atac", - "Antibody Capture": "prot", - } + {"Gene Expression": "rna", "Peaks": "atac", "Antibody Capture": "prot"} ), as_view: bool = False, index: tuple[slice | Integral, slice | Integral] | slice | Integral | None = None, @@ -697,12 +690,7 @@ def _update_attr( ) for m, a in self.mod.items() ] - data_mod = pd.concat( - dfs, - join="outer", - axis=axis, - sort=False, - ) + data_mod = pd.concat(dfs, join="outer", axis=axis, sort=False) # pd.concat wrecks the ordering when doing an outer join with a MultiIndex and different data frame shapes if axis == 1: @@ -797,11 +785,7 @@ def _update_attr( for colname in (mod + "+" + rowcol for mod in self.mod.keys()): data_mod.drop(colname, axis=1, inplace=True, errors="ignore") - setattr( - self, - "_" + attr, - attr_reindexed, - ) + setattr(self, "_" + attr, attr_reindexed) # Update .obsm/.varm # this needs to be after setting _obs/_var due to dimension checking in the aligned mapping @@ -964,8 +948,7 @@ def _update_attr_legacy( if join_common: # If all modalities have a column with the same name, it is not global columns_common = reduce( - lambda a, b: a.intersection(b), - [getattr(self.mod[mod], attr).columns for mod in self.mod], + lambda a, b: a.intersection(b), [getattr(self.mod[mod], attr).columns for mod in self.mod] ) data_global = data_global.loc[:, [c not in columns_common for c in data_global.columns]] @@ -1091,12 +1074,7 @@ def _update_attr_legacy( ] # Here, attr_names are guaranteed to be unique and are safe to be used for joins - data_mod = pd.concat( - dfs, - join="outer", - axis=axis, - sort=False, - ) + data_mod = pd.concat(dfs, join="outer", axis=axis, sort=False) data_common = pd.concat( [ @@ -1117,12 +1095,7 @@ def _update_attr_legacy( ) for m, a in self.mod.items() ] - data_mod = pd.concat( - dfs, - join="outer", - axis=axis, - sort=False, - ) + data_mod = pd.concat(dfs, join="outer", axis=axis, sort=False) # pd.concat wrecks the ordering when doing an outer join with a MultiIndex and different data frame shapes if axis == 1: @@ -2241,13 +2214,7 @@ def push_obs( Forces drop=True. False by default. """ return self._push_attr( - "obs", - columns=columns, - mods=mods, - common=common, - prefixed=prefixed, - drop=drop, - only_drop=only_drop, + "obs", columns=columns, mods=mods, common=common, prefixed=prefixed, drop=drop, only_drop=only_drop ) def push_var( @@ -2286,13 +2253,7 @@ def push_var( Forces drop=True. False by default. """ return self._push_attr( - "var", - columns=columns, - mods=mods, - common=common, - prefixed=prefixed, - drop=drop, - only_drop=only_drop, + "var", columns=columns, mods=mods, common=common, prefixed=prefixed, drop=drop, only_drop=only_drop ) def write_h5mu(self, filename: str | None = None, **kwargs): @@ -2378,16 +2339,7 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0) descr += f"\n{mod_indent}{k}:\t" + v._gen_repr(v.n_obs, v.n_vars, extensive, nest_level + 1) continue descr += f"\n{mod_indent}{k}:\t{v.n_obs} x {v.n_vars}" - for attr in [ - "obs", - "var", - "uns", - "obsm", - "varm", - "layers", - "obsp", - "varp", - ]: + for attr in ["obs", "var", "uns", "obsm", "varm", "layers", "obsp", "varp"]: try: keys = getattr(v, attr).keys() if len(keys) > 0: diff --git a/src/mudata/_core/repr.py b/src/mudata/_core/repr.py index f46b638..d1ae690 100644 --- a/src/mudata/_core/repr.py +++ b/src/mudata/_core/repr.py @@ -53,12 +53,7 @@ def format_values(x): return type(x) if x.dtype == object: try: - testval = next( - filter( - lambda y: ~np.isnan(y) if isinstance(y, Number) else x is not None, - x, - ) - ) + testval = next(filter(lambda y: ~np.isnan(y) if isinstance(y, Number) else x is not None, x)) except StopIteration: pass if testval is None: diff --git a/src/mudata/_core/to_.py b/src/mudata/_core/to_.py index cdde51a..783afd1 100644 --- a/src/mudata/_core/to_.py +++ b/src/mudata/_core/to_.py @@ -36,11 +36,7 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData: return adata -def to_mudata( - adata: AnnData, - axis: Literal[0, 1], - by: str, -) -> MuData: +def to_mudata(adata: AnnData, axis: Literal[0, 1], by: str) -> MuData: """ Convert AnnData to MuData by splitting it along obs or var. diff --git a/src/mudata/_core/utils.py b/src/mudata/_core/utils.py index fc8dd07..023662c 100644 --- a/src/mudata/_core/utils.py +++ b/src/mudata/_core/utils.py @@ -62,11 +62,7 @@ def _classify_attr_columns(names: Sequence[str], prefixes: Sequence[str]) -> Seq res: list[dict[str, str]] = [] for name in names: - name_common = { - "name": name, - "prefix": "", - "derived_name": name, - } + name_common = {"name": name, "prefix": "", "derived_name": name} name_split = name.split(":", 1) if len(name_split) < 2: @@ -75,11 +71,7 @@ def _classify_attr_columns(names: Sequence[str], prefixes: Sequence[str]) -> Seq maybe_modname, derived_name = name_split if maybe_modname in prefixes: - name_prefixed = { - "name": name, - "prefix": maybe_modname, - "derived_name": derived_name, - } + name_prefixed = {"name": name, "prefix": maybe_modname, "derived_name": derived_name} res.append(name_prefixed) else: res.append(name_common) @@ -114,11 +106,7 @@ def _classify_prefixed_columns(names: Sequence[str], prefixes: Sequence[str]) -> res: list[dict[str, str]] = [] for name in names: - name_common = { - "name": name, - "prefix": "", - "derived_name": name, - } + name_common = {"name": name, "prefix": "", "derived_name": name} name_split = name.split(":", 1) if len(name_split) < 2: @@ -127,11 +115,7 @@ def _classify_prefixed_columns(names: Sequence[str], prefixes: Sequence[str]) -> maybe_modname, derived_name = name_split if maybe_modname in prefixes: - name_prefixed = { - "name": name, - "prefix": maybe_modname, - "derived_name": derived_name, - } + name_prefixed = {"name": name, "prefix": maybe_modname, "derived_name": derived_name} res.append(name_prefixed) else: res.append(name_common) diff --git a/src/mudata/_core/views.py b/src/mudata/_core/views.py index 71535b2..4b11653 100644 --- a/src/mudata/_core/views.py +++ b/src/mudata/_core/views.py @@ -10,12 +10,7 @@ class _ViewMixin(_SetItemMixin): """AnnData View Mixin but using ._mudata_ref""" - def __init__( - self, - *args, - view_args: tuple["MuData", str, tuple[str, ...]] = None, - **kwargs, - ): + def __init__(self, *args, view_args: tuple["MuData", str, tuple[str, ...]] = None, **kwargs): if view_args is not None: view_args = ElementRef(*view_args) self._view_args = view_args diff --git a/src/mudata/version.py b/src/mudata/version.py old mode 100755 new mode 100644 diff --git a/tests/test_io.py b/tests/test_io.py index 683e2dd..5ec256c 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -60,9 +60,7 @@ def test_write_read_zarr_mod_obs_colname(self, mdata, filepath_zarr): class TestMuDataMod: def test_h5mu_mod_backed(self, mdata, filepath_h5mu): - mdata.write( - filepath_h5mu, - ) + mdata.write(filepath_h5mu) mdata_ = mudata.read_h5mu(filepath_h5mu, backed="r") assert list(mdata_.mod.keys()) == ["mod1", "mod2"] diff --git a/tests/test_view_copy.py b/tests/test_view_copy.py index 4001778..effa9f1 100644 --- a/tests/test_view_copy.py +++ b/tests/test_view_copy.py @@ -14,12 +14,10 @@ def mdata(): rng = np.random.default_rng(42) mod1 = AnnData( - np.arange(0, 100, 0.1).reshape(-1, 10), - obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)), + np.arange(0, 100, 0.1).reshape(-1, 10), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)) ) mod2 = AnnData( - np.arange(101, 2101, 1).reshape(-1, 20), - obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)), + np.arange(101, 2101, 1).reshape(-1, 20), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)) ) mods = {"mod1": mod1, "mod2": mod2} # Make var_names different in different modalities @@ -34,12 +32,10 @@ def mdata_with_obsp(): """Create a MuData object with populated obsp and varp fields.""" rng = np.random.default_rng(42) mod1 = AnnData( - np.arange(0, 100, 0.1).reshape(-1, 10), - obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)), + np.arange(0, 100, 0.1).reshape(-1, 10), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)) ) mod2 = AnnData( - np.arange(101, 2101, 1).reshape(-1, 20), - obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)), + np.arange(101, 2101, 1).reshape(-1, 20), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)) ) mods = {"mod1": mod1, "mod2": mod2} # Make var_names different in different modalities @@ -170,26 +166,22 @@ def test_obsp_slicing(self, mdata_with_obsp): mdata_slice = mdata_with_obsp[random_indices] # Check that the sliced obsp matrices have correct shape in the view - assert mdata_slice.obsp["distances"].shape == ( - n_obs_subset, - n_obs_subset, - ), f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['distances'].shape}" - assert mdata_slice.obsp["connectivities"].shape == ( - n_obs_subset, - n_obs_subset, - ), f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['connectivities'].shape}" + assert mdata_slice.obsp["distances"].shape == (n_obs_subset, n_obs_subset), ( + f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['distances'].shape}" + ) + assert mdata_slice.obsp["connectivities"].shape == (n_obs_subset, n_obs_subset), ( + f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['connectivities'].shape}" + ) # Make a copy of the sliced MuData object mdata_copy = mdata_slice.copy() # Check shapes after copy - these should be (n_obs_subset, n_obs_subset) if correctly copied - assert mdata_copy.obsp["distances"].shape == ( - n_obs_subset, - n_obs_subset, - ), f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['distances'].shape}" - assert mdata_copy.obsp["connectivities"].shape == ( - n_obs_subset, - n_obs_subset, - ), f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['connectivities'].shape}" + assert mdata_copy.obsp["distances"].shape == (n_obs_subset, n_obs_subset), ( + f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['distances'].shape}" + ) + assert mdata_copy.obsp["connectivities"].shape == (n_obs_subset, n_obs_subset), ( + f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['connectivities'].shape}" + ) def test_varp_slicing(self, mdata_with_obsp): """Test that varp matrices are correctly sliced when subsetting a MuData object.""" @@ -207,15 +199,13 @@ def test_varp_slicing(self, mdata_with_obsp): mdata_slice = mdata_with_obsp[:, random_var_indices] # Check that the sliced varp matrix has correct shape in the view - assert mdata_slice.varp["correlations"].shape == ( - n_var_subset, - n_var_subset, - ), f"Expected shape in view: {(n_var_subset, orig_n_var)}, got: {mdata_slice.varp['correlations'].shape}" + assert mdata_slice.varp["correlations"].shape == (n_var_subset, n_var_subset), ( + f"Expected shape in view: {(n_var_subset, orig_n_var)}, got: {mdata_slice.varp['correlations'].shape}" + ) # Copy the sliced MuData object mdata_copy = mdata_slice.copy() # Check shapes after copy - assert mdata_copy.varp["correlations"].shape == ( - n_var_subset, - n_var_subset, - ), f"Expected shape after copy: {(n_var_subset, n_var_subset)}, got: {mdata_copy.varp['correlations'].shape}" + assert mdata_copy.varp["correlations"].shape == (n_var_subset, n_var_subset), ( + f"Expected shape after copy: {(n_var_subset, n_var_subset)}, got: {mdata_copy.varp['correlations'].shape}" + )