diff --git a/.codecov.yml b/.codecov.yaml
similarity index 78%
rename from .codecov.yml
rename to .codecov.yaml
index cb56083..d0c0e29 100644
--- a/.codecov.yml
+++ b/.codecov.yaml
@@ -1,4 +1,4 @@
-# Based on pydata/xarray, anndata
+# Based on pydata/xarray
codecov:
require_ci_to_pass: no
@@ -12,6 +12,6 @@ coverage:
changes: false
comment:
- layout: "diff, flags, files"
+ layout: diff, flags, files
behavior: once
require_base: no
diff --git a/.cruft.json b/.cruft.json
new file mode 100644
index 0000000..6f02e9a
--- /dev/null
+++ b/.cruft.json
@@ -0,0 +1,43 @@
+{
+ "template": "https://github.com/scverse/cookiecutter-scverse",
+ "commit": "d383d94fadff9e4e6fdb59d77c68cb900d7cedec",
+ "checkout": null,
+ "context": {
+ "cookiecutter": {
+ "project_name": "mudata",
+ "package_name": "mudata",
+ "project_description": "Multimodal data",
+ "author_full_name": "Danila Bredikhin",
+ "author_email": "danila@stanford.edu",
+ "github_user": "scverse",
+ "github_repo": "mudata",
+ "license": "BSD 3-Clause License",
+ "ide_integration": false,
+ "_copy_without_render": [
+ ".github/workflows/build.yaml",
+ ".github/workflows/test.yaml",
+ "docs/_templates/autosummary/**.rst"
+ ],
+ "_exclude_on_template_update": [
+ "CHANGELOG.md",
+ "LICENSE",
+ "README.md",
+ "docs/api.md",
+ "docs/index.md",
+ "docs/notebooks/example.ipynb",
+ "docs/references.bib",
+ "docs/references.md",
+ "src/**",
+ "tests/**"
+ ],
+ "_render_devdocs": false,
+ "_jinja2_env_vars": {
+ "lstrip_blocks": true,
+ "trim_blocks": true
+ },
+ "_template": "https://github.com/scverse/cookiecutter-scverse",
+ "_commit": "d383d94fadff9e4e6fdb59d77c68cb900d7cedec"
+ }
+ },
+ "directory": null
+}
diff --git a/.editorconfig b/.editorconfig
index 66e3d33..66678e3 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,12 +1,15 @@
root = true
[*]
-charset = utf-8
+indent_style = space
+indent_size = 4
end_of_line = lf
-insert_final_newline = true
+charset = utf-8
trim_trailing_whitespace = true
-max_line_length = 100
+insert_final_newline = true
-[*.py]
-indent_size = 4
-indent_style = space
+[{*.{yml,yaml,toml},.cruft.json}]
+indent_size = 2
+
+[Makefile]
+indent_style = tab
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index c101f33..0000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve mudata
-title: ''
-labels: bug
-assignees: ''
-
----
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behaviour.
-
-Please provide exact steps to reproduce the bug in a clean Python environment. In case it's not clear what's causing this bug, please provide the data or the data generation procedure.
-Sometimes it is not possible to share the data but usually it is possible to replicate problems on publicly available datasets or to share a subset of your data.
-
-**Expected behaviour**
-A clear and concise description of what you expected to happen.
-
-**System**
- - OS: [e.g. macOS Monterey]
- - Python version [e.g. 3.11]
- - Versions of libraries involved [e.g. AnnData 0.10.0]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..3ca1ccb
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,94 @@
+name: Bug report
+description: Report something that is broken or incorrect
+labels: bug
+body:
+ - type: markdown
+ attributes:
+ value: |
+ **Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports)
+ detailing how to provide the necessary information for us to reproduce your bug. In brief:
+ * Please provide exact steps how to reproduce the bug in a clean Python environment.
+ * In case it's not clear what's causing this bug, please provide the data or the data generation procedure.
+ * Sometimes it is not possible to share the data, but usually it is possible to replicate problems on publicly
+ available datasets or to share a subset of your data.
+
+ - type: textarea
+ id: report
+ attributes:
+ label: Report
+ description: A clear and concise description of what the bug is.
+ validations:
+ required: true
+
+ - type: textarea
+ id: versions
+ attributes:
+ label: Versions
+ description: |
+ Which version of packages.
+
+ Please install `session-info2`, run the following command in a notebook,
+ click the “Copy as Markdown” button, then paste the results into the text box below.
+
+ ```python
+ In[1]: import session_info2; session_info2.session_info(dependencies=True)
+ ```
+
+ Alternatively, run this in a console:
+
+ ```python
+ >>> import session_info2; print(session_info2.session_info(dependencies=True)._repr_mimebundle_()["text/markdown"])
+ ```
+ render: python
+ placeholder: |
+ anndata 0.11.3
+ ---- ----
+ charset-normalizer 3.4.1
+ coverage 7.7.0
+ psutil 7.0.0
+ dask 2024.7.1
+ jaraco.context 5.3.0
+ numcodecs 0.15.1
+ jaraco.functools 4.0.1
+ Jinja2 3.1.6
+ sphinxcontrib-jsmath 1.0.1
+ sphinxcontrib-htmlhelp 2.1.0
+ toolz 1.0.0
+ session-info2 0.1.2
+ PyYAML 6.0.2
+ llvmlite 0.44.0
+ scipy 1.15.2
+ pandas 2.2.3
+ sphinxcontrib-devhelp 2.0.0
+ h5py 3.13.0
+ tblib 3.0.0
+ setuptools-scm 8.2.0
+ more-itertools 10.3.0
+ msgpack 1.1.0
+ sparse 0.15.5
+ wrapt 1.17.2
+ jaraco.collections 5.1.0
+ numba 0.61.0
+ pyarrow 19.0.1
+ pytz 2025.1
+ MarkupSafe 3.0.2
+ crc32c 2.7.1
+ sphinxcontrib-qthelp 2.0.0
+ sphinxcontrib-serializinghtml 2.0.0
+ zarr 2.18.4
+ asciitree 0.3.3
+ six 1.17.0
+ sphinxcontrib-applehelp 2.0.0
+ numpy 2.1.3
+ cloudpickle 3.1.1
+ sphinxcontrib-bibtex 2.6.3
+ natsort 8.4.0
+ jaraco.text 3.12.1
+ setuptools 76.1.0
+ Deprecated 1.2.18
+ packaging 24.2
+ python-dateutil 2.9.0.post0
+ ---- ----
+ Python 3.13.2 | packaged by conda-forge | (main, Feb 17 2025, 14:10:22) [GCC 13.3.0]
+ OS Linux-6.11.0-109019-tuxedo-x86_64-with-glibc2.39
+ Updated 2025-03-18 15:47
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..5b62547
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+ - name: Scverse Community Forum
+ url: https://discourse.scverse.org/
+ about: If you have questions about “How to do X”, please ask them here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index 7cd03eb..0000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for mudata
-title: ''
-labels: enhancement
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-Add any other context or screenshots about the feature request here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..0bec61b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,11 @@
+name: Feature request
+description: Propose a new feature for mudata
+labels: enhancement
+body:
+ - type: textarea
+ id: description
+ attributes:
+ label: Description of feature
+ description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered.
+ validations:
+ required: true
diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
deleted file mode 100644
index 814f9f3..0000000
--- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
+++ /dev/null
@@ -1,7 +0,0 @@
-Fixes # .
-
-Changes proposed in this pull request:
--
--
--
-
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
deleted file mode 100644
index f58e4c6..0000000
--- a/.github/workflows/black.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: Lint
-
-on: [push, pull_request]
-
-jobs:
- lint:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v2
- - uses: actions/setup-python@v2
- - uses: psf/black@stable
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
new file mode 100644
index 0000000..83e01a1
--- /dev/null
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,33 @@
+name: Check Build
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+defaults:
+ run:
+ # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
+ shell: bash -euo pipefail {0}
+
+jobs:
+ package:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ filter: blob:none
+ fetch-depth: 0
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ cache-dependency-glob: pyproject.toml
+ - name: Build package
+ run: uv build
+ - name: Check package
+ run: uvx twine check --strict dist/*.whl
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
deleted file mode 100644
index e708b90..0000000
--- a/.github/workflows/codecov.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Code coverage
-on: [push, pull_request]
-jobs:
- run:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout
- uses: actions/checkout@v2
- - name: Set up Python 3.11
- uses: actions/setup-python@v2
- with:
- python-version: 3.11
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install uv
- uv venv
- source .venv/bin/activate
- uv pip install pytest coverage
- uv pip install .[dev,docs,test]
- - name: Run tests and collect coverage
- run: |
- source .venv/bin/activate
- coverage run -m pytest --cache-clear
- coverage xml
- - name: Upload coverage to Codecov
- uses: codecov/codecov-action@v2
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
deleted file mode 100644
index 1738e7a..0000000
--- a/.github/workflows/dev.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Package dev versions
-
-on: [push, pull_request]
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
- strategy:
- matrix:
- python-version: ["3.11", "3.12", "3.13"]
-
- steps:
- - uses: actions/checkout@v2
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v1
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install uv
- uv venv
- source .venv/bin/activate
- uv pip install ruff pytest
- uv pip install .[dev,docs,test]
- - name: Install dev versions
- run: |
- source .venv/bin/activate
- uv pip install -U git+https://github.com/scverse/scanpy
- uv pip install -U git+https://github.com/scverse/anndata
- - name: Test with pytest
- run: |
- source .venv/bin/activate
- pytest
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
deleted file mode 100644
index 594564a..0000000
--- a/.github/workflows/pythonpackage.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Python package
-
-on: [push, pull_request]
-
-jobs:
- build:
-
- runs-on: ubuntu-latest
- strategy:
- matrix:
- python-version: ["3.11", "3.12", "3.13"]
-
- steps:
- - uses: actions/checkout@v4
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
- with:
- python-version: ${{ matrix.python-version }}
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install uv
- uv venv
- source .venv/bin/activate
- uv pip install ruff pytest
- uv pip install .[dev,docs,test]
- - name: Ruff check
- run: |
- source .venv/bin/activate
- ruff check src/mudata
- - name: Test with pytest
- run: |
- source .venv/bin/activate
- python -m pytest
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yaml
similarity index 91%
rename from .github/workflows/release.yml
rename to .github/workflows/release.yaml
index 536e83c..91f6bad 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yaml
@@ -2,7 +2,10 @@ name: Release
on:
release:
- types: [published]
+ push:
+ tags:
+ - "v?[0-9]+.[0-9]+.[0-9]+**"
+ workflow_dispatch:
defaults:
run:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
new file mode 100644
index 0000000..3a2317a
--- /dev/null
+++ b/.github/workflows/test.yaml
@@ -0,0 +1,105 @@
+name: Test
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+ schedule:
+ - cron: "0 5 1,15 * *"
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+defaults:
+ run:
+ # to fail on error in multiline statements (-e), in pipes (-o pipefail), and on unset variables (-u).
+ shell: bash -euo pipefail {0}
+
+jobs:
+ # Get the test environment from hatch as defined in pyproject.toml.
+ # This ensures that the pyproject.toml is the single point of truth for test definitions and the same tests are
+ # run locally and on continuous integration.
+ # Check [[tool.hatch.envs.hatch-test.matrix]] in pyproject.toml and https://hatch.pypa.io/latest/environment/ for
+ # more details.
+ get-environments:
+ runs-on: ubuntu-latest
+ outputs:
+ envs: ${{ steps.get-envs.outputs.envs }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ filter: blob:none
+ fetch-depth: 0
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ - name: Get test environments
+ id: get-envs
+ run: |
+ ENVS_JSON=$(uvx hatch env show --json | jq -c 'to_entries
+ | map(
+ select(.key | startswith("hatch-test"))
+ | {
+ name: .key,
+ label: (if (.key | contains("pre")) then .key + " (PRE-RELEASE DEPENDENCIES)" else .key end),
+ python: .value.python
+ }
+ )')
+ echo "envs=${ENVS_JSON}" | tee $GITHUB_OUTPUT
+
+ # Run tests through hatch. Spawns a separate runner for each environment defined in the hatch matrix obtained above.
+ test:
+ needs: get-environments
+
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ env: ${{ fromJSON(needs.get-environments.outputs.envs) }}
+
+ name: ${{ matrix.env.label }}
+ runs-on: ${{ matrix.os }}
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ filter: blob:none
+ fetch-depth: 0
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ python-version: ${{ matrix.env.python }}
+ cache-dependency-glob: pyproject.toml
+ - name: create hatch environment
+ run: uvx hatch env create ${{ matrix.env.name }}
+ - name: run tests using hatch
+ env:
+ MPLBACKEND: agg
+ PLATFORM: ${{ matrix.os }}
+ DISPLAY: :42
+ run: uvx hatch run ${{ matrix.env.name }}:run-cov -v --color=yes -n auto
+ - name: generate coverage report
+ run: |
+ # See https://coverage.readthedocs.io/en/latest/config.html#run-patch
+ test -f .coverage || uvx hatch run ${{ matrix.env.name }}:cov-combine
+ uvx hatch run ${{ matrix.env.name }}:cov-report # report visibly
+ uvx hatch run ${{ matrix.env.name }}:coverage xml # create report for upload
+ - name: Upload coverage
+ uses: codecov/codecov-action@v5
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
+
+ # Check that all tests defined above pass. This makes it easy to set a single "required" test in branch
+ # protection instead of having to update it frequently. See https://github.com/re-actors/alls-green#why.
+ check:
+ name: Tests pass in all hatch environments
+ if: always()
+ needs:
+ - get-environments
+ - test
+ runs-on: ubuntu-latest
+ steps:
+ - uses: re-actors/alls-green@release/v1
+ with:
+ jobs: ${{ toJSON(needs) }}
diff --git a/.gitignore b/.gitignore
index aaf2cf1..e4b4b33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,134 +1,27 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-src/mudata/_version.py
-
-# PyInstaller
-# Usually these files are written by a python script from a template
-# before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
+# Temp files
+.DS_Store
+*~
+buck-out/
.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# pipenv
-# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-# However, in case of collaboration, if having platform-specific dependencies or dependencies
-# having no cross-platform support, pipenv may install dependencies that don't work, or not
-# install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
+# IDEs
+/.idea/
+/.vscode/
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# KDevelop project settings
-*.kdev4
-.kdev4/
+# Compiled files
+.venv/
+__pycache__/
+.*cache/
+/src/mudata/_version.py
-# mkdocs documentation
-/site
+# Distribution / packaging
+/dist/
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
+# Tests and coverage
+/data/
+/node_modules/
+/.coverage*
-# Pyre type checker
-.pyre/
+# docs
+/docs/generated/
+/docs/_build/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 696c2af..b9de3fe 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,38 @@
+fail_fast: false
+default_language_version:
+ python: python3
+default_stages:
+ - pre-commit
+ - pre-push
+minimum_pre_commit_version: 2.16.0
repos:
- - repo: https://github.com/psf/black
- rev: 24.10.0 # Replace by any tag/version: https://github.com/psf/black/tags
+ - repo: https://github.com/biomejs/pre-commit
+ rev: v2.2.4
hooks:
- - id: black
- language_version: python3
+ - id: biome-format
+ exclude: ^\.cruft\.json$ # inconsistent indentation with cruft - file never to be modified manually.
+ - repo: https://github.com/tox-dev/pyproject-fmt
+ rev: v2.6.0
+ hooks:
+ - id: pyproject-fmt
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.13.2
+ hooks:
+ - id: ruff-check
+ types_or: [python, pyi, jupyter]
+ args: [--fix, --exit-non-zero-on-fix]
+ - id: ruff-format
+ types_or: [python, pyi, jupyter]
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v6.0.0
+ hooks:
+ - id: detect-private-key
+ - id: check-ast
+ - id: end-of-file-fixer
+ - id: mixed-line-ending
+ args: [--fix=lf]
+ - id: trailing-whitespace
+ - id: check-case-conflict
+ # Check that there are no merge conflicts (could be generated by template sync)
+ - id: check-merge-conflict
+ args: [--assume-in-merge]
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index e9c1c78..c3f3f96 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,19 +1,15 @@
+# https://docs.readthedocs.io/en/stable/config-file/v2.html
version: 2
-
-# Set the OS, Python version and other tools you might need
build:
- os: ubuntu-22.04
+ os: ubuntu-24.04
tools:
- python: "3.11"
-
-# Build documentation in the "docs/" directory with Sphinx
-sphinx:
- configuration: docs/source/conf.py
-
-# Explicitly set the version of Python and its requirements
-python:
- install:
- - method: pip
- path: .
- extra_requirements:
- - docs
+ python: "3.12"
+ jobs:
+ create_environment:
+ - asdf plugin add uv
+ - asdf install uv latest
+ - asdf global uv latest
+ build:
+ html:
+ - uvx hatch run docs:build
+ - mv docs/_build $READTHEDOCS_OUTPUT
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..a44f821
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,138 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog][],
+and this project adheres to [Semantic Versioning][].
+
+[keep a changelog]: https://keepachangelog.com/en/1.1.0/
+[semantic versioning]: https://semver.org/spec/v2.0.0.html
+
+## [0.3.3]
+
+- Fixed an [issue](https://github.com/scverse/mudata/issues/103) in `update()`
+- Fixed an [issue](https://github.com/scverse/mudata/issues/109) in `push_obs()`
+
+## [0.3.2]
+
+### Fixed
+
+- Fixed an [issue](https://github.com/scverse/mudata/issues/99) in `update()`
+
+## [0.3.1]
+
+### Fixed
+
+- compatibility with anndata 0.10.9
+
+## [0.3.0]
+
+### Added
+
+- Pull/push interface for annotations: `pull_obs()`, `pull_var()`, `push_obs()`, `push_var()`
+- Conversion functions: `to_anndata()`, `to_mudata()`
+- Concatenation of MuData objects
+- `MuData.mod_names` attribute
+- Pretty-printing for `MuData.mod`
+- `fsspec` support for readers.
+
+### Fixed
+
+- Improved performance and behavior of `update()`.
+ For compatibility reasons, this release keeps the old behaviour of pulling annotations on read/update as default.
+- `read_zarr()` now supports `mod-order`
+- Correct handling of the `uns` attribute by views.
+
+### Note
+
+If you want to adopt the new update behaviour, set `mudata.set_options(pull_on_update=False)`.
+This will be the default behaviour in the next release.
+With it, the annotations will not be copied from the modalities on `update()` implicitly.
+
+To copy the annotations explicitly, you will need to use `pull_obs()` and/or `pull_var()`.
+
+## [0.2.4]
+
+### Changed
+
+- Requires anndata 0.10.8 or newer.
+
+### Fixed
+
+- Compatibility with numpy 2.0
+- Compatibility with anndata 0.11
+
+## [0.2.3]
+
+### Fixed
+
+- Fixes and improvements for backed objects, views, nested MuData objects, I/O and HTML representation.
+- Pandas 2.0 compatibility
+
+## [0.2.2]
+
+### Fixed
+
+- `Path` objects now work in `mudata.read()`
+
+## [0.2.1]
+
+### Added
+
+- `MuData.__len__`.
+ This should make it easier to build MuData into workflows that operate on data containers with length.
+ In practice using `n_obs` should be preferred.
+
+### Changed
+
+- Default `dict` has replaced `OrderedDict`, e.g. in the `uns` slot, to improve compatibility with new serialisation versions.
+ As of Python 3.6, dictionaries are insertion-ordered.
+
+### Fixed
+
+- Improvements and optimizations to `update()`
+
+## [0.2.0]
+
+### Added
+
+- [new axes interface](https://github.com/scverse/mudata/blob/master/docs/source/notebooks/axes.ipynb) that allows to use MuData objects as containers with different shared dimensions.
+
+### Changed
+
+- new I/O serialisation of [AnnData v0.8](https://anndata.readthedocs.io/en/latest/release-notes/index.html#th-march-2022).
+
+### Fixed
+
+- Updating a MuData object with `MuData.update()` is even faster in many use cases.
+
+## [0.1.2]
+
+### Changed
+
+- Improved documentation, including a new page describing the sharp bits.
+
+### Fixed
+
+- Updating a MuData object with `update()` is now much faster.
+
+## [0.1.1]
+
+- Various stability and bug fixes
+
+## [0.1.0]
+
+Initial `mudata` release with `MuData`, previously a part of the `muon` framework.
+
+[0.3.3]: https://github.com/scverse/mudata/compare/v0.3.1...v0.3.3
+[0.3.2]: https://github.com/scverse/mudata/compare/v0.3.1...v0.3.2
+[0.3.1]: https://github.com/scverse/mudata/compare/v0.3.0...v0.3.1
+[0.3.0]: https://github.com/scverse/mudata/compare/v0.2.4...v0.3.0
+[0.2.4]: https://github.com/scverse/mudata/compare/v0.2.3...v0.2.4
+[0.2.3]: https://github.com/scverse/mudata/compare/v0.2.2...v0.2.3
+[0.2.2]: https://github.com/scverse/mudata/compare/v0.2.1...v0.2.2
+[0.2.1]: https://github.com/scverse/mudata/compare/v0.2.0...v0.2.1
+[0.2.0]: https://github.com/scverse/mudata/compare/v0.1.2...v0.2.0
+[0.1.2]: https://github.com/scverse/mudata/compare/v0.1.1...v0.1.2
+[0.1.1]: https://github.com/scverse/mudata/compare/v0.1.0...v0.1.1
+[0.1.0]: https://github.com/scverse/mudata/releases/tag/v0.1.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index f57b2ac..0000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Contributing
-
-This document describes details about contributing to `MuData`.
-
-The main entry point for a contribution is an issue. Please use issues to discuss the change you wish to make or the funcionality you want to add to `MuData`. For a more in-depth discussion you can also use [discussions](https://github.com/scverse/mudata/discussions) or contact `MuData` authors or maintainers via other communication methods such as email.
-
-## Issues
-
-Please consider opening an issue if you've encountered a bug, a performance issue, a documentation issue or have a feature request in mind. For convenience, we provide issue templates that you are very welcome to use.
-
-When creating an issue about a problem that you've encountered (e.g. an error), please include the minimal amount of source code to reproduce it. When including tracebacks, please paste the full traceback text.
-
-## Pull requests
-
-The code that is suggested to be merged into `MuData` is expected to follow reasonable Python code styleguides such as the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html). Below there are a few ideas that may help to improve the code quality.
-
-- Format the code with [black](https://github.com/psf/black).
-- Make sure debugging code (e.g. `pdb.set_trace()`) is removed as well as respective dependencies (`import pdb`).
-- Use tools like `pylint` and `flake8` to check proposed code changes.
-- Make sure documentation is changed to reflect the changes. That includes docstrings as well as external files such as the ones in `docs/` or respective `README.md` files.
-- Consider increasing the version number in `setup.py`. Please stick to [semantic versioning](https://semver.org/).
-- Pull requests can be merged when the LGTM (_looks good to me_) has been received from reviewers, probably after a few rounds of reviews.
-
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index aae9579..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1 +0,0 @@
-recursive-exclude tests *
diff --git a/README.md b/README.md
index 17ffe4a..604a062 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,36 @@
-
+
-[](http://mudata.readthedocs.io/)
-[](https://pypi.org/project/mudata)
-[](https://scverse.org)
-[](https://numfocus.org)
+[![PyPI][badge-pypi][pypi]]
+[![Tests][badge-tests]][tests]
+[![Documentation][badge-docs]][documentation]
+[![Powered by scverse][badge-scverse]][scverse]
+[![Powered by NumFOCUS][badge-numfocus]][numfocus]
+
+[badge-tests]: https://img.shields.io/github/actions/workflow/status/scverse/mudata/test.yaml?branch=main
+[badge-docs]: https://img.shields.io/readthedocs/mudata
+[badge-pypi]: https://img.shields.io/pypi/v/mudata
+[badge-numfocus]: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A
+[badge-scverse]: https://img.shields.io/badge/scverse-core-black.svg?labelColor=white&logo=data:image/svg%2bxml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+PCFET0NUWVBFIHN2ZyBQVUJMSUMgIi0vL1czQy8vRFREIFNWRyAxLjEvL0VOIiAiaHR0cDovL3d3dy53My5vcmcvR3JhcGhpY3MvU1ZHLzEuMS9EVEQvc3ZnMTEuZHRkIj4KPHN2ZyB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiB2aWV3Qm94PSIwIDAgOTEgOTEiIHZlcnNpb249IjEuMSIKICAgIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIKICAgIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB4bWw6c3BhY2U9InByZXNlcnZlIgogICAgeG1sbnM6c2VyaWY9Imh0dHA6Ly93d3cuc2VyaWYuY29tLyIgc3R5bGU9ImZpbGwtcnVsZTpldmVub2RkO2NsaXAtcnVsZTpldmVub2RkO3N0cm9rZS1saW5lam9pbjpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoyOyI+CiAgICA8ZyBpZD0iRWJlbmVfMyI+CiAgICAgICAgPGc+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik0zNSw4OS42Yy0yMi4zLC0zLjQgLTMwLjYsLTE5LjggLTMwLjYsLTE5LjhjMTAuOCwxNi45IDQzLDkuMSA1Mi45LDIuNWMxMi40LC04LjMgOCwtMTUuMyA2LjgsLTE4LjFjNS40LDcuMiA1LjMsMjMuNSAtMS4xLDI5LjRjLTUuNiw1LjEgLTE1LjMsNy45IC0yOCw2WiIgc3R5bGU9ImZpbGw6I2ZmZjtmaWxsLXJ1bGU6bm9uemVybztzdHJva2U6IzAwMDtzdHJva2Utd2lkdGg6MXB4OyIvPgogICAgICAgICAgICA8cGF0aCBkPSJNODMuOSw0My41YzIuOSwtNy4xIDAuOCwtMTIuNSAwLjUsLTEzLjNjLTAuNywtMS4zIC0xLjUsLTIuMyAtMi40LC0zLjFjLTE2LjEsLTEyLjYgLTU1LjksMSAtNzAuOSwxNi44Yy0xMC45LDExLjUgLTEwLjEsMjAgLTYuNywyNS44YzMuMSw0LjggNy45LDcuNiAxMy40LDljLTExLjUsLTEyLjQgOS44LC0zMS4xIDI5LC0zOGMyMSwtNy41IDMyLjUsLTMgMzcuMSwyLjhaIiBzdHlsZT0iZmlsbDojMzQzNDM0O2ZpbGwtcnVsZTpub256ZXJvO3N0cm9rZTojMDAwO3N0cm9rZS13aWR0aDoxcHg7Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik03OS42LDUwLjRjOSwtMTAuNSA1LC0xOS43IDQuOCwtMjAuNGMtMCwwIDQuNCw3LjEgMi4yLDIyLjZjLTEuMiw4LjUgLTUuNCwxNiAtMTAuMSwxMS44Yy0yLjEsLTEuOCAtMywtNi45IDMuMSwtMTRaIiBzdHlsZT0iZmlsbDojZmZmO2ZpbGwtcnVsZTpub256ZXJvO3N0cm9rZTojMDAwO3N0cm9rZS13aWR0aDoxcHg7Ii8+CiAgICAgICAgICAgIDxwYXRoIGQ9Ik02NCw1NC4yYy0zLjMsLTQuOCAtOC4xLC03LjQgLTEyLjMsLTEwLjhjLTIuMiwtMS43IC0xNi40LC0xMS4yIC0xOS4yLC0xNS4xYy02LjQsLTYuNCAtOS41LC0xNi45IC0zLjQsLTIzLjFjLTQuNCwtMC44IC04LjIsMC4yIC0xMC42LDEuNWMtMS4xLDAuNiAtMi4xLDEuMiAtMi44LDJjLTYuNyw2LjIgLTUuOCwxNyAtMS42LDI0LjNjNC41LDcuOCAxMy4yLDE1LjQgMjQuMywyMi44YzUuMSwzLjQgMTUuNiw4LjQgMTkuMywxNmMxMS43LC04LjEgNy42LC0xNC45IDYuMywtMTcuNloiIHN0eWxlPSJmaWxsOiNiNGI0YjQ7ZmlsbC1ydWxlOm5vbnplcm87c3Ryb2tlOiMwMDA7c3Ryb2tlLXdpZHRoOjFweDsiLz4KICAgICAgICAgICAgPHBhdGggZD0iTTM4LjcsOS44YzcuOSw2LjMgMTIuNCw5LjggMjAsOC41YzUuNywtMSA0LjksLTcuOSAtNCwtMTMuNmMtNC40LC0yLjggLTkuNCwtNC4yIC0xNS43LC00LjJjLTcuNSwtMCAtMTYuMywzLjkgLTIwLjYsNi40YzQsLTIuMyAxMS45LC0zLjggMjAuMywyLjlaIiBzdHlsZT0iZmlsbDojZmZmO2ZpbGwtcnVsZTpub256ZXJvO3N0cm9rZTojMDAwO3N0cm9rZS13aWR0aDoxcHg7Ii8+CiAgICAgICAgPC9nPgogICAgPC9nPgo8L3N2Zz4=
# MuData – multimodal data
-[Documentation](https://mudata.readthedocs.io/) | [Publication](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02577-8)
+[Documentation][] | [Publication][muon paper] | [Changelog][]
-For using `MuData` in multimodal omics applications see [`muon`](https://github.com/scverse/muon).
+For using `MuData` in multimodal omics applications see [muon][].
## Data structure
-In the same vein as [AnnData](https://github.com/theislab/anndata) is designed to represent unimodal annotated datasets in Python, `MuData` is designed to provide functionality to load, process, and store multimodal omics data.
+In the same vein as [AnnData][] is designed to represent unimodal annotated datasets in Python, `MuData` is designed to provide functionality to load, process, and store multimodal omics data.
```
MuData
.obs -- annotation of observations (cells, samples)
.var -- annotation of features (genes, genomic loci, etc.)
- .obsm -- multidimensional cell annotation,
+ .obsm -- multidimensional cell annotation,
incl. a boolean for each modality
that links .obs to the cells of that modality
- .varm -- multidimensional feature annotation,
+ .varm -- multidimensional feature annotation,
incl. a boolean vector for each modality
that links .var to the features of that modality
.mod
@@ -49,13 +56,13 @@ from mudata import MuData
mdata = MuData({'rna': adata_rna, 'atac': adata_atac})
```
-If multimodal data from 10X Genomics is to be read, convenient readers are provided by [`muon`](https://github.com/scverse/muon) that return a `MuData` object with AnnData objects inside, each corresponding to its own modality:
+If multimodal data from 10X Genomics is to be read, convenient readers are provided by [muon][] that return a `MuData` object with AnnData objects inside, each corresponding to its own modality:
```py
import muon as mu
mu.read_10x_h5("filtered_feature_bc_matrix.h5")
-# MuData object with n_obs × n_vars = 10000 × 80000
+# MuData object with n_obs × n_vars = 10000 × 80000
# 2 modalities
# rna: 10000 x 30000
# var: 'gene_ids', 'feature_types', 'genome', 'interval'
@@ -87,10 +94,10 @@ md.write("pbmc_10k.h5mu/rna", adata)
If you use `mudata` in your work, please cite the publication as follows:
> **MUON: multimodal omics analysis framework**
->
+>
> Danila Bredikhin, Ilia Kats, Oliver Stegle
>
-> _Genome Biology_ 2022 Feb 01. doi: [10.1186/s13059-021-02577-8](https://doi.org/10.1186/s13059-021-02577-8).
+> _Genome Biology_ 2022 Feb 01. doi: [10.1186/s13059-021-02577-8][muon paper].
You can cite the scverse publication as follows:
@@ -98,11 +105,11 @@ You can cite the scverse publication as follows:
>
> Isaac Virshup, Danila Bredikhin, Lukas Heumos, Giovanni Palla, Gregor Sturm, Adam Gayoso, Ilia Kats, Mikaela Koutrouli, Scverse Community, Bonnie Berger, Dana Pe’er, Aviv Regev, Sarah A. Teichmann, Francesca Finotello, F. Alexander Wolf, Nir Yosef, Oliver Stegle & Fabian J. Theis
>
-> _Nat Biotechnol._ 2023 Apr 10. doi: [10.1038/s41587-023-01733-8](https://doi.org/10.1038/s41587-023-01733-8).
+> _Nat Biotechnol._ 2023 Apr 10. doi: [10.1038/s41587-023-01733-8][scverse paper].
[//]: # (numfocus-fiscal-sponsor-attribution)
-mudata is part of the scverse® project ([website](https://scverse.org), [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS](https://numfocus.org/).
+mudata is part of the scverse® project ([website][scverse], [governance](https://scverse.org/about/roles)) and is fiscally sponsored by [NumFOCUS][].
If you like scverse® and want to support our mission, please consider making a tax-deductible [donation](https://numfocus.org/donate-to-scverse) to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.
@@ -113,3 +120,14 @@ If you like scverse® and want to support our mission, please consider making a
>
+
+[tests]: https://github.com/scverse/mudata/actions/workflows/test.yaml
+[documentation]: https://mudata.readthedocs.io
+[changelog]: https://mudata.readthedocs.io/en/latest/changelog.html
+[pypi]: https://pypi.org/project/mudata
+[numfocus]: https://numfocus.org
+[scverse]: https://scverse.org
+[muon]: https://github.com/scverse/muon
+[anndata]: https://github.com/scverse/anndata
+[muon paper]: https://doi.org/10.1186/s13059-021-02577-8
+[scverse paper]: https://doi.org/10.1038/s41587-023-01733-8
diff --git a/biome.jsonc b/biome.jsonc
new file mode 100644
index 0000000..9f8f220
--- /dev/null
+++ b/biome.jsonc
@@ -0,0 +1,17 @@
+{
+ "$schema": "https://biomejs.dev/schemas/2.2.0/schema.json",
+ "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true },
+ "formatter": { "useEditorconfig": true },
+ "overrides": [
+ {
+ "includes": ["./.vscode/*.json", "**/*.jsonc"],
+ "json": {
+ "formatter": { "trailingCommas": "all" },
+ "parser": {
+ "allowComments": true,
+ "allowTrailingCommas": true,
+ },
+ },
+ },
+ ],
+}
diff --git a/docs/.gitignore b/docs/.gitignore
deleted file mode 100644
index 02fa90c..0000000
--- a/docs/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-**/generated/
-build/
diff --git a/docs/Makefile b/docs/Makefile
index d0c3cbf..d4bb2cb 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -5,8 +5,8 @@
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
-SOURCEDIR = source
-BUILDDIR = build
+SOURCEDIR = .
+BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
diff --git a/docs/_static/img/mudata.png b/docs/_static/img/mudata.png
new file mode 100644
index 0000000..2fbc9c4
Binary files /dev/null and b/docs/_static/img/mudata.png differ
diff --git a/docs/img/mudata.svg b/docs/_static/img/mudata.svg
similarity index 100%
rename from docs/img/mudata.svg
rename to docs/_static/img/mudata.svg
diff --git a/docs/img/muon_header.png b/docs/_static/img/muon_header.png
similarity index 100%
rename from docs/img/muon_header.png
rename to docs/_static/img/muon_header.png
diff --git a/docs/img/muon_logo.png b/docs/_static/img/muon_logo.png
similarity index 100%
rename from docs/img/muon_logo.png
rename to docs/_static/img/muon_logo.png
diff --git a/docs/img/muon_logo_coloured.png b/docs/_static/img/muon_logo_coloured.png
similarity index 100%
rename from docs/img/muon_logo_coloured.png
rename to docs/_static/img/muon_logo_coloured.png
diff --git a/tests/__init__.py b/docs/_templates/.gitkeep
similarity index 100%
rename from tests/__init__.py
rename to docs/_templates/.gitkeep
diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
new file mode 100644
index 0000000..7b4a0cf
--- /dev/null
+++ b/docs/_templates/autosummary/class.rst
@@ -0,0 +1,61 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. add toctree option to make autodoc generate the pages
+
+.. autoclass:: {{ objname }}
+
+{% block attributes %}
+{% if attributes %}
+Attributes table
+~~~~~~~~~~~~~~~~
+
+.. autosummary::
+{% for item in attributes %}
+ ~{{ name }}.{{ item }}
+{%- endfor %}
+{% endif %}
+{% endblock %}
+
+{% block methods %}
+{% if methods %}
+Methods table
+~~~~~~~~~~~~~
+
+.. autosummary::
+{% for item in methods %}
+ {%- if item != '__init__' %}
+ ~{{ name }}.{{ item }}
+ {%- endif -%}
+{%- endfor %}
+{% endif %}
+{% endblock %}
+
+{% block attributes_documentation %}
+{% if attributes %}
+Attributes
+~~~~~~~~~~
+
+{% for item in attributes %}
+
+.. autoattribute:: {{ [objname, item] | join(".") }}
+{%- endfor %}
+
+{% endif %}
+{% endblock %}
+
+{% block methods_documentation %}
+{% if methods %}
+Methods
+~~~~~~~
+
+{% for item in methods %}
+{%- if item != '__init__' %}
+
+.. automethod:: {{ [objname, item] | join(".") }}
+{%- endif -%}
+{%- endfor %}
+
+{% endif %}
+{% endblock %}
diff --git a/docs/api.md b/docs/api.md
new file mode 100644
index 0000000..0f2084e
--- /dev/null
+++ b/docs/api.md
@@ -0,0 +1,32 @@
+# API Reference
+
+```{contents}
+:depth: 3
+:local:
+```
+
+```{toctree}
+:maxdepth: 10
+```
+
+```{eval-rst}
+.. currentmodule:: mudata
+```
+
+## Multimodal omics
+
+```{eval-rst}
+.. module::mudata
+.. autosummary::
+ :toctree: generated
+
+ MuData
+```
+
+## Input/Output
+
+```{eval-rst}
+.. automodsumm:: mudata
+ :functions-only:
+ :toctree: generated
+```
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 0000000..d9e79ba
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,3 @@
+```{include} ../CHANGELOG.md
+
+```
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..75d9ba2
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,128 @@
+# Configuration file for the Sphinx documentation builder.
+
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+import sys
+from datetime import datetime
+from importlib.metadata import metadata
+from pathlib import Path
+
+HERE = Path(__file__).parent
+sys.path.insert(0, str(HERE / "extensions"))
+
+
+# -- Project information -----------------------------------------------------
+
+# NOTE: If you installed your project in editable mode, this might be stale.
+# If this is the case, reinstall it to refresh the metadata
+info = metadata("mudata")
+project_name = info["Name"]
+author = info["Author"]
+copyright = f"{datetime.now():%Y}, {author}."
+version = info["Version"]
+urls = dict(pu.split(", ") for pu in info.get_all("Project-URL"))
+repository_url = urls["Source"]
+
+# The full version, including alpha/beta/rc tags
+release = info["Version"]
+
+bibtex_bibfiles = ["references.bib"]
+templates_path = ["_templates"]
+nitpicky = True # Warn about broken links
+needs_sphinx = "4.0"
+
+html_context = {
+ "display_github": True, # Integrate GitHub
+ "github_user": "scverse",
+ "github_repo": project_name,
+ "github_version": "main",
+ "conf_py_path": "/docs/",
+}
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings.
+# They can be extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = [
+ "myst_nb",
+ "sphinx_copybutton",
+ "sphinx.ext.autodoc",
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.napoleon",
+ "sphinxcontrib.bibtex",
+ "sphinx_autodoc_typehints",
+ "sphinx_tabs.tabs",
+ "sphinx.ext.mathjax",
+ "IPython.sphinxext.ipython_console_highlighting",
+ "sphinxext.opengraph",
+ "sphinx_automodapi.automodapi",
+ *[p.stem for p in (HERE / "extensions").glob("*.py")],
+]
+
+autosummary_generate = True
+autodoc_member_order = "groupwise"
+default_role = "literal"
+napoleon_google_docstring = False
+napoleon_numpy_docstring = True
+napoleon_include_init_with_doc = False
+napoleon_use_rtype = True # having a separate entry generally helps readability
+napoleon_use_param = True
+myst_heading_anchors = 6 # create anchors for h1-h6
+myst_enable_extensions = ["amsmath", "colon_fence", "deflist", "dollarmath", "html_image", "html_admonition"]
+myst_url_schemes = ("http", "https", "mailto")
+nb_output_stderr = "remove"
+nb_execution_mode = "off"
+nb_merge_streams = True
+typehints_defaults = "braces"
+
+ogp_social_cards = {
+ "image": "_static/img/mudata.png" # doesn't support SVGs'
+}
+
+source_suffix = {".rst": "restructuredtext", ".ipynb": "myst-nb", ".myst": "myst-nb"}
+
+intersphinx_mapping = {
+ "python": ("https://docs.python.org/3", None),
+ "anndata": ("https://anndata.readthedocs.io/en/stable/", None),
+ "scanpy": ("https://scanpy.readthedocs.io/en/stable/", None),
+ "numpy": ("https://numpy.org/doc/stable/", None),
+ "fsspec": ("https://filesystem-spec.readthedocs.io/en/stable/", None),
+ "zarr": ("https://zarr.readthedocs.io/en/stable/", None),
+}
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"]
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_book_theme"
+html_static_path = ["_static"]
+html_logo = "_static/img/mudata.svg"
+html_css_files = ["css/custom.css"]
+
+html_title = project_name
+
+html_theme_options = {
+ "repository_url": repository_url,
+ "use_repository_button": True,
+ "path_to_docs": "docs/",
+ "navigation_with_keys": False,
+}
+
+pygments_style = "default"
+
+nitpick_ignore = [
+ # If building the documentation fails because of a missing link that is outside your control,
+ # you can add an exception to this list.
+ # ("py:class", "igraph.Graph"),
+]
diff --git a/docs/contributing.md b/docs/contributing.md
new file mode 100644
index 0000000..c41ee2b
--- /dev/null
+++ b/docs/contributing.md
@@ -0,0 +1,329 @@
+# Contributing guide
+
+This document aims at summarizing the most important information for getting you started on contributing to this project.
+We assume that you are already familiar with git and with making pull requests on GitHub.
+
+For more extensive tutorials, that also cover the absolute basics,
+please refer to other resources such as the [pyopensci tutorials][],
+the [scientific Python tutorials][], or the [scanpy developer guide][].
+
+[pyopensci tutorials]: https://www.pyopensci.org/learn.html
+[scientific Python tutorials]: https://learn.scientific-python.org/development/tutorials/
+[scanpy developer guide]: https://scanpy.readthedocs.io/en/latest/dev/index.html
+
+:::{tip} The *hatch* project manager
+
+We highly recommend to familiarize yourself with [`hatch`][hatch].
+Hatch is a Python project manager that
+
+- manages virtual environments, separately for development, testing and building the documentation.
+ Separating the environments is useful to avoid dependency conflicts.
+- allows to run tests locally in different environments (e.g. different python versions)
+- allows to run tasks defined in `pyproject.toml`, e.g. to build documentation.
+
+While the project is setup with `hatch` in mind,
+it is still possible to use different tools to manage dependencies, such as `uv` or `pip`.
+
+:::
+
+[hatch]: https://hatch.pypa.io/latest/
+
+## Installing dev dependencies
+
+In addition to the packages needed to _use_ this package,
+you need additional python packages to [run tests](#writing-tests) and [build the documentation](#docs-building).
+
+:::::{tabs}
+::::{group-tab} Hatch
+
+On the command line, you typically interact with hatch through its command line interface (CLI).
+Running one of the following commands will automatically resolve the environments for testing and
+building the documentation in the background:
+
+```bash
+hatch test # defined in the table [tool.hatch.envs.hatch-test] in pyproject.toml
+hatch run docs:build # defined in the table [tool.hatch.envs.docs]
+```
+
+When using an IDE such as VS Code,
+you’ll have to point the editor at the paths to the virtual environments manually.
+The environment you typically want to use as your main development environment is the `hatch-test`
+environment with the latest Python version.
+
+To get a list of all environments for your projects, run
+
+```bash
+hatch env show -i
+```
+
+This will list “Standalone” environments and a table of “Matrix” environments like the following:
+
+```
++------------+---------+--------------------------+----------+---------------------------------+-------------+
+| Name | Type | Envs | Features | Dependencies | Scripts |
++------------+---------+--------------------------+----------+---------------------------------+-------------+
+| hatch-test | virtual | hatch-test.py3.10-stable | dev | coverage-enable-subprocess==1.0 | cov-combine |
+| | | hatch-test.py3.13-stable | test | coverage[toml]~=7.4 | cov-report |
+| | | hatch-test.py3.13-pre | | pytest-mock~=3.12 | run |
+| | | | | pytest-randomly~=3.15 | run-cov |
+| | | | | pytest-rerunfailures~=14.0 | |
+| | | | | pytest-xdist[psutil]~=3.5 | |
+| | | | | pytest~=8.1 | |
++------------+---------+--------------------------+----------+---------------------------------+-------------+
+```
+
+From the `Envs` column, select the environment name you want to use for development.
+In this example, it would be `hatch-test.py3.13-stable`.
+
+Next, create the environment with
+
+```bash
+hatch env create hatch-test.py3.13-stable
+```
+
+Then, obtain the path to the environment using
+
+```bash
+hatch env find hatch-test.py3.13-stable
+```
+
+In case you are using VScode, now open the command palette (Ctrl+Shift+P) and search for `Python: Select Interpreter`.
+Choose `Enter Interpreter Path` and paste the path to the virtual environment from above.
+
+In this future, this may become easier through a hatch vscode extension.
+
+::::
+
+::::{group-tab} uv
+
+A popular choice for managing virtual environments is [uv][].
+The main disadvantage compared to hatch is that it supports only a single environment per project at a time,
+which requires you to mix the dependencies for running tests and building docs.
+This can have undesired side-effects,
+such as requiring to install a lower version of a library your project depends on,
+only because an outdated sphinx plugin pins an older version.
+
+To initalize a virtual environment in the `.venv` directory of your project, simply run
+
+```bash
+uv sync --all-extras
+```
+
+The `.venv` directory is typically automatically discovered by IDEs such as VS Code.
+
+::::
+
+::::{group-tab} Pip
+
+Pip is nowadays mostly superseded by environment manager such as [hatch][].
+However, for the sake of completeness, and since it’s ubiquitously available,
+we describe how you can manage environments manually using `pip`:
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e ".[dev,test,doc]"
+```
+
+The `.venv` directory is typically automatically discovered by IDEs such as VS Code.
+
+::::
+:::::
+
+[hatch environments]: https://hatch.pypa.io/latest/tutorials/environment/basic-usage/
+[uv]: https://docs.astral.sh/uv/
+
+## Code-style
+
+This package uses [pre-commit][] to enforce consistent code-styles.
+On every commit, pre-commit checks will either automatically fix issues with the code, or raise an error message.
+
+To enable pre-commit locally, simply run
+
+```bash
+pre-commit install
+```
+
+in the root of the repository.
+Pre-commit will automatically download all dependencies when it is run for the first time.
+
+Alternatively, you can rely on the [pre-commit.ci][] service enabled on GitHub.
+If you didn’t run `pre-commit` before pushing changes to GitHub it will automatically commit fixes to your pull request, or show an error message.
+
+If pre-commit.ci added a commit on a branch you still have been working on locally, simply use
+
+```bash
+git pull --rebase
+```
+
+to integrate the changes into yours.
+While the [pre-commit.ci][] is useful, we strongly encourage installing and running pre-commit locally first to understand its usage.
+
+Finally, most editors have an _autoformat on save_ feature.
+Consider enabling this option for [ruff][ruff-editors] and [biome][biome-editors].
+
+[pre-commit]: https://pre-commit.com/
+[pre-commit.ci]: https://pre-commit.ci/
+[ruff-editors]: https://docs.astral.sh/ruff/integrations/
+[biome-editors]: https://biomejs.dev/guides/integrate-in-editor/
+
+(writing-tests)=
+
+## Writing tests
+
+This package uses [pytest][] for automated testing.
+Please write {doc}`scanpy:dev/testing` for every function added to the package.
+
+Most IDEs integrate with pytest and provide a GUI to run tests.
+Just point yours to one of the environments returned by
+
+```bash
+hatch env create hatch-test # create test environments for all supported versions
+hatch env find hatch-test # list all possible test environment paths
+```
+
+Alternatively, you can run all tests from the command line by executing
+
+:::::{tabs}
+::::{group-tab} Hatch
+
+```bash
+hatch test # test with the highest supported Python version
+# or
+hatch test --all # test with all supported Python versions
+```
+
+::::
+
+::::{group-tab} uv
+
+```bash
+uv run pytest
+```
+
+::::
+
+::::{group-tab} Pip
+
+```bash
+source .venv/bin/activate
+pytest
+```
+
+::::
+:::::
+
+in the root of the repository.
+
+[pytest]: https://docs.pytest.org/
+
+### Continuous integration
+
+Continuous integration via GitHub actions will automatically run the tests on all pull requests and test
+against the minimum and maximum supported Python version.
+
+Additionally, there’s a CI job that tests against pre-releases of all dependencies (if there are any).
+The purpose of this check is to detect incompatibilities of new package versions early on and
+gives you time to fix the issue or reach out to the developers of the dependency before the package
+is released to a wider audience.
+
+The CI job is defined in `.github/workflows/test.yaml`,
+however the single point of truth for CI jobs is the Hatch test matrix defined in `pyproject.toml`.
+This means that local testing via hatch and remote testing on CI tests against the same python versions and uses the same environments.
+
+## Publishing a release
+
+### Updating the version number
+
+Before making a release, you need to update the version number in the `changelog.md` file.
+Please adhere to [Semantic Versioning][semver], in brief
+
+> Given a version number MAJOR.MINOR.PATCH, increment the:
+>
+> 1. MAJOR version when you make incompatible API changes,
+> 2. MINOR version when you add functionality in a backwards compatible manner, and
+> 3. PATCH version when you make backwards compatible bug fixes.
+>
+> Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
+
+Once you are done, commit your changes and tag the commit as `vX.X.X.
+Push the tag.
+This will automatically release to [PyPI][].
+
+[semver]: https://semver.org/
+[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
+[pypi]: https://pypi.org/
+
+## Writing documentation
+
+Please write documentation for new or changed features and use-cases.
+This project uses [sphinx][] with the following features:
+
+- The [myst][] extension allows to write documentation in markdown/Markedly Structured Text
+- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension).
+- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
+- [sphinx-autodoc-typehints][], to automatically reference annotated input and output types
+- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
+
+See scanpy’s {doc}`scanpy:dev/documentation` for more information on how to write your own.
+
+[sphinx]: https://www.sphinx-doc.org/en/master/
+[myst]: https://myst-parser.readthedocs.io/en/latest/intro.html
+[myst-nb]: https://myst-nb.readthedocs.io/en/latest/
+[numpydoc-napoleon]: https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html
+[numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html
+[sphinx-autodoc-typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints
+
+### Tutorials with myst-nb and jupyter notebooks
+
+The documentation is set-up to render jupyter notebooks stored in the `docs/notebooks` directory using [myst-nb][].
+Currently, only notebooks in `.ipynb` format are supported that will be included with both their input and output cells.
+It is your responsibility to update and re-run the notebook whenever necessary.
+
+If you are interested in automatically running notebooks as part of the continuous integration,
+please check out [this feature request][issue-render-notebooks] in the `cookiecutter-scverse` repository.
+
+[issue-render-notebooks]: https://github.com/scverse/cookiecutter-scverse/issues/40
+
+#### Hints
+
+- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`.
+ Only if you do so can sphinx automatically create a link to the external documentation.
+- If building the documentation fails because of a missing link that is outside your control,
+ you can add an entry to the `nitpick_ignore` list in `docs/conf.py`
+
+(docs-building)=
+
+### Building the docs locally
+
+:::::{tabs}
+::::{group-tab} Hatch
+
+```bash
+hatch run docs:build
+hatch run docs:open
+```
+
+::::
+
+::::{group-tab} uv
+
+```bash
+cd docs
+uv run sphinx-build -M html . _build -W
+(xdg-)open _build/html/index.html
+```
+
+::::
+
+::::{group-tab} Pip
+
+```bash
+source .venv/bin/activate
+cd docs
+sphinx-build -M html . _build -W
+(xdg-)open _build/html/index.html
+```
+
+::::
+:::::
diff --git a/docs/img/mudata.png b/docs/img/mudata.png
deleted file mode 100644
index faa33fd..0000000
Binary files a/docs/img/mudata.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..18a4470
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,66 @@
+# Say hello to MuData
+
+**MuData** is a format for annotated multimodal datasets. MuData is native to Python but provides cross-language functionality via HDF5-based `.h5mu` files.
+
+[website]: https://scverse.org/
+[governance]: https://scverse.org/about/roles/
+[numfocus]: https://numfocus.org/
+[donation]: https://numfocus.org/donate-to-scverse/
+
+MuData is part of the scverse® project ([website], [governance]) and is fiscally sponsored by [NumFOCUS].
+Please consider making a tax-deductible [donation] to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.
+
+
+
+
+
+
+
+
+## MuData objects as containers
+
+The `mudata` package introduces multimodal data objects ([MuData class](#mudata.MuData)) allowing Python users to work with increasigly complex datasets efficiently and to build new workflows and computational tools around it.
+
+```
+MuData object with n_obs × n_vars = 10110 × 110101
+ 2 modalities
+ atac: 10110 x 100001
+ rna: 10110 x 10100
+```
+
+MuData objects enable multimodal information to be stored & accessed naturally, embrace [AnnData](https://github.com/theislab/anndata) for the individual modalities, and can be serialized to `.h5mu` files. [Learn more about multimodal objects](io/mudata.md) as well as [file formats for storing & sharing them](io/output.md).
+
+## Natural interface
+
+MuData objects feature an AnnData-like interface and familiar concepts such as *observations* and *variables* for the two data dimensions. Get familiar with MuData in the [Quickstart tutorial](notebooks/quickstart_mudata).
+
+## Handling MuData objects
+
+A flagship framework for multimodal omics analysis — `muon` — has been built around the MuData format. Find more information on it [in its documentation](https://muon.readthedocs.io/en/latest/) and [on the tutorials page](https://muon-tutorials.readthedocs.io/en/latest/) as well as in the corresponding publication{cite:p}`bredikhin_2022`.
+
+```{eval-rst}
+.. toctree::
+ :hidden:
+ :maxdepth: 1
+ :caption: Getting started
+
+ notebooks/quickstart_mudata.ipynb
+ notebooks/nuances.ipynb
+ notebooks/axes.ipynb
+ notebooks/annotations_management.ipynb
+
+.. toctree::
+ :hidden:
+ :maxdepth: 1
+ :caption: Documentation
+
+ install
+ io/input
+ io/mudata
+ io/output
+ io/spec
+ api
+ changelog
+ contributing
+ references
+```
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 0000000..4b5c034
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,32 @@
+# Install mudata
+
+```{contents}
+:local:
+:depth: 3
+```
+
+```{toctree}
+:maxdepth: 10
+:glob: *
+```
+
+## Stable version
+
+`mudata` can be installed [from PyPI](https://pypi.org/project/mudata) with `pip`:
+
+```console
+pip install mudata
+```
+
+## Development version
+
+To use a pre-release version of `mudata`, install it from [from the GitHub repository](https://github.com/scverse/mudata):
+
+```console
+pip install git+https://github.com/scverse/mudata
+```
+
+## Troubleshooting
+
+Please consult the details on installing `scanpy` and its dependencies [here](https://scanpy.readthedocs.io/en/stable/installation.html).
+If there are issues that have not been described, addressed, or documented, please consider [opening an issue](https://github.com/scverse/mudata/issues).
diff --git a/docs/io/input.md b/docs/io/input.md
new file mode 100644
index 0000000..13b0705
--- /dev/null
+++ b/docs/io/input.md
@@ -0,0 +1,89 @@
+# Input data
+
+A default way to import `MuData` is the following:
+
+```python
+from mudata import MuData
+```
+
+There are various ways in which the data can be provided to create a MuData object:
+
+```{contents}
+:local:
+:depth: 3
+```
+
+```{toctree}
+:maxdepth: 10
+:glob: *
+```
+
+## AnnData objects
+
+MuData object can be constructed from a dictionary of existing AnnData objects:
+
+```python
+mdata = MuData({'rna': adata_rna, 'atac': adata_atac})
+```
+
+AnnData objects themselves can be easily constructed from NumPy arrays and/or Pandas DataFrames annotating features (*variables*) and samples/cells (*observations*). This makes it a rather general data format to work with any type of high-dimensional data.
+
+```python
+from anndata import AnnData
+adata = AnnData(X=matrix, obs=metadata_df, var=features_df)
+```
+
+Please see more details on how to operate on AnnData objects [in the anndata documentation](https://anndata.readthedocs.io/).
+
+## Omics data
+
+When data fromats specific to genomics are of interest, specialised readers can be found in analysis frameworks such as [muon](https://muon.readthedocs.io/). These functions, including the ones for Cell Ranger count matrices as well as Snap files, [are described here](https://muon.readthedocs.io/en/latest/io/input.html).
+
+
+## Remote storage
+
+MuData objects can be read and cached from remote locations including via HTTP(S) or from S3 buckets. This is achieved via [`fsspec`](https://github.com/fsspec/filesystem_spec). For example, to read a MuData object from a remote server:
+
+```python
+import fsspec
+
+fname = "https://github.com/gtca/h5xx-datasets/raw/main/datasets/minipbcite.h5mu?download="
+with fsspec.open(fname) as f:
+ mdata = mudata.read_h5mu(f)
+```
+
+A caching layer can be added in the following way:
+
+```python
+fname_cached = "filecache::" + fname
+with fsspec.open(fname_cached, filecache={'cache_storage': '/tmp/'}) as f:
+ mdata = mudata.read_h5mu(f)
+```
+
+For more `fsspec` usage examples see [its documentation](https://filesystem-spec.readthedocs.io/).
+
+###S3
+
+MuData objects in the `.h5mu` format stored in an S3 bucket can be read with `fsspec` as well:
+
+```python
+storage_options = {
+ 'endpoint_url': 'localhost:9000',
+ 'key': 'AWS_ACCESS_KEY_ID',
+ 'secret': 'AWS_SECRET_ACCESS_KEY',
+}
+
+with fsspec.open('s3://bucket/dataset.h5mu', **storage_options) as f:
+ mudata.read_h5mu(f)
+```
+
+
+MuData objects stored in the `.zarr` format in an S3 bucket can be read from a *mapping*:
+
+```python
+import s3fs
+
+s3 = s3fs.S3FileSystem(**storage_options)
+store = s3.get_mapper('s3://bucket/dataset.zarr')
+mdata = mudata.read_zarr(store)
+```
diff --git a/docs/io/mudata.md b/docs/io/mudata.md
new file mode 100644
index 0000000..6fe7ed4
--- /dev/null
+++ b/docs/io/mudata.md
@@ -0,0 +1,181 @@
+# Multimodal data objects
+
+[MuData](#mudata.MuData) is a class for multimodal objects:
+
+```python
+from mudata import MuData
+```
+
+
+`MuData` objects comprise a dictionary with `AnnData` objects, one per modality, in their `.mod` attribute. Just as `AnnData` objects themselves, they also contain attributes like `.obs` with annotation of observations (samples or cells), `.obsm` with their multidimensional annotations such as embeddings, etc.
+
+```{contents}
+:local:
+:depth: 3
+```
+
+```{toctree}
+:maxdepth: 10
+:glob: *
+```
+
+## MuData's attributes
+
+Key attributes and methods of `MuData` objects as well as important concepts are described below. A full list of attributes and methods of multimodal containers can be found in the [MuData](#mudata.MuData) documentation.
+
+### `.mod`
+
+Modalities are stored in a collection accessible via the `.mod` attribute of the `MuData` object with names of modalities as keys and `AnnData` objects as values.
+
+```python
+list(mdata.mod.keys())
+# => ['atac', 'rna']
+```
+
+Individual modalities can be accessed with their names via the `.mod` attribute or via the `MuData` object itself as a shorthand:
+
+```python
+mdata.mod['rna']
+# or
+mdata['rna']
+# => AnnData object
+```
+
+### `.obs` & `.var`
+
+:::{warning}
+Version 0.3 introduces pull/push interface for annotations.
+For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default.
+
+This will be changed in the next release, and the annotations will not be copied implicitly.
+To adopt the new behaviour, use [](#mudata.set_options) with `pull_on_update=False`.
+The new approach to `.update()` and annotations is described below.
+:::
+
+Samples (cells) annotations are stored in the data frame accessible via the `.obs` attribute. Same goes for `.var`, which contains annotation of variables (features).
+
+Copies of columns from `.obs` or `.var` data frames of individual modalities can be added with the `.pull_obs()` or `.pull_var()` methods:
+
+```python
+mdata.pull_obs()
+mdata.pull_var()
+```
+
+When the annotations are changed in `AnnData` objects of modalities, e.g. new columns are added, they can be propagated to the `.obs` or `.var` data frames with the same `.pull_obs()` or `.pull_var()` methods.
+
+Observations columns copied from individual modalities contain modality name as their prefix, e.g. `rna:n_genes`. Same is true for variables columns however if there are columns with identical names in `.var` of multiple modalities — e.g. `n_cells`, — these columns are merged across modalities and no prefix is added.
+
+When there are changes directly related to observations or variables, e.g. samples (cells) are filtered out or features (genes) are renamed, the changes have to be fetched with the `.update()` method:
+
+```python
+mdata.update()
+```
+
+### `.obsm`
+
+Multidimensional annotations of samples (cells) are accessible in the `.obsm` attribute. For instance, that can be UMAP coordinates that were learnt jointly on all modalities. Or [MOFA](https://biofam.github.io/MOFA2/) embeddings — a generalisation of PCA to multiple omics.
+
+```python
+# mdata is a MuData object with CITE-seq data
+mdata.obsm
+# => MuAxisArrays with keys: X_umap, X_mofa, prot, rna
+```
+
+As another multidimensional embedding, this slot may contain boolean vectors, one per modality, indicating if samples (cells) are available in the respective modality. For instance, if all samples (cells) are the same across modalities, all values in those vectors are `True`.
+
+### Container shape
+
+The `MuData` object's shape is represented by two numbers calculated from the shapes of individual modalities — one for the number of observations and one for the number of variables.
+
+```python
+mdata.shape
+# => (9573, 132465)
+mdata.n_obs
+# => 9573
+mdata.n_vars
+# => 132465
+```
+
+By default, variables are always counted as belonging uniquely to a single modality while observations with the same name are counted as the same observation, which has variables across multiple modalities measured for.
+
+```python
+[ad.shape for ad in mdata.mod.values()]
+# => [(9500, 10100), (9573, 122364)]
+```
+
+If the shape of a modality is changed, [MuData.update](#mudata.MuData.update) has to be run to bring the respective updates to the `MuData` object.
+
+### Keeping containers up to date
+
+:::{warning}
+Version 0.3 introduces pull/push interface for annotations.
+For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default.
+
+This will be changed in the next release, and the annotations will not be copied implicitly.
+To adopt the new behaviour, use [](#mudata.set_options) with `pull_on_update=False`.
+The new approach to `.update()` and annotations is described below.
+:::
+
+Modalities inside the `MuData` container are full-fledged `AnnData` objects, which can be operated independently with any tool that works on `AnnData` objects.
+When modalities are changed externally, the shape of the `MuData` object as well as metadata fetched from individual modalities will then reflect the previous state of the data.
+To keep the container up to date, there is an `.update()` method that syncs the `.obs_names` and `.var_names` of the `MuData` object with the ones of the modalities.
+
+
+### Managing annotations
+
+To fetch the corresponding annotations from individual modalities, there are [MuData.pull_obs](#mudata.MuData.pull_obs) and [MuData.pull_var](#mudata.MuData.pull_var) methods.
+
+To update the annotations of individual modalities with the global annotations, [MuData.push_obs](#mudata.MuData.push_obs) and [MuData.push_var](#mudata.MuData.push_var) methods can be used.
+
+
+### Backed containers
+
+To enable the backed mode for the count matrices in all the modalities, `.h5mu` files can be read with the relevant flag:
+
+```python
+mdata_b = mudata.read("filename.h5mu", backed=True)
+mdata_b.isbacked
+# => True
+```
+
+When creating a copy of a backed `MuData` object, the filename has to be provided, and the copy of the object will be backed at a new location.
+
+```python
+mdata_copy = mdata_b.copy("filename_copy.h5mu")
+mdata_b.file.filename
+# => 'filename_copy.h5mu'
+```
+
+### Container views
+
+Analogous to the behaviour of `AnnData` objects, slicing `MuData` objects returns views of the original data.
+
+```python
+view = mdata[:100,:1000]
+view.is_view
+# => True
+
+# In the view, each modality is a view as well
+view["A"].is_view
+# => True
+```
+
+Subsetting `MuData` objects is special since it slices them across modalities. I.e. the slicing operation for a set of `obs_names` and/or `var_names` will be performed for each modality and not only for the global multimodal annotation.
+
+This behaviour makes workflows memory-efficient, which is especially important when working with large datasets. If the object is to be modified however, a copy of it should be created, which is not a view anymore and has no dependance on the original object.
+
+```python
+mdata_sub = view.copy()
+mdata_sub.is_view
+# => False
+```
+
+If the original object is backed, the filename has to be provided to the `.copy()` call, and the resulting object will be backed at a new location.
+
+```python
+mdata_sub = backed_view.copy("mdata_sub.h5mu")
+mdata_sub.is_view
+# => False
+mdata_sub.isbacked
+# => True
+```
diff --git a/docs/io/output.md b/docs/io/output.md
new file mode 100644
index 0000000..d796146
--- /dev/null
+++ b/docs/io/output.md
@@ -0,0 +1,77 @@
+# Output data
+
+In order to save & share multimodal data, `.h5mu` file format has been designed.
+
+```{contents}
+:local:
+:depth: 3
+```
+
+```{toctree}
+:maxdepth: 10
+:glob: *
+```
+
+## `.h5mu` files
+
+`.h5mu` files are the default storage for MuData objects. These are HDF5 files with a standardised structure, which is similar to the one of `.h5ad` files where AnnData objects are stored. The most noticeable distinction is `.mod` group presence where individual modalities are stored — in the same way as they would be stored in the `.h5ad` files.
+
+```python
+mdata.write("mudata.h5mu")
+```
+
+Inspect the contents of the file in the terminal:
+
+```console
+$ h5ls mudata.h5mu
+mod Group
+obs Group
+obsm Group
+var Group
+varm Group
+
+$ h5ls data/mudata.h5mu/mod
+atac Group
+rna Group
+```
+
+## AnnData inside `.h5mu`
+
+Individual modalities in the `.h5mu` file are stored in exactly the same way as AnnData objects. This, together with the hierarchical nature of HDF5 files, makes it possible to read individual modalities from `.h5mu` files as well as to save individual modalities to the `.h5mu` file:
+
+```python
+adata = mudata.read("mudata.h5mu/rna")
+
+mudata.write("mudata.h5mu/rna", adata)
+```
+
+The function [](#mudata.read) automatically decides based on the input if [](#mudata.read_h5mu) or rather [](#mudata.read_h5ad) should be called.
+
+Learn more about the on-disk format specification shared by MuData and AnnData [in the AnnData documentation](https://anndata.readthedocs.io/en/latest/fileformat-prose.html).
+
+## `.zarr` files
+
+[Zarr](https://zarr.readthedocs.io/en/stable/) is a cloud-friendly format for chunked N-dimensional arrays. Zarr is another supported serialisation format for MuData objects:
+
+```python
+mdata = mudata.read_zarr("mudata.zarr")
+
+mdata.write_zarr("mudata.zarr")
+```
+
+Just as with `.h5mu` files, MuData objects saved in `.zarr` format resemble how AnnData objects are stored, with one additional level of abstraction:
+
+```console
+$ tree -L 1 mudata.zarr
+mudata.zarr
+├── mod
+├── obs
+├── obsm
+├── obsmap
+├── obsp
+├── uns
+├── var
+├── varm
+├── varmap
+└── varp
+```
diff --git a/docs/io/spec.md b/docs/io/spec.md
new file mode 100644
index 0000000..0cc6779
--- /dev/null
+++ b/docs/io/spec.md
@@ -0,0 +1,61 @@
+# MuData specification [RFC]
+
+Building on top of the [AnnData spec](https://anndata.readthedocs.io/en/latest/fileformat-prose.html), this document provides details on the `MuData` on-disk format. For user-facing features, please see (this document)[mudata.md].
+
+```python-console
+>>> import h5py
+>>> f = h5py.File("citeseq.h5mu")
+>>> list(f.keys())
+['mod', 'obs', 'obsm', 'obsmap', 'uns', 'var', 'varm', 'varmap']
+```
+
+```{contents}
+:local:
+:depth: 3
+```
+
+```{toctree}
+:maxdepth: 10
+:glob: *
+```
+
+## `.mod`
+
+Modalities are stored in a `.mod` group of the `.h5mu` file in the alphabetical order. To preserve the order of the modalities, there is an attribute `mod-order` that lists the modalities in their respective order. If some modalities are missing from that attribute, the attribute is to be ignored.
+
+```python-console
+>>> dict(f["mod"].attrs)
+{'mod-order': array(['prot', 'rna'], dtype=object)}
+```
+
+## `.obsmap` and `.varmap`
+
+While in practice `MuData` relies on `.obs_names` and `.var_names` to collate global observations and variables, it also allows to disambiguate between items with the same name using integer maps. For example, global observations will have non-zero integer values in `.obsmap["rna"]` if they are present in the `rna` modality. If an observation or a variable is missing from a modality, it will correspond to a `0` value.
+
+```python-console
+>>> list(f["obsmap"].keys())
+['prot', 'rna']
+>>> import numpy as np
+>>> np.array(f["obsmap"]["rna"])
+array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32)
+>>> np.array(f["obsmap"]["prot"])
+array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32)
+
+>>> list(f["varmap"].keys())
+['prot', 'rna']
+>>> np.array(f["varmap"]["rna"])
+array([ 0, 0, 0, ..., 17804, 17805, 17806], dtype=uint32)
+>>> np.array(f["varmap"]["prot"])
+array([1, 2, 3, ..., 0, 0, 0], dtype=uint32)
+```
+
+## `.axis`
+
+Axis describes which dimensions are shared: observations (`axis=0`), variables (`axis=1`), or both (`axis=-1`). It is recorded in the `axis` attribute of the file:
+
+```python-console
+>>> f.attrs["axis"]
+0
+```
+
+Multimodal datasets, which have observations shared between modalities, will have `axis=0`. If no axis attribute is available such as in files with the older versions of this specification, it is assumed to be `0` by default.
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 6247f7e..0000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
- set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
- echo.
- echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
- echo.installed, then set the SPHINXBUILD environment variable to point
- echo.to the full path of the 'sphinx-build' executable. Alternatively you
- echo.may add the Sphinx directory to PATH.
- echo.
- echo.If you don't have Sphinx installed, grab it from
- echo.http://sphinx-doc.org/
- exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/source/notebooks/annotations_management.ipynb b/docs/notebooks/annotations_management.ipynb
similarity index 97%
rename from docs/source/notebooks/annotations_management.ipynb
rename to docs/notebooks/annotations_management.ipynb
index 3ad518f..52a1c61 100644
--- a/docs/source/notebooks/annotations_management.ipynb
+++ b/docs/notebooks/annotations_management.ipynb
@@ -72,7 +72,6 @@
"outputs": [],
"source": [
"def make_mdata():\n",
- " N = 100\n",
" D1, D2, D3 = 10, 20, 30\n",
" D = D1 + D2 + D3\n",
"\n",
@@ -114,7 +113,7 @@
"mdata = make_mdata()\n",
"# TODO: shouldn't be needed from 0.4\n",
"# mdata.update(pull=False)\n",
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -196,7 +195,7 @@
"outputs": [],
"source": [
"# Clean up\n",
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -247,7 +246,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -296,7 +295,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -348,7 +347,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -398,7 +397,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -440,7 +439,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -498,7 +497,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -539,7 +538,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -589,7 +588,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -721,7 +720,7 @@
"\n",
" # column present in some (2 out of 3) modalities (non-unique)\n",
" mod2.obs[\"arange\"] = np.arange(N)\n",
- " mod3.obs[\"arange\"] = np.arange(N, 2*N)\n",
+ " mod3.obs[\"arange\"] = np.arange(N, 2 * N)\n",
"\n",
" # column present in one modality (unique)\n",
" mod3.obs[\"mod3_cell\"] = True\n",
@@ -740,7 +739,7 @@
"mdata = make_mdata()\n",
"# TODO: shouldn't be needed from 0.4\n",
"# mdata.update(pull=False)\n",
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -849,7 +848,7 @@
"outputs": [],
"source": [
"# Clean up\n",
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -902,7 +901,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -943,7 +942,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -996,7 +995,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -1046,7 +1045,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -1088,7 +1087,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -1138,7 +1137,7 @@
"metadata": {},
"outputs": [],
"source": [
- "mdata.obs = mdata.obs.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]"
]
},
{
@@ -1255,7 +1254,6 @@
"outputs": [],
"source": [
"def make_mdata():\n",
- " N = 100\n",
" D1, D2, D3 = 10, 20, 30\n",
" D = D1 + D2 + D3\n",
"\n",
@@ -1270,18 +1268,20 @@
" mod3 = AnnData(np.arange(5101, 8101, 1).reshape(-1, D3))\n",
" mod3.obs_names = mod1.obs_names.copy()\n",
" mod3.var_names = [f\"var{i}\" for i in range(D1 + D2, D)]\n",
- " \n",
+ "\n",
" mdata = MuData({\"mod1\": mod1, \"mod2\": mod2, \"mod3\": mod3})\n",
"\n",
" # common column to be propagated to all modalities\n",
" mdata.var[\"highly_variable\"] = True\n",
"\n",
" # prefix column to be propagated to the respective modalities\n",
- " mdata.var[\"mod2:if_mod2\"] = np.concatenate([\n",
- " np.repeat(pd.NA, D1), \n",
- " np.repeat(True, D2),\n",
- " np.repeat(pd.NA, D3),\n",
- " ])\n",
+ " mdata.var[\"mod2:if_mod2\"] = np.concatenate(\n",
+ " [\n",
+ " np.repeat(pd.NA, D1),\n",
+ " np.repeat(True, D2),\n",
+ " np.repeat(pd.NA, D3),\n",
+ " ]\n",
+ " )\n",
"\n",
" return mdata"
]
@@ -1382,7 +1382,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].var = mdata[m].var.loc[:,[]]"
+ " mdata[m].var = mdata[m].var.loc[:, []]"
]
},
{
@@ -1438,7 +1438,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].var = mdata[m].var.loc[:,[]]"
+ " mdata[m].var = mdata[m].var.loc[:, []]"
]
},
{
@@ -1490,7 +1490,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].var = mdata[m].var.loc[:,[]]"
+ " mdata[m].var = mdata[m].var.loc[:, []]"
]
},
{
@@ -1558,7 +1558,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].var = mdata[m].var.loc[:,[]]"
+ " mdata[m].var = mdata[m].var.loc[:, []]"
]
},
{
@@ -1669,9 +1669,7 @@
"outputs": [],
"source": [
"def make_mdata():\n",
- " N = 100\n",
" D1, D2 = 10, 20\n",
- " D = D1 + D2\n",
"\n",
" mod1 = AnnData(np.arange(0, 100, 0.1).reshape(-1, D1))\n",
" mod1.obs_names = [f\"obs{i}\" for i in range(mod1.n_obs)]\n",
@@ -1780,7 +1778,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].obs = mdata[m].obs.loc[:,[]]"
+ " mdata[m].obs = mdata[m].obs.loc[:, []]"
]
},
{
@@ -1830,7 +1828,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].obs = mdata[m].obs.loc[:,[]]"
+ " mdata[m].obs = mdata[m].obs.loc[:, []]"
]
},
{
@@ -1895,7 +1893,7 @@
"source": [
"# Clean up\n",
"for m in mdata.mod.keys():\n",
- " mdata[m].obs = mdata[m].obs.loc[:,[]]"
+ " mdata[m].obs = mdata[m].obs.loc[:, []]"
]
},
{
@@ -2046,8 +2044,8 @@
"mdata = make_mdata()\n",
"# TODO: shouldn't be needed from 0.4\n",
"# mdata.update(pull=False)\n",
- "mdata.obs = mdata.obs.loc[:,[]]\n",
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]\n",
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -2198,8 +2196,8 @@
"mdata = make_staged_mdata()\n",
"# TODO: shouldn't be needed from 0.4\n",
"# mdata.update(pull=False)\n",
- "mdata.obs = mdata.obs.loc[:,[]]\n",
- "mdata.var = mdata.var.loc[:,[]]"
+ "mdata.obs = mdata.obs.loc[:, []]\n",
+ "mdata.var = mdata.var.loc[:, []]"
]
},
{
@@ -2315,14 +2313,14 @@
"source": [
"def make_nested_mdata():\n",
" stages = make_staged_mdata()\n",
- " stages.obs = stages.obs.loc[:,[]] # pre-0.3\n",
- " \n",
+ " stages.obs = stages.obs.loc[:, []] # pre-0.3\n",
+ "\n",
" mod2 = AnnData(np.arange(10000, 12000, 1).reshape(10, -1))\n",
" mod2.obs_names = [f\"obs{i}\" for i in range(mod2.n_obs)]\n",
" mod2.var_names = [f\"mod2:var{i}\" for i in range(mod2.n_vars)]\n",
"\n",
" mdata = MuData({\"mod1\": stages, \"mod2\": mod2}, axis=-1)\n",
- " \n",
+ "\n",
" mdata.obs[\"dataset\"] = \"ref\"\n",
"\n",
" return mdata"
@@ -2431,7 +2429,7 @@
}
],
"source": [
- "for m, mod in mdata.mod.items():\n",
+ "for mod in mdata.mod.values():\n",
" print(mod.obs.dtypes)"
]
},
@@ -2454,7 +2452,7 @@
}
],
"source": [
- "for m, mod in mdata['mod1'].mod.items():\n",
+ "for mod in mdata[\"mod1\"].mod.values():\n",
" print(mod.obs.dtypes)"
]
},
@@ -2475,7 +2473,7 @@
"source": [
"def push_obs_rec(mdata: MuData):\n",
" mdata.push_obs()\n",
- " for m, mod in mdata.mod.items():\n",
+ " for mod in mdata.mod.values():\n",
" if isinstance(mod, MuData):\n",
" push_obs_rec(mod)"
]
@@ -2497,7 +2495,7 @@
"metadata": {},
"outputs": [],
"source": [
- "for m, mod in mdata['mod1'].mod.items():\n",
+ "for mod in mdata[\"mod1\"].mod.values():\n",
" assert \"dataset\" in mod.obs"
]
},
diff --git a/docs/source/notebooks/axes.ipynb b/docs/notebooks/axes.ipynb
similarity index 94%
rename from docs/source/notebooks/axes.ipynb
rename to docs/notebooks/axes.ipynb
index dcf1abe..f3e5358 100644
--- a/docs/source/notebooks/axes.ipynb
+++ b/docs/notebooks/axes.ipynb
@@ -68,7 +68,6 @@
"metadata": {},
"outputs": [],
"source": [
- "import mudata as md\n",
"from mudata import MuData, AnnData"
]
},
@@ -79,7 +78,6 @@
"outputs": [],
"source": [
"import numpy as np\n",
- "import pandas as pd\n",
"\n",
"np.random.seed(1)"
]
@@ -113,9 +111,9 @@
"source": [
"n, d1, d2 = 100, 1000, 1500\n",
"\n",
- "ax = AnnData(np.random.normal(size=(n,d1)))\n",
+ "ax = AnnData(np.random.normal(size=(n, d1)))\n",
"\n",
- "ay = AnnData(np.random.normal(size=(n,d2)))"
+ "ay = AnnData(np.random.normal(size=(n, d2)))"
]
},
{
@@ -180,8 +178,8 @@
}
],
"source": [
- "print(\"ax.var_names: [\", \", \".join(ax.var_names.values[:5]) + \", ..., \", ax.var_names.values[d1-1], \"]\")\n",
- "print(\"ay.var_names: [\", \", \".join(ay.var_names.values[:5]) + \", ..., \", ay.var_names.values[d2-1], \"]\")"
+ "print(\"ax.var_names: [\", \", \".join(ax.var_names.values[:5]) + \", ..., \", ax.var_names.values[d1 - 1], \"]\")\n",
+ "print(\"ay.var_names: [\", \", \".join(ay.var_names.values[:5]) + \", ..., \", ay.var_names.values[d2 - 1], \"]\")"
]
},
{
@@ -197,8 +195,8 @@
"metadata": {},
"outputs": [],
"source": [
- "ax.var_names = [f\"x_var{i+1}\" for i in range(d1)]\n",
- "ay.var_names = [f\"y_var{i+1}\" for i in range(d2)]"
+ "ax.var_names = [f\"x_var{i + 1}\" for i in range(d1)]\n",
+ "ay.var_names = [f\"y_var{i + 1}\" for i in range(d2)]"
]
},
{
@@ -253,8 +251,8 @@
"source": [
"n1, n2, d = 100, 500, 1000\n",
"\n",
- "ad1 = AnnData(np.random.normal(size=(n1,d)))\n",
- "ad2 = AnnData(np.random.normal(size=(n2,d)))"
+ "ad1 = AnnData(np.random.normal(size=(n1, d)))\n",
+ "ad2 = AnnData(np.random.normal(size=(n2, d)))"
]
},
{
@@ -264,8 +262,8 @@
"outputs": [],
"source": [
"# Cell barcodes are dataset-specific\n",
- "ad1.obs_names = [f\"dat1-cell{i+1}\" for i in range(n1)]\n",
- "ad2.obs_names = [f\"dat2-cell{i+1}\" for i in range(n2)]"
+ "ad1.obs_names = [f\"dat1-cell{i + 1}\" for i in range(n1)]\n",
+ "ad2.obs_names = [f\"dat2-cell{i + 1}\" for i in range(n2)]"
]
},
{
@@ -360,8 +358,8 @@
"source": [
"n, d_raw, d_preproc = 100, 900, 300\n",
"\n",
- "a_raw = AnnData(np.random.normal(size=(n,d_raw)))\n",
- "a_preproc = a_raw[:,np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))].copy()"
+ "a_raw = AnnData(np.random.normal(size=(n, d_raw)))\n",
+ "a_preproc = a_raw[:, np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))].copy()"
]
},
{
diff --git a/docs/source/notebooks/nuances.ipynb b/docs/notebooks/nuances.ipynb
similarity index 96%
rename from docs/source/notebooks/nuances.ipynb
rename to docs/notebooks/nuances.ipynb
index 828f9fd..7662a0d 100644
--- a/docs/source/notebooks/nuances.ipynb
+++ b/docs/notebooks/nuances.ipynb
@@ -34,15 +34,7 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"%pip install mudata"
]
@@ -83,8 +75,7 @@
"metadata": {},
"outputs": [],
"source": [
- "import numpy as np\n",
- "import pandas as pd"
+ "import numpy as np"
]
},
{
@@ -103,9 +94,9 @@
"n, d1, d2, k = 1000, 100, 200, 10\n",
"\n",
"np.random.seed(1)\n",
- "z = np.random.normal(loc=np.arange(k), scale=np.arange(k)*2, size=(n,k))\n",
- "w1 = np.random.normal(size=(d1,k))\n",
- "w2 = np.random.normal(size=(d2,k))\n",
+ "z = np.random.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))\n",
+ "w1 = np.random.normal(size=(d1, k))\n",
+ "w2 = np.random.normal(size=(d2, k))\n",
"\n",
"mod1 = AnnData(X=np.dot(z, w1.T))\n",
"mod2 = AnnData(X=np.dot(z, w2.T))"
@@ -276,8 +267,8 @@
"\n",
"smaller_mdata = mdata.copy()\n",
"\n",
- "smaller_mdata.mod['mod1'] = mod1[:900]\n",
- "smaller_mdata.mod['mod2'] = mod2[:900]"
+ "smaller_mdata.mod[\"mod1\"] = mod1[:900]\n",
+ "smaller_mdata.mod[\"mod2\"] = mod2[:900]"
]
},
{
@@ -332,7 +323,7 @@
}
],
"source": [
- "print(max(smaller_mdata.obs['dummy_index']))"
+ "print(max(smaller_mdata.obs[\"dummy_index\"]))"
]
},
{
@@ -388,7 +379,7 @@
}
],
"source": [
- "print(max(smaller_mdata.obs['dummy_index']))"
+ "print(max(smaller_mdata.obs[\"dummy_index\"]))"
]
},
{
diff --git a/docs/source/notebooks/quickstart_mudata.ipynb b/docs/notebooks/quickstart_mudata.ipynb
similarity index 97%
rename from docs/source/notebooks/quickstart_mudata.ipynb
rename to docs/notebooks/quickstart_mudata.ipynb
index b38c223..363d2b9 100644
--- a/docs/source/notebooks/quickstart_mudata.ipynb
+++ b/docs/notebooks/quickstart_mudata.ipynb
@@ -76,12 +76,13 @@
],
"source": [
"import numpy as np\n",
+ "\n",
"np.random.seed(1)\n",
"\n",
"n, d, k = 1000, 100, 10\n",
"\n",
- "z = np.random.normal(loc=np.arange(k), scale=np.arange(k)*2, size=(n,k))\n",
- "w = np.random.normal(size=(d,k))\n",
+ "z = np.random.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))\n",
+ "w = np.random.normal(size=(d, k))\n",
"y = np.dot(z, w.T)\n",
"y.shape"
]
@@ -113,8 +114,8 @@
"from anndata import AnnData\n",
"\n",
"adata = AnnData(y)\n",
- "adata.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n",
- "adata.var_names = [f\"var_{j+1}\" for j in range(d)]\n",
+ "adata.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
+ "adata.var_names = [f\"var_{j + 1}\" for j in range(d)]\n",
"adata"
]
},
@@ -143,12 +144,12 @@
],
"source": [
"d2 = 50\n",
- "w2 = np.random.normal(size=(d2,k))\n",
+ "w2 = np.random.normal(size=(d2, k))\n",
"y2 = np.dot(z, w2.T)\n",
"\n",
"adata2 = AnnData(y2)\n",
- "adata2.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n",
- "adata2.var_names = [f\"var2_{j+1}\" for j in range(d2)]\n",
+ "adata2.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
+ "adata2.var_names = [f\"var2_{j + 1}\" for j in range(d2)]\n",
"adata2"
]
},
@@ -274,7 +275,7 @@
}
],
"source": [
- "mdata.varm['A']"
+ "mdata.varm[\"A\"]"
]
},
{
@@ -353,9 +354,9 @@
}
],
"source": [
- "print(f\"Outdated variables names: ...,\", \", \".join(mdata.var_names[-3:]))\n",
+ "print(\"Outdated variables names: ...,\", \", \".join(mdata.var_names[-3:]))\n",
"mdata.update()\n",
- "print(f\"Updated variables names: ...,\", \", \".join(mdata.var_names[-3:]))"
+ "print(\"Updated variables names: ...,\", \", \".join(mdata.var_names[-3:]))"
]
},
{
@@ -504,7 +505,7 @@
}
],
"source": [
- "with md.set_options(display_style = \"html\", display_html_expand = 0b000):\n",
+ "with md.set_options(display_style=\"html\", display_html_expand=0b000):\n",
" display(mdata)"
]
},
@@ -720,7 +721,7 @@
}
],
"source": [
- "with md.set_options(display_style = \"html\", display_html_expand = 0b000):\n",
+ "with md.set_options(display_style=\"html\", display_html_expand=0b000):\n",
" display(mdata_r)"
]
},
@@ -774,7 +775,7 @@
"source": [
"def simple_pca(mdata):\n",
" from sklearn import decomposition\n",
- " \n",
+ "\n",
" x = np.hstack([m.X for m in mdata.mod.values()])\n",
"\n",
" pca = decomposition.PCA(n_components=2)\n",
@@ -783,8 +784,8 @@
" # By default, methods operate in-place\n",
" # and embeddings are stored in the .obsm slot\n",
" mdata.obsm[\"X_pca\"] = components\n",
- " \n",
- " return "
+ "\n",
+ " return"
]
},
{
diff --git a/docs/pylint.rc b/docs/pylint.rc
deleted file mode 100644
index 45a4dcf..0000000
--- a/docs/pylint.rc
+++ /dev/null
@@ -1,589 +0,0 @@
-[MASTER]
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code.
-extension-pkg-whitelist=
-
-# Specify a score threshold to be exceeded before program exits with error.
-fail-under=10.0
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS
-
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
-ignore-patterns=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
-# number of processors available to use.
-jobs=1
-
-# Control the amount of potential inferred values when inferring a single
-# object. This can help the performance when dealing with large functions or
-# complex, nested conditions.
-limit-inference-results=100
-
-# List of plugins (as comma separated values of python module names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Pickle collected data for later comparisons.
-persistent=yes
-
-# When enabled, pylint would attempt to guess common misconfiguration and emit
-# user-friendly hints instead of false-positive error messages.
-suggestion-mode=yes
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
-confidence=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once). You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use "--disable=all --enable=classes
-# --disable=W".
-disable=print-statement,
- parameter-unpacking,
- unpacking-in-except,
- old-raise-syntax,
- backtick,
- long-suffix,
- old-ne-operator,
- old-octal-literal,
- import-star-module-level,
- non-ascii-bytes-literal,
- raw-checker-failed,
- bad-inline-option,
- locally-disabled,
- file-ignored,
- suppressed-message,
- useless-suppression,
- deprecated-pragma,
- use-symbolic-message-instead,
- apply-builtin,
- basestring-builtin,
- buffer-builtin,
- cmp-builtin,
- coerce-builtin,
- execfile-builtin,
- file-builtin,
- long-builtin,
- raw_input-builtin,
- reduce-builtin,
- standarderror-builtin,
- unicode-builtin,
- xrange-builtin,
- coerce-method,
- delslice-method,
- getslice-method,
- setslice-method,
- no-absolute-import,
- old-division,
- dict-iter-method,
- dict-view-method,
- next-method-called,
- metaclass-assignment,
- indexing-exception,
- raising-string,
- reload-builtin,
- oct-method,
- hex-method,
- nonzero-method,
- cmp-method,
- input-builtin,
- round-builtin,
- intern-builtin,
- unichr-builtin,
- map-builtin-not-iterating,
- zip-builtin-not-iterating,
- range-builtin-not-iterating,
- filter-builtin-not-iterating,
- using-cmp-argument,
- eq-without-hash,
- div-method,
- idiv-method,
- rdiv-method,
- exception-message-attribute,
- invalid-str-codec,
- sys-max-int,
- bad-python3-import,
- deprecated-string-function,
- deprecated-str-translate-call,
- deprecated-itertools-function,
- deprecated-types-field,
- next-method-defined,
- dict-items-not-iterating,
- dict-keys-not-iterating,
- dict-values-not-iterating,
- deprecated-operator-function,
- deprecated-urllib-function,
- xreadlines-attribute,
- deprecated-sys-function,
- exception-escape,
- comprehension-escape,
- W0311
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-enable=c-extension-no-member
-
-
-[REPORTS]
-
-# Python expression which should return a score less than or equal to 10. You
-# have access to the variables 'error', 'warning', 'refactor', and 'convention'
-# which contain the number of messages in each category, as well as 'statement'
-# which is the total number of statements analyzed. This score is used by the
-# global evaluation report (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details.
-#msg-template=
-
-# Set the output format. Available formats are text, parseable, colorized, json
-# and msvs (visual studio). You can also give a reporter class, e.g.
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Tells whether to display a full report or only the messages.
-reports=no
-
-# Activate the evaluation score.
-score=yes
-
-
-[REFACTORING]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-# Complete name of functions that never returns. When checking for
-# inconsistent-return-statements if a never returning function is called then
-# it will be considered as an explicit return statement and no message will be
-# printed.
-never-returning-functions=sys.exit
-
-
-[LOGGING]
-
-# The type of string formatting that logging methods do. `old` means using %
-# formatting, `new` is for `{}` formatting.
-logging-format-style=old
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format.
-logging-modules=logging
-
-
-[SPELLING]
-
-# Limits count of emitted suggestions for spelling mistakes.
-max-spelling-suggestions=4
-
-# Spelling dictionary name. Available dictionaries: none. To make it work,
-# install the python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains the private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to the private dictionary (see the
-# --spelling-private-dict-file option) instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,
- XXX,
- TODO
-
-# Regular expression of note tags to take in consideration.
-#notes-rgx=
-
-
-[TYPECHECK]
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# Tells whether to warn about missing members when the owner of the attribute
-# is inferred to be None.
-ignore-none=yes
-
-# This flag controls whether pylint should warn about no-member and similar
-# checks whenever an opaque object is returned when inferring. The inference
-# can return multiple potential results while evaluating a Python object, but
-# some branches might not be evaluated, which results in partial inference. In
-# that case, it might be useful to still emit no-member and other checks for
-# the rest of the inferred objects.
-ignore-on-opaque-inference=yes
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis). It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=
-
-# Show a hint with possible names when a member name was not found. The aspect
-# of finding the hint is based on edit distance.
-missing-member-hint=yes
-
-# The minimum edit distance a name should have in order to be considered a
-# similar match for a missing member name.
-missing-member-hint-distance=1
-
-# The total number of similar names that should be taken in consideration when
-# showing a hint for a missing member.
-missing-member-max-choices=1
-
-# List of decorators that change the signature of a decorated function.
-signature-mutators=
-
-
-[VARIABLES]
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid defining new builtins when possible.
-additional-builtins=
-
-# Tells whether unused global variables should be treated as a violation.
-allow-global-unused-variables=yes
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,
- _cb
-
-# A regular expression matching the name of dummy variables (i.e. expected to
-# not be used).
-dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore.
-ignored-argument-names=_.*|^ignored_|^unused_
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
-
-
-[FORMAT]
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )??$
-
-# Number of spaces of indent required inside a hanging or continued line.
-indent-after-paren=4
-
-# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
-# tab).
-indent-string=' '
-
-# Maximum number of characters on a single line.
-max-line-length=100
-
-# Maximum number of lines in a module.
-max-module-lines=1000
-
-# Allow the body of a class to be on the same line as the declaration if body
-# contains single statement.
-single-line-class-stmt=no
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-
-[SIMILARITIES]
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-
-[BASIC]
-
-# Naming style matching correct argument names.
-argument-naming-style=snake_case
-
-# Regular expression matching correct argument names. Overrides argument-
-# naming-style.
-#argument-rgx=
-
-# Naming style matching correct attribute names.
-attr-naming-style=snake_case
-
-# Regular expression matching correct attribute names. Overrides attr-naming-
-# style.
-#attr-rgx=
-
-# Bad variable names which should always be refused, separated by a comma.
-bad-names=foo,
- bar,
- baz,
- toto,
- tutu,
- tata
-
-# Bad variable names regexes, separated by a comma. If names match any regex,
-# they will always be refused
-bad-names-rgxs=
-
-# Naming style matching correct class attribute names.
-class-attribute-naming-style=any
-
-# Regular expression matching correct class attribute names. Overrides class-
-# attribute-naming-style.
-#class-attribute-rgx=
-
-# Naming style matching correct class names.
-class-naming-style=PascalCase
-
-# Regular expression matching correct class names. Overrides class-naming-
-# style.
-#class-rgx=
-
-# Naming style matching correct constant names.
-const-naming-style=UPPER_CASE
-
-# Regular expression matching correct constant names. Overrides const-naming-
-# style.
-#const-rgx=
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-# Naming style matching correct function names.
-function-naming-style=snake_case
-
-# Regular expression matching correct function names. Overrides function-
-# naming-style.
-#function-rgx=
-
-# Good variable names which should always be accepted, separated by a comma.
-good-names=i,
- j,
- k,
- ex,
- Run,
- _
-
-# Good variable names regexes, separated by a comma. If names match any regex,
-# they will always be accepted
-good-names-rgxs=
-
-# Include a hint for the correct naming format with invalid-name.
-include-naming-hint=no
-
-# Naming style matching correct inline iteration names.
-inlinevar-naming-style=any
-
-# Regular expression matching correct inline iteration names. Overrides
-# inlinevar-naming-style.
-#inlinevar-rgx=
-
-# Naming style matching correct method names.
-method-naming-style=snake_case
-
-# Regular expression matching correct method names. Overrides method-naming-
-# style.
-#method-rgx=
-
-# Naming style matching correct module names.
-module-naming-style=snake_case
-
-# Regular expression matching correct module names. Overrides module-naming-
-# style.
-#module-rgx=
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-# These decorators are taken in consideration only for invalid-name.
-property-classes=abc.abstractproperty
-
-# Naming style matching correct variable names.
-variable-naming-style=snake_case
-
-# Regular expression matching correct variable names. Overrides variable-
-# naming-style.
-#variable-rgx=
-
-
-[STRING]
-
-# This flag controls whether inconsistent-quotes generates a warning when the
-# character used as a quote delimiter is used inconsistently within a module.
-check-quote-consistency=no
-
-# This flag controls whether the implicit-str-concat should generate a warning
-# on implicit string concatenation in sequences defined over several lines.
-check-str-concat-over-line-jumps=no
-
-
-[IMPORTS]
-
-# List of modules that can be imported at any level, not just the top level
-# one.
-allow-any-import-level=
-
-# Allow wildcard imports from modules that define __all__.
-allow-wildcard-with-all=no
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-# Deprecated modules which should not be used, separated by a comma.
-deprecated-modules=optparse,tkinter.tix
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled).
-ext-import-graph=
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled).
-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled).
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-# Couples of modules and preferred modules, separated by a comma.
-preferred-modules=
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,
- __new__,
- setUp,
- __post_init__
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,
- _fields,
- _replace,
- _source,
- _make
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=cls
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method.
-max-args=5
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Maximum number of boolean expressions in an if statement (see R0916).
-max-bool-expr=5
-
-# Maximum number of branch for function / method body.
-max-branches=12
-
-# Maximum number of locals for function / method body.
-max-locals=15
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of return / yield for function / method body.
-max-returns=6
-
-# Maximum number of statements in function / method body.
-max-statements=50
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "BaseException, Exception".
-overgeneral-exceptions=BaseException,
- Exception
diff --git a/docs/references.bib b/docs/references.bib
new file mode 100644
index 0000000..f05dc51
--- /dev/null
+++ b/docs/references.bib
@@ -0,0 +1,26 @@
+@article{Virshup_2023,
+ doi = {10.1038/s41587-023-01733-8},
+ url = {https://doi.org/10.1038%2Fs41587-023-01733-8},
+ year = 2023,
+ month = {apr},
+ publisher = {Springer Science and Business Media {LLC}},
+ author = {Isaac Virshup and Danila Bredikhin and Lukas Heumos and Giovanni Palla and Gregor Sturm and Adam Gayoso and Ilia Kats and Mikaela Koutrouli and Philipp Angerer and Volker Bergen and Pierre Boyeau and Maren Büttner and Gokcen Eraslan and David Fischer and Max Frank and Justin Hong and Michal Klein and Marius Lange and Romain Lopez and Mohammad Lotfollahi and Malte D. Luecken and Fidel Ramirez and Jeffrey Regier and Sergei Rybakov and Anna C. Schaar and Valeh Valiollah Pour Amiri and Philipp Weiler and Galen Xing and Bonnie Berger and Dana Pe'er and Aviv Regev and Sarah A. Teichmann and Francesca Finotello and F. Alexander Wolf and Nir Yosef and Oliver Stegle and Fabian J. Theis and},
+ title = {The scverse project provides a computational ecosystem for single-cell omics data analysis},
+ journal = {Nature Biotechnology}
+}
+
+@article{bredikhin_2022,
+ abstract = {Advances in multi-omics have led to an explosion of multimodal datasets to address questions from basic biology to translation. While these data provide novel opportunities for discovery, they also pose management and analysis challenges, thus motivating the development of tailored computational solutions. Here, we present a data standard and an analysis framework for multi-omics, MUON, designed to organise, analyse, visualise, and exchange multimodal data. MUON stores multimodal data in an efficient yet flexible and interoperable data structure. MUON enables a versatile range of analyses, from data preprocessing to flexible multi-omics alignment.},
+ author = {Bredikhin, Danila and Kats, Ilia and Stegle, Oliver},
+ doi = {10.1186/s13059-021-02577-8},
+ journal = {{Genome Biol}},
+ month = feb,
+ nlmuniqueid = {100960660},
+ number = {1},
+ pages = {42},
+ pii = {10.1186/s13059-021-02577-8},
+ pubmed = {35105358},
+ title = {{MUON: multimodal omics analysis framework}},
+ volume = {23},
+ year = {2022}
+}
diff --git a/docs/references.md b/docs/references.md
new file mode 100644
index 0000000..00ad6a6
--- /dev/null
+++ b/docs/references.md
@@ -0,0 +1,5 @@
+# References
+
+```{bibliography}
+:cited:
+```
diff --git a/docs/source/_static/styles.css b/docs/source/_static/styles.css
deleted file mode 100644
index d3acee4..0000000
--- a/docs/source/_static/styles.css
+++ /dev/null
@@ -1,7 +0,0 @@
-.wy-side-nav-search, .wy-nav-top {
- background: #FFFFFF;
-}
-
-.wy-side-nav-search > a {
- padding: 0;
-}
\ No newline at end of file
diff --git a/docs/source/_templates/autosummary/function.rst b/docs/source/_templates/autosummary/function.rst
deleted file mode 100644
index 611659f..0000000
--- a/docs/source/_templates/autosummary/function.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. currentmodule:: {{ module }}
-
-.. autofunction:: {{ fullname }}
diff --git a/docs/source/_templates/autosummary/module.rst b/docs/source/_templates/autosummary/module.rst
deleted file mode 100644
index 9c0b618..0000000
--- a/docs/source/_templates/autosummary/module.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. automodule:: {{ fullname }}
-
- {% block attributes %}
- {% if attributes %}
- .. rubric:: {{ _('Module Attributes') }}
-
- .. autosummary::
- {% for item in attributes %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block functions %}
- {% if functions %}
- .. rubric:: {{ _('Functions') }}
-
- .. autosummary::
- :toctree:
- {% for item in functions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block classes %}
- {% if classes %}
- .. rubric:: {{ _('Classes') }}
-
- .. autosummary::
- :toctree:
- {% for item in classes %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block exceptions %}
- {% if exceptions %}
- .. rubric:: {{ _('Exceptions') }}
-
- .. autosummary::
- :toctree:
- {% for item in exceptions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
-{% block modules %}
-{% if modules %}
-.. rubric:: Modules
-
-.. autosummary::
- :toctree:
-{% for item in modules %}
- {{ item }}
-{%- endfor %}
-{% endif %}
-{% endblock %}
diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
deleted file mode 100644
index 62bf8be..0000000
--- a/docs/source/api/index.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-.. mudata documentation master file, created by
- sphinx-quickstart on Sun Sep 13 02:51:46 2020.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-
-
-API reference
-=============
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
-
-Multimodal omics
-----------------
-
-.. module:: mudata
-
-.. autosummary::
- :toctree: generated
-
- MuData
-
-Input/Output
-------------
-
-.. automodsumm:: mudata
- :functions-only:
- :toctree: generated
-
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
deleted file mode 100644
index 0a14223..0000000
--- a/docs/source/changelog.rst
+++ /dev/null
@@ -1,101 +0,0 @@
-Release notes
-=============
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-v0.3.3
-------
-
-* Fixed an `issue `_ in ``update()``
-* Fixed an `issue `_ in ``push_obs()``
-
-v0.3.2
-------
-
-Fixed an `issue `_ in ``update()``
-
-v0.3.1
-------
-
-This release brings compatibility with the anndata 0.10.9 release.
-
-v0.3.0
-------
-
-This version comes with a notable change to the way the annotations of individual modalities are treated.
-It implements pull/push interface for annotations with functions :func:`mudata.MuData.pull_obs`, :func:`mudata.MuData.pull_var`, :func:`mudata.MuData.push_obs`, and :func:`mudata.MuData.push_var`.
-
-:func:`mudata.MuData.update` performance and behaviour have been generally improved.
-For compatibility reasons, this release keeps the old behaviour of pulling annotations on read/update as default.
-
-.. note::
- If you want to adopt the new update behaviour, set ``mudata.set_options(pull_on_update=False)``. This will be the default behaviour in the next release.
- With it, the annotations will not be copied from the modalities on :func:`mudata.MuData.update` implicitly.
-
- To copy the annotations explicitly, you will need to use :func:`mudata.MuData.pull_obs` and/or :func:`mudata.MuData.pull_var`.
-
-This release also comes with new functionalities such as :func:`mudata.to_anndata`, :func:`mudata.to_mudata`, and :func:`mudata.concat`.
-
-:class:`mudata.MuData` objects now have a new ``.mod_names`` attribute. ``MuData.mod`` can be pretty-printed. Readers support ``fsspec``, and :func:`mudata.read_zarr` now supports ``mod-order``. The ``uns`` attribute now properly handled by the views.
-
-
-v0.2.4
-------
-
-This version brings compatibility with the numpy 2.0.0 release and the future anndata 0.11 release with dtype argument deprecation.
-
-Requires anndata 0.10.8 or newer.
-
-v0.2.3
-------
-
-Fixes and improvements for backed objects, views, nested MuData objects, I/O and HTML representation.
-
-Pandas 2.0 compatibility.
-
-v0.2.2
-------
-
-Path objects ``pathlib.Path`` now work in :func:`mudata.read`.
-
-v0.2.1
-------
-
-This version comes with :func:`mudata.MuData.update` improvements and optimisations.
-
-There is now :func:`mudata.MuData.__len__`. This should make it easier to build MuData into workflows that operate on data containers with length. In practice using :func:`mudata.MuData.n_obs` should be preferred.
-
-In this implementation of MuData, default ``dict`` has replaced ``OrderedDict``, e.g. in the ``.uns`` slot, to improve compatibility with new serialisation versions. As of Python 3.6, dictionaries are insertion-ordered.
-
-v0.2.0
-------
-
-This version uses new I/O serialisation of `AnnData v0.8 `_.
-
-Updating a MuData object with :func:`mudata.MuData.update` is even faster in many use cases.
-
-There's `a new axes interface `_ that allows to use MuData objects as containers with different shared dimensions.
-
-
-v0.1.2
-------
-
-Updating a MuData object with :func:`mudata.MuData.update` is now much faster.
-
-This version also comes with an improved documentation, including `a new page describing the sharp bits `__.
-
-v0.1.1
-------
-
-This version comes with improved stability and bug fixes.
-
-v0.1.0
-------
-
-Initial ``mudata`` release with ``MuData`` (:class:`mudata.MuData`), previously a part of the ``muon`` framework.
-
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index 7c3097e..0000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import sys
-from pathlib import Path
-
-from recommonmark.transform import AutoStructify
-
-# sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(0, Path("../").resolve())
-
-
-# -- Project information -----------------------------------------------------
-
-project = "mudata"
-copyright = "2020 - 2024, Danila Bredikhin"
-author = "Danila Bredikhin"
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
- "recommonmark",
- "sphinx.ext.napoleon",
- "sphinx.ext.autosummary",
- "sphinx.ext.autodoc",
- "sphinx_automodapi.automodapi",
- "sphinx.ext.autosectionlabel",
- "sphinx.ext.mathjax",
- "nbsphinx",
-]
-autosectionlabel_prefix_document = True
-
-source_suffix = {
- ".rst": "restructuredtext",
- ".txt": "markdown",
- ".md": "markdown",
-}
-
-# autodoc/autosummary config
-autosummary_generate = True
-autosummary_imported_members = False
-autodoc_default_options = {
- "members": True,
- "private-members": False, # Do not document private methods
- "special-members": False, # Do not document special methods like __init__
- "inherited-members": False,
- "show-inheritance": True,
-}
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-html_theme = "sphinx_book_theme"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
-html_logo = "../img/mudata.svg"
-html_theme_options = {
- "logo_only": True,
- "display_version": False,
-}
-
-
-# app setup hook
-def setup(app):
- app.add_config_value(
- "recommonmark_config",
- {
- #'url_resolver': lambda url: github_doc_root + url,
- "auto_toc_tree_section": "Contents",
- "enable_auto_toc_tree": True,
- "enable_math": False,
- "enable_inline_math": False,
- "enable_eval_rst": True,
- },
- True,
- )
- app.add_transform(AutoStructify)
- app.add_css_file("styles.css")
diff --git a/docs/source/index.rst b/docs/source/index.rst
deleted file mode 100644
index ea75e7a..0000000
--- a/docs/source/index.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-Say hello to MuData
-===================
-
-**MuData** is a format for annotated multimodal datasets. MuData is native to Python but provides cross-language functionality via HDF5-based ``.h5mu`` files.
-
-.. _website: https://scverse.org/
-.. _governance: https://scverse.org/about/roles/
-.. _NumFOCUS: https://numfocus.org/
-.. _donation: https://numfocus.org/donate-to-scverse/
-
-MuData is part of the scverse® project (`website`_, `governance`_) and is fiscally sponsored by `NumFOCUS`_.
-Please consider making a tax-deductible `donation`_ to help the project pay for developer time, professional services, travel, workshops, and a variety of other needs.
-
-.. raw:: html
-
-
-
-
-
-
-
-
-MuData objects as containers
-----------------------------
-
-``mudata`` package introduces multimodal data objects (:class:`mudata.MuData` class) allowing Python users to work with increasigly complex datasets efficiently and to build new workflows and computational tools around it.
-::
- MuData object with n_obs × n_vars = 10110 × 110101
- 2 modalities
- atac: 10110 x 100001
- rna: 10110 x 10100
-
-MuData objects enable multimodal information to be stored & accessed naturally, embrace `AnnData `_ for the individual modalities, and can be serialized to ``.h5mu`` files. :doc:`Learn more about multimodal objects ` as well as :doc:`file formats for storing & sharing them `.
-
-Natural interface
------------------
-
-MuData objects feature an AnnData-like interface and familiar concepts such as *observations* and *variables* for the two data dimensions. Get familiar with MuData in the :doc:`Quickstart tutorial `.
-
-Handling MuData objects
------------------------
-
-A flagship framework for multimodal omics analysis — ``muon`` — has been built around the MuData format. Find more information on it `in its documentation `_ and `on the tutorials page `_ as well as `in the corresponding publication `_.
-
-
-.. toctree::
- :hidden:
- :maxdepth: 1
- :caption: Getting started
-
- notebooks/quickstart_mudata.ipynb
- notebooks/nuances.ipynb
- notebooks/axes.ipynb
- notebooks/annotations_management.ipynb
-
-.. toctree::
- :hidden:
- :maxdepth: 1
- :caption: Documentation
-
- install
- io/input
- io/mudata
- io/output
- io/spec
- api/index
- changelog
-
diff --git a/docs/source/install.rst b/docs/source/install.rst
deleted file mode 100644
index 83f9d7e..0000000
--- a/docs/source/install.rst
+++ /dev/null
@@ -1,36 +0,0 @@
-Install mudata
-==============
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-
-Stable version
---------------
-
-``mudata`` can be installed `from PyPI `_ with ``pip``:
-::
- pip install mudata
-
-
-Development version
--------------------
-
-To use a pre-release version of ``mudata``, install it from `from the GitHub repository `_:
-::
- pip install git+https://github.com/scverse/mudata
-
-
-Troubleshooting
----------------
-
-Please consult the details on installing ``scanpy`` and its dependencies `here `_. If there are issues that have not beed described, addressed, or documented, please consider `opening an issue `_.
-
-
-Hacking on mudata
------------------
-For hacking on the package, it is most convenient to do a so-called development-mode install, which symlinks files in your Python package directory to your mudata working directory, such that you do not need to reinstall after every change. We use `flit `_ as our build system. After installing flit, you can run ``flit install -s`` from within the mudata project directory to perform a development-mode install. Happy hacking!
diff --git a/docs/source/io/input.rst b/docs/source/io/input.rst
deleted file mode 100644
index 0ac0d5f..0000000
--- a/docs/source/io/input.rst
+++ /dev/null
@@ -1,91 +0,0 @@
-.. MuData documentation master file, created by
- sphinx-quickstart on Thu Oct 22 02:24:42 2020.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
-Input data
-==========
-
-A default way to import ``MuData`` is the following:
-::
- from mudata import MuData
-
-
-There are various ways in which the data can be provided to create a MuData object:
-
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-
-
-AnnData objects
----------------
-
-MuData object can be constructed from a dictionary of existing AnnData objects:
-::
- mdata = MuData({'rna': adata_rna, 'atac': adata_atac})
-
-
-AnnData objects themselves can be easily constructed from NumPy arrays and/or Pandas DataFrames annotating features (*variables*) and samples/cells (*observations*). This makes it a rather general data format to work with any type of high-dimensional data.
-::
- from anndata import AnnData
- adata = AnnData(X=matrix, obs=metadata_df, var=features_df)
-
-
-Please see more details on how to operate on AnnData objects `in the anndata documentation `_.
-
-
-Omics data
-----------
-
-When data fromats specific to genomics are of interest, specialised readers can be found in analysis frameworks such as `muon `_. These functions, including the ones for Cell Ranger count matrices as well as Snap files, `are described here `_.
-
-
-Remote storage
---------------
-
-MuData objects can be read and cached from remote locations including via HTTP(S) or from S3 buckets. This is achieved via [`fsspec`](https://github.com/fsspec/filesystem_spec). For example, to read a MuData object from a remote server:
-::
- import fsspec
-
- fname = "https://github.com/gtca/h5xx-datasets/raw/main/datasets/minipbcite.h5mu?download="
- with fsspec.open(fname) as f:
- mdata = mudata.read_h5mu(f)
-
-
-A caching layer can be added in the following way:
-::
- fname_cached = "filecache::" + fname
- with fsspec.open(fname_cached, filecache={'cache_storage': '/tmp/'}) as f:
- mdata = mudata.read_h5mu(f)
-
-
-For more `fsspec` usage examples see [its documentation](https://filesystem-spec.readthedocs.io/).
-
-S3
-^^
-
-MuData objects in the ``.h5mu`` format stored in an S3 bucket can be read with ``fsspec`` as well:
-::
- storage_options = {
- 'endpoint_url': 'localhost:9000',
- 'key': 'AWS_ACCESS_KEY_ID',
- 'secret': 'AWS_SECRET_ACCESS_KEY',
- }
-
- with fsspec.open('s3://bucket/dataset.h5mu', **storage_options) as f:
- mudata.read_h5mu(f)
-
-
-MuData objects stored in the ``.zarr`` format in an S3 bucket can be read from a *mapping*:
-::
- import s3fs
-
- s3 = s3fs.S3FileSystem(**storage_options)
- store = s3.get_mapper('s3://bucket/dataset.zarr')
- mdata = mudata.read_zarr(store)
diff --git a/docs/source/io/mudata.rst b/docs/source/io/mudata.rst
deleted file mode 100644
index f1c826f..0000000
--- a/docs/source/io/mudata.rst
+++ /dev/null
@@ -1,167 +0,0 @@
-Multimodal data objects
-=======================
-
-:class:`mudata.MuData` is a class for multimodal objects:
-::
- from mudata import MuData
-
-
-``MuData`` objects comprise a dictionary with ``AnnData`` objects, one per modality, in their ``.mod`` attribute. Just as ``AnnData`` objects themselves, they also contain attributes like ``.obs`` with annotation of observations (samples or cells), ``.obsm`` with their multidimensional annotations such as embeddings, etc.
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-
-MuData's attributes
--------------------
-
-Key attributes & method of ``MuData`` objects as well as important concepts are described below. A full list of attributes and methods of multimodal containers can be found in the :class:`mudata.MuData` documentation.
-
-.mod
-^^^^
-
-Modalities are stored in a collection accessible via the ``.mod`` attribute of the ``MuData`` object with names of modalities as keys and ``AnnData`` objects as values.
-::
- list(mdata.mod.keys())
- # => ['atac', 'rna']
-
-
-Individual modalities can be accessed with their names via the ``.mod`` attribute or via the ``MuData`` object itself as a shorthand:
-::
- mdata.mod['rna']
- # or
- mdata['rna']
- # => AnnData object
-
-
-.obs & .var
-^^^^^^^^^^^
-
-.. warning::
- Version 0.3 introduces pull/push interface for annotations. For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default.
-
- This will be changed in the next release, and the annotations will not be copied implicitly.
- To adopt the new behaviour, use :func:`mudata.set_options` with ``pull_on_update=False``.
- The new approach to ``.update()`` and annotations is described below.
-
-Samples (cells) annotations are stored in the data frame accessible via the ``.obs`` attribute. Same goes for ``.var``, which contains annotation of variables (features).
-
-Copies of columns from ``.obs`` or ``.var`` data frames of individual modalities can be added with the ``.pull_obs()`` or ``.pull_var()`` methods:
-::
- mdata.pull_obs()
- mdata.pull_var()
-
-When the annotations are changed in ``AnnData`` objects of modalities, e.g. new columns are added, they can be propagated to the ``.obs`` or ``.var`` data frames with the same ``.pull_obs()`` or ``.pull_var()`` methods.
-
-Observations columns copied from individual modalities contain modality name as their prefix, e.g. ``rna:n_genes``. Same is true for variables columns however if there are columns with identical names in ``.var`` of multiple modalities — e.g. ``n_cells``, — these columns are merged across modalities and no prefix is added.
-
-When there are changes directly related to observations or variables, e.g. samples (cells) are filtered out or features (genes) are renamed, the changes have to be fetched with the ``.update()`` method:
-::
- mdata.update()
-
-
-.obsm
-^^^^^
-
-Multidimensional annotations of samples (cells) are accessible in the ``.obsm`` attribute. For instance, that can be UMAP coordinates that were learnt jointly on all modalities. Or `MOFA `_ embeddings — a generalisation of PCA to multiple omics.
-::
- # mdata is a MuData object with CITE-seq data
- mdata.obsm
- # => MuAxisArrays with keys: X_umap, X_mofa, prot, rna
-
-As another multidimensional embedding, this slot may contain boolean vectors, one per modality, indicating if samples (cells) are available in the respective modality. For instance, if all samples (cells) are the same across modalities, all values in those vectors are ``True``.
-
-
-Container's shape
------------------
-
-The ``MuData`` object's shape is represented by two numbers calculated from the shapes of individual modalities — one for the number of observations and one for the number of variables.
-::
- mdata.shape
- # => (9573, 132465)
- mdata.n_obs
- # => 9573
- mdata.n_vars
- # => 132465
-
-By default, variables are always counted as belonging uniquely to a single modality while observations with the same name are counted as the same observation, which has variables across multiple modalities measured for.
-::
- [ad.shape for ad in mdata.mod.values()]
- # => [(9500, 10100), (9573, 122364)]
-
-If the shape of a modality is changed, :func:`mudata.MuData.update` has to be run to bring the respective updates to the ``MuData`` object.
-
-
-Keeping containers up to date
------------------------------
-
-.. warning::
- Version 0.3 introduces pull/push interface for annotations. For compatibility reasons, the old behaviour of pulling annotations on read/update is kept as default.
-
- This will be changed in the next release, and the annotations will not be copied implicitly.
- To adopt the new behaviour, use :func:`mudata.set_options` with ``pull_on_update=False``.
- The new approach to ``.update()`` and annotations is described below.
-
-Modalities inside the ``MuData`` container are full-fledged ``AnnData`` objects, which can be operated independently with any tool that works on ``AnnData`` objects.
-When modalities are changed externally, the shape of the ``MuData`` object as well as metadata fetched from individual modalities will then reflect the previous state of the data.
-To keep the container up to date, there is an ``.update()`` method that syncs the ``.obs_names`` and ``.var_names`` of the ``MuData`` object with the ones of the modalities.
-
-
-Managing annotations
---------------------
-
-To fetch the corresponding annotations from individual modalities, there are :func:`mudata.MuData.pull_obs` and :func:`mudata.MuData.pull_var` methods.
-
-To update the annotations of individual modalities with the global annotations, :func:`mudata.MuData.push_obs` and :func:`mudata.MuData.push_var` methods can be used.
-
-
-Backed containers
------------------
-
-To enable the backed mode for the count matrices in all the modalities, ``.h5mu`` files can be read with the relevant flag:
-::
- mdata_b = mudata.read("filename.h5mu", backed=True)
- mdata_b.isbacked
- # => True
-
-
-When creating a copy of a backed ``MuData`` object, the filename has to be provided, and the copy of the object will be backed at a new location.
-::
- mdata_copy = mdata_b.copy("filename_copy.h5mu")
- mdata_b.file.filename
- # => 'filename_copy.h5mu'
-
-
-Container's views
------------------
-
-Analogous to the behaviour of ``AnnData`` objects, slicing ``MuData`` objects returns views of the original data.
-::
- view = mdata[:100,:1000]
- view.is_view
- # => True
-
- # In the view, each modality is a view as well
- view["A"].is_view
- # => True
-
-Subsetting ``MuData`` objects is special since it slices them across modalities. I.e. the slicing operation for a set of ``obs_names`` and/or ``var_names`` will be performed for each modality and not only for the global multimodal annotation.
-
-This behaviour makes workflows memory-efficient, which is especially important when working with large datasets. If the object is to be modified however, a copy of it should be created, which is not a view anymore and has no dependance on the original object.
-::
- mdata_sub = view.copy()
- mdata_sub.is_view
- # => False
-
-If the original object is backed, the filename has to be provided to the ``.copy()`` call, and the resulting object will be backed at a new location.
-::
- mdata_sub = backed_view.copy("mdata_sub.h5mu")
- mdata_sub.is_view
- # => False
- mdata_sub.isbacked
- # => True
-
diff --git a/docs/source/io/output.rst b/docs/source/io/output.rst
deleted file mode 100644
index 2169f2f..0000000
--- a/docs/source/io/output.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-Output data
-===========
-
-In order to save & share multimodal data, ``.h5mu`` file format has been designed.
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-
-
-.h5mu files
------------
-
-``.h5mu`` files are the default storage for MuData objects. These are HDF5 files with a standardised structure, which is similar to the one of ``.h5ad`` files where AnnData objects are stored. The most noticeable distinction is ``.mod`` group presence where individual modalities are stored — in the same way as they would be stored in the ``.h5ad`` files.
-::
- mdata.write("mudata.h5mu")
-
-Inspect the contents of the file in the terminal:
-
-.. code-block:: console
-
- > h5ls mudata.h5mu
- mod Group
- obs Group
- obsm Group
- var Group
- varm Group
-
- > h5ls data/mudata.h5mu/mod
- atac Group
- rna Group
-
-
-
-AnnData inside .h5mu
-^^^^^^^^^^^^^^^^^^^^
-
-Individual modalities in the ``.h5mu`` file are stored in exactly the same way as AnnData objects. This, together with the hierarchical nature of HDF5 files, makes it possible to read individual modalities from ``.h5mu`` files as well as to save individual modalities to the ``.h5mu`` file:
-::
- adata = mudata.read("mudata.h5mu/rna")
-
- mudata.write("mudata.h5mu/rna", adata)
-
-The function :func:`mudata.read` automatically decides based on the input if :func:`mudata.read_h5mu` or rather :func:`mudata.read_h5ad` should be called.
-
-Learn more about the on-disk format specification shared by MuData and AnnData `in the AnnData documentation `_.
-
-
-.zarr files
------------
-
-`Zarr `_ is a cloud-friendly format for chunked N-dimensional arrays. Zarr is another supported serialisation format for MuData objects:
-::
- mdata = mudata.read_zarr("mudata.zarr")
-
- mdata.write_zarr("mudata.zarr")
-
-Just as with ``.h5mu`` files, MuData objects saved in ``.zarr`` format resemble how AnnData objects are stored, with one additional level of abstraction:
-
-.. code-block:: console
-
- > tree -L 1 mudata.zarr
- mudata.zarr
- ├── mod
- ├── obs
- ├── obsm
- ├── obsmap
- ├── obsp
- ├── uns
- ├── var
- ├── varm
- ├── varmap
- └── varp
-
diff --git a/docs/source/io/spec.rst b/docs/source/io/spec.rst
deleted file mode 100644
index bb3d5dd..0000000
--- a/docs/source/io/spec.rst
+++ /dev/null
@@ -1,57 +0,0 @@
-MuData specification [RFC]
-==========================
-
-Building on top of the `AnnData spec `_, this document provides details on the ``MuData`` on-disk format. For user-facing features, please see `this document `__.
-::
- >>> import h5py
- >>> f = h5py.File("citeseq.h5mu")
- >>> list(f.keys())
- ['mod', 'obs', 'obsm', 'obsmap', 'uns', 'var', 'varm', 'varmap']
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-
-
-.mod
-----
-
-Modalities are stored in a ``.mod`` group of the ``.h5mu`` file in the alphabetical order. To preserve the order of the modalities, there is an attribute ``"mod-order"`` that lists the modalities in their respective order. If some modalities are missing from that attribute, the attribute is to be ignored.
-::
- >>> dict(f["mod"].attrs)
- {'mod-order': array(['prot', 'rna'], dtype=object)}
-
-
-.obsmap and .varmap
--------------------
-
-While in practice ``MuData`` relies on ``.obs_names`` and ``.var_names`` to collate global observations and variables, it also allows to disambiguate between items with the same name using integer maps. For example, global observations will have non-zero integer values in ``.obsmap["rna"]`` if they are present in the ``"rna"`` modality. If an observation or a variable is missing from a modality, it will correspond to a ``0`` value.
-::
- >>> list(f["obsmap"].keys())
- ['prot', 'rna']
- >>> import numpy as np
- >>> np.array(f["obsmap"]["rna"])
- array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32)
- >>> np.array(f["obsmap"]["prot"])
- array([ 1, 2, 3, ..., 3889, 3890, 3891], dtype=uint32)
-
- >>> list(f["varmap"].keys())
- ['prot', 'rna']
- >>> np.array(f["varmap"]["rna"])
- array([ 0, 0, 0, ..., 17804, 17805, 17806], dtype=uint32)
- >>> np.array(f["varmap"]["prot"])
- array([1, 2, 3, ..., 0, 0, 0], dtype=uint32)
-
-.axis
------
-
-Axis describes which dimensions are shared: observations (``axis=0``), variables (``axis=1```), or both (``axis=-1``). It is recorded in the ``axis`` attribute of the file:
-::
- >>> f.attrs["axis"]
- 0
-
-Multimodal datasets, which have observations shared between modalities, will have ``axis=0``. If no axis attribute is available such as in files with the older versions of this specification, it is assumed to be ``0`` by default.
diff --git a/docs/source/notebooks/requirements.txt b/docs/source/notebooks/requirements.txt
deleted file mode 100644
index 131a333..0000000
--- a/docs/source/notebooks/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-numpy
-pandas
-anndata
-mudata
diff --git a/docs/source/nuances.rst b/docs/source/nuances.rst
deleted file mode 100644
index 2e5aee4..0000000
--- a/docs/source/nuances.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Nuances
-=======
-
-This is *the sharp bits* page for ``mudata``, which provides information on the nuances when working with ``MuData`` objects.
-
-.. contents:: :local:
- :depth: 3
-
-.. toctree::
- :maxdepth: 10
-
- *
-
-
-Variable names
---------------
-
-``MuData`` is designed with features (variables) being different in different modalities in mind. Hence their names should be unique and different between modalities. In other words, ``.var_names`` are checked for uniqueness across modalities.
-
-This behaviour ensures all the functions are easy to reason about. For instance, if there is a ``var_name`` that is present in both modalities, what happens during plotting a joint embedding from ``.obsm`` coloured by this ``var_name`` is not strictly defined.
-
-Nevertheless, ``MuData`` can accommodate modalities with duplicated ``.var_names``. For the typical workflows, we recommend renaming them manually or calling ``.var_names_make_unique()``.
-
-
-Update
-------
-
-Modalities in ``MuData`` objects are full-featured ``AnnData`` objects. Hence they can be operated individually, and their ``MuData`` parent will have to be updated to fetch this information.
-
diff --git a/pyproject.toml b/pyproject.toml
index 0f0a29d..743bcc5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,65 +1,73 @@
[build-system]
build-backend = "hatchling.build"
-requires = ["hatchling", "hatch-vcs"]
+requires = [ "hatch-vcs", "hatchling" ]
[project]
name = "mudata"
description = "Multimodal data"
-requires-python = ">= 3.10"
-license = "BSD-3-Clause"
-authors = [
- { name = "Danila Bredikhin" },
-]
+readme = "README.md"
+license = { file = "LICENSE" }
maintainers = [
- { name = "Danila Bredikhin", email = "danila@stanford.edu" },
+ { name = "Danila Bredikhin", email = "danila@stanford.edu" },
]
-readme = "README.md"
-classifiers = [
- "Programming Language :: Python :: 3",
- "License :: OSI Approved :: BSD License",
- "Operating System :: OS Independent",
- "Development Status :: 3 - Alpha",
- "Topic :: Scientific/Engineering :: Bio-Informatics",
- "Intended Audience :: Science/Research"
+authors = [
+ { name = "Danila Bredikhin" },
]
-dependencies = [
- "anndata >= 0.10.8",
- "pandas >= 1.4.0"
+requires-python = ">=3.10"
+classifiers = [
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: BSD License",
+ "Operating System :: OS Independent",
+ "Programming Language :: Python :: 3 :: Only",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
]
-dynamic = ["version"]
-
-[project.urls]
-Documentation = "https://mudata.readthedocs.io/en/latest/"
-Source = "https://github.com/scverse/mudata"
-Home-page = "https://muon.scverse.org/"
+dynamic = [ "version" ]
-[project.optional-dependencies]
-dev = [
- "setuptools_scm",
+dependencies = [
+ "anndata>=0.10.8",
+ "numpy",
+ "pandas>=1.4",
+ "scipy",
+ # for debug logging (referenced from the issue template)
+ "session-info2",
]
-docs = [
- "sphinx",
- "sphinx-book-theme",
- "pydata-sphinx-theme",
- "readthedocs-sphinx-search",
- "nbsphinx",
- "sphinx_automodapi",
- "recommonmark"
+optional-dependencies.dev = [
+ "fsspec",
+ "hatch-vcs", # for version handling in editable mode
+ "pre-commit",
+ "twine>=4.0.2",
+ "zarr<3",
]
-test = [
- "zarr<3",
- "pytest"
+optional-dependencies.doc = [
+ "docutils>=0.8,!=0.18.*,!=0.19.*",
+ "ipykernel",
+ "ipython",
+ "myst-nb>=1.1",
+ "pandas",
+ # Until pybtex >0.24.0 releases: https://bitbucket.org/pybtex-devs/pybtex/issues/169/
+ "setuptools",
+ "sphinx>=8.1",
+ "sphinx-autodoc-typehints",
+ "sphinx-automodapi",
+ "sphinx-book-theme>=1",
+ "sphinx-copybutton",
+ "sphinx-tabs",
+ "sphinxcontrib-bibtex>=1",
+ "sphinxext-opengraph",
]
-
-[tool.pytest.ini_options]
-python_files = "test_*.py"
-testpaths = [
- "./tests", # unit tests
+optional-dependencies.test = [
+ "coverage>=7.10",
+ "pytest",
+ "zarr<3",
]
-
-[tool.black]
-line-length = 100
-target-version = ['py39']
+# https://docs.pypi.org/project_metadata/#project-urls
+urls.Documentation = "https://mudata.readthedocs.io/"
+urls.Homepage = "https://muons.scverse.org"
+urls.Source = "https://github.com/scverse/mudata"
[tool.hatch.version]
source = "vcs"
@@ -67,38 +75,102 @@ source = "vcs"
[tool.hatch.build.hooks.vcs]
version-file = "src/mudata/_version.py"
-[tool.hatch.build.targets.wheel]
-packages = ["src/mudata"]
+[tool.hatch.envs.default]
+installer = "uv"
+features = [ "dev" ]
-[tool.hatch.build.targets.sdist]
-exclude = [
- "/.github",
- "/docs",
+[tool.hatch.envs.docs]
+features = [ "doc" ]
+scripts.build = "sphinx-build -M html docs docs/_build {args}"
+scripts.open = "python -m webbrowser -t docs/_build/html/index.html"
+scripts.clean = "git clean -fdX -- {args:docs}"
+
+# Test the lowest and highest supported Python versions with normal deps
+[[tool.hatch.envs.hatch-test.matrix]]
+deps = [ "stable" ]
+python = [ "3.10", "3.13" ]
+
+# Test the newest supported Python version also with pre-release deps
+[[tool.hatch.envs.hatch-test.matrix]]
+deps = [ "pre" ]
+python = [ "3.13" ]
+
+[tool.hatch.envs.hatch-test]
+features = [ "dev", "test" ]
+
+[tool.hatch.envs.hatch-test.overrides]
+# If the matrix variable `deps` is set to "pre",
+# set the environment variable `UV_PRERELEASE` to "allow".
+matrix.deps.env-vars = [
+ { key = "UV_PRERELEASE", value = "allow", if = [ "pre" ] },
]
[tool.ruff]
-src = ["src"]
-
-[tool.ruff.format]
-docstring-code-format = true
-
-[tool.ruff.lint]
-select = [
- "E", # Error detected by Pycodestyle
- "F", # Errors detected by Pyflakes
- "W", # Warning detected by Pycodestyle
- "UP", # pyupgrade
- "I", # isort
- "TCH", # manage type checking blocks
- "ICN", # Follow import conventions
- "PTH", # Pathlib instead of os.path
- "PT", # Pytest conventions
+line-length = 120
+src = [ "src" ]
+extend-include = [ "*.ipynb" ]
+
+format.skip-magic-trailing-comma = true
+format.exclude = [ "*.ipynb" ]
+format.docstring-code-format = true
+
+lint.select = [
+ "B", # flake8-bugbear
+ "BLE", # flake8-blind-except
+ "C4", # flake8-comprehensions
+ "D", # pydocstyle
+ "E", # Error detected by Pycodestyle
+ "F", # Errors detected by Pyflakes
+ "I", # isort
+ "RUF100", # Report unused noqa directives
+ "TID", # flake8-tidy-imports
+ "UP", # pyupgrade
+ "W", # Warning detected by Pycodestyle
+]
+lint.ignore = [
+ "B008", # Errors from function calls in argument defaults. These are fine when the result is immutable.
+ "D100", # Missing docstring in public module
+ "D104", # Missing docstring in public package
+ "D105", # __magic__ methods are often self-explanatory, allow missing docstrings
+ "D107", # Missing docstring in __init__
+ # Disable one in each pair of mutually incompatible rules
+ "D203", # We don’t want a blank line before a class docstring
+ "D213", # <> We want docstrings to start immediately after the opening triple quote
+ "D400", # first line should end with a period [Bug: doesn’t work with single-line docstrings]
+ "D401", # First line should be in imperative mood; try rephrasing
+ "E501", # line too long -> we accept long comment lines; formatter gets rid of long code lines
+ "E731", # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
+ "E741", # allow I, O, l as variable names -> I is the identity matrix
+ "TID252", # allow relative imports
]
-ignore = [
- # line too long -> we accept long comment lines; formatter gets rid of long code lines
- "E501",
- # Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments,
- "E731",
- # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation
- "E741",
+lint.per-file-ignores."*/__init__.py" = [ "F401" ]
+lint.per-file-ignores."docs/*" = [ "I" ]
+lint.per-file-ignores."docs/notebooks/*" = [ "D", "F403", "F405" ]
+lint.per-file-ignores."tests/*" = [ "D" ]
+lint.pydocstyle.convention = "numpy"
+
+[tool.pytest.ini_options]
+testpaths = [ "tests" ]
+xfail_strict = true
+addopts = [
+ "--import-mode=importlib", # allow using test files with same name
+]
+
+[tool.coverage.run]
+source = [ "mudata" ]
+patch = [ "subprocess" ]
+omit = [
+ "**/test_*.py",
+]
+
+[tool.cruft]
+skip = [
+ "tests",
+ "src/**/__init__.py",
+ "src/**/basic.py",
+ "docs/api.md",
+ "docs/changelog.md",
+ "docs/references.bib",
+ "docs/references.md",
+ "docs/notebooks/example.ipynb",
]
diff --git a/src/mudata/__init__.py b/src/mudata/__init__.py
index e8ff1ac..4377213 100644
--- a/src/mudata/__init__.py
+++ b/src/mudata/__init__.py
@@ -1,15 +1,5 @@
"""Multimodal datasets"""
-try: # See https://github.com/maresb/hatch-vcs-footgun-example
- from setuptools_scm import get_version
-
- __version__ = get_version(root="../..", relative_to=__file__)
-except (ImportError, LookupError):
- try:
- from ._version import __version__
- except ModuleNotFoundError:
- raise RuntimeError("mudata is not correctly installed. Please install it, e.g. with pip.")
-
from anndata import AnnData
from ._core import utils
@@ -29,6 +19,7 @@
from ._core.merge import concat
from ._core.mudata import MuData
from ._core.to_ import to_anndata, to_mudata
+from .version import __version__, __version_tuple__
__anndataversion__ = "0.1.0"
__mudataversion__ = "0.1.0"
diff --git a/src/mudata/_core/compat.py b/src/mudata/_core/compat.py
index 1419811..cc7f7a0 100644
--- a/src/mudata/_core/compat.py
+++ b/src/mudata/_core/compat.py
@@ -12,35 +12,17 @@
from anndata._core.aligned_mapping import AlignedView, AxisArrays, PairwiseArrays
except ImportError:
# anndata < 0.10.9
- from anndata._core.aligned_mapping import (
- AlignedViewMixin as AlignedView,
- )
- from anndata._core.aligned_mapping import (
- AxisArrays as AxisArraysLegacy,
- )
- from anndata._core.aligned_mapping import (
- AxisArraysBase,
- )
- from anndata._core.aligned_mapping import (
- PairwiseArrays as PairwiseArraysLegacy,
- )
+ from anndata._core.aligned_mapping import AlignedViewMixin as AlignedView
+ from anndata._core.aligned_mapping import AxisArrays as AxisArraysLegacy
+ from anndata._core.aligned_mapping import AxisArraysBase
+ from anndata._core.aligned_mapping import PairwiseArrays as PairwiseArraysLegacy
class AxisArrays(AxisArraysLegacy):
- def __init__(
- self,
- parent: AnnData | Raw,
- axis: int,
- store: Mapping | AxisArraysBase | None = None,
- ):
+ def __init__(self, parent: AnnData | Raw, axis: int, store: Mapping | AxisArraysBase | None = None):
super().__init__(parent, axis=axis, vals=store)
class PairwiseArrays(PairwiseArraysLegacy):
- def __init__(
- self,
- parent: AnnData,
- axis: int,
- store: Mapping | None = None,
- ):
+ def __init__(self, parent: AnnData, axis: int, store: Mapping | None = None):
super().__init__(parent, axis=axis, vals=store)
diff --git a/src/mudata/_core/config.py b/src/mudata/_core/config.py
index 93631cf..6e5671b 100644
--- a/src/mudata/_core/config.py
+++ b/src/mudata/_core/config.py
@@ -1,10 +1,6 @@
import logging as log
-OPTIONS = {
- "display_style": "text",
- "display_html_expand": 0b010,
- "pull_on_update": None,
-}
+OPTIONS = {"display_style": "text", "display_html_expand": 0b010, "pull_on_update": None}
_VALID_OPTIONS = {
"display_style": lambda x: x in ("text", "html"),
@@ -25,12 +21,12 @@ class set_options:
Options can be set in the context:
- >>> with mudata.set_options(display_style='html'):
+ >>> with mudata.set_options(display_style="html"):
... print("Options are applied here")
... or globally:
- >>> mudata.set_options(display_style='html')
+ >>> mudata.set_options(display_style="html")
"""
def __init__(self, **kwargs):
diff --git a/src/mudata/_core/file_backing.py b/src/mudata/_core/file_backing.py
index 5d5552e..9b8e147 100644
--- a/src/mudata/_core/file_backing.py
+++ b/src/mudata/_core/file_backing.py
@@ -9,28 +9,16 @@
class MuDataFileManager(AnnDataFileManager):
- def __init__(
- self,
- filename: PathLike | None = None,
- filemode: Literal["r", "r+"] | None = None,
- ):
+ def __init__(self, filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None):
self._counter = 0
self._children = WeakSet()
if filename is not None:
filename = Path(filename)
super().__init__(ad.AnnData(), filename, filemode)
- def open(
- self,
- filename: PathLike | None = None,
- filemode: Literal["r", "r+"] | None = None,
- add_ref=False,
- ) -> bool:
+ def open(self, filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None, add_ref=False) -> bool:
if self.is_open and (
- filename is None
- and filemode is None
- or filename == self.filename
- and filemode == self._filemode
+ filename is None and filemode is None or filename == self.filename and filemode == self._filemode
):
if add_ref:
self.counter += 1
@@ -80,12 +68,7 @@ def filename(self, filename: PathLike | None):
class AnnDataFileManager(ad._core.file_backing.AnnDataFileManager):
_h5files = {}
- def __init__(
- self,
- adata: ad.AnnData,
- mod: str,
- parent: MuDataFileManager,
- ):
+ def __init__(self, adata: ad.AnnData, mod: str, parent: MuDataFileManager):
self._parent = parent
self._mod = mod
parent._children.add(self)
@@ -94,11 +77,7 @@ def __init__(
if parent.is_open:
self._set_file()
- def open(
- self,
- filename: PathLike | None = None,
- filemode: Literal["r", "r+"] | None = None,
- ):
+ def open(self, filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None):
if not self._parent.open(filename, filemode, add_ref=True):
self._set_file()
diff --git a/src/mudata/_core/io.py b/src/mudata/_core/io.py
index 6882348..c6c101c 100644
--- a/src/mudata/_core/io.py
+++ b/src/mudata/_core/io.py
@@ -36,9 +36,7 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs):
file,
"obs",
mdata.strings_to_categoricals(
- mdata._shrink_attr("obs", inplace=False).copy()
- if OPTIONS["pull_on_update"] is None
- else mdata.obs.copy()
+ mdata._shrink_attr("obs", inplace=False).copy() if OPTIONS["pull_on_update"] is None else mdata.obs.copy()
),
dataset_kwargs=kwargs,
)
@@ -46,9 +44,7 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs):
file,
"var",
mdata.strings_to_categoricals(
- mdata._shrink_attr("var", inplace=False).copy()
- if OPTIONS["pull_on_update"] is None
- else mdata.var.copy()
+ mdata._shrink_attr("var", inplace=False).copy() if OPTIONS["pull_on_update"] is None else mdata.var.copy()
),
dataset_kwargs=kwargs,
)
@@ -65,11 +61,9 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs):
attrs["axis"] = mdata.axis
mod = file.require_group("mod")
- for k, v in mdata.mod.items():
+ for k, adata in mdata.mod.items():
group = mod.require_group(k)
- adata = mdata.mod[k]
-
adata.strings_to_categoricals()
if adata.raw is not None:
adata.strings_to_categoricals(adata.raw.var)
@@ -108,13 +102,7 @@ def _write_h5mu(file: h5py.File, mdata: MuData, write_data=True, **kwargs):
mdata.update()
-def write_zarr(
- store: MutableMapping | str | Path,
- data: MuData | AnnData,
- chunks=None,
- write_data=True,
- **kwargs,
-):
+def write_zarr(store: MutableMapping | str | Path, data: MuData | AnnData, chunks=None, write_data=True, **kwargs):
"""
Write MuData or AnnData object to the Zarr store
@@ -177,11 +165,9 @@ def write_zarr(
attrs["axis"] = mdata.axis
mod = file.require_group("mod")
- for k, v in mdata.mod.items():
+ for k, adata in mdata.mod.items():
group = mod.require_group(k)
- adata = mdata.mod[k]
-
adata.strings_to_categoricals()
if adata.raw is not None:
adata.strings_to_categoricals(adata.raw.var)
@@ -239,9 +225,7 @@ def write_h5mu(filename: PathLike, mdata: MuData, **kwargs):
nbytes = f.write(
f"MuData (format-version={__mudataversion__};creator=muon;creator-version={__version__})".encode()
)
- f.write(
- b"\0" * (512 - nbytes)
- ) # this is only needed because the H5file was written in append mode
+ f.write(b"\0" * (512 - nbytes)) # this is only needed because the H5file was written in append mode
def write_h5ad(filename: PathLike, mod: str, data: MuData | AnnData):
@@ -314,14 +298,14 @@ def write(filename: PathLike, data: MuData | AnnData):
This function is designed to enhance I/O ease of use.
It recognises the following formats of filename:
- - for MuData
- - FILE.h5mu
- - for AnnData
- - FILE.h5mu/MODALITY
- - FILE.h5mu/mod/MODALITY
- - FILE.h5ad
- """
+ - for MuData
+ - `FILE.h5mu`
+ - for AnnData
+ - `FILE.h5mu/MODALITY`
+ - `FILE.h5mu/mod/MODALITY`
+ - `FILE.h5ad`
+ """
import re
if filename.endswith(".h5mu") or isinstance(data, MuData):
@@ -395,9 +379,7 @@ def _validate_h5mu(filename: PathLike) -> (str, Callable | None):
callback = lambda: fname.__exit__()
ish5mu = fname.read(6) == b"MuData"
except ImportError as e:
- raise ImportError(
- "To read from remote storage or cache, install fsspec: pip install fsspec"
- ) from e
+ raise ImportError("To read from remote storage or cache, install fsspec: pip install fsspec") from e
else:
ish5mu = False
raise e
@@ -406,27 +388,20 @@ def _validate_h5mu(filename: PathLike) -> (str, Callable | None):
if isinstance(filename, str) or isinstance(filename, Path):
if h5py.is_hdf5(filename):
warn(
- "The HDF5 file was not created by muon/mudata, we can't guarantee that everything will work correctly"
+ "The HDF5 file was not created by muon/mudata, we can't guarantee that everything will work correctly",
+ stacklevel=2,
)
else:
raise ValueError("The file is not an HDF5 file")
else:
- warn("Cannot verify that the (remote) file is a valid H5MU file")
+ warn("Cannot verify that the (remote) file is a valid H5MU file", stacklevel=2)
return fname, callback
def read_h5mu(filename: PathLike, backed: str | bool | None = None):
- """
- Read MuData object from HDF5 file
- """
- assert backed in [
- None,
- True,
- False,
- "r",
- "r+",
- ], "Argument `backed` should be boolean, or r/r+, or None"
+ """Read MuData object from HDF5 file."""
+ assert backed in [None, True, False, "r", "r+"], "Argument `backed` should be boolean, or r/r+, or None"
from anndata._io.h5ad import read_dataframe
from anndata._io.specs.registry import read_elem
@@ -454,7 +429,7 @@ def read_h5mu(filename: PathLike, backed: str | bool | None = None):
mod_order = None
if "mod-order" in gmods.attrs:
mod_order = _read_attr(gmods.attrs, "mod-order")
- if mod_order is not None and all([m in gmods for m in mod_order]):
+ if mod_order is not None and all(m in gmods for m in mod_order):
mods = {k: mods[k] for k in mod_order}
d[k] = mods
@@ -473,8 +448,8 @@ def read_h5mu(filename: PathLike, backed: str | bool | None = None):
def read_zarr(store: str | Path | MutableMapping | zarr.Group):
- """\
- Read from a hierarchical Zarr array store.
+ """Read from a hierarchical Zarr array store.
+
Parameters
----------
store
@@ -482,12 +457,8 @@ def read_zarr(store: str | Path | MutableMapping | zarr.Group):
"""
import zarr
from anndata._io.specs.registry import read_elem
- from anndata._io.zarr import (
- read_dataframe,
- )
- from anndata._io.zarr import (
- read_zarr as anndata_read_zarr,
- )
+ from anndata._io.zarr import read_dataframe
+ from anndata._io.zarr import read_zarr as anndata_read_zarr
if isinstance(store, Path):
store = str(store)
@@ -511,7 +482,7 @@ def read_zarr(store: str | Path | MutableMapping | zarr.Group):
mod_order = None
if "mod-order" in gmods.attrs:
mod_order = _read_attr(gmods.attrs, "mod-order")
- if mod_order is not None and all([m in gmods for m in mod_order]):
+ if mod_order is not None and all(m in gmods for m in mod_order):
mods = {k: mods[k] for k in mod_order}
d[k] = mods
@@ -545,11 +516,7 @@ def _read_zarr_mod(g: zarr.Group, manager: MuDataFileManager = None, backed: boo
ad.file = AnnDataFileManager(ad, Path(g.name).name, manager)
raw = _read_legacy_raw(
- g,
- d.get("raw"),
- read_dataframe,
- read_elem,
- attrs=("var", "varm") if backed else ("var", "varm", "X"),
+ g, d.get("raw"), read_dataframe, read_elem, attrs=("var", "varm") if backed else ("var", "varm", "X")
)
if raw:
ad._raw = Raw(ad, **raw)
@@ -582,27 +549,15 @@ def _read_h5mu_mod(g: h5py.Group, manager: MuDataFileManager = None, backed: boo
return ad
-def read_h5ad(
- filename: PathLike,
- mod: str | None,
- backed: str | bool | None = None,
-) -> AnnData:
- """
- Read AnnData object from inside a .h5mu file
- or from a standalone .h5ad file (mod=None)
+def read_h5ad(filename: PathLike, mod: str | None, backed: str | bool | None = None) -> AnnData:
+ """Read AnnData object from inside a .h5mu file or from a standalone .h5ad file (mod=None).
Currently replicates and modifies anndata._io.h5ad.read_h5ad.
Matrices are loaded as they are in the file (sparse or dense).
Ideally this is merged later to anndata._io.h5ad.read_h5ad.
"""
- assert backed in [
- None,
- True,
- False,
- "r",
- "r+",
- ], "Argument `backed` should be boolean, or r/r+, or None"
+ assert backed in [None, True, False, "r", "r+"], "Argument `backed` should be boolean, or r/r+, or None"
from anndata import read_h5ad
@@ -612,10 +567,7 @@ def read_h5ad(
except TypeError as e:
fname, callback = filename, None
# Support fsspec
- if (
- filename.__class__.__name__ == "BufferedReader"
- or filename.__class__.__name__ == "OpenFile"
- ):
+ if filename.__class__.__name__ == "BufferedReader" or filename.__class__.__name__ == "OpenFile":
try:
from fsspec.core import OpenFile
@@ -623,9 +575,7 @@ def read_h5ad(
fname = filename.__enter__()
callback = lambda: fname.__exit__()
except ImportError as e:
- raise ImportError(
- "To read from remote storage or cache, install fsspec: pip install fsspec"
- ) from e
+ raise ImportError("To read from remote storage or cache, install fsspec: pip install fsspec") from e
adata = read_h5ad(fname, backed=backed)
if callable is not None:
@@ -656,23 +606,29 @@ def read_h5ad(
def read(filename: PathLike, **kwargs) -> MuData | AnnData:
- """
- Read MuData object from HDF5 file
- or AnnData object (a single modality) inside it
+ """Read MuData object from HDF5 file or AnnData object (a single modality) inside it.
This function is designed to enhance I/O ease of use.
It recognises the following formats:
- - FILE.h5mu
- - FILE.h5mu/MODALITY
- - FILE.h5mu/mod/MODALITY
- - FILE.h5ad
+
+ - `FILE.h5mu`
+ - `FILE.h5mu/MODALITY`
+ - `FILE.h5mu/mod/MODALITY`
+ - `FILE.h5ad`
OpenFile and BufferedReader from fsspec are supported for remote storage, e.g.:
- - mdata = read(fsspec.open("s3://bucket/file.h5mu")))
- - with fsspec.open("s3://bucket/file.h5mu") as f:
- mdata = read(f)
- - with fsspec.open("https://server/file.h5ad") as f:
- adata = read(f)
+
+ - .. code-block::
+
+ mdata = read(fsspec.open("s3://bucket/file.h5mu")))
+ - .. code-block::
+
+ with fsspec.open("s3://bucket/file.h5mu") as f:
+ mdata = read(f)
+ - .. code-block::
+
+ with fsspec.open("https://server/file.h5ad") as f:
+ adata = read(f)
"""
import re
diff --git a/src/mudata/_core/merge.py b/src/mudata/_core/merge.py
index 7623f46..53c1ba2 100644
--- a/src/mudata/_core/merge.py
+++ b/src/mudata/_core/merge.py
@@ -97,7 +97,6 @@ def concat(
Examples
--------
-
Preparing example objects
>>> import mudata as md, anndata as ad, pandas as pd, numpy as np
@@ -130,10 +129,7 @@ def concat(
if isinstance(mdatas, Mapping):
if keys is not None:
- raise TypeError(
- "Cannot specify categories in both mapping keys and using `keys`. "
- "Only specify this once."
- )
+ raise TypeError("Cannot specify categories in both mapping keys and using `keys`. Only specify this once.")
keys, mdatas = list(mdatas.keys()), list(mdatas.values())
else:
mdatas = list(mdatas)
@@ -141,23 +137,19 @@ def concat(
if keys is None:
keys = np.arange(len(mdatas)).astype(str)
- assert all(
- [isinstance(m, MuData) for m in mdatas]
- ), "For concatenation to work, all objects should be of type MuData"
+ assert all(isinstance(m, MuData) for m in mdatas), "For concatenation to work, all objects should be of type MuData"
assert len(mdatas) > 1, "mdatas collection should have more than one MuData object"
- if len(set(m.axis for m in mdatas)) != 1:
+ if len({m.axis for m in mdatas}) != 1:
"All MuData objects in mdatas should have the same axis."
axis = mdatas[0].axis
# Modalities intersection
- common_mods = reduce(
- np.intersect1d, [np.array(list(m.mod.keys())).astype("object") for m in mdatas]
- )
+ common_mods = reduce(np.intersect1d, [np.array(list(m.mod.keys())).astype("object") for m in mdatas])
assert len(common_mods) > 0, "There should be at least one common modality across all mdatas"
# Concatenate all the modalities
- modalities: dict[str, AnnData] = dict()
+ modalities: dict[str, AnnData] = {}
for m in common_mods:
modalities[m] = ad_concat(
[mdata[m] for mdata in mdatas],
@@ -178,14 +170,11 @@ def concat(
# Label column
label_col = pd.Categorical.from_codes(
- np.repeat(np.arange(len(mdatas)), [m.shape[axis] for m in mdatas]),
- categories=keys,
+ np.repeat(np.arange(len(mdatas)), [m.shape[axis] for m in mdatas]), categories=keys
)
# Combining indexes
- concat_indices = pd.concat(
- [pd.Series(axis_indices(m, axis=axis)) for m in mdatas], ignore_index=True
- )
+ concat_indices = pd.concat([pd.Series(axis_indices(m, axis=axis)) for m in mdatas], ignore_index=True)
if index_unique is not None:
concat_indices = concat_indices.str.cat(label_col.map(str), sep=index_unique)
concat_indices = pd.Index(concat_indices)
@@ -195,11 +184,7 @@ def concat(
# Annotation for concatenation axis
check_combinable_cols([getattr(m, dim).columns for m in mdatas], join=join)
- concat_annot = pd.concat(
- unify_dtypes([getattr(m, dim) for m in mdatas]),
- join=join,
- ignore_index=True,
- )
+ concat_annot = pd.concat(unify_dtypes([getattr(m, dim) for m in mdatas]), join=join, ignore_index=True)
concat_annot.index = concat_indices
if label is not None:
concat_annot[label] = label_col
@@ -223,10 +208,7 @@ def concat(
patch_alt_dim.append(elems_alt_dim)
if join == "inner":
- concat_mapping = inner_concat_aligned_mapping(
- [getattr(m, f"{dim}m") for m in mdatas],
- index=concat_indices,
- )
+ concat_mapping = inner_concat_aligned_mapping([getattr(m, f"{dim}m") for m in mdatas], index=concat_indices)
if pairwise:
concat_pairwise = concat_pairwise_mapping(
mappings=[getattr(m, f"{dim}p") for m in mdatas],
@@ -237,9 +219,7 @@ def concat(
concat_pairwise = {}
elif join == "outer":
concat_mapping = outer_concat_aligned_mapping(
- [getattr(m, f"{dim}m") for m in mdatas],
- index=concat_indices,
- fill_value=fill_value,
+ [getattr(m, f"{dim}m") for m in mdatas], index=concat_indices, fill_value=fill_value
)
if pairwise:
concat_pairwise = concat_pairwise_mapping(
@@ -271,13 +251,13 @@ def concat(
alt_mapping = merge(
[
{k: r(v, axis=0) for k, v in getattr(a, f"{alt_dim}m").items()}
- for r, a in zip(reindexers, mdatas)
- ],
+ for r, a in zip(reindexers, mdatas, strict=False)
+ ]
)
alt_pairwise = merge(
[
{k: r(r(v, axis=0), axis=1) for k, v in getattr(a, f"{alt_dim}p").items()}
- for r, a in zip(reindexers, mdatas)
+ for r, a in zip(reindexers, mdatas, strict=False)
]
)
uns = uns_merge([m.uns for m in mdatas])
diff --git a/src/mudata/_core/mudata.py b/src/mudata/_core/mudata.py
index 328010f..f8c9557 100644
--- a/src/mudata/_core/mudata.py
+++ b/src/mudata/_core/mudata.py
@@ -16,10 +16,7 @@
import numpy as np
import pandas as pd
from anndata import AnnData
-from anndata._core.aligned_mapping import (
- AxisArraysBase,
- PairwiseArraysView,
-)
+from anndata._core.aligned_mapping import AxisArraysBase, PairwiseArraysView
from anndata._core.views import DataFrameView
from anndata.utils import convert_to_dict
@@ -79,17 +76,11 @@ def _repr_hierarchy(
if isinstance(v, MuData):
maybe_axis = (
- (
- " [shared obs] "
- if v.axis == 0
- else " [shared var] " if v.axis == 1 else " [shared obs and var] "
- )
+ (" [shared obs] " if v.axis == 0 else " [shared var] " if v.axis == 1 else " [shared obs and var] ")
if hasattr(v, "axis")
else ""
)
- descr += (
- f"\n{indent}{k} MuData{maybe_axis}({v.n_obs} × {v.n_vars}){backed_at}{is_view}"
- )
+ descr += f"\n{indent}{k} MuData{maybe_axis}({v.n_obs} × {v.n_vars}){backed_at}{is_view}"
if i != len(self) - 1:
levels = [nest_level] + active_levels
@@ -153,11 +144,7 @@ def __init__(
self,
data: Union[AnnData, Mapping[str, AnnData], "MuData"] | None = None,
feature_types_names: dict | None = MappingProxyType(
- {
- "Gene Expression": "rna",
- "Peaks": "atac",
- "Antibody Capture": "prot",
- }
+ {"Gene Expression": "rna", "Peaks": "atac", "Antibody Capture": "prot"}
),
as_view: bool = False,
index: tuple[slice | Integral, slice | Integral] | slice | Integral | None = None,
@@ -314,13 +301,9 @@ def _init_as_view(self, mudata_ref: "MuData", index):
cvaridx = slice(None)
if a.is_view:
if isinstance(a, MuData):
- self.mod[m] = a._mudata_ref[
- _resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._mudata_ref)
- ]
+ self.mod[m] = a._mudata_ref[_resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._mudata_ref)]
else:
- self.mod[m] = a._adata_ref[
- _resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._adata_ref)
- ]
+ self.mod[m] = a._adata_ref[_resolve_idxs((a._oidx, a._vidx), (cobsidx, cvaridx), a._adata_ref)]
else:
self.mod[m] = a[cobsidx, cvaridx]
@@ -387,7 +370,9 @@ def _init_from_dict_(
k: (
v
if isinstance(v, AnnData) or isinstance(v, MuData)
- else MuData(**v) if "mod" in v else AnnData(**v)
+ else MuData(**v)
+ if "mod" in v
+ else AnnData(**v)
)
for k, v in mod.items()
},
@@ -404,10 +389,7 @@ def _init_from_dict_(
)
def _check_duplicated_attr_names(self, attr: str):
- if any(
- not getattr(self.mod[mod_i], attr + "_names").astype(str).is_unique
- for mod_i in self.mod
- ):
+ if any(not getattr(self.mod[mod_i], attr + "_names").astype(str).is_unique for mod_i in self.mod):
# If there are non-unique attr_names, we can only handle outer joins
# under the condition the duplicated values are restricted to one modality
dups = [
@@ -421,11 +403,7 @@ def _check_duplicated_attr_names(self, attr: str):
for i, mod_i_dup_attrs in enumerate(dups):
for j, mod_j in enumerate(self.mod):
if j != i:
- if any(
- np.isin(
- mod_i_dup_attrs, getattr(self.mod[mod_j], attr + "_names").values
- )
- ):
+ if any(np.isin(mod_i_dup_attrs, getattr(self.mod[mod_j], attr + "_names").values)):
warnings.warn(
f"Duplicated {attr}_names should not be present in different modalities due to the ambiguity that leads to.",
stacklevel=3,
@@ -459,12 +437,8 @@ def _check_changed_attr_names(self, attr: str, columns: bool = False):
if m in getattr(self, attrhash):
cached_hash = getattr(self, attrhash)[m]
new_hash = (
- sha1(
- np.ascontiguousarray(getattr(self.mod[m], attr).index.values)
- ).hexdigest(),
- sha1(
- np.ascontiguousarray(getattr(self.mod[m], attr).columns.values)
- ).hexdigest(),
+ sha1(np.ascontiguousarray(getattr(self.mod[m], attr).index.values)).hexdigest(),
+ sha1(np.ascontiguousarray(getattr(self.mod[m], attr).columns.values)).hexdigest(),
)
if cached_hash[0] != new_hash[0]:
attr_names_changed = True
@@ -562,14 +536,10 @@ def _create_global_attr_index(self, attr: str, axis: int):
modindices = [getattr(self.mod[m], attr).index for m in self.mod]
if all(modindices[i].equals(modindices[i + 1]) for i in range(len(modindices) - 1)):
attrindex = modindices[0].copy()
- attrindex = reduce(
- pd.Index.union, [getattr(self.mod[m], attr).index for m in self.mod]
- ).values
+ attrindex = reduce(pd.Index.union, [getattr(self.mod[m], attr).index for m in self.mod]).values
else:
# Modality-specific indices
- attrindex = np.concatenate(
- [getattr(self.mod[m], attr).index.values for m in self.mod], axis=0
- )
+ attrindex = np.concatenate([getattr(self.mod[m], attr).index.values for m in self.mod], axis=0)
return attrindex
def _update_attr(
@@ -624,9 +594,7 @@ def _update_attr(
attrmap = getattr(self, attr + "map")
dfs = [
- getattr(a, attr)
- .loc[:, []]
- .assign(**{f"{m}:{rowcol}": np.arange(getattr(a, attr).shape[0])})
+ getattr(a, attr).loc[:, []].assign(**{f"{m}:{rowcol}": np.arange(getattr(a, attr).shape[0])})
for m, a in self.mod.items()
]
@@ -675,10 +643,7 @@ def calc_attrm_update():
if not attr_duplicated:
# Shared axis
data_mod = pd.concat(
- dfs,
- join="outer",
- axis=1 if axis == (1 - self._axis) or self._axis == -1 else 0,
- sort=False,
+ dfs, join="outer", axis=1 if axis == (1 - self._axis) or self._axis == -1 else 0, sort=False
)
for mod in self.mod.keys():
fix_attrmap_col(data_mod, mod, rowcol)
@@ -700,10 +665,7 @@ def calc_attrm_update():
else:
dfs = [_make_index_unique(df, force=True) for df in dfs]
data_mod = pd.concat(
- dfs,
- join="outer",
- axis=1 if axis == (1 - self._axis) or self._axis == -1 else 0,
- sort=False,
+ dfs, join="outer", axis=1 if axis == (1 - self._axis) or self._axis == -1 else 0, sort=False
)
data_mod = _restore_index(data_mod)
@@ -717,9 +679,7 @@ def calc_attrm_update():
# only use unchanged modalities for ordering
if (
modmask.sum() == getattr(amod, attr).shape[0]
- and (
- getattr(amod, attr).index[modmap[modmask] - 1] == prev_index[modmask]
- ).all()
+ and (getattr(amod, attr).index[modmap[modmask] - 1] == prev_index[modmask]).all()
):
data_mod.set_index(colname, append=True, inplace=True)
data_global.set_index(attrmap[mod].reshape(-1), append=True, inplace=True)
@@ -731,9 +691,7 @@ def calc_attrm_update():
f"{attr}_names is not unique, global {attr} is present, and {attr}map is empty. The update() is not well-defined, verify if global {attr} map to the correct modality-specific {attr}.",
stacklevel=2,
)
- data_mod.reset_index(
- data_mod.index.names.difference(data_global.index.names), inplace=True
- )
+ data_mod.reset_index(data_mod.index.names.difference(data_global.index.names), inplace=True)
# after inserting a new modality with duplicates, but no duplicates before:
# data_mod.index is not unique
# after deleting a modality with duplicates: data_global.index is not unique, but
@@ -764,18 +722,14 @@ def calc_attrm_update():
if not data_mod.index.is_unique:
warnings.warn(
- f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`."
+ f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`.", stacklevel=2
)
if self._axis == -1:
warnings.warn(
- f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first."
+ f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first.", stacklevel=2
)
- setattr(
- self,
- "_" + attr,
- data_mod,
- )
+ setattr(self, "_" + attr, data_mod)
# Update .obsm/.varm
# this needs to be after setting _obs/_var due to dimension checking in the aligned mapping
@@ -788,21 +742,20 @@ def calc_attrm_update():
if can_update:
for mx_key, mx in attrm.items():
if mx_key not in self.mod.keys(): # not a modality name
- cattr = attrm[mx_key]
- if isinstance(cattr, pd.DataFrame):
- cattr = cattr.iloc[index_order, :]
- cattr.iloc[index_order == -1, :] = pd.NA
+ if isinstance(mx, pd.DataFrame):
+ mx = mx.iloc[index_order, :]
+ mx.iloc[index_order == -1, :] = pd.NA
else:
- cattr = cattr[index_order]
- cattr[index_order == -1] = np.nan
- attrm[mx_key] = cattr
+ mx = mx[index_order]
+ mx[index_order == -1] = np.nan
+ attrm[mx_key] = mx
# Update .obsp/.varp (size might have changed)
- for mx_key in attrp.keys():
- attrp[mx_key] = attrp[mx_key][index_order, index_order]
- attrp[mx_key][index_order == -1, :] = -1
- attrp[mx_key][:, index_order == -1] = -1
-
+ for mx_key, mx in attrp.items():
+ mx = mx[mx_key][index_order, index_order]
+ mx[index_order == -1, :] = -1
+ mx[:, index_order == -1] = -1
+ attrp[mx_key] = mx
else:
raise NotImplementedError(
f"{attr}_names seem to have been renamed and filtered at the same time. "
@@ -847,13 +800,11 @@ def _update_attr_legacy(
if attr_duplicated:
warnings.warn(
- f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`.",
- stacklevel=2,
+ f"{attr}_names are not unique. To make them unique, call `.{attr}_names_make_unique`.", stacklevel=2
)
if self._axis == -1:
warnings.warn(
- f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first.",
- stacklevel=2,
+ f"Behaviour is not defined with axis=-1, {attr}_names need to be made unique first.", stacklevel=2
)
if not any(attr_changed):
@@ -865,8 +816,7 @@ def _update_attr_legacy(
if join_common:
if attr_intersecting:
warnings.warn(
- f"Cannot join columns with the same name because {attr}_names are intersecting.",
- stacklevel=2,
+ f"Cannot join columns with the same name because {attr}_names are intersecting.", stacklevel=2
)
join_common = False
@@ -904,8 +854,7 @@ def _update_attr_legacy(
if join_common:
# If all modalities have a column with the same name, it is not global
columns_common = reduce(
- lambda a, b: a.intersection(b),
- [getattr(self.mod[mod], attr).columns for mod in self.mod],
+ lambda a, b: a.intersection(b), [getattr(self.mod[mod], attr).columns for mod in self.mod]
)
data_global = data_global.loc[:, [c not in columns_common for c in data_global.columns]]
@@ -953,25 +902,18 @@ def _update_attr_legacy(
sort=False,
)
data_common = pd.concat(
- [
- _maybe_coerce_to_boolean(getattr(a, attr)[columns_common])
- for m, a in self.mod.items()
- ],
+ [_maybe_coerce_to_boolean(getattr(a, attr)[columns_common]) for m, a in self.mod.items()],
join="outer",
axis=0,
sort=False,
)
- data_mod = _maybe_coerce_to_bool(
- data_mod.join(data_common, how="left", sort=False)
- )
+ data_mod = _maybe_coerce_to_bool(data_mod.join(data_common, how="left", sort=False))
data_common = _maybe_coerce_to_bool(data_common)
# this occurs when join_common=True and we already have a global data frame, e.g. after reading from H5MU
sharedcols = data_mod.columns.intersection(data_global.columns)
- data_global.rename(
- columns={col: f"global:{col}" for col in sharedcols}, inplace=True
- )
+ data_global.rename(columns={col: f"global:{col}" for col in sharedcols}, inplace=True)
else:
data_mod = _maybe_coerce_to_bool(
pd.concat(
@@ -1012,12 +954,8 @@ def _update_attr_legacy(
data_mod = data_mod.rename_axis(col_index, axis=0).reset_index()
data_mod[col_cumcount] = data_mod.groupby(col_index).cumcount()
data_global = data_global.rename_axis(col_index, axis=0).reset_index()
- data_global[col_cumcount] = (
- data_global.reset_index().groupby(col_index).cumcount()
- )
- data_mod = data_mod.merge(
- data_global, on=[col_index, col_cumcount], how="left", sort=False
- )
+ data_global[col_cumcount] = data_global.reset_index().groupby(col_index).cumcount()
+ data_mod = data_mod.merge(data_global, on=[col_index, col_cumcount], how="left", sort=False)
# Restore the index and remove the helper column
data_mod = data_mod.set_index(col_index).rename_axis(None, axis=0)
del data_mod[col_cumcount]
@@ -1043,18 +981,11 @@ def _update_attr_legacy(
]
# Here, attr_names are guaranteed to be unique and are safe to be used for joins
- data_mod = pd.concat(
- dfs,
- join="outer",
- axis=axis,
- sort=False,
- )
+ data_mod = pd.concat(dfs, join="outer", axis=axis, sort=False)
data_common = pd.concat(
[
- _maybe_coerce_to_boolean(
- _make_index_unique(getattr(a, attr)[columns_common], force=True)
- )
+ _maybe_coerce_to_boolean(_make_index_unique(getattr(a, attr)[columns_common], force=True))
for m, a in self.mod.items()
],
join="outer",
@@ -1067,19 +998,12 @@ def _update_attr_legacy(
else:
dfs = [
_make_index_unique(
- getattr(a, attr)
- .assign(**{rowcol: np.arange(getattr(a, attr).shape[0])})
- .add_prefix(m + ":"),
+ getattr(a, attr).assign(**{rowcol: np.arange(getattr(a, attr).shape[0])}).add_prefix(m + ":"),
force=True,
)
for m, a in self.mod.items()
]
- data_mod = pd.concat(
- dfs,
- join="outer",
- axis=axis,
- sort=False,
- )
+ data_mod = pd.concat(dfs, join="outer", axis=axis, sort=False)
# pd.concat wrecks the ordering when doing an outer join with a MultiIndex and different data frame shapes
if axis == 1:
@@ -1093,18 +1017,14 @@ def _update_attr_legacy(
if len(mask) > 0:
negativemask = ~newidx.index.get_level_values(0).isin(globalidx)
newidx = pd.MultiIndex.from_frame(
- pd.concat(
- [newidx.loc[globalidx[mask], :], newidx.iloc[negativemask, :]], axis=0
- )
+ pd.concat([newidx.loc[globalidx[mask], :], newidx.iloc[negativemask, :]], axis=0)
)
data_mod = data_mod.reindex(newidx, copy=False)
# this occurs when join_common=True and we already have a global data frame, e.g. after reading from HDF5
if join_common:
sharedcols = data_mod.columns.intersection(data_global.columns)
- data_global.rename(
- columns={col: f"global:{col}" for col in sharedcols}, inplace=True
- )
+ data_global.rename(columns={col: f"global:{col}" for col in sharedcols}, inplace=True)
data_mod = _restore_index(data_mod)
data_mod.index.set_names(rowcol, inplace=True)
@@ -1129,9 +1049,7 @@ def _update_attr_legacy(
f"{attr}_names is not unique, global {attr} is present, and {attr}map is empty. The update() is not well-defined, verify if global {attr} map to the correct modality-specific {attr}.",
stacklevel=2,
)
- data_mod.reset_index(
- data_mod.index.names.difference(data_global.index.names), inplace=True
- )
+ data_mod.reset_index(data_mod.index.names.difference(data_global.index.names), inplace=True)
data_mod = _make_index_unique(data_mod, force=True)
data_global = _make_index_unique(data_global, force=True)
data_mod = data_mod.join(data_global, how="left", sort=False)
@@ -1184,8 +1102,7 @@ def _update_attr_legacy(
new_index = ~now_index.isin(prev_index)
if new_index.sum() == 0 or (
- keep_index.sum() + new_index.sum() == len(now_index)
- and len(now_index) > len(prev_index)
+ keep_index.sum() + new_index.sum() == len(now_index) and len(now_index) > len(prev_index)
):
# Another length (filtered) or new modality added
# Update .obsm/.varm (size might have changed)
@@ -1252,10 +1169,7 @@ def _shrink_attr(self, attr: str, inplace=True) -> pd.DataFrame:
map(
all,
zip(
- *(
- [not col.startswith(mod + ":") for col in getattr(self, attr).columns]
- for mod in self.mod
- ),
+ *([not col.startswith(mod + ":") for col in getattr(self, attr).columns] for mod in self.mod),
strict=False,
),
)
@@ -1385,9 +1299,7 @@ def obs_names_make_unique(self):
ki = mods[i]
for j in range(i + 1, len(self.mod)):
kj = mods[j]
- common_obs.append(
- self.mod[ki].obs_names.intersection(self.mod[kj].obs_names.values)
- )
+ common_obs.append(self.mod[ki].obs_names.intersection(self.mod[kj].obs_names.values))
if any(len(x) > 0 for x in common_obs):
warnings.warn(
"Modality names will be prepended to obs_names since there are identical obs_names in different modalities.",
@@ -1509,9 +1421,7 @@ def var_names_make_unique(self):
ki = mods[i]
for j in range(i + 1, len(self.mod)):
kj = mods[j]
- common_vars.append(
- np.intersect1d(self.mod[ki].var_names.values, self.mod[kj].var_names.values)
- )
+ common_vars.append(np.intersect1d(self.mod[ki].var_names.values, self.mod[kj].var_names.values))
if any(len(x) > 0 for x in common_vars):
warnings.warn(
"Modality names will be prepended to var_names since there are identical var_names in different modalities.",
@@ -1804,17 +1714,12 @@ def _pull_attr(
ccols = []
for name in modcols:
ccols.append(
- MetadataColumn(
- allowed_prefixes=self.mod.keys(),
- prefix=prefix,
- name=name,
- strip_prefix=False,
- )
+ MetadataColumn(allowed_prefixes=self.mod.keys(), prefix=prefix, name=name, strip_prefix=False)
)
derived_name_counts[name] += 1
cols[prefix] = ccols
- for prefix, modcols in cols.items():
+ for modcols in cols.values():
for col in modcols:
count = derived_name_counts[col.derived_name]
col.count = count # this is important to classify columns
@@ -1830,9 +1735,7 @@ def _pull_attr(
# keep only requested columns
cols = {
- prefix: [
- col for col in modcols if col.name in columns or col.derived_name in columns
- ]
+ prefix: [col for col in modcols if col.name in columns or col.derived_name in columns]
for prefix, modcols in cols.items()
}
@@ -1849,19 +1752,14 @@ def _pull_attr(
# filter columns by class, keep only those that were requested
selector = {"common": common, "nonunique": nonunique, "unique": unique}
- cols = {
- prefix: [col for col in modcols if selector[col.klass]]
- for prefix, modcols in cols.items()
- }
+ cols = {prefix: [col for col in modcols if selector[col.klass]] for prefix, modcols in cols.items()}
# filter columns, keep only requested modalities
if mods is not None:
cols = {prefix: cols[prefix] for prefix in mods}
# count final filtered column names, required later to decide whether to prefix a column with its source modality
- derived_name_count = Counter(
- [col.derived_name for modcols in cols.values() for col in modcols]
- )
+ derived_name_count = Counter([col.derived_name for modcols in cols.values() for col in modcols])
# - axis == self.axis
# e.g. combine obs from multiple modalities (with shared obs)
@@ -2136,10 +2034,7 @@ def _push_attr(
drop = True
# get all global columns
- cols = [
- MetadataColumn(allowed_prefixes=self.mod.keys(), name=name)
- for name in getattr(self, attr).columns
- ]
+ cols = [MetadataColumn(allowed_prefixes=self.mod.keys(), name=name) for name in getattr(self, attr).columns]
if columns is not None:
for k, v in {"common": common, "prefixed": prefixed}.items():
@@ -2251,13 +2146,7 @@ def push_obs(
Forces drop=True. False by default.
"""
return self._push_attr(
- "obs",
- columns=columns,
- mods=mods,
- common=common,
- prefixed=prefixed,
- drop=drop,
- only_drop=only_drop,
+ "obs", columns=columns, mods=mods, common=common, prefixed=prefixed, drop=drop, only_drop=only_drop
)
def push_var(
@@ -2296,13 +2185,7 @@ def push_var(
Forces drop=True. False by default.
"""
return self._push_attr(
- "var",
- columns=columns,
- mods=mods,
- common=common,
- prefixed=prefixed,
- drop=drop,
- only_drop=only_drop,
+ "var", columns=columns, mods=mods, common=common, prefixed=prefixed, drop=drop, only_drop=only_drop
)
def write_h5mu(self, filename: str | None = None, **kwargs):
@@ -2355,11 +2238,7 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0)
backed_at = f" backed at {str(self.filename)!r}" if self.isbacked else ""
view_of = "View of " if self.is_view else ""
maybe_axis = (
- (
- ""
- if self.axis == 0
- else " (shared var) " if self.axis == 1 else " (shared obs and var) "
- )
+ ("" if self.axis == 0 else " (shared var) " if self.axis == 1 else " (shared obs and var) ")
if hasattr(self, "axis")
else ""
)
@@ -2374,10 +2253,7 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0)
all,
zip(
*[
- [
- not col.startswith(mod + mod_sep)
- for col in getattr(self, attr).keys()
- ]
+ [not col.startswith(mod + mod_sep) for col in getattr(self, attr).keys()]
for mod in self.mod
],
strict=False,
@@ -2385,26 +2261,17 @@ def _gen_repr(self, n_obs, n_vars, extensive: bool = False, nest_level: int = 0)
)
)
if any(global_keys):
- descr += f"\n{indent} {attr}:\t{str([keys[i] for i in range(len(keys)) if global_keys[i]])[1:-1]}"
+ descr += (
+ f"\n{indent} {attr}:\t{str([keys[i] for i in range(len(keys)) if global_keys[i]])[1:-1]}"
+ )
descr += f"\n{indent} {len(self.mod)} modalit{'y' if len(self.mod) == 1 else 'ies'}"
for k, v in self.mod.items():
mod_indent = " " * (nest_level + 1)
if isinstance(v, MuData):
- descr += f"\n{mod_indent}{k}:\t" + v._gen_repr(
- v.n_obs, v.n_vars, extensive, nest_level + 1
- )
+ descr += f"\n{mod_indent}{k}:\t" + v._gen_repr(v.n_obs, v.n_vars, extensive, nest_level + 1)
continue
descr += f"\n{mod_indent}{k}:\t{v.n_obs} x {v.n_vars}"
- for attr in [
- "obs",
- "var",
- "uns",
- "obsm",
- "varm",
- "layers",
- "obsp",
- "varp",
- ]:
+ for attr in ["obs", "var", "uns", "obsm", "varm", "layers", "obsp", "varp"]:
try:
keys = getattr(v, attr).keys()
if len(keys) > 0:
@@ -2462,10 +2329,8 @@ def _repr_html_(self, expand=None):
)
# General object properties
- mods += (
- "{} object {} obs × {} var".format(
- type(dat).__name__, *(dat.shape)
- )
+ mods += "{} object {} obs × {} var".format(
+ type(dat).__name__, *(dat.shape)
)
if dat.isbacked:
mods += f"
↳ backed at {self.file.filename}"
diff --git a/src/mudata/_core/repr.py b/src/mudata/_core/repr.py
index 481d0c6..d1ae690 100644
--- a/src/mudata/_core/repr.py
+++ b/src/mudata/_core/repr.py
@@ -18,7 +18,7 @@ def maybe_module_class(obj, sep=".", builtins=False) -> tuple[str, str]:
m = ""
else:
m += sep
- except Exception:
+ except AttributeError:
m += ""
return (m, cl)
@@ -49,30 +49,21 @@ def format_values(x):
elif isinstance(x, pd.Series):
x = x.to_numpy()
else:
- warn(f"got unknown array type {type(x)}, don't know how handle it.")
+ warn(f"got unknown array type {type(x)}, don't know how handle it.", stacklevel=1)
return type(x)
if x.dtype == object:
try:
- testval = next(
- filter(
- lambda y: ~np.isnan(y) if isinstance(y, Number) else x is not None,
- x,
- )
- )
+ testval = next(filter(lambda y: ~np.isnan(y) if isinstance(y, Number) else x is not None, x))
except StopIteration:
pass
if testval is None:
testval = x[0]
- if (
- isinstance(testval, Integral)
- or isinstance(testval, np.bool_)
- or isinstance(testval, bool)
- ):
+ if isinstance(testval, Integral) or isinstance(testval, np.bool_) or isinstance(testval, bool):
s += ",".join([f"{i}" for i in x])
elif isinstance(testval, Real):
s += ",".join([f"{i:.2f}" for i in x])
elif isinstance(testval, Complex):
- warn("got complex number, don't know how to handle it")
+ warn("got complex number, don't know how to handle it", stacklevel=1)
elif isinstance(testval, Iterable):
s += ",".join(map(format_values, x))
lastidx = max(50, s.find(","))
@@ -88,9 +79,7 @@ def block_matrix(data, attr, name):
s += ""
s += """
{} {}{}
- """.format(
- obj.dtype, *maybe_module_class(obj)
- )
+ """.format(obj.dtype, *maybe_module_class(obj))
s += "
"
return s
@@ -131,9 +120,7 @@ def details_block_table(data, attr, name, expand=0, dims=True, square=False):
[
"""
| {} | {} | {}{} |
-
""".format(
- attr_key, obj[attr_key].dtype, *maybe_module_class(obj[attr_key])
- )
+ """.format(attr_key, obj[attr_key].dtype, *maybe_module_class(obj[attr_key]))
for attr_key in obj.keys()
]
)
@@ -146,11 +133,7 @@ def details_block_table(data, attr, name, expand=0, dims=True, square=False):
attr_key,
obj[attr_key].dtype if hasattr(obj[attr_key], "dtype") else "",
*maybe_module_class(obj[attr_key]),
- (
- f"{obj[attr_key].shape[1]} columns"
- if len(obj[attr_key].shape) > 1 and dims
- else ""
- ),
+ (f"{obj[attr_key].shape[1]} columns" if len(obj[attr_key].shape) > 1 and dims else ""),
)
for attr_key in obj.keys()
]
@@ -166,9 +149,7 @@ def details_block_table(data, attr, name, expand=0, dims=True, square=False):
s += ""
s += """
| {} | {}{} |
-
""".format(
- obj.dtype, *maybe_module_class(obj)
- )
+ """.format(obj.dtype, *maybe_module_class(obj))
s += "
"
s += ""
else: # Unstructured
diff --git a/src/mudata/_core/to_.py b/src/mudata/_core/to_.py
index 8088918..783afd1 100644
--- a/src/mudata/_core/to_.py
+++ b/src/mudata/_core/to_.py
@@ -8,7 +8,7 @@
def to_anndata(mdata: MuData, **kwargs) -> AnnData:
"""
- Convert MuData to AnnData by concatenating modalities
+ Convert MuData to AnnData by concatenating modalities.
If mdata.axis == 0 (shared observations),
concatenate modalities along axis 1 (`anndata.concat(axis=1)`).
@@ -17,9 +17,9 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData:
Parameters
----------
- data : MuData
+ data
MuData object to convert to AnnData
- kwargs : dict
+ kwargs
Keyword arguments passed to anndata.concat
"""
if mdata.axis == -1:
@@ -36,14 +36,9 @@ def to_anndata(mdata: MuData, **kwargs) -> AnnData:
return adata
-def to_mudata(
- adata: AnnData,
- axis: Literal[0, 1],
- by: str,
-) -> MuData:
+def to_mudata(adata: AnnData, axis: Literal[0, 1], by: str) -> MuData:
"""
- Convert AnnData to MuData by splitting it
- along obs or var
+ Convert AnnData to MuData by splitting it along obs or var.
Axis signifies the shared axis.
Use `axis=0` for getting MuData with shared observations (axis=0),
@@ -51,11 +46,11 @@ def to_mudata(
Paramteters
-----------
- adata : AnnData
+ adata
AnnData object to convert to MuData
- axis : int
+ axis
Axis of shared observations (0) or variables (1)
- by : str
+ by
Key in `adata.var` (if axis=0) or `adata.obs` (if axis=1) to split by
"""
# Use AnnData.split_by() when it's ready
diff --git a/src/mudata/_core/utils.py b/src/mudata/_core/utils.py
index 41a9f45..0658615 100644
--- a/src/mudata/_core/utils.py
+++ b/src/mudata/_core/utils.py
@@ -103,9 +103,7 @@ def _update_and_concat(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
# df.update(df2)
common_cols = df1.columns.intersection(df2.columns)
for col in common_cols:
- if isinstance(df[col].values, pd.Categorical) and isinstance(
- df2[col].values, pd.Categorical
- ):
+ if isinstance(df[col].values, pd.Categorical) and isinstance(df2[col].values, pd.Categorical):
common_cats = pd.api.types.union_categoricals([df[col], df2[col]]).categories
df[col] = df[col].cat.set_categories(common_cats)
df2[col] = df2[col].cat.set_categories(common_cats)
diff --git a/src/mudata/_core/views.py b/src/mudata/_core/views.py
index c2c06ae..4b11653 100644
--- a/src/mudata/_core/views.py
+++ b/src/mudata/_core/views.py
@@ -8,16 +8,9 @@
class _ViewMixin(_SetItemMixin):
- """
- AnnData View Mixin but using ._mudata_ref
- """
-
- def __init__(
- self,
- *args,
- view_args: tuple["MuData", str, tuple[str, ...]] = None,
- **kwargs,
- ):
+ """AnnData View Mixin but using ._mudata_ref"""
+
+ def __init__(self, *args, view_args: tuple["MuData", str, tuple[str, ...]] = None, **kwargs):
if view_args is not None:
view_args = ElementRef(*view_args)
self._view_args = view_args
@@ -30,8 +23,6 @@ def __deepcopy__(self, memo):
class DictView(_ViewMixin, dict):
- """
- AnnData DictView adopted for MuData
- """
+ """AnnData DictView adapted for MuData"""
pass
diff --git a/src/mudata/version.py b/src/mudata/version.py
new file mode 100644
index 0000000..b0c7fcd
--- /dev/null
+++ b/src/mudata/version.py
@@ -0,0 +1,37 @@
+"""Compute the version number and store it in the `__version__` variable.
+
+Based on .
+"""
+
+
+def _get_hatch_version():
+ """Compute the most up-to-date version number in a development environment.
+
+ Returns `None` if Hatchling is not installed, e.g. in a production environment.
+
+ For more details, see .
+ """
+ import os
+
+ try:
+ from hatchling.metadata.core import ProjectMetadata
+ from hatchling.plugin.manager import PluginManager
+ from hatchling.utils.fs import locate_file
+ except ImportError:
+ # Hatchling is not installed, so probably we are not in
+ # a development environment.
+ return None
+
+ pyproject_toml = locate_file(__file__, "pyproject.toml")
+ if pyproject_toml is None:
+ return None
+ root = os.path.dirname(pyproject_toml)
+ metadata = ProjectMetadata(root=root, plugin_manager=PluginManager())
+ # Version can be either statically set in pyproject.toml or computed dynamically:
+ return metadata.core.version or metadata.hatch.version.cached
+
+
+__version__ = _get_hatch_version()
+__version_tuple__ = None
+if not __version__: # not in development mode
+ from ._version import __version__
diff --git a/tests/conftest.py b/tests/conftest.py
index ec77214..6faed70 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,26 +1,21 @@
import pytest
-@pytest.fixture(scope="module")
-def filepath_h5mu(tmpdir_factory):
- return str(tmpdir_factory.mktemp("tmp_test_dir").join("testA.h5mu"))
+@pytest.fixture
+def filepath_h5mu(tmp_path):
+ return tmp_path / "testA.h5mu"
-@pytest.fixture(scope="module")
-def filepath2_h5mu(tmpdir_factory):
- return str(tmpdir_factory.mktemp("tmp_test_dir").join("testB.h5mu"))
+@pytest.fixture
+def filepath2_h5mu(tmp_path):
+ return tmp_path / "testB.h5mu"
-@pytest.fixture(scope="module")
-def filepath_hdf5(tmpdir_factory):
- return str(tmpdir_factory.mktemp("tmp_mofa_dir").join("mofa_pytest.hdf5"))
+@pytest.fixture
+def filepath_zarr(tmp_path):
+ return tmp_path / "testA.zarr"
-@pytest.fixture(scope="module")
-def filepath_zarr(tmpdir_factory):
- return str(tmpdir_factory.mktemp("tmp_test_dir").join("testA.zarr"))
-
-
-@pytest.fixture(scope="module")
-def filepath2_zarr(tmpdir_factory):
- return str(tmpdir_factory.mktemp("tmp_test_dir").join("testB.zarr"))
+@pytest.fixture
+def filepath2_zarr(tmp_path):
+ return tmp_path / "testB.zarr"
diff --git a/tests/test_axis_-1.py b/tests/test_axis_-1.py
index 5bbccf4..0ddbaee 100644
--- a/tests/test_axis_-1.py
+++ b/tests/test_axis_-1.py
@@ -13,9 +13,7 @@ def test_create(self):
n, d_raw, d_preproc = 100, 900, 300
a_raw = AnnData(np.random.normal(size=(n, d_raw)))
- a_preproc = a_raw[
- :, np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))
- ].copy()
+ a_preproc = a_raw[:, np.sort(np.random.choice(np.arange(d_raw), d_preproc, replace=False))].copy()
mdata = MuData({"raw": a_raw, "preproc": a_preproc}, axis=-1)
diff --git a/tests/test_io.py b/tests/test_io.py
index 273b0ab..5ec256c 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -18,7 +18,6 @@ def mdata():
)
-@pytest.mark.usefixtures("filepath_h5mu", "filepath_zarr")
class TestMuData:
def test_write_read_h5mu_basic(self, mdata, filepath_h5mu):
mdata.write(filepath_h5mu)
@@ -59,12 +58,9 @@ def test_write_read_zarr_mod_obs_colname(self, mdata, filepath_zarr):
assert mdata_.obs["mod1:column"].values[0] == 2
-@pytest.mark.usefixtures("filepath_h5mu")
class TestMuDataMod:
def test_h5mu_mod_backed(self, mdata, filepath_h5mu):
- mdata.write(
- filepath_h5mu,
- )
+ mdata.write(filepath_h5mu)
mdata_ = mudata.read_h5mu(filepath_h5mu, backed="r")
assert list(mdata_.mod.keys()) == ["mod1", "mod2"]
diff --git a/tests/test_obs_var.py b/tests/test_obs_var.py
index 164efa5..167f711 100644
--- a/tests/test_obs_var.py
+++ b/tests/test_obs_var.py
@@ -30,9 +30,7 @@ def test_obs_global_columns(self, mdata, filepath_h5mu):
assert list(mdata.obs.columns.values) == [f"{m}:demo" for m in mdata.mod.keys()] + ["demo"]
mdata.write(filepath_h5mu)
mdata_ = mudata.read(filepath_h5mu)
- assert list(mdata_.obs.columns.values) == [f"{m}:demo" for m in mdata_.mod.keys()] + [
- "demo"
- ]
+ assert list(mdata_.obs.columns.values) == [f"{m}:demo" for m in mdata_.mod.keys()] + ["demo"]
def test_var_global_columns(self, mdata, filepath_h5mu):
for m, mod in mdata.mod.items():
diff --git a/tests/test_pull_push.py b/tests/test_pull_push.py
index 7251966..df93245 100644
--- a/tests/test_pull_push.py
+++ b/tests/test_pull_push.py
@@ -5,13 +5,13 @@
import pytest
from anndata import AnnData
-from mudata import MuData, set_options
+from mudata import MuData
@pytest.fixture()
def modalities(request, obs_n, var_unique):
n_mod = 3
- mods = dict()
+ mods = {}
np.random.seed(100)
for i in range(n_mod):
i1 = i + 1
@@ -36,9 +36,7 @@ def modalities(request, obs_n, var_unique):
if obs_n:
if obs_n == "disjoint":
- mod2_which_obs = np.random.choice(
- mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False
- )
+ mod2_which_obs = np.random.choice(mods["mod2"].obs_names, size=mods["mod2"].n_obs // 2, replace=False)
mods["mod2"] = mods["mod2"][mod2_which_obs].copy()
return mods
@@ -47,7 +45,7 @@ def modalities(request, obs_n, var_unique):
@pytest.fixture()
def datasets(request, var_n, obs_unique):
n_datasets = 3
- datasets = dict()
+ datasets = {}
np.random.seed(100)
for i in range(n_datasets):
i1 = i + 1
@@ -206,9 +204,7 @@ def test_push_var_simple(self, modalities):
assert "mod2_pushed" in mdata["mod2"].var.columns
map = mdata.varmap["mod2"].ravel()
mask = map > 0
- assert (
- mdata.var["mod2:mod2_pushed"][mask] == mdata["mod2"].var["mod2_pushed"][map[mask] - 1]
- ).all()
+ assert (mdata.var["mod2:mod2_pushed"][mask] == mdata["mod2"].var["mod2_pushed"][map[mask] - 1]).all()
@pytest.mark.parametrize("var_unique", [True, False])
@pytest.mark.parametrize("obs_n", ["joint", "disjoint"])
@@ -234,9 +230,7 @@ def test_push_obs_simple(self, modalities):
assert "mod2_pushed" in mdata["mod2"].obs.columns
map = mdata.obsmap["mod2"].ravel()
mask = map > 0
- assert (
- mdata.obs["mod2:mod2_pushed"][mask] == mdata["mod2"].obs["mod2_pushed"][map[mask] - 1]
- ).all()
+ assert (mdata.obs["mod2:mod2_pushed"][mask] == mdata["mod2"].obs["mod2_pushed"][map[mask] - 1]).all()
@pytest.mark.usefixtures("filepath_h5mu")
diff --git a/tests/test_update.py b/tests/test_update.py
index 818d47f..baac4a1 100644
--- a/tests/test_update.py
+++ b/tests/test_update.py
@@ -11,7 +11,7 @@
@pytest.fixture()
def modalities(request, obs_n, obs_across, obs_mod):
n_mod = 3
- mods = dict()
+ mods = {}
np.random.seed(100)
for i in range(n_mod):
i1 = i + 1
@@ -23,9 +23,7 @@ def modalities(request, obs_n, obs_across, obs_mod):
if obs_n:
if obs_n == "disjoint":
- mod2_which_obs = np.random.choice(
- mods["mod1"].obs_names, size=mods["mod1"].n_obs // 2, replace=False
- )
+ mod2_which_obs = np.random.choice(mods["mod1"].obs_names, size=mods["mod1"].n_obs // 2, replace=False)
mods["mod1"] = mods["mod1"][mod2_which_obs].copy()
if obs_across:
@@ -44,9 +42,7 @@ def modalities(request, obs_n, obs_across, obs_mod):
mods["mod3"].obs_names = obsnames3
mods["mod2"].var_names = varnames2
mods["mod3"].var_names = varnames3
- elif (
- obs_mod == "extreme_duplicated"
- ): # integer overflow: https://github.com/scverse/mudata/issues/107
+ elif obs_mod == "extreme_duplicated": # integer overflow: https://github.com/scverse/mudata/issues/107
obsnames2 = mods["mod2"].obs_names.to_numpy()
varnames2 = mods["mod2"].var_names.to_numpy()
obsnames2[:-1] = obsnames2[0] = "testobs"
@@ -96,9 +92,7 @@ def new_update(self):
def get_attrm_values(mdata, attr, key, names):
attrm = getattr(mdata, f"{attr}m")
index = getattr(mdata, f"{attr}_names")
- return np.concatenate(
- [np.atleast_1d(attrm[key][np.nonzero(index == name)[0]]) for name in names]
- )
+ return np.concatenate([np.atleast_1d(attrm[key][np.nonzero(index == name)[0]]) for name in names])
def test_update_simple(self, mdata, axis):
"""
@@ -117,16 +111,12 @@ def test_update_simple(self, mdata, axis):
assert mdata.shape[1 - axis] == sum(mod.shape[1 - axis] for mod in mdata.mod.values())
assert (
getattr(mdata, f"{oattr}_names")
- == reduce(
- lambda x, y: x.append(y),
- (getattr(mod, f"{oattr}_names") for mod in mdata.mod.values()),
- )
+ == reduce(lambda x, y: x.append(y), (getattr(mod, f"{oattr}_names") for mod in mdata.mod.values()))
).all()
# names along axis are unioned
axisnames = reduce(
- lambda x, y: x.union(y, sort=False),
- (getattr(mod, f"{attr}_names") for mod in mdata.mod.values()),
+ lambda x, y: x.union(y, sort=False), (getattr(mod, f"{attr}_names") for mod in mdata.mod.values())
)
assert mdata.shape[axis] == axisnames.shape[0]
assert (getattr(mdata, f"{attr}_names").sort_values() == axisnames.sort_values()).all()
@@ -141,8 +131,7 @@ def test_update_simple(self, mdata, axis):
# df1 = df1.iloc[::-1, :]
# df = pd.concat((kdf1, df2), axis=1, join="outer", sort=False)
assert (
- getattr(mdata, f"{attr}_names")[: mdata["mod1"].shape[axis]]
- == getattr(mdata["mod1"], f"{attr}_names")
+ getattr(mdata, f"{attr}_names")[: mdata["mod1"].shape[axis]] == getattr(mdata["mod1"], f"{attr}_names")
).all()
def test_update_add_modality(self, modalities, axis):
@@ -172,10 +161,7 @@ def test_update_add_modality(self, modalities, axis):
assert np.isnan(mdata.obsm["test"]).sum() == modalities[modnames[i]].n_obs
assert np.all(np.isnan(mdata.obsm["test"][-modalities[modnames[i]].n_obs :]))
assert np.all(~np.isnan(mdata.obsm["test"][: -modalities[modnames[i]].n_obs]))
- assert (
- test_obsm_values[~np.isnan(test_obsm_values)].reshape(-1)
- == true_obsm_values.reshape(-1)
- ).all()
+ assert (test_obsm_values[~np.isnan(test_obsm_values)].reshape(-1) == true_obsm_values.reshape(-1)).all()
else:
assert (test_obsm_values == true_obsm_values).all()
@@ -185,15 +171,9 @@ def test_update_add_modality(self, modalities, axis):
assert (oattrnames[: old_oattrnames.size] == old_oattrnames).all()
assert (
- attrnames
- == old_attrnames.union(
- getattr(modalities[modnames[i]], f"{attr}_names"), sort=False
- )
- ).all()
- assert (
- oattrnames
- == old_oattrnames.append(getattr(modalities[modnames[i]], f"{oattr}_names"))
+ attrnames == old_attrnames.union(getattr(modalities[modnames[i]], f"{attr}_names"), sort=False)
).all()
+ assert (oattrnames == old_oattrnames.append(getattr(modalities[modnames[i]], f"{oattr}_names"))).all()
def test_update_delete_modality(self, mdata, axis):
modnames = list(mdata.mod.keys())
@@ -255,10 +235,7 @@ def test_update_intersecting(self, modalities, axis):
setattr(
mod,
f"{oattr}_names",
- [
- f"{m}_{oattr}{j}" if j != 0 else f"{oattr}_{j}"
- for j in range(mod.shape[1 - axis])
- ],
+ [f"{m}_{oattr}{j}" if j != 0 else f"{oattr}_{j}" for j in range(mod.shape[1 - axis])],
)
mdata = MuData(modalities, axis=axis)
@@ -271,16 +248,12 @@ def test_update_intersecting(self, modalities, axis):
assert mdata.shape[1 - axis] == sum(mod.shape[1 - axis] for mod in modalities.values())
assert (
getattr(mdata, f"{oattr}_names")
- == reduce(
- lambda x, y: x.append(y),
- (getattr(mod, f"{oattr}_names") for mod in modalities.values()),
- )
+ == reduce(lambda x, y: x.append(y), (getattr(mod, f"{oattr}_names") for mod in modalities.values()))
).all()
# names along axis are unioned
axisnames = reduce(
- lambda x, y: x.union(y, sort=False),
- (getattr(mod, f"{attr}_names") for mod in modalities.values()),
+ lambda x, y: x.union(y, sort=False), (getattr(mod, f"{attr}_names") for mod in modalities.values())
)
assert mdata.shape[axis] == axisnames.shape[0]
assert (getattr(mdata, f"{attr}_names") == axisnames).all()
@@ -433,10 +406,7 @@ def test_update_after_filter_obs_adata(self, mdata_legacy):
"""
# Replicate in-place filtering in muon:
# mu.pp.filter_obs(mdata['mod1'], 'min_count', lambda x: (x < -2))
- mdata_legacy.mod["mod1"] = mdata_legacy["mod1"][
- mdata_legacy["mod1"].obs["min_count"] < -2
- ].copy()
- old_obsnames = mdata_legacy.obs_names
+ mdata_legacy.mod["mod1"] = mdata_legacy["mod1"][mdata_legacy["mod1"].obs["min_count"] < -2].copy()
mdata_legacy.update()
assert mdata_legacy.obs["batch"].isna().sum() == 0
@@ -460,13 +430,10 @@ def test_update_after_obs_reordered(self, mdata_legacy):
mdata_legacy.update()
test_obsm_values = [
- mdata_legacy.obsm["test_obsm"][np.where(mdata_legacy.obs_names == name)[0][0]]
- for name in some_obs_names
+ mdata_legacy.obsm["test_obsm"][np.where(mdata_legacy.obs_names == name)[0][0]] for name in some_obs_names
]
- assert all(
- [all(true_obsm_values[i] == test_obsm_values[i]) for i in range(len(true_obsm_values))]
- )
+ assert all(all(true_obsm_values[i] == test_obsm_values[i]) for i in range(len(true_obsm_values)))
# @pytest.mark.usefixtures("filepath_h5mu")
diff --git a/tests/test_update_axis_1.py b/tests/test_update_axis_1.py
index 5192a37..f97d936 100644
--- a/tests/test_update_axis_1.py
+++ b/tests/test_update_axis_1.py
@@ -51,7 +51,7 @@ def mdata(request, var_n, var_across, var_mod):
@pytest.fixture()
def datasets(request, var_n, var_across, var_mod):
n_sets = 3
- datasets = dict()
+ datasets = {}
np.random.seed(100)
for i in range(n_sets):
i1 = i + 1
@@ -177,21 +177,15 @@ def test_update_after_var_reordered(self, mdata):
some_var_names = mdata.var_names.values[:2]
true_varm_values = [
- mdata.varm["test_varm"][np.where(mdata.var_names.values == name)[0][0]]
- for name in some_var_names
+ mdata.varm["test_varm"][np.where(mdata.var_names.values == name)[0][0]] for name in some_var_names
]
mdata.mod["set1"] = mdata["set1"][:, ::-1].copy()
mdata.update()
- test_varm_values = [
- mdata.varm["test_varm"][np.where(mdata.var_names == name)[0][0]]
- for name in some_var_names
- ]
+ test_varm_values = [mdata.varm["test_varm"][np.where(mdata.var_names == name)[0][0]] for name in some_var_names]
- assert all(
- [all(true_varm_values[i] == test_varm_values[i]) for i in range(len(true_varm_values))]
- )
+ assert all(all(true_varm_values[i] == test_varm_values[i]) for i in range(len(true_varm_values)))
# @pytest.mark.usefixtures("filepath_h5mu")
diff --git a/tests/test_view_copy.py b/tests/test_view_copy.py
index af19655..effa9f1 100644
--- a/tests/test_view_copy.py
+++ b/tests/test_view_copy.py
@@ -14,12 +14,10 @@
def mdata():
rng = np.random.default_rng(42)
mod1 = AnnData(
- np.arange(0, 100, 0.1).reshape(-1, 10),
- obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)),
+ np.arange(0, 100, 0.1).reshape(-1, 10), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False))
)
mod2 = AnnData(
- np.arange(101, 2101, 1).reshape(-1, 20),
- obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)),
+ np.arange(101, 2101, 1).reshape(-1, 20), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False))
)
mods = {"mod1": mod1, "mod2": mod2}
# Make var_names different in different modalities
@@ -34,12 +32,10 @@ def mdata_with_obsp():
"""Create a MuData object with populated obsp and varp fields."""
rng = np.random.default_rng(42)
mod1 = AnnData(
- np.arange(0, 100, 0.1).reshape(-1, 10),
- obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)),
+ np.arange(0, 100, 0.1).reshape(-1, 10), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False))
)
mod2 = AnnData(
- np.arange(101, 2101, 1).reshape(-1, 20),
- obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False)),
+ np.arange(101, 2101, 1).reshape(-1, 20), obs=pd.DataFrame(index=rng.choice(150, size=100, replace=False))
)
mods = {"mod1": mod1, "mod2": mod2}
# Make var_names different in different modalities
@@ -164,34 +160,28 @@ def test_obsp_slicing(self, mdata_with_obsp):
# Slice a subset of cells
n_obs_subset = 50
- random_indices = np.random.choice(
- mdata_with_obsp.obs_names, size=n_obs_subset, replace=False
- )
+ random_indices = np.random.choice(mdata_with_obsp.obs_names, size=n_obs_subset, replace=False)
# Create a slice view
mdata_slice = mdata_with_obsp[random_indices]
# Check that the sliced obsp matrices have correct shape in the view
- assert mdata_slice.obsp["distances"].shape == (
- n_obs_subset,
- n_obs_subset,
- ), f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['distances'].shape}"
- assert mdata_slice.obsp["connectivities"].shape == (
- n_obs_subset,
- n_obs_subset,
- ), f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['connectivities'].shape}"
+ assert mdata_slice.obsp["distances"].shape == (n_obs_subset, n_obs_subset), (
+ f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['distances'].shape}"
+ )
+ assert mdata_slice.obsp["connectivities"].shape == (n_obs_subset, n_obs_subset), (
+ f"Expected shape in view: {(n_obs_subset, orig_n_obs)}, got: {mdata_slice.obsp['connectivities'].shape}"
+ )
# Make a copy of the sliced MuData object
mdata_copy = mdata_slice.copy()
# Check shapes after copy - these should be (n_obs_subset, n_obs_subset) if correctly copied
- assert mdata_copy.obsp["distances"].shape == (
- n_obs_subset,
- n_obs_subset,
- ), f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['distances'].shape}"
- assert mdata_copy.obsp["connectivities"].shape == (
- n_obs_subset,
- n_obs_subset,
- ), f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['connectivities'].shape}"
+ assert mdata_copy.obsp["distances"].shape == (n_obs_subset, n_obs_subset), (
+ f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['distances'].shape}"
+ )
+ assert mdata_copy.obsp["connectivities"].shape == (n_obs_subset, n_obs_subset), (
+ f"Expected shape after copy: {(n_obs_subset, n_obs_subset)}, got: {mdata_copy.obsp['connectivities'].shape}"
+ )
def test_varp_slicing(self, mdata_with_obsp):
"""Test that varp matrices are correctly sliced when subsetting a MuData object."""
@@ -209,15 +199,13 @@ def test_varp_slicing(self, mdata_with_obsp):
mdata_slice = mdata_with_obsp[:, random_var_indices]
# Check that the sliced varp matrix has correct shape in the view
- assert mdata_slice.varp["correlations"].shape == (
- n_var_subset,
- n_var_subset,
- ), f"Expected shape in view: {(n_var_subset, orig_n_var)}, got: {mdata_slice.varp['correlations'].shape}"
+ assert mdata_slice.varp["correlations"].shape == (n_var_subset, n_var_subset), (
+ f"Expected shape in view: {(n_var_subset, orig_n_var)}, got: {mdata_slice.varp['correlations'].shape}"
+ )
# Copy the sliced MuData object
mdata_copy = mdata_slice.copy()
# Check shapes after copy
- assert mdata_copy.varp["correlations"].shape == (
- n_var_subset,
- n_var_subset,
- ), f"Expected shape after copy: {(n_var_subset, n_var_subset)}, got: {mdata_copy.varp['correlations'].shape}"
+ assert mdata_copy.varp["correlations"].shape == (n_var_subset, n_var_subset), (
+ f"Expected shape after copy: {(n_var_subset, n_var_subset)}, got: {mdata_copy.varp['correlations'].shape}"
+ )