Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0c0a87f
Merge feature/datatree into feature/qc-plots
Munchic Aug 7, 2020
9088a02
function collector collect only function
Munchic Aug 8, 2020
5b4f494
plot only requested plots + check for bad inputs
Munchic Aug 11, 2020
08385ee
dumb mistake that modifies plot_methods list
Munchic Aug 11, 2020
6aa1f4c
create new folder with from plot name
Munchic Aug 11, 2020
79c2a3d
pass plot kwargs from config to plots
Munchic Aug 13, 2020
583610f
new plot map is here yay
Munchic Aug 17, 2020
e28b7af
root and normalize plotting work now yay!
Munchic Aug 18, 2020
8cb55e7
new plot filepath
Munchic Aug 18, 2020
298ee24
ignore precursors for plot_map
Munchic Aug 19, 2020
53ec5ac
use plot_kwargs_defaults as template for default
Munchic Aug 19, 2020
1b510fc
ignore undefined plot_map
Munchic Aug 19, 2020
48b650a
Merge feature/datatree into feature/qc-plots
Munchic Aug 19, 2020
0b71457
plotting debugging and notebook compatibility;
TheAustinator Sep 23, 2020
34f4aa9
tree parallelization; plotting widgets; improved logging
TheAustinator Sep 25, 2020
93389f9
plot widget; ProcessTreeRun; working on parallelism;
TheAustinator Oct 6, 2020
0f30dd1
Merge pull request #8 from TheAustinator/feature/plotting-updates
TheAustinator Oct 7, 2020
2261270
migrated plot wrappers from cellforest and added plot_sources to conf…
TheAustinator Oct 13, 2020
2a4189a
Merge remote into local
Munchic Oct 14, 2020
dae6cd4
plot tweaks
TheAustinator Oct 14, 2020
94477f1
twigs and twigs widget; sundry plotting bugs
TheAustinator Oct 15, 2020
2a72c28
docstrings on plot helper functions
Munchic Oct 21, 2020
c77f979
Unified widget for twigs and sweeps; metadata operations for branch a…
TheAustinator Oct 25, 2020
633793b
Merge branch 'feature/plotting-updates' into feature/qc-plots
TheAustinator Oct 29, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
dist: xenial
dist: focal
language: python
python:
- '3.6'
install:
- pip3 install -r requirements.txt
script:
- pytest
script: pytest
deploy:
provider: pypi
user: theaustinator
on:
tags: true
password:
secure:
secure: P5b36T+ulGokWYCGqt/JRN9l4p3HwTlQBazpWxhqz3bHQ8GhmIAYRn5xPno0R4hA0dyO0hKtdp8Vkd+RqTmdvv0HJSHvLp4HC5A9JSugChnA5CRnfbBQ8VWMxNrGGJDADaaRFyT7GCzE317c4LOpcLsCqRUnAIytNqm7VvLCL00nQmeQu2b0sYsjXHbHbQQVcWEdMlMptr+hUqGiDXRjG8/1luMvP7ZUe0IBcVcHKp2hwIjTzwqPCyNF0J92l7DEmTHVyfmPYp5ioqdQDvJ5DjN6bBNELD/uEbmq3A8zagm47m46aGc0uBiT7qpKLW4w+fYlVkph2Uvj3qRvnAzWCcRLtItkJyIj7V2ovKSs7btheUmJHM93JnQc6hRqYfdggKLBDgosvLLOab4xBzJW5K7JwD4Wa9NCs3pidJWcF3WSTm6xpTFj19uEd8m9xQ/KPn3UfOAwSTImi+7Ya/LpYMV1xMwuKyQMWqanlOfpi5CFSzBrrBC44lL/ZhXjgEfocxSvCeqm+aK2gObDq2Eymze/OVvvOrBgnilU5D3EMai5ustkj8RjnjSEFJKAJYeXwGE4Yx73Ger0AZDqvLWbn8Pf7rHfcqpSJJx7jIJUeOwhWP6vme1vpqvW8V/VuwJP6ZobkyQ7xmnl5ceNGTYPOtfzhcpw5S7WAukSzpYQKmQ=
4 changes: 3 additions & 1 deletion dataforest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dataforest.utils.loaders.update_config import get_current_config, update_config
from dataforest.utils.loaders.config import CONFIG_OPTIONS, load_config as _load_config
from dataforest.utils.loaders.update_config import get_current_config, get_config_updater as _get_config_updater
from dataforest.core.Interface import Interface

update_config = _get_config_updater(_load_config)

load = Interface.load
from_input_dirs = Interface.from_input_dirs
Expand Down
37 changes: 35 additions & 2 deletions dataforest/config/MetaPlotMethods.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from pathlib import Path
from typing import List

from dataforest.config.MetaConfig import MetaConfig
from dataforest.utils.loaders.collectors import collect_plots
from dataforest.utils.loaders.path import get_module_paths
from dataforest.utils.plots_config import build_process_plot_method_lookup, parse_plot_kwargs


class MetaPlotMethods(MetaConfig):
Expand All @@ -8,5 +13,33 @@ def PLOT_METHOD_LOOKUP(cls):
return {k: v for source in cls.CONFIG["plot_sources"] for k, v in collect_plots(source).items()}

@property
def PLOT_METHODS(cls):
return cls.CONFIG["plot_methods"]
def PLOT_MAP(cls):
return cls.CONFIG.get("plot_map", dict())

@property
def PROCESS_PLOT_METHODS(cls):
try:
plot_methods = cls.CONFIG["plot_methods"]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another way to write this would be plot_methods = cls.CONFIG.get("plot_methods", parse_plot_methods(config=cls.CONFIG))

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't find parse_plot_methods, is this outdated?

except KeyError:
plot_methods = build_process_plot_method_lookup(cls.CONFIG.get("plot_map", dict()))

return plot_methods

@property
def PLOT_KWARGS_DEFAULTS(cls):
return cls.CONFIG.get("plot_kwargs_defaults", dict())

@property
def PLOT_KWARGS(cls): # TODO-QC: mapping of process, plot to plot_kwargs
plot_map = cls.CONFIG.get("plot_map", dict())
plot_kwargs_defaults = cls.CONFIG.get("plot_kwargs_defaults", dict())
plot_kwargs = parse_plot_kwargs(plot_map, plot_kwargs_defaults)
return plot_kwargs

# @property
# def R_FUNCTIONS_FILEPATH(cls) -> Path:
# return get_module_paths([cls.CONFIG["r_functions_sources"]])[0]

@property
def R_PLOT_SOURCES(cls) -> List[Path]:
return get_module_paths(cls.CONFIG["r_plot_sources"])
7 changes: 5 additions & 2 deletions dataforest/config/MetaProcessSchema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

from dataforest.config.MetaConfig import MetaConfig
from dataforest.utils.plots_config import parse_plot_map


class MetaProcessSchema(MetaConfig):
Expand All @@ -19,8 +20,10 @@ def FILE_MAP(cls):
return cls["file_map"]

@property
def PLOT_MAP(cls):
return cls["plot_map"]
def PLOT_MAP(cls): # TODO-QC: process plot map starting here? Make it into a class where you can fetch plot_kwargs?
plot_map = cls.CONFIG.get("plot_map", dict())
plot_kwargs_defaults = cls.CONFIG.get("plot_kwargs_defaults", dict())
return parse_plot_map(plot_map, plot_kwargs_defaults)

@property
def LAYERS(cls):
Expand Down
68 changes: 32 additions & 36 deletions dataforest/core/BranchSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from copy import deepcopy
from typing import Union, List, Dict

from typeguard import typechecked

from dataforest.core.RunGroupSpec import RunGroupSpec
from dataforest.core.RunSpec import RunSpec
from dataforest.utils.exceptions import DuplicateProcessName

Expand All @@ -19,41 +22,21 @@ class BranchSpec(list):
>>> # NOTE: conceptual illustration only, not real processes
>>> branch_spec = [
>>> {
>>> "_PROCESS_": "normalize",
>>> "_PARAMS_": {
>>> "min_genes": 5,
>>> "max_genes": 5000,
>>> "min_cells": 5,
>>> "nfeatures": 30,
>>> "perc_mito_cutoff": 20,
>>> "method": "seurat_default",
>>> }
>>> "_SUBSET_": {
>>> "indication": {"disease_1", "disease_3"},
>>> "collection_center": "mass_general",
>>> },
>>> "_FILTER_": {
>>> "donor": "D115"
>>> }
>>> },
>>> {
>>> "_PROCESS_": "reduce", # dimensionality reduction
>>> "_ALIAS_": "linear_dim_reduce",
>>> "_PARAMS_": {
>>> "algorithm": "pca",
>>> "n_pcs": 30,
>>> "n_pcs": 30
>>> }
>>> },
>>> {
>>> "_PROCESS_": "reduce",
>>> "_ALIAS_": "nonlinear_dim_reduce",
>>> "_PARAMS_": {
>>> "algorithm": "umap",
>>> "n_neighbors": 15,
>>> "min_dist": 0.1,
>>> "n_components": 2,
>>> "metric": "euclidean"
>>> }
>>> "_PARAMS_": ...
>>> },
>>> {
>>> "_PROCESS_": "dispersity"
>>> "_PARAMS_": ...
>>> }
>>> ]
>>> branch_spec = BranchSpec(branch_spec)
Expand All @@ -69,6 +52,8 @@ class BranchSpec(list):
process_order:
"""

_RUN_SPEC_CLASS = RunSpec

def __init__(self, spec: Union[str, List[dict], "BranchSpec[RunSpec]"]):
if isinstance(spec, str):
spec = json.loads(spec)
Expand All @@ -84,6 +69,10 @@ def __init__(self, spec: Union[str, List[dict], "BranchSpec[RunSpec]"]):
)
self.process_order: List[str] = [spec_item.name for spec_item in self]

@property
def processes(self):
return [run_spec["_PROCESS_"] for run_spec in self]

@property
def shell_str(self):
"""string version which can be passed via shell and loaded via json"""
Expand Down Expand Up @@ -188,9 +177,9 @@ def _get_data_operation_list(self, process_name: str, operation_name: str) -> Li
operation_list.append(operation)
return operation_list

def _build_run_spec_lookup(self) -> Dict[str, "RunSpec"]:
def _build_run_spec_lookup(self) -> Dict[str, Union["RunSpec", "RunGroupSpec"]]:
"""See class definition"""
run_spec_lookup = {"root": RunSpec({})}
run_spec_lookup = {"root": self._RUN_SPEC_CLASS({})}
for run_spec in self:
try:
process_name = run_spec.name
Expand All @@ -217,18 +206,25 @@ def _build_precursors_lookup(self, incl_root: bool = False, incl_current: bool =
current_precursors = current_precursors + [spec_item.name]
return precursors

def __getitem__(self, item: Union[str, int]) -> "RunSpec":
@typechecked
def __getitem__(self, key: Union[str, int, slice]) -> Union["RunSpec", "BranchSpec"]:
"""Get `RunSpec` either via `int` index or `name`"""
if not isinstance(item, int):
try:
return self._run_spec_lookup[item]
except Exception as e:
raise e
if isinstance(key, str):
return self._run_spec_lookup[key]
elif isinstance(key, slice):
if isinstance(key.stop, str):
if key.start or key.step:
raise ValueError(f"Can only use stop with string slice (ex. [:'process_name'])")
precursors_lookup = self.get_precursors_lookup(incl_current=True)
precursors = precursors_lookup[key.stop]
return self.__class__([self._run_spec_lookup[process] for process in precursors])
else:
return self.__class__(super().__getitem__(key))
else:
return super().__getitem__(item)
return super().__getitem__(key)

def __setitem__(self, k, v):
raise ValueError("Cannot set items dynamically. All items must be defined at init")
raise NotImplementedError("Cannot set items dynamically. All items must be defined at init")

def __contains__(self, item):
return item in self._run_spec_lookup
Expand Down
18 changes: 13 additions & 5 deletions dataforest/core/DataBase.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC
from pathlib import Path
from typing import Union, Optional, List, Dict
from typing import Union, Optional, List, AnyStr

from dataforest.core.PlotMethods import PlotMethods

Expand All @@ -9,14 +10,21 @@ class DataBase:
Mixin for `DataTree`, `DataBranch`, and derived class
"""

_PLOT_METHODS = PlotMethods

def __init__(self):
self.plot = PlotMethods(self)
self.root = None
self.plot = self._PLOT_METHODS(self)

@property
def root_built(self):
return (Path(self.root) / "meta.tsv").exists()

@staticmethod
def _combine_datasets(
root: Union[str, Path],
metadata: Optional[Union[str, Path]] = None,
input_paths: Optional[List[Union[str, Path]]] = None,
root: AnyStr,
metadata: Optional[AnyStr] = None,
input_paths: Optional[List[AnyStr]] = None,
mode: Optional[str] = None,
):
raise NotImplementedError("Must be implemented by subclass")
Loading