diff --git a/Snakefile b/Snakefile index e92e4f24f..f04425376 100644 --- a/Snakefile +++ b/Snakefile @@ -5,7 +5,7 @@ import yaml from spras.dataset import Dataset from spras.evaluation import Evaluation from spras.analysis import ml, summary, cytoscape -import spras.config as _config +import spras.config.config as _config # Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037 # and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them diff --git a/config/config.yaml b/config/config.yaml index 8ee0b75a1..1e5f1d561 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -169,8 +169,6 @@ reconstruction_settings: # TODO move to global reconstruction_dir: "output" - run: true - analysis: # Create one summary per pathway file and a single summary table for all pathways for each dataset summary: diff --git a/config/egfr.yaml b/config/egfr.yaml index d744e9ec8..667cb55f0 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -146,7 +146,6 @@ gold_standards: reconstruction_settings: locations: reconstruction_dir: output/egfr - run: true analysis: cytoscape: include: true diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index d729a0375..87e996a9c 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -122,8 +122,6 @@ reconstruction_settings: # TODO move to global reconstruction_dir: "output" - run: true - analysis: # Create one summary per pathway file and a single summary table for all pathways for each dataset summary: diff --git a/environment.yml b/environment.yml index a0fb02293..c0c550074 100644 --- a/environment.yml +++ b/environment.yml @@ -9,6 +9,7 @@ dependencies: - matplotlib=3.10.3 - networkx=3.5 - pandas=2.3.0 + - pydantic=2.11.7 - numpy=2.3.1 - requests=2.32.4 - scikit-learn=1.7.0 diff --git a/pyproject.toml b/pyproject.toml index eee89b240..a3beee474 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "matplotlib==3.10.3", "networkx==3.5", "pandas==2.3.0", + "pydantic==2.11.7", "numpy==2.3.1", "requests==2.32.4", "scikit-learn==1.7.0", @@ -73,4 +74,4 @@ select = [ # py-modules tells setuptools which directory is our actual module py-modules = ["spras"] # packages tells setuptools what the exported package is called (ie allows import spras) -packages = ["spras", "spras.analysis"] +packages = ["spras", "spras.analysis", "spras.config"] diff --git a/spras/config/__init__.py b/spras/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spras/config.py b/spras/config/config.py similarity index 60% rename from spras/config.py rename to spras/config/config.py index 4811ba820..605faecf4 100644 --- a/spras/config.py +++ b/spras/config/config.py @@ -6,7 +6,7 @@ module that imports this module can access a config option by checking the object's value. For example -import spras.config as config +import spras.config.config as config container_framework = config.config.container_framework will grab the top level registry configuration option as it appears in the config file @@ -15,20 +15,20 @@ import copy as copy import itertools as it import os -import re +import warnings from collections.abc import Iterable +from typing import Any import numpy as np import yaml +from spras.config.schema import ContainerFramework, RawConfig from spras.util import NpHashEncoder, hash_params_sha1_base32 -# The default length of the truncated hash used to identify parameter combinations -DEFAULT_HASH_LENGTH = 7 -DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio" - config = None +DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio" + # This will get called in the Snakefile, instantiating the singleton with the raw config def init_global(config_dict): global config @@ -42,39 +42,41 @@ def init_from_file(filepath): try: with open(filepath, 'r') as yaml_file: config_dict = yaml.safe_load(yaml_file) - except FileNotFoundError: - print(f"Error: The specified config '{filepath}' could not be found.") - return False + except FileNotFoundError as e: + raise RuntimeError(f"Error: The specified config '{filepath}' could not be found.") from e except yaml.YAMLError as e: - print(f"Error: Failed to parse config '{filepath}': {e}") - return False + raise RuntimeError(f"Error: Failed to parse config '{filepath}'") from e # And finally, initialize config = Config(config_dict) class Config: - def __init__(self, raw_config): - # Since process_config winds up modifying the raw_config passed to it as a side effect, - # we'll make a deep copy here to guarantee we don't break anything. This preserves the - # config as it's given to the Snakefile by Snakemake + def __init__(self, raw_config: dict[str, Any]): + # Since snakemake provides an empty config, we provide this + # wrapper error first before passing validation to pydantic. + if raw_config == {}: + raise ValueError("Config file cannot be empty. Use --configfile to set a config file.") + + parsed_raw_config = RawConfig.model_validate(raw_config) + + # Member vars populated by process_config. Any values that don't have quick initial values are set to None + # before they are populated for __init__ to show exactly what is being configured. - # Member vars populated by process_config. Set to None before they are populated so that our - # __init__ makes clear exactly what is being configured. # Directory used for storing output - self.out_dir = None + self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity" self.container_framework = None # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio" - self.container_prefix = DEFAULT_CONTAINER_PREFIX + self.container_prefix: str = DEFAULT_CONTAINER_PREFIX # A Boolean specifying whether to unpack singularity containers. Default is False self.unpack_singularity = False # A dictionary to store configured datasets against which SPRAS will be run self.datasets = None # A dictionary to store configured gold standard data against output of SPRAS runs self.gold_standards = None - # The hash length SPRAS will use to identify parameter combinations. Default is 7 - self.hash_length = DEFAULT_HASH_LENGTH + # The hash length SPRAS will use to identify parameter combinations. + self.hash_length = parsed_raw_config.hash_length # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key. self.algorithms = None # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations. @@ -83,9 +85,11 @@ def __init__(self, raw_config): # Deprecated. Previously a dict mapping algorithm names to a Boolean tracking whether they used directed graphs. self.algorithm_directed = None # A dict with the analysis settings - self.analysis_params = None + self.analysis_params = parsed_raw_config.analysis + # A dict with the evaluation settings + self.evaluation_params = self.analysis_params.evaluation # A dict with the ML settings - self.ml_params = None + self.ml_params = self.analysis_params.ml # A Boolean specifying whether to run ML analysis for individual algorithms self.analysis_include_ml_aggregate_algo = None # A dict with the PCA settings @@ -105,69 +109,29 @@ def __init__(self, raw_config): # A Boolean specifying whether to run the evaluation per algorithm analysis self.analysis_include_evaluation_aggregate_algo = None - _raw_config = copy.deepcopy(raw_config) - self.process_config(_raw_config) - - def process_config(self, raw_config): - if raw_config == {}: - raise ValueError("Config file cannot be empty. Use --configfile to set a config file.") - - # Set up a few top-level config variables - self.out_dir = raw_config["reconstruction_settings"]["locations"]["reconstruction_dir"] - - # We allow the container framework not to be defined in the config. In the case it isn't, default to docker. - # However, if we get a bad value, we raise an exception. - if "container_framework" in raw_config: - container_framework = raw_config["container_framework"].lower() - if container_framework not in ("docker", "singularity", "dsub"): - msg = "SPRAS was configured to run with an unknown container framework: '" + raw_config["container_framework"] + "'. Accepted values are 'docker', 'singularity' or 'dsub'." - raise ValueError(msg) - if container_framework == "dsub": - print("Warning: 'dsub' framework integration is experimental and may not be fully supported.") - self.container_framework = container_framework - else: - self.container_framework = "docker" - - # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. - if "unpack_singularity" in raw_config: - # The value in the config is a string, and we need to convert it to a bool. - unpack_singularity = raw_config["unpack_singularity"] - if unpack_singularity and self.container_framework != "singularity": - print("Warning: unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.") - self.unpack_singularity = unpack_singularity - - # Grab registry from the config, and if none is provided default to docker - if "container_registry" in raw_config and raw_config["container_registry"]["base_url"] != "" and raw_config["container_registry"]["owner"] != "": - self.container_prefix = raw_config["container_registry"]["base_url"] + "/" + raw_config["container_registry"]["owner"] + self.process_config(parsed_raw_config) - # Parse dataset information - # Datasets is initially a list, where each list entry has a dataset label and lists of input files - # Convert the dataset list into a dict where the label is the key and update the config data structure + def process_datasets(self, raw_config: RawConfig): + """ + Parse dataset information + Datasets is initially a list, where each list entry has a dataset label and lists of input files + Convert the dataset list into a dict where the label is the key and update the config data structure + """ # TODO allow labels to be optional and assign default labels - # TODO check for collisions in dataset labels, warn, and make the labels unique # Need to work more on input file naming to make less strict assumptions # about the filename structure # Currently assumes all datasets have a label and the labels are unique # When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts # Convert to dicts to simplify the yaml logging - self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]} - - for key in self.datasets: - pattern = r'^\w+$' - if not bool(re.match(pattern, key)): - raise ValueError(f"Dataset label \'{key}\' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.") + self.datasets = {} + for dataset in raw_config.datasets: + label = dataset.label + if label.lower() in [key.lower() for key in self.datasets.keys()]: + raise ValueError(f"Datasets must have unique case-insensitive labels, but the label {label} appears at least twice.") + self.datasets[label] = dict(dataset) # parse gold standard information - try: - self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standards"]} - except: - self.gold_standards = {} - - # check that gold_standard labels are formatted correctly - for key in self.gold_standards: - pattern = r'^\w+$' - if not bool(re.match(pattern, key)): - raise ValueError(f"Gold standard label \'{key}\' contains invalid values. Gold standard labels can only contain letters, numbers, or underscores.") + self.gold_standards = {gold_standard.label: dict(gold_standard) for gold_standard in raw_config.gold_standards} # check that all the dataset labels in the gold standards are existing datasets labels dataset_labels = set(self.datasets.keys()) @@ -181,35 +145,36 @@ def process_config(self, raw_config): # Maps from the dataset label to the dataset list index # dataset_dict = {dataset.get('label', f'dataset{index}'): index for index, dataset in enumerate(datasets)} - # Override the default parameter hash length if specified in the config file - if "hash_length" in raw_config and raw_config["hash_length"] != "": - self.hash_length = int(raw_config["hash_length"]) - + def process_algorithms(self, raw_config: RawConfig): + """ + Parse algorithm information + Each algorithm's parameters are provided as a list of dictionaries + Defaults are handled in the Python function or class that wraps + running that algorithm + Keys in the parameter dictionary are strings + """ prior_params_hashes = set() - - # Parse algorithm information - # Each algorithm's parameters are provided as a list of dictionaries - # Defaults are handled in the Python function or class that wraps - # running that algorithm - # Keys in the parameter dictionary are strings self.algorithm_params = dict() self.algorithm_directed = dict() - self.algorithms = raw_config["algorithms"] + self.algorithms = raw_config.algorithms for alg in self.algorithms: - cur_params = alg["params"] - if "include" in cur_params and cur_params.pop("include"): + cur_params = alg.params + if cur_params.include: # This dict maps from parameter combinations hashes to parameter combination dictionaries - self.algorithm_params[alg["name"]] = dict() + self.algorithm_params[alg.name] = dict() else: # Do not parse the rest of the parameters for this algorithm if it is not included continue - if "directed" in cur_params: - print("UPDATE: we no longer use the directed key in the config file") - cur_params.pop("directed") + if cur_params.directed is not None: + warnings.warn("UPDATE: we no longer use the directed key in the config file", stacklevel=2) + + cur_params = cur_params.__pydantic_extra__ + if cur_params is None: + raise RuntimeError("An internal error occurred: ConfigDict extra should be set on AlgorithmParams.") # The algorithm has no named arguments so create a default placeholder - if len(cur_params) == 0: + if len(cur_params.keys()) == 0: cur_params["run1"] = {"spras_placeholder": ["no parameters"]} # Each set of runs should be 1 level down in the config file @@ -240,7 +205,7 @@ def process_config(self, raw_config): # Catch-all for strings obj = [obj] if not isinstance(obj, Iterable): - raise ValueError(f"The object `{obj}` in algorithm {alg['name']} at key '{p}' in run '{run_params}' is not iterable!") from None + raise ValueError(f"The object `{obj}` in algorithm {alg.name} at key '{p}' in run '{run_params}' is not iterable!") from None all_runs.append(obj) run_list_tuples = list(it.product(*all_runs)) param_name_tuple = tuple(param_name_list) @@ -261,38 +226,33 @@ def process_config(self, raw_config): if params_hash in prior_params_hashes: raise ValueError(f'Parameter hash collision detected. Increase the hash_length in the config file ' f'(current length {self.hash_length}).') - self.algorithm_params[alg["name"]][params_hash] = run_dict - - self.analysis_params = raw_config["analysis"] if "analysis" in raw_config else {} - self.ml_params = self.analysis_params["ml"] if "ml" in self.analysis_params else {} - self.evaluation_params = self.analysis_params["evaluation"] if "evaluation" in self.analysis_params else {} - - self.pca_params = {} - if "components" in self.ml_params: - self.pca_params["components"] = self.ml_params["components"] - if "labels" in self.ml_params: - self.pca_params["labels"] = self.ml_params["labels"] - if "kde" in self.ml_params: - self.pca_params["kde"] = self.ml_params["kde"] - else: - self.pca_params["kde"] = False - if "remove_empty_pathways" in self.ml_params: - self.pca_params["remove_empty_pathways"] = self.ml_params["remove_empty_pathways"] - - self.hac_params = {} - if "linkage" in self.ml_params: - self.hac_params["linkage"] = self.ml_params["linkage"] - if "metric" in self.ml_params: - self.hac_params["metric"] = self.ml_params ["metric"] - - self.analysis_include_summary = raw_config["analysis"]["summary"]["include"] - self.analysis_include_cytoscape = raw_config["analysis"]["cytoscape"]["include"] - self.analysis_include_ml = raw_config["analysis"]["ml"]["include"] - self.analysis_include_evaluation = raw_config["analysis"]["evaluation"]["include"] + self.algorithm_params[alg.name][params_hash] = run_dict + + def process_analysis(self, raw_config: RawConfig): + if not raw_config.analysis: + return + + # self.ml_params is a class, pca_params needs to be a dict. + self.pca_params = { + "components": self.ml_params.components, + "labels": self.ml_params.labels, + "kde": self.ml_params.kde, + "remove_empty_pathways": self.ml_params.remove_empty_pathways + } + + self.hac_params = { + "linkage": self.ml_params.linkage, + "metric": self.ml_params.metric + } + + self.analysis_include_summary = raw_config.analysis.summary.include + self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include + self.analysis_include_ml = raw_config.analysis.ml.include + self.analysis_include_evaluation = raw_config.analysis.evaluation.include # Only run ML aggregate per algorithm if analysis include ML is set to True - if 'aggregate_per_algorithm' in self.ml_params and self.analysis_include_ml: - self.analysis_include_ml_aggregate_algo = raw_config["analysis"]["ml"]["aggregate_per_algorithm"] + if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml: + self.analysis_include_ml_aggregate_algo = raw_config.analysis.ml.aggregate_per_algorithm else: self.analysis_include_ml_aggregate_algo = False @@ -306,8 +266,8 @@ def process_config(self, raw_config): self.analysis_include_evaluation = False # Only run Evaluation aggregate per algorithm if analysis include ML is set to True - if 'aggregate_per_algorithm' in self.evaluation_params and self.analysis_include_evaluation: - self.analysis_include_evaluation_aggregate_algo = raw_config["analysis"]["evaluation"]["aggregate_per_algorithm"] + if self.evaluation_params.aggregate_per_algorithm and self.analysis_include_evaluation: + self.analysis_include_evaluation_aggregate_algo = raw_config.analysis.evaluation.aggregate_per_algorithm else: self.analysis_include_evaluation_aggregate_algo = False @@ -321,3 +281,24 @@ def process_config(self, raw_config): if self.analysis_include_evaluation and not self.pca_params["kde"]: self.pca_params["kde"] = True print("Setting kde to true; Evaluation analysis needs to run KDE for PCA-Chosen parameter selection.") + + def process_config(self, raw_config: RawConfig): + # Set up a few top-level config variables + self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir + + if raw_config.container_framework == ContainerFramework.dsub: + warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2) + self.container_framework = raw_config.container_framework + + # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. + if raw_config.unpack_singularity and self.container_framework != "singularity": + warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2) + self.unpack_singularity = raw_config.unpack_singularity + + # Grab registry from the config, and if none is provided default to docker + if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "": + self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner + + self.process_datasets(raw_config) + self.process_algorithms(raw_config) + self.process_analysis(raw_config) diff --git a/spras/config/schema.py b/spras/config/schema.py new file mode 100644 index 000000000..c84ea4384 --- /dev/null +++ b/spras/config/schema.py @@ -0,0 +1,168 @@ +""" +Contains the raw pydantic schema for the configuration file. + +Using Pydantic as our backing config parser allows us to declaratively +type our config, giving us more robust user errors with guarantees +that parts of the config exist after parsing it through Pydantic. + +We declare models using two classes here: +- `BaseModel` (docs: https://docs.pydantic.dev/latest/concepts/models/) +- `CaseInsensitiveEnum` (see ./util.py) +""" + +import re +from typing import Annotated, Optional + +from pydantic import AfterValidator, BaseModel, ConfigDict, Field + +from spras.config.util import CaseInsensitiveEnum + +# Most options here have an `include` property, +# which is meant to make disabling parts of the configuration easier. +# When an option does not have a default, it means that it must be set by the user. + +class SummaryAnalysis(BaseModel): + include: bool + + # We prefer to never allow extra keys, to prevent + # any user mistypes. + model_config = ConfigDict(extra='forbid') + +class CytoscapeAnalysis(BaseModel): + include: bool + + model_config = ConfigDict(extra='forbid') + +# Note that CaseInsensitiveEnum is not pydantic: pydantic +# has special support for enums, but we avoid the +# pydantic-specific "model_config" key here for this reason. +class MlLinkage(CaseInsensitiveEnum): + ward = 'ward' + complete = 'complete' + average = 'average' + single = 'single' + +class MlMetric(CaseInsensitiveEnum): + euclidean = 'euclidean' + manhattan = 'manhattan' + cosine = 'cosine' + +class MlAnalysis(BaseModel): + include: bool + aggregate_per_algorithm: bool = False + components: int = 2 + labels: bool = True + kde: bool = False + remove_empty_pathways: bool = False + linkage: MlLinkage = MlLinkage.ward + metric: MlMetric = MlMetric.euclidean + + model_config = ConfigDict(extra='forbid') + +class EvaluationAnalysis(BaseModel): + include: bool + aggregate_per_algorithm: bool = False + + model_config = ConfigDict(extra='forbid') + +class Analysis(BaseModel): + summary: SummaryAnalysis = SummaryAnalysis(include=False) + cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False) + ml: MlAnalysis = MlAnalysis(include=False) + evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False) + + model_config = ConfigDict(extra='forbid') + + +# The default length of the truncated hash used to identify parameter combinations +DEFAULT_HASH_LENGTH = 7 + +def label_validator(name: str): + """ + A validator takes in a label + and ensures that it contains only letters, numbers, or underscores. + """ + label_pattern = r'^\w+$' + def validate(label: str): + if not bool(re.match(label_pattern, label)): + raise ValueError(f"{name} label '{label}' contains invalid values. {name} labels can only contain letters, numbers, or underscores.") + return label + return validate + +class ContainerFramework(CaseInsensitiveEnum): + docker = 'docker' + # TODO: add apptainer variant once #260 gets merged + singularity = 'singularity' + dsub = 'dsub' + +class ContainerRegistry(BaseModel): + base_url: str + owner: str = Field(description="The owner or project of the registry") + + model_config = ConfigDict(extra='forbid') + +class AlgorithmParams(BaseModel): + include: bool + directed: Optional[bool] = None + + # TODO: use array of runs instead. We currently rely on the + # extra parameters here to extract the algorithm parameter information, + # which is why this deviates from the usual ConfigDict(extra='forbid'). + model_config = ConfigDict(extra='allow') + +class Algorithm(BaseModel): + name: str + params: AlgorithmParams + + model_config = ConfigDict(extra='forbid') + +class Dataset(BaseModel): + # We prefer AfterValidator here to allow pydantic to run its own + # validation & coercion logic before we check it against our own + # requirements + label: Annotated[str, AfterValidator(label_validator("Dataset"))] + node_files: list[str] + edge_files: list[str] + other_files: list[str] + data_dir: str + + model_config = ConfigDict(extra='forbid') + +class GoldStandard(BaseModel): + label: Annotated[str, AfterValidator(label_validator("Gold Standard"))] + node_files: list[str] + data_dir: str + dataset_labels: list[str] + + model_config = ConfigDict(extra='forbid') + +class Locations(BaseModel): + reconstruction_dir: str + + model_config = ConfigDict(extra='forbid') + +# NOTE: This setting doesn't have any uses past setting the output_dir as of now. +class ReconstructionSettings(BaseModel): + locations: Locations + + model_config = ConfigDict(extra='forbid') + +class RawConfig(BaseModel): + # TODO: move these container values to a nested container key + container_framework: ContainerFramework = ContainerFramework.docker + unpack_singularity: bool = False + container_registry: ContainerRegistry + + hash_length: int = DEFAULT_HASH_LENGTH + "The length of the hash used to identify a parameter combination" + + algorithms: list[Algorithm] + datasets: list[Dataset] + gold_standards: list[GoldStandard] = [] + analysis: Analysis = Analysis() + + reconstruction_settings: ReconstructionSettings + + # We include use_attribute_docstrings here to preserve the docstrings + # after attributes at runtime (for future JSON schema generation) + model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True) diff --git a/spras/config/util.py b/spras/config/util.py new file mode 100644 index 000000000..b7680222b --- /dev/null +++ b/spras/config/util.py @@ -0,0 +1,19 @@ +from enum import Enum +from typing import Any + + +# https://stackoverflow.com/a/76883868/7589775 +class CaseInsensitiveEnum(str, Enum): + """ + We prefer this over Enum to make sure the config parsing + is more relaxed when it comes to string enum values. + """ + @classmethod + def _missing_(cls, value: Any): + if isinstance(value, str): + value = value.lower() + + for member in cls: + if member.lower() == value: + return member + return None diff --git a/spras/containers.py b/spras/containers.py index 2209df554..b7711c4f6 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -8,7 +8,7 @@ import docker import docker.errors -import spras.config as config +import spras.config.config as config from spras.logging import indent from spras.util import hash_filename diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py index 57c5640d2..a8291f72f 100644 --- a/test/AllPairs/test_ap.py +++ b/test/AllPairs/test_ap.py @@ -4,7 +4,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.allpairs import AllPairs # Note that we don't directly use the config in the test, but we need the config diff --git a/test/BowTieBuilder/test_btb.py b/test/BowTieBuilder/test_btb.py index ad85f9bb7..4f0952f16 100644 --- a/test/BowTieBuilder/test_btb.py +++ b/test/BowTieBuilder/test_btb.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config config.init_from_file("config/config.yaml") diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py index e903bc2fd..05bd3ae70 100644 --- a/test/DOMINO/test_domino.py +++ b/test/DOMINO/test_domino.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.domino import DOMINO, post_domino_id_transform, pre_domino_id_transform config.init_from_file("config/config.yaml") diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py index fbee54902..9093efc68 100644 --- a/test/LocalNeighborhood/test_ln.py +++ b/test/LocalNeighborhood/test_ln.py @@ -4,7 +4,7 @@ import pytest -import spras.config as config +import spras.config.config as config config.init_from_file("config/config.yaml") diff --git a/test/MEO/test_meo.py b/test/MEO/test_meo.py index e2abdb72d..32958be20 100644 --- a/test/MEO/test_meo.py +++ b/test/MEO/test_meo.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.meo import MEO, write_properties config.init_from_file("config/config.yaml") diff --git a/test/MinCostFlow/test_mcf.py b/test/MinCostFlow/test_mcf.py index 89bd61d0b..c777a665d 100644 --- a/test/MinCostFlow/test_mcf.py +++ b/test/MinCostFlow/test_mcf.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.mincostflow import MinCostFlow config.init_from_file("config/config.yaml") diff --git a/test/OmicsIntegrator1/test_oi1.py b/test/OmicsIntegrator1/test_oi1.py index 35b41d428..a484c0af3 100644 --- a/test/OmicsIntegrator1/test_oi1.py +++ b/test/OmicsIntegrator1/test_oi1.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.omicsintegrator1 import OmicsIntegrator1, write_conf config.init_from_file("config/config.yaml") diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py index 311a9c7e7..aa74cd94e 100644 --- a/test/OmicsIntegrator2/test_oi2.py +++ b/test/OmicsIntegrator2/test_oi2.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.omicsintegrator2 import OmicsIntegrator2 config.init_from_file("config/config.yaml") diff --git a/test/PathLinker/test_pathlinker.py b/test/PathLinker/test_pathlinker.py index 3fd6a96bd..ed9f10670 100644 --- a/test/PathLinker/test_pathlinker.py +++ b/test/PathLinker/test_pathlinker.py @@ -3,7 +3,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.pathlinker import PathLinker config.init_from_file("config/config.yaml") diff --git a/test/RWR/test_RWR.py b/test/RWR/test_RWR.py index 4d6ce7864..b0316ded0 100644 --- a/test/RWR/test_RWR.py +++ b/test/RWR/test_RWR.py @@ -4,7 +4,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.rwr import RWR config.init_from_file("config/config.yaml") diff --git a/test/ResponseNet/test_rn.py b/test/ResponseNet/test_rn.py index 6fa09904b..6b9fe05cf 100644 --- a/test/ResponseNet/test_rn.py +++ b/test/ResponseNet/test_rn.py @@ -4,7 +4,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.responsenet import ResponseNet config.init_from_file("config/config.yaml") diff --git a/test/ST_RWR/test_STRWR.py b/test/ST_RWR/test_STRWR.py index a0a5b4ea9..898b24055 100644 --- a/test/ST_RWR/test_STRWR.py +++ b/test/ST_RWR/test_STRWR.py @@ -4,7 +4,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.strwr import ST_RWR config.init_from_file("config/config.yaml") diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index c9eaa437a..abde6f979 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -102,7 +102,6 @@ reconstruction_settings: locations: #place the save path here reconstruction_dir: "output" - run: true analysis: # Create one summary per pathway file and a single summary table for all pathways for each dataset diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index 4a31dad46..da4560df9 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -91,7 +91,6 @@ datasets: reconstruction_settings: locations: reconstruction_dir: output/egfr - run: true analysis: cytoscape: include: true diff --git a/test/analysis/test_cytoscape.py b/test/analysis/test_cytoscape.py index 7451b9876..68a77cd07 100644 --- a/test/analysis/test_cytoscape.py +++ b/test/analysis/test_cytoscape.py @@ -2,7 +2,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.analysis.cytoscape import run_cytoscape config.init_from_file("test/analysis/input/config.yaml") diff --git a/test/analysis/test_summary.py b/test/analysis/test_summary.py index 4ff5396da..0400d1f1b 100644 --- a/test/analysis/test_summary.py +++ b/test/analysis/test_summary.py @@ -3,7 +3,7 @@ import pandas as pd -import spras.config as config +import spras.config.config as config from spras.analysis.summary import summarize_networks from spras.dataset import Dataset diff --git a/test/test_config.py b/test/test_config.py index 39b091b5e..72df9e0b9 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -3,8 +3,15 @@ import numpy as np import pytest -import spras.config as config +import spras.config.config as config +from spras.config.schema import DEFAULT_HASH_LENGTH +filler_dataset_data: dict[str, str | list[str]] = { + "data_dir": "fake", + "edge_files": [], + "other_files": [], + "node_files": [] +} # Set up a dummy config for testing. For now, only include things that MUST exist in the dict # in order for the config init to complete. To test particular parts of the config initialization, @@ -22,10 +29,26 @@ def get_test_config(): "reconstruction_dir": "my_dir" } }, - "datasets": [{"label": "alg1"}, {"label": "alg2"}], - "gold_standards": [{"label": "gs1", "dataset_labels": []}], + "datasets": [{ + "label": "alg1", + "data_dir": "fake", + "edge_files": [], + "other_files": [], + "node_files": [] + }, { + "label": "alg2", + "data_dir": "faux", + "edge_files": [], + "other_files": [], + "node_files": [] + }], + "gold_standards": [{ + "label": "gs1", + "dataset_labels": [], + "node_files": [], + "data_dir": "gs-fake" + }], "algorithms": [ - {"params": ["param2", "param2"]}, { "name": "strings", "params": { @@ -123,9 +146,9 @@ def test_config_hash_length(self): config.init_global(test_config) assert (config.config.hash_length == 7) - test_config["hash_length"] = "" + test_config.pop("hash_length", None) config.init_global(test_config) - assert (config.config.hash_length == config.DEFAULT_HASH_LENGTH) + assert (config.config.hash_length == DEFAULT_HASH_LENGTH) # Initialize the configuration test_config["hash_length"] = "12" @@ -191,6 +214,7 @@ def test_correct_dataset_label(self): test_config = get_test_config() correct_test_dicts = [{"label": "test"}, {"label": "123"}, {"label": "test123"}, {"label": "123test"}, {"label": "_"}, {"label": "test_test"}, {"label": "_test"}, {"label": "test_"}] + correct_test_dicts = [dict(list(d.items()) + list(filler_dataset_data.items())) for d in correct_test_dicts] for test_dict in correct_test_dicts: test_config["datasets"] = [test_dict] diff --git a/test/test_util.py b/test/test_util.py index baf9db0ed..2a25fc0d1 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -2,7 +2,7 @@ import pytest -import spras.config as config +import spras.config.config as config from spras.containers import convert_docker_path, prepare_path_docker, prepare_volume from spras.util import hash_params_sha1_base32