Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
c2e64f7
feat: begin config refactor
tristan-f-r Jun 25, 2025
4d1a19c
feat: mostly structured config
tristan-f-r Jun 25, 2025
b56ecde
feat: add enum variants on ml
tristan-f-r Jun 25, 2025
bf95888
fix: some defaults
tristan-f-r Jun 25, 2025
51d6a7b
feat: fully finish config parsing
tristan-f-r Jun 25, 2025
a27d38d
style: fmt
tristan-f-r Jun 25, 2025
dd4674a
fix: remove dep mark, use strict is None
tristan-f-r Jun 25, 2025
5a8826d
chore: correct config loc
tristan-f-r Jun 25, 2025
a47b0df
fix: specify hac params
tristan-f-r Jun 25, 2025
d721656
Merge branch 'umain' into config-pydantic
tristan-f-r Jun 26, 2025
8d75604
fix: expand class params
tristan-f-r Jun 26, 2025
afa1de5
fix: expand on pca_params
tristan-f-r Jun 26, 2025
5eefc51
fix: drop include dict
tristan-f-r Jun 26, 2025
5243186
fix: call items
tristan-f-r Jun 26, 2025
3b20c48
fix: better typing and deafults
tristan-f-r Jun 26, 2025
4c1fcb6
Merge branch 'umain' into config-pydantic
tristan-f-r Jun 26, 2025
2d4a90f
style: fmt
tristan-f-r Jun 26, 2025
1c55925
Merge branch 'umain' into config-pydantic
tristan-f-r Jul 9, 2025
2a4fb2e
refactor: add config forbid
tristan-f-r Jul 9, 2025
4ded57e
refactor: update config imports
tristan-f-r Jul 9, 2025
22b5686
refactor: better names to schema files
tristan-f-r Jul 9, 2025
ea59e4c
fix: no default include, mention model_config allow reason
tristan-f-r Jul 11, 2025
fa7d7c9
fix(config): case-insensitive check on labels
tristan-f-r Jul 11, 2025
52eab21
refactor: merge config
tristan-f-r Jul 14, 2025
3c305f4
docs: use concepts link
tristan-f-r Jul 14, 2025
49e50a0
refactor: mv util_enum -> util
tristan-f-r Jul 14, 2025
cb28f61
docs: correct util_enum path
tristan-f-r Jul 14, 2025
2c938ed
fix: add spras.config to pyproject
tristan-f-r Jul 14, 2025
cdbaf41
Merge branch 'umain' into config-pydantic
tristan-f-r Jul 15, 2025
ebcf6b0
docs: mention `args` in contributing
tristan-f-r Jul 16, 2025
4733b95
Revert "docs: mention `args` in contributing"
tristan-f-r Jul 16, 2025
fa51d79
docs: document some pydantic choices
tristan-f-r Jul 17, 2025
d07d2af
Merge branch 'umain' into config-pydantic
tristan-f-r Jul 24, 2025
1bef8c7
fix: add defaults for kde and remove_empty_pathways
tristan-f-r Jul 24, 2025
593206c
style: fmt
tristan-f-r Jul 24, 2025
c465c9c
test: update rn import
tristan-f-r Jul 24, 2025
39faf41
Merge branch 'umain' into config-pydantic
tristan-f-r Jul 25, 2025
2443735
style: typos
tristan-f-r Jul 25, 2025
18a173f
docs: grammar
tristan-f-r Jul 26, 2025
5225616
docs: use nicer alphanumeric explanation
tristan-f-r Jul 28, 2025
e82eddf
Merge branch 'umain' into config-pydantic
tristan-f-r Jul 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import yaml
from spras.dataset import Dataset
from spras.evaluation import Evaluation
from spras.analysis import ml, summary, cytoscape
import spras.config as _config
import spras.config.config as _config

# Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
# and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
Expand Down
2 changes: 0 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,6 @@ reconstruction_settings:
# TODO move to global
reconstruction_dir: "output"

run: true

analysis:
# Create one summary per pathway file and a single summary table for all pathways for each dataset
summary:
Expand Down
1 change: 0 additions & 1 deletion config/egfr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ gold_standards:
reconstruction_settings:
locations:
reconstruction_dir: output/egfr
run: true
analysis:
cytoscape:
include: true
Expand Down
2 changes: 0 additions & 2 deletions docker-wrappers/SPRAS/example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ reconstruction_settings:
# TODO move to global
reconstruction_dir: "output"

run: true

analysis:
# Create one summary per pathway file and a single summary table for all pathways for each dataset
summary:
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies:
- matplotlib=3.10.3
- networkx=3.5
- pandas=2.3.0
- pydantic=2.11.7
- numpy=2.3.1
- requests=2.32.4
- scikit-learn=1.7.0
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"matplotlib==3.10.3",
"networkx==3.5",
"pandas==2.3.0",
"pydantic==2.11.7",
"numpy==2.3.1",
"requests==2.32.4",
"scikit-learn==1.7.0",
Expand Down Expand Up @@ -73,4 +74,4 @@ select = [
# py-modules tells setuptools which directory is our actual module
py-modules = ["spras"]
# packages tells setuptools what the exported package is called (ie allows import spras)
packages = ["spras", "spras.analysis"]
packages = ["spras", "spras.analysis", "spras.config"]
Empty file added spras/config/__init__.py
Empty file.
237 changes: 109 additions & 128 deletions spras/config.py → spras/config/config.py

Large diffs are not rendered by default.

168 changes: 168 additions & 0 deletions spras/config/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
Contains the raw pydantic schema for the configuration file.

Using Pydantic as our backing config parser allows us to declaratively
type our config, giving us more robust user errors with guarantees
that parts of the config exist after parsing it through Pydantic.

We declare models using two classes here:
- `BaseModel` (docs: https://docs.pydantic.dev/latest/concepts/models/)
- `CaseInsensitiveEnum` (see ./util.py)
"""

import re
from typing import Annotated, Optional

from pydantic import AfterValidator, BaseModel, ConfigDict, Field

from spras.config.util import CaseInsensitiveEnum

# Most options here have an `include` property,
# which is meant to make disabling parts of the configuration easier.
# When an option does not have a default, it means that it must be set by the user.

class SummaryAnalysis(BaseModel):
include: bool

# We prefer to never allow extra keys, to prevent
# any user mistypes.
model_config = ConfigDict(extra='forbid')

class CytoscapeAnalysis(BaseModel):
include: bool

model_config = ConfigDict(extra='forbid')

# Note that CaseInsensitiveEnum is not pydantic: pydantic
# has special support for enums, but we avoid the
# pydantic-specific "model_config" key here for this reason.
class MlLinkage(CaseInsensitiveEnum):
ward = 'ward'
complete = 'complete'
average = 'average'
single = 'single'

class MlMetric(CaseInsensitiveEnum):
euclidean = 'euclidean'
manhattan = 'manhattan'
cosine = 'cosine'

class MlAnalysis(BaseModel):
include: bool
aggregate_per_algorithm: bool = False
components: int = 2
labels: bool = True
kde: bool = False
remove_empty_pathways: bool = False
linkage: MlLinkage = MlLinkage.ward
metric: MlMetric = MlMetric.euclidean

model_config = ConfigDict(extra='forbid')

class EvaluationAnalysis(BaseModel):
include: bool
aggregate_per_algorithm: bool = False

model_config = ConfigDict(extra='forbid')

class Analysis(BaseModel):
summary: SummaryAnalysis = SummaryAnalysis(include=False)
cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
ml: MlAnalysis = MlAnalysis(include=False)
evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)

model_config = ConfigDict(extra='forbid')


# The default length of the truncated hash used to identify parameter combinations
DEFAULT_HASH_LENGTH = 7

def label_validator(name: str):
"""
A validator takes in a label
and ensures that it contains only letters, numbers, or underscores.
"""
label_pattern = r'^\w+$'
def validate(label: str):
if not bool(re.match(label_pattern, label)):
raise ValueError(f"{name} label '{label}' contains invalid values. {name} labels can only contain letters, numbers, or underscores.")
return label
return validate

class ContainerFramework(CaseInsensitiveEnum):
docker = 'docker'
# TODO: add apptainer variant once #260 gets merged
singularity = 'singularity'
dsub = 'dsub'

class ContainerRegistry(BaseModel):
base_url: str
owner: str = Field(description="The owner or project of the registry")

model_config = ConfigDict(extra='forbid')

class AlgorithmParams(BaseModel):
include: bool
directed: Optional[bool] = None

# TODO: use array of runs instead. We currently rely on the
# extra parameters here to extract the algorithm parameter information,
# which is why this deviates from the usual ConfigDict(extra='forbid').
model_config = ConfigDict(extra='allow')

class Algorithm(BaseModel):
name: str
params: AlgorithmParams

model_config = ConfigDict(extra='forbid')

class Dataset(BaseModel):
# We prefer AfterValidator here to allow pydantic to run its own
# validation & coercion logic before we check it against our own
# requirements
label: Annotated[str, AfterValidator(label_validator("Dataset"))]
node_files: list[str]
edge_files: list[str]
other_files: list[str]
data_dir: str

model_config = ConfigDict(extra='forbid')

class GoldStandard(BaseModel):
label: Annotated[str, AfterValidator(label_validator("Gold Standard"))]
node_files: list[str]
data_dir: str
dataset_labels: list[str]

model_config = ConfigDict(extra='forbid')

class Locations(BaseModel):
reconstruction_dir: str

model_config = ConfigDict(extra='forbid')

# NOTE: This setting doesn't have any uses past setting the output_dir as of now.
class ReconstructionSettings(BaseModel):
locations: Locations

model_config = ConfigDict(extra='forbid')

class RawConfig(BaseModel):
# TODO: move these container values to a nested container key
container_framework: ContainerFramework = ContainerFramework.docker
unpack_singularity: bool = False
container_registry: ContainerRegistry

hash_length: int = DEFAULT_HASH_LENGTH
"The length of the hash used to identify a parameter combination"

algorithms: list[Algorithm]
datasets: list[Dataset]
gold_standards: list[GoldStandard] = []
analysis: Analysis = Analysis()

reconstruction_settings: ReconstructionSettings

# We include use_attribute_docstrings here to preserve the docstrings
# after attributes at runtime (for future JSON schema generation)
model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
19 changes: 19 additions & 0 deletions spras/config/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from enum import Enum
from typing import Any


# https://stackoverflow.com/a/76883868/7589775
class CaseInsensitiveEnum(str, Enum):
"""
We prefer this over Enum to make sure the config parsing
is more relaxed when it comes to string enum values.
"""
@classmethod
def _missing_(cls, value: Any):
if isinstance(value, str):
value = value.lower()

for member in cls:
if member.lower() == value:
return member
return None
2 changes: 1 addition & 1 deletion spras/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import docker
import docker.errors

import spras.config as config
import spras.config.config as config
from spras.logging import indent
from spras.util import hash_filename

Expand Down
2 changes: 1 addition & 1 deletion test/AllPairs/test_ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.allpairs import AllPairs

# Note that we don't directly use the config in the test, but we need the config
Expand Down
2 changes: 1 addition & 1 deletion test/BowTieBuilder/test_btb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config

config.init_from_file("config/config.yaml")

Expand Down
2 changes: 1 addition & 1 deletion test/DOMINO/test_domino.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.domino import DOMINO, post_domino_id_transform, pre_domino_id_transform

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/LocalNeighborhood/test_ln.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

import spras.config as config
import spras.config.config as config

config.init_from_file("config/config.yaml")

Expand Down
2 changes: 1 addition & 1 deletion test/MEO/test_meo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.meo import MEO, write_properties

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/MinCostFlow/test_mcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.mincostflow import MinCostFlow

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/OmicsIntegrator1/test_oi1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.omicsintegrator1 import OmicsIntegrator1, write_conf

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/OmicsIntegrator2/test_oi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.omicsintegrator2 import OmicsIntegrator2

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/PathLinker/test_pathlinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.pathlinker import PathLinker

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/RWR/test_RWR.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.rwr import RWR

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/ResponseNet/test_rn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.responsenet import ResponseNet

config.init_from_file("config/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/ST_RWR/test_STRWR.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.strwr import ST_RWR

config.init_from_file("config/config.yaml")
Expand Down
1 change: 0 additions & 1 deletion test/analysis/input/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ reconstruction_settings:
locations:
#place the save path here
reconstruction_dir: "output"
run: true

analysis:
# Create one summary per pathway file and a single summary table for all pathways for each dataset
Expand Down
1 change: 0 additions & 1 deletion test/analysis/input/egfr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ datasets:
reconstruction_settings:
locations:
reconstruction_dir: output/egfr
run: true
analysis:
cytoscape:
include: true
Expand Down
2 changes: 1 addition & 1 deletion test/analysis/test_cytoscape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

import spras.config as config
import spras.config.config as config
from spras.analysis.cytoscape import run_cytoscape

config.init_from_file("test/analysis/input/config.yaml")
Expand Down
2 changes: 1 addition & 1 deletion test/analysis/test_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

import spras.config as config
import spras.config.config as config
from spras.analysis.summary import summarize_networks
from spras.dataset import Dataset

Expand Down
Loading
Loading