diff --git a/Snakefile b/Snakefile
index e92e4f24f..f04425376 100644
--- a/Snakefile
+++ b/Snakefile
@@ -5,7 +5,7 @@ import yaml
 from spras.dataset import Dataset
 from spras.evaluation import Evaluation
 from spras.analysis import ml, summary, cytoscape
-import spras.config as _config
+import spras.config.config as _config
 
 # Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
 # and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
diff --git a/config/config.yaml b/config/config.yaml
index 8ee0b75a1..1e5f1d561 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -169,8 +169,6 @@ reconstruction_settings:
     # TODO move to global
     reconstruction_dir: "output"
 
-  run: true
-
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
   summary:
diff --git a/config/egfr.yaml b/config/egfr.yaml
index d744e9ec8..667cb55f0 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -146,7 +146,6 @@ gold_standards:
 reconstruction_settings:
   locations:
     reconstruction_dir: output/egfr
-  run: true
 analysis:
   cytoscape:
     include: true
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
index d729a0375..87e996a9c 100644
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ b/docker-wrappers/SPRAS/example_config.yaml
@@ -122,8 +122,6 @@ reconstruction_settings:
     # TODO move to global
     reconstruction_dir: "output"
 
-  run: true
-
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
   summary:
diff --git a/environment.yml b/environment.yml
index a0fb02293..c0c550074 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,6 +9,7 @@ dependencies:
   - matplotlib=3.10.3
   - networkx=3.5
   - pandas=2.3.0
+  - pydantic=2.11.7
   - numpy=2.3.1
   - requests=2.32.4
   - scikit-learn=1.7.0
diff --git a/pyproject.toml b/pyproject.toml
index eee89b240..a3beee474 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ dependencies = [
     "matplotlib==3.10.3",
     "networkx==3.5",
     "pandas==2.3.0",
+    "pydantic==2.11.7",
     "numpy==2.3.1",
     "requests==2.32.4",
     "scikit-learn==1.7.0",
@@ -73,4 +74,4 @@ select = [
 # py-modules tells setuptools which directory is our actual module
 py-modules = ["spras"]
 # packages tells setuptools what the exported package is called (ie allows import spras)
-packages = ["spras", "spras.analysis"]
+packages = ["spras", "spras.analysis", "spras.config"]
diff --git a/spras/config/__init__.py b/spras/config/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spras/config.py b/spras/config/config.py
similarity index 60%
rename from spras/config.py
rename to spras/config/config.py
index 4811ba820..605faecf4 100644
--- a/spras/config.py
+++ b/spras/config/config.py
@@ -6,7 +6,7 @@
 module that imports this module can access a config option by checking the object's
 value. For example
 
-import spras.config as config
+import spras.config.config as config
 container_framework = config.config.container_framework
 
 will grab the top level registry configuration option as it appears in the config file
@@ -15,20 +15,20 @@
 import copy as copy
 import itertools as it
 import os
-import re
+import warnings
 from collections.abc import Iterable
+from typing import Any
 
 import numpy as np
 import yaml
 
+from spras.config.schema import ContainerFramework, RawConfig
 from spras.util import NpHashEncoder, hash_params_sha1_base32
 
-# The default length of the truncated hash used to identify parameter combinations
-DEFAULT_HASH_LENGTH = 7
-DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio"
-
 config = None
 
+DEFAULT_CONTAINER_PREFIX = "docker.io/reedcompbio"
+
 # This will get called in the Snakefile, instantiating the singleton with the raw config
 def init_global(config_dict):
     global config
@@ -42,39 +42,41 @@ def init_from_file(filepath):
     try:
         with open(filepath, 'r') as yaml_file:
             config_dict = yaml.safe_load(yaml_file)
-    except FileNotFoundError:
-        print(f"Error: The specified config '{filepath}' could not be found.")
-        return False
+    except FileNotFoundError as e:
+        raise RuntimeError(f"Error: The specified config '{filepath}' could not be found.") from e
     except yaml.YAMLError as e:
-        print(f"Error: Failed to parse config '{filepath}': {e}")
-        return False
+        raise RuntimeError(f"Error: Failed to parse config '{filepath}'") from e
 
     # And finally, initialize
     config = Config(config_dict)
 
 
 class Config:
-    def __init__(self, raw_config):
-        # Since process_config winds up modifying the raw_config passed to it as a side effect,
-        # we'll make a deep copy here to guarantee we don't break anything. This preserves the
-        # config as it's given to the Snakefile by Snakemake
+    def __init__(self, raw_config: dict[str, Any]):
+        # Since snakemake provides an empty config, we provide this
+        # wrapper error first before passing validation to pydantic.
+        if raw_config == {}:
+            raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
+
+        parsed_raw_config = RawConfig.model_validate(raw_config)
+
+        # Member vars populated by process_config. Any values that don't have quick initial values are set to None
+        # before they are populated for __init__ to show exactly what is being configured.
 
-        # Member vars populated by process_config. Set to None before they are populated so that our
-        # __init__ makes clear exactly what is being configured.
         # Directory used for storing output
-        self.out_dir = None
+        self.out_dir = parsed_raw_config.reconstruction_settings.locations.reconstruction_dir
         # Container framework used by PRMs. Valid options are "docker", "dsub", and "singularity"
         self.container_framework = None
         # The container prefix (host and organization) to use for images. Default is "docker.io/reedcompbio"
-        self.container_prefix = DEFAULT_CONTAINER_PREFIX
+        self.container_prefix: str = DEFAULT_CONTAINER_PREFIX
         # A Boolean specifying whether to unpack singularity containers. Default is False
         self.unpack_singularity = False
         # A dictionary to store configured datasets against which SPRAS will be run
         self.datasets = None
         # A dictionary to store configured gold standard data against output of SPRAS runs
         self.gold_standards = None
-        # The hash length SPRAS will use to identify parameter combinations. Default is 7
-        self.hash_length = DEFAULT_HASH_LENGTH
+        # The hash length SPRAS will use to identify parameter combinations.
+        self.hash_length = parsed_raw_config.hash_length
         # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key.
         self.algorithms = None
         # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations.
@@ -83,9 +85,11 @@ def __init__(self, raw_config):
         # Deprecated. Previously a dict mapping algorithm names to a Boolean tracking whether they used directed graphs.
         self.algorithm_directed = None
         # A dict with the analysis settings
-        self.analysis_params = None
+        self.analysis_params = parsed_raw_config.analysis
+        # A dict with the evaluation settings
+        self.evaluation_params = self.analysis_params.evaluation
         # A dict with the ML settings
-        self.ml_params = None
+        self.ml_params = self.analysis_params.ml
         # A Boolean specifying whether to run ML analysis for individual algorithms
         self.analysis_include_ml_aggregate_algo = None
         # A dict with the PCA settings
@@ -105,69 +109,29 @@ def __init__(self, raw_config):
         # A Boolean specifying whether to run the evaluation per algorithm analysis
         self.analysis_include_evaluation_aggregate_algo = None
 
-        _raw_config = copy.deepcopy(raw_config)
-        self.process_config(_raw_config)
-
-    def process_config(self, raw_config):
-        if raw_config == {}:
-            raise ValueError("Config file cannot be empty. Use --configfile <filename> to set a config file.")
-
-        # Set up a few top-level config variables
-        self.out_dir = raw_config["reconstruction_settings"]["locations"]["reconstruction_dir"]
-
-        # We allow the container framework not to be defined in the config. In the case it isn't, default to docker.
-        # However, if we get a bad value, we raise an exception.
-        if "container_framework" in raw_config:
-            container_framework = raw_config["container_framework"].lower()
-            if container_framework not in ("docker", "singularity", "dsub"):
-                msg = "SPRAS was configured to run with an unknown container framework: '" + raw_config["container_framework"] + "'. Accepted values are 'docker', 'singularity' or 'dsub'."
-                raise ValueError(msg)
-            if container_framework == "dsub":
-                print("Warning: 'dsub' framework integration is experimental and may not be fully supported.")
-            self.container_framework = container_framework
-        else:
-            self.container_framework = "docker"
-
-        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
-        if "unpack_singularity" in raw_config:
-            # The value in the config is a string, and we need to convert it to a bool.
-            unpack_singularity = raw_config["unpack_singularity"]
-            if unpack_singularity and self.container_framework != "singularity":
-                print("Warning: unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.")
-            self.unpack_singularity = unpack_singularity
-
-        # Grab registry from the config, and if none is provided default to docker
-        if "container_registry" in raw_config and raw_config["container_registry"]["base_url"] != "" and raw_config["container_registry"]["owner"] != "":
-            self.container_prefix = raw_config["container_registry"]["base_url"] + "/" + raw_config["container_registry"]["owner"]
+        self.process_config(parsed_raw_config)
 
-        # Parse dataset information
-        # Datasets is initially a list, where each list entry has a dataset label and lists of input files
-        # Convert the dataset list into a dict where the label is the key and update the config data structure
+    def process_datasets(self, raw_config: RawConfig):
+        """
+        Parse dataset information
+        Datasets is initially a list, where each list entry has a dataset label and lists of input files
+        Convert the dataset list into a dict where the label is the key and update the config data structure
+        """
         # TODO allow labels to be optional and assign default labels
-        # TODO check for collisions in dataset labels, warn, and make the labels unique
         # Need to work more on input file naming to make less strict assumptions
         # about the filename structure
         # Currently assumes all datasets have a label and the labels are unique
         # When Snakemake parses the config file it loads the datasets as OrderedDicts not dicts
         # Convert to dicts to simplify the yaml logging
-        self.datasets = {dataset["label"]: dict(dataset) for dataset in raw_config["datasets"]}
-
-        for key in self.datasets:
-            pattern = r'^\w+$'
-            if not bool(re.match(pattern, key)):
-                raise ValueError(f"Dataset label \'{key}\' contains invalid values. Dataset labels can only contain letters, numbers, or underscores.")
+        self.datasets = {}
+        for dataset in raw_config.datasets:
+            label = dataset.label
+            if label.lower() in [key.lower() for key in self.datasets.keys()]:
+                raise ValueError(f"Datasets must have unique case-insensitive labels, but the label {label} appears at least twice.")
+            self.datasets[label] = dict(dataset)
 
         # parse gold standard information
-        try:
-            self.gold_standards = {gold_standard["label"]: dict(gold_standard) for gold_standard in raw_config["gold_standards"]}
-        except:
-            self.gold_standards = {}
-
-        # check that gold_standard labels are formatted correctly
-        for key in self.gold_standards:
-            pattern = r'^\w+$'
-            if not bool(re.match(pattern, key)):
-                raise ValueError(f"Gold standard label \'{key}\' contains invalid values. Gold standard labels can only contain letters, numbers, or underscores.")
+        self.gold_standards = {gold_standard.label: dict(gold_standard) for gold_standard in raw_config.gold_standards}
 
         # check that all the dataset labels in the gold standards are existing datasets labels
         dataset_labels = set(self.datasets.keys())
@@ -181,35 +145,36 @@ def process_config(self, raw_config):
         # Maps from the dataset label to the dataset list index
         # dataset_dict = {dataset.get('label', f'dataset{index}'): index for index, dataset in enumerate(datasets)}
 
-        # Override the default parameter hash length if specified in the config file
-        if "hash_length" in raw_config and raw_config["hash_length"] != "":
-            self.hash_length = int(raw_config["hash_length"])
-
+    def process_algorithms(self, raw_config: RawConfig):
+        """
+        Parse algorithm information
+        Each algorithm's parameters are provided as a list of dictionaries
+        Defaults are handled in the Python function or class that wraps
+        running that algorithm
+        Keys in the parameter dictionary are strings
+        """
         prior_params_hashes = set()
-
-        # Parse algorithm information
-        # Each algorithm's parameters are provided as a list of dictionaries
-        # Defaults are handled in the Python function or class that wraps
-        # running that algorithm
-        # Keys in the parameter dictionary are strings
         self.algorithm_params = dict()
         self.algorithm_directed = dict()
-        self.algorithms = raw_config["algorithms"]
+        self.algorithms = raw_config.algorithms
         for alg in self.algorithms:
-            cur_params = alg["params"]
-            if "include" in cur_params and cur_params.pop("include"):
+            cur_params = alg.params
+            if cur_params.include:
                 # This dict maps from parameter combinations hashes to parameter combination dictionaries
-                self.algorithm_params[alg["name"]] = dict()
+                self.algorithm_params[alg.name] = dict()
             else:
                 # Do not parse the rest of the parameters for this algorithm if it is not included
                 continue
 
-            if "directed" in cur_params:
-                print("UPDATE: we no longer use the directed key in the config file")
-                cur_params.pop("directed")
+            if cur_params.directed is not None:
+                warnings.warn("UPDATE: we no longer use the directed key in the config file", stacklevel=2)
+
+            cur_params = cur_params.__pydantic_extra__
+            if cur_params is None:
+                raise RuntimeError("An internal error occurred: ConfigDict extra should be set on AlgorithmParams.")
 
             # The algorithm has no named arguments so create a default placeholder
-            if len(cur_params) == 0:
+            if len(cur_params.keys()) == 0:
                 cur_params["run1"] = {"spras_placeholder": ["no parameters"]}
 
             # Each set of runs should be 1 level down in the config file
@@ -240,7 +205,7 @@ def process_config(self, raw_config):
                                     # Catch-all for strings
                                     obj = [obj]
                             if not isinstance(obj, Iterable):
-                                raise ValueError(f"The object `{obj}` in algorithm {alg['name']} at key '{p}' in run '{run_params}' is not iterable!") from None
+                                raise ValueError(f"The object `{obj}` in algorithm {alg.name} at key '{p}' in run '{run_params}' is not iterable!") from None
                         all_runs.append(obj)
                 run_list_tuples = list(it.product(*all_runs))
                 param_name_tuple = tuple(param_name_list)
@@ -261,38 +226,33 @@ def process_config(self, raw_config):
                     if params_hash in prior_params_hashes:
                         raise ValueError(f'Parameter hash collision detected. Increase the hash_length in the config file '
                                         f'(current length {self.hash_length}).')
-                    self.algorithm_params[alg["name"]][params_hash] = run_dict
-
-        self.analysis_params = raw_config["analysis"] if "analysis" in raw_config else {}
-        self.ml_params = self.analysis_params["ml"] if "ml" in self.analysis_params else {}
-        self.evaluation_params = self.analysis_params["evaluation"] if "evaluation" in self.analysis_params else {}
-
-        self.pca_params = {}
-        if "components" in self.ml_params:
-            self.pca_params["components"] = self.ml_params["components"]
-        if "labels" in self.ml_params:
-            self.pca_params["labels"] = self.ml_params["labels"]
-        if "kde" in self.ml_params:
-            self.pca_params["kde"] = self.ml_params["kde"]
-        else:
-            self.pca_params["kde"] = False
-        if "remove_empty_pathways" in self.ml_params:
-            self.pca_params["remove_empty_pathways"] = self.ml_params["remove_empty_pathways"]
-
-        self.hac_params = {}
-        if "linkage" in self.ml_params:
-            self.hac_params["linkage"] = self.ml_params["linkage"]
-        if "metric" in self.ml_params:
-            self.hac_params["metric"] = self.ml_params ["metric"]
-
-        self.analysis_include_summary = raw_config["analysis"]["summary"]["include"]
-        self.analysis_include_cytoscape = raw_config["analysis"]["cytoscape"]["include"]
-        self.analysis_include_ml = raw_config["analysis"]["ml"]["include"]
-        self.analysis_include_evaluation = raw_config["analysis"]["evaluation"]["include"]
+                    self.algorithm_params[alg.name][params_hash] = run_dict
+
+    def process_analysis(self, raw_config: RawConfig):
+        if not raw_config.analysis:
+            return
+
+        # self.ml_params is a class, pca_params needs to be a dict.
+        self.pca_params = {
+            "components": self.ml_params.components,
+            "labels": self.ml_params.labels,
+            "kde": self.ml_params.kde,
+            "remove_empty_pathways": self.ml_params.remove_empty_pathways
+        }
+
+        self.hac_params = {
+            "linkage": self.ml_params.linkage,
+            "metric": self.ml_params.metric
+        }
+
+        self.analysis_include_summary = raw_config.analysis.summary.include
+        self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include
+        self.analysis_include_ml = raw_config.analysis.ml.include
+        self.analysis_include_evaluation = raw_config.analysis.evaluation.include
 
         # Only run ML aggregate per algorithm if analysis include ML is set to True
-        if 'aggregate_per_algorithm' in self.ml_params and self.analysis_include_ml:
-            self.analysis_include_ml_aggregate_algo = raw_config["analysis"]["ml"]["aggregate_per_algorithm"]
+        if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml:
+            self.analysis_include_ml_aggregate_algo = raw_config.analysis.ml.aggregate_per_algorithm
         else:
             self.analysis_include_ml_aggregate_algo = False
 
@@ -306,8 +266,8 @@ def process_config(self, raw_config):
             self.analysis_include_evaluation = False
 
         # Only run Evaluation aggregate per algorithm if analysis include ML is set to True
-        if 'aggregate_per_algorithm' in self.evaluation_params and self.analysis_include_evaluation:
-            self.analysis_include_evaluation_aggregate_algo = raw_config["analysis"]["evaluation"]["aggregate_per_algorithm"]
+        if self.evaluation_params.aggregate_per_algorithm and self.analysis_include_evaluation:
+            self.analysis_include_evaluation_aggregate_algo = raw_config.analysis.evaluation.aggregate_per_algorithm
         else:
             self.analysis_include_evaluation_aggregate_algo = False
 
@@ -321,3 +281,24 @@ def process_config(self, raw_config):
         if self.analysis_include_evaluation and not self.pca_params["kde"]:
             self.pca_params["kde"] = True
             print("Setting kde to true; Evaluation analysis needs to run KDE for PCA-Chosen parameter selection.")
+
+    def process_config(self, raw_config: RawConfig):
+        # Set up a few top-level config variables
+        self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
+
+        if raw_config.container_framework == ContainerFramework.dsub:
+            warnings.warn("'dsub' framework integration is experimental and may not be fully supported.", stacklevel=2)
+        self.container_framework = raw_config.container_framework
+
+        # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container.
+        if raw_config.unpack_singularity and self.container_framework != "singularity":
+            warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2)
+        self.unpack_singularity = raw_config.unpack_singularity
+
+        # Grab registry from the config, and if none is provided default to docker
+        if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "":
+            self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner
+
+        self.process_datasets(raw_config)
+        self.process_algorithms(raw_config)
+        self.process_analysis(raw_config)
diff --git a/spras/config/schema.py b/spras/config/schema.py
new file mode 100644
index 000000000..c84ea4384
--- /dev/null
+++ b/spras/config/schema.py
@@ -0,0 +1,168 @@
+"""
+Contains the raw pydantic schema for the configuration file.
+
+Using Pydantic as our backing config parser allows us to declaratively
+type our config, giving us more robust user errors with guarantees
+that parts of the config exist after parsing it through Pydantic.
+
+We declare models using two classes here:
+- `BaseModel` (docs: https://docs.pydantic.dev/latest/concepts/models/)
+- `CaseInsensitiveEnum` (see ./util.py)
+"""
+
+import re
+from typing import Annotated, Optional
+
+from pydantic import AfterValidator, BaseModel, ConfigDict, Field
+
+from spras.config.util import CaseInsensitiveEnum
+
+# Most options here have an `include` property,
+# which is meant to make disabling parts of the configuration easier.
+# When an option does not have a default, it means that it must be set by the user.
+
+class SummaryAnalysis(BaseModel):
+    include: bool
+
+    # We prefer to never allow extra keys, to prevent
+    # any user mistypes.
+    model_config = ConfigDict(extra='forbid')
+
+class CytoscapeAnalysis(BaseModel):
+    include: bool
+
+    model_config = ConfigDict(extra='forbid')
+
+# Note that CaseInsensitiveEnum is not pydantic: pydantic
+# has special support for enums, but we avoid the
+# pydantic-specific "model_config" key here for this reason.
+class MlLinkage(CaseInsensitiveEnum):
+    ward = 'ward'
+    complete = 'complete'
+    average = 'average'
+    single = 'single'
+
+class MlMetric(CaseInsensitiveEnum):
+    euclidean = 'euclidean'
+    manhattan = 'manhattan'
+    cosine = 'cosine'
+
+class MlAnalysis(BaseModel):
+    include: bool
+    aggregate_per_algorithm: bool = False
+    components: int = 2
+    labels: bool = True
+    kde: bool = False
+    remove_empty_pathways: bool = False
+    linkage: MlLinkage = MlLinkage.ward
+    metric: MlMetric = MlMetric.euclidean
+
+    model_config = ConfigDict(extra='forbid')
+
+class EvaluationAnalysis(BaseModel):
+    include: bool
+    aggregate_per_algorithm: bool = False
+
+    model_config = ConfigDict(extra='forbid')
+
+class Analysis(BaseModel):
+    summary: SummaryAnalysis = SummaryAnalysis(include=False)
+    cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
+    ml: MlAnalysis = MlAnalysis(include=False)
+    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
+
+    model_config = ConfigDict(extra='forbid')
+
+
+# The default length of the truncated hash used to identify parameter combinations
+DEFAULT_HASH_LENGTH = 7
+
+def label_validator(name: str):
+    """
+    A validator takes in a label
+    and ensures that it contains only letters, numbers, or underscores.
+    """
+    label_pattern = r'^\w+$'
+    def validate(label: str):
+        if not bool(re.match(label_pattern, label)):
+            raise ValueError(f"{name} label '{label}' contains invalid values. {name} labels can only contain letters, numbers, or underscores.")
+        return label
+    return validate
+
+class ContainerFramework(CaseInsensitiveEnum):
+    docker = 'docker'
+    # TODO: add apptainer variant once #260 gets merged
+    singularity = 'singularity'
+    dsub = 'dsub'
+
+class ContainerRegistry(BaseModel):
+    base_url: str
+    owner: str = Field(description="The owner or project of the registry")
+
+    model_config = ConfigDict(extra='forbid')
+
+class AlgorithmParams(BaseModel):
+    include: bool
+    directed: Optional[bool] = None
+
+    # TODO: use array of runs instead. We currently rely on the
+    # extra parameters here to extract the algorithm parameter information,
+    # which is why this deviates from the usual ConfigDict(extra='forbid').
+    model_config = ConfigDict(extra='allow')
+
+class Algorithm(BaseModel):
+    name: str
+    params: AlgorithmParams
+
+    model_config = ConfigDict(extra='forbid')
+
+class Dataset(BaseModel):
+    # We prefer AfterValidator here to allow pydantic to run its own
+    # validation & coercion logic before we check it against our own
+    # requirements
+    label: Annotated[str, AfterValidator(label_validator("Dataset"))]
+    node_files: list[str]
+    edge_files: list[str]
+    other_files: list[str]
+    data_dir: str
+
+    model_config = ConfigDict(extra='forbid')
+
+class GoldStandard(BaseModel):
+    label: Annotated[str, AfterValidator(label_validator("Gold Standard"))]
+    node_files: list[str]
+    data_dir: str
+    dataset_labels: list[str]
+
+    model_config = ConfigDict(extra='forbid')
+
+class Locations(BaseModel):
+    reconstruction_dir: str
+
+    model_config = ConfigDict(extra='forbid')
+
+# NOTE: This setting doesn't have any uses past setting the output_dir as of now.
+class ReconstructionSettings(BaseModel):
+    locations: Locations
+
+    model_config = ConfigDict(extra='forbid')
+
+class RawConfig(BaseModel):
+    # TODO: move these container values to a nested container key
+    container_framework: ContainerFramework = ContainerFramework.docker
+    unpack_singularity: bool = False
+    container_registry: ContainerRegistry
+
+    hash_length: int = DEFAULT_HASH_LENGTH
+    "The length of the hash used to identify a parameter combination"
+
+    algorithms: list[Algorithm]
+    datasets: list[Dataset]
+    gold_standards: list[GoldStandard] = []
+    analysis: Analysis = Analysis()
+
+    reconstruction_settings: ReconstructionSettings
+
+    # We include use_attribute_docstrings here to preserve the docstrings
+    # after attributes at runtime (for future JSON schema generation)
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
diff --git a/spras/config/util.py b/spras/config/util.py
new file mode 100644
index 000000000..b7680222b
--- /dev/null
+++ b/spras/config/util.py
@@ -0,0 +1,19 @@
+from enum import Enum
+from typing import Any
+
+
+# https://stackoverflow.com/a/76883868/7589775
+class CaseInsensitiveEnum(str, Enum):
+    """
+    We prefer this over Enum to make sure the config parsing
+    is more relaxed when it comes to string enum values.
+    """
+    @classmethod
+    def _missing_(cls, value: Any):
+        if isinstance(value, str):
+            value = value.lower()
+
+            for member in cls:
+                if member.lower() == value:
+                    return member
+        return None
diff --git a/spras/containers.py b/spras/containers.py
index 2209df554..b7711c4f6 100644
--- a/spras/containers.py
+++ b/spras/containers.py
@@ -8,7 +8,7 @@
 import docker
 import docker.errors
 
-import spras.config as config
+import spras.config.config as config
 from spras.logging import indent
 from spras.util import hash_filename
 
diff --git a/test/AllPairs/test_ap.py b/test/AllPairs/test_ap.py
index 57c5640d2..a8291f72f 100644
--- a/test/AllPairs/test_ap.py
+++ b/test/AllPairs/test_ap.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.allpairs import AllPairs
 
 # Note that we don't directly use the config in the test, but we need the config
diff --git a/test/BowTieBuilder/test_btb.py b/test/BowTieBuilder/test_btb.py
index ad85f9bb7..4f0952f16 100644
--- a/test/BowTieBuilder/test_btb.py
+++ b/test/BowTieBuilder/test_btb.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 
 config.init_from_file("config/config.yaml")
 
diff --git a/test/DOMINO/test_domino.py b/test/DOMINO/test_domino.py
index e903bc2fd..05bd3ae70 100644
--- a/test/DOMINO/test_domino.py
+++ b/test/DOMINO/test_domino.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.domino import DOMINO, post_domino_id_transform, pre_domino_id_transform
 
 config.init_from_file("config/config.yaml")
diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py
index fbee54902..9093efc68 100644
--- a/test/LocalNeighborhood/test_ln.py
+++ b/test/LocalNeighborhood/test_ln.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 
 config.init_from_file("config/config.yaml")
 
diff --git a/test/MEO/test_meo.py b/test/MEO/test_meo.py
index e2abdb72d..32958be20 100644
--- a/test/MEO/test_meo.py
+++ b/test/MEO/test_meo.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.meo import MEO, write_properties
 
 config.init_from_file("config/config.yaml")
diff --git a/test/MinCostFlow/test_mcf.py b/test/MinCostFlow/test_mcf.py
index 89bd61d0b..c777a665d 100644
--- a/test/MinCostFlow/test_mcf.py
+++ b/test/MinCostFlow/test_mcf.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.mincostflow import MinCostFlow
 
 config.init_from_file("config/config.yaml")
diff --git a/test/OmicsIntegrator1/test_oi1.py b/test/OmicsIntegrator1/test_oi1.py
index 35b41d428..a484c0af3 100644
--- a/test/OmicsIntegrator1/test_oi1.py
+++ b/test/OmicsIntegrator1/test_oi1.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.omicsintegrator1 import OmicsIntegrator1, write_conf
 
 config.init_from_file("config/config.yaml")
diff --git a/test/OmicsIntegrator2/test_oi2.py b/test/OmicsIntegrator2/test_oi2.py
index 311a9c7e7..aa74cd94e 100644
--- a/test/OmicsIntegrator2/test_oi2.py
+++ b/test/OmicsIntegrator2/test_oi2.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.omicsintegrator2 import OmicsIntegrator2
 
 config.init_from_file("config/config.yaml")
diff --git a/test/PathLinker/test_pathlinker.py b/test/PathLinker/test_pathlinker.py
index 3fd6a96bd..ed9f10670 100644
--- a/test/PathLinker/test_pathlinker.py
+++ b/test/PathLinker/test_pathlinker.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.pathlinker import PathLinker
 
 config.init_from_file("config/config.yaml")
diff --git a/test/RWR/test_RWR.py b/test/RWR/test_RWR.py
index 4d6ce7864..b0316ded0 100644
--- a/test/RWR/test_RWR.py
+++ b/test/RWR/test_RWR.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.rwr import RWR
 
 config.init_from_file("config/config.yaml")
diff --git a/test/ResponseNet/test_rn.py b/test/ResponseNet/test_rn.py
index 6fa09904b..6b9fe05cf 100644
--- a/test/ResponseNet/test_rn.py
+++ b/test/ResponseNet/test_rn.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.responsenet import ResponseNet
 
 config.init_from_file("config/config.yaml")
diff --git a/test/ST_RWR/test_STRWR.py b/test/ST_RWR/test_STRWR.py
index a0a5b4ea9..898b24055 100644
--- a/test/ST_RWR/test_STRWR.py
+++ b/test/ST_RWR/test_STRWR.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.strwr import ST_RWR
 
 config.init_from_file("config/config.yaml")
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
index c9eaa437a..abde6f979 100644
--- a/test/analysis/input/config.yaml
+++ b/test/analysis/input/config.yaml
@@ -102,7 +102,6 @@ reconstruction_settings:
   locations:
     #place the save path here
     reconstruction_dir: "output"
-  run: true
 
 analysis:
   # Create one summary per pathway file and a single summary table for all pathways for each dataset
diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index 4a31dad46..da4560df9 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -91,7 +91,6 @@ datasets:
 reconstruction_settings:
   locations:
     reconstruction_dir: output/egfr
-  run: true
 analysis:
   cytoscape:
     include: true
diff --git a/test/analysis/test_cytoscape.py b/test/analysis/test_cytoscape.py
index 7451b9876..68a77cd07 100644
--- a/test/analysis/test_cytoscape.py
+++ b/test/analysis/test_cytoscape.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.analysis.cytoscape import run_cytoscape
 
 config.init_from_file("test/analysis/input/config.yaml")
diff --git a/test/analysis/test_summary.py b/test/analysis/test_summary.py
index 4ff5396da..0400d1f1b 100644
--- a/test/analysis/test_summary.py
+++ b/test/analysis/test_summary.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 
-import spras.config as config
+import spras.config.config as config
 from spras.analysis.summary import summarize_networks
 from spras.dataset import Dataset
 
diff --git a/test/test_config.py b/test/test_config.py
index 39b091b5e..72df9e0b9 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -3,8 +3,15 @@
 import numpy as np
 import pytest
 
-import spras.config as config
+import spras.config.config as config
+from spras.config.schema import DEFAULT_HASH_LENGTH
 
+filler_dataset_data: dict[str, str | list[str]] = {
+    "data_dir": "fake",
+    "edge_files": [],
+    "other_files": [],
+    "node_files": []
+}
 
 # Set up a dummy config for testing. For now, only include things that MUST exist in the dict
 # in order for the config init to complete. To test particular parts of the config initialization,
@@ -22,10 +29,26 @@ def get_test_config():
                 "reconstruction_dir": "my_dir"
             }
         },
-        "datasets": [{"label": "alg1"}, {"label": "alg2"}],
-        "gold_standards": [{"label": "gs1", "dataset_labels": []}],
+        "datasets": [{
+            "label": "alg1",
+            "data_dir": "fake",
+            "edge_files": [],
+            "other_files": [],
+            "node_files": []
+        }, {
+            "label": "alg2",
+            "data_dir": "faux",
+            "edge_files": [],
+            "other_files": [],
+            "node_files": []
+        }],
+        "gold_standards": [{
+            "label": "gs1",
+            "dataset_labels": [],
+            "node_files": [],
+            "data_dir": "gs-fake"
+        }],
         "algorithms": [
-            {"params": ["param2", "param2"]},
             {
                 "name": "strings",
                 "params": {
@@ -123,9 +146,9 @@ def test_config_hash_length(self):
         config.init_global(test_config)
         assert (config.config.hash_length == 7)
 
-        test_config["hash_length"] = ""
+        test_config.pop("hash_length", None)
         config.init_global(test_config)
-        assert (config.config.hash_length == config.DEFAULT_HASH_LENGTH)
+        assert (config.config.hash_length == DEFAULT_HASH_LENGTH)
 
         # Initialize the configuration
         test_config["hash_length"] = "12"
@@ -191,6 +214,7 @@ def test_correct_dataset_label(self):
         test_config = get_test_config()
         correct_test_dicts = [{"label": "test"},  {"label": "123"}, {"label": "test123"}, {"label": "123test"}, {"label": "_"},
                               {"label": "test_test"}, {"label": "_test"}, {"label": "test_"}]
+        correct_test_dicts = [dict(list(d.items()) + list(filler_dataset_data.items())) for d in correct_test_dicts]
 
         for test_dict in correct_test_dicts:
             test_config["datasets"] = [test_dict]
diff --git a/test/test_util.py b/test/test_util.py
index baf9db0ed..2a25fc0d1 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-import spras.config as config
+import spras.config.config as config
 from spras.containers import convert_docker_path, prepare_path_docker, prepare_volume
 from spras.util import hash_params_sha1_base32