From 3ca71b1a039f2178a22ef3cb95aaf3abc63aef91 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 23 Jan 2026 14:47:33 -0600 Subject: [PATCH 1/4] Add additional flags to snakemake_long for env management, verbose logging I was tired of hacking around wanting verbose logging in the HTCondor Snakemake executor, so I added some plumbing to pass Snakemake's '--verbose' flag through 'snakemake_long.py' to snakemake itself. Additionally, I added '--env-manager' so I could run things with my preferred mamba env instead of conda (which is too slow to rebuild). --- docker-wrappers/SPRAS/snakemake_long.py | 34 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/docker-wrappers/SPRAS/snakemake_long.py b/docker-wrappers/SPRAS/snakemake_long.py index 230593b94..8ad1ff537 100755 --- a/docker-wrappers/SPRAS/snakemake_long.py +++ b/docker-wrappers/SPRAS/snakemake_long.py @@ -30,6 +30,9 @@ def parse_args(isLocal=False): parser.add_argument("command", help="Helper command to run", choices=["long"]) parser.add_argument("--snakefile", help="The Snakefile to run. If omitted, the Snakefile is assumed to be in the current directory.", required=False) parser.add_argument("--profile", help="A path to a directory containing the desired Snakemake profile.", required=True) + parser.add_argument("--verbose", help="Enable verbose output for debugging.", action="store_true", required=False) + parser.add_argument("--env-manager", help="The environment manager to use (conda or mamba). Default is conda.", + choices=["conda", "mamba"], default="conda", required=False) # I'd love to change this to "logdir", but using the same name as Snakemake for consistency of feeling between this script # and Snakemake proper. parser.add_argument("--htcondor-jobdir", help="The directory Snakemake will write logs to. If omitted, a 'logs` directory will be created in the current directory", required=False) @@ -39,19 +42,24 @@ def parse_args(isLocal=False): Given a Snakefile, profile, and HTCondor job directory, submit a local universe job that runs Snakemake from the context of the submission directory. """ -def submit_local(snakefile, profile, htcondor_jobdir): +def submit_local(snakefile, profile, htcondor_jobdir, verbose=False, env_manager="conda"): # Get the location of this script, which also serves as the executable for the condor job. script_location = pathlib.Path(__file__).resolve() + # Build arguments string, including optional flags + args_str = f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir} --env-manager {env_manager}" + if verbose: + args_str += " --verbose" + submit_description = htcondor.Submit({ "executable": script_location, # We use the "long" command to indicate to the script that it should run the Snakemake command instead of submitting another job. # See comment in parse_args for more information. - "arguments": f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir}", + "arguments": args_str, "universe": "local", "request_disk": "512MB", "request_cpus": 1, - "request_memory": 512, + "request_memory": "512MB", # Set up logging "log": f"{htcondor_jobdir}/snakemake.log", @@ -100,7 +108,7 @@ def top_main(): os.makedirs(args.htcondor_jobdir) try: - submit_local(args.snakefile, args.profile, args.htcondor_jobdir) + submit_local(args.snakefile, args.profile, args.htcondor_jobdir, args.verbose, args.env_manager) except Exception as e: print(f"Error: Could not submit local universe job. {e}") raise @@ -108,17 +116,29 @@ def top_main(): """ Command to activate conda environment and run Snakemake. This is run by the local universe job, not the user. """ +def get_env_activation_command(env_manager, env_name="spras"): + """Generate the appropriate shell commands to activate the environment based on the env manager.""" + if env_manager == "mamba": + # mamba uses shell hook for activation + return f'eval "$(mamba shell hook --shell bash)" && mamba activate {env_name}' + else: # conda (default) + return f'source $(conda info --base)/etc/profile.d/conda.sh && conda activate {env_name}' + def long_main(): args = parse_args(True) # Note that we need to unset APPTAINER_CACHEDIR in this case but not in the local terminal case because the wrapper # HTCondor job has a different environment and populating this value causes Snakemake to fail when it tries to write # to spool (a read-only filesystem from the perspective of the EP job). + verbose_flag = "--verbose" if args.verbose else "" + + # Get the appropriate activation command for the detected/specified env manager + activation_cmd = get_env_activation_command(args.env_manager) + command = f""" - source $(conda info --base)/etc/profile.d/conda.sh && \ - conda activate spras && \ + {activation_cmd} && \ unset APPTAINER_CACHEDIR && \ - snakemake -s {args.snakefile} --profile {args.profile} --htcondor-jobdir {args.htcondor_jobdir} + snakemake -s {args.snakefile} --profile {args.profile} --htcondor-jobdir {args.htcondor_jobdir} {verbose_flag} """ try: From 7f1eed1eca99e6157fe55421e5b592cdfdbbe62d Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 23 Jan 2026 15:42:43 -0600 Subject: [PATCH 2/4] Overhaul HTCondor instructions The executor has matured quite a bit since these instructions were first drafted, and it's my hope that these changes remove a lot of the headache for running jobs. Now, you can edit config files in `config/` and use the `input/` directory directly. Workflows should be submitted directly from the repository root. --- .gitignore | 9 + docker-wrappers/SPRAS/example_config.yaml | 154 ----------------- docs/htcondor.rst | 161 ++++++++++-------- .../SPRAS => htcondor}/snakemake_long.py | 2 +- {docker-wrappers/SPRAS => htcondor}/spras.sh | 0 {docker-wrappers/SPRAS => htcondor}/spras.sub | 26 +-- .../spras_profile/config.yaml | 13 +- run_htcondor.sh | 8 + 8 files changed, 127 insertions(+), 246 deletions(-) delete mode 100644 docker-wrappers/SPRAS/example_config.yaml rename {docker-wrappers/SPRAS => htcondor}/snakemake_long.py (98%) rename {docker-wrappers/SPRAS => htcondor}/spras.sh (100%) rename {docker-wrappers/SPRAS => htcondor}/spras.sub (87%) rename {docker-wrappers/SPRAS => htcondor}/spras_profile/config.yaml (83%) create mode 100755 run_htcondor.sh diff --git a/.gitignore b/.gitignore index 3629c49c6..91933def7 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,12 @@ TempMat.mat # Singularity cache unpacked + +# HTCondor logs +htcondor/logs/ +*.err +*.out +*.log + +# Any sif files +*.sif diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml deleted file mode 100644 index 1e7fd69c2..000000000 --- a/docker-wrappers/SPRAS/example_config.yaml +++ /dev/null @@ -1,154 +0,0 @@ -# Global workflow control - -# The length of the hash used to identify a parameter combination -hash_length: 7 - -containers: - # Specify the container framework used by each PRM wrapper. Valid options include: - # - docker (default if not specified) - # - singularity OR apptainer -- Apptainer (formerly Singularity) is useful in HPC/HTC environments where docker isn't allowed - # - dsub -- experimental with limited support, used for running on Google Cloud - framework: singularity - - # Only used if framework is set to singularity/apptainer, this will unpack the containers - # to the local filesystem. This is useful when PRM containers need to run inside another container, - # such as would be the case in an HTCondor/OSPool environment. - # NOTE: This unpacks containers to the local filesystem, which will take up space in a way - # that persists after the workflow is complete. To clean up the unpacked containers, the user must - # manually delete them. For convenience, these unpacked files will exist in the current working directory - # under `unpacked`. - # Here, we unpack it since we're running on HTCondor. - unpack_singularity: true - - # Allow the user to configure which container registry containers should be pulled from - # Note that this assumes container names are consistent across registries, and that the - # registry being passed doesn't require authentication for pull actions - registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio - -# This list of algorithms should be generated by a script which checks the filesystem for installs. -# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm -# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved) -# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change -# which algorithms are run in a given experiment. -# -# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple -# parameters are specified then the algorithm will be run as many times as needed to cover all parameter -# combinations. For instance if we have the following: -# - name: "myAlg" -# params: -# include: true -# a: [1,2] -# b: [0.5,0.75] -# -# then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be -# careful: too many parameters might make your runs take a long time. - -algorithms: - - name: "pathlinker" - include: false - runs: - run1: - k: range(100,201,100) - - - name: "omicsintegrator1" - include: true - runs: - run1: - r: [5] - b: [5, 6] - w: np.linspace(0,5,2) - g: [3] - d: [10] - - - name: "omicsintegrator2" - include: true - runs: - run1: - b: [4] - g: [0] - run2: - b: [2] - g: [3] - - - name: "meo" - include: true - runs: - run1: - max_path_length: [3] - local_search: [true] - rand_restarts: [10] - - - name: "mincostflow" - include: true - runs: - run1: - flow: [1] # The flow must be an int - capacity: [1] - - - name: "allpairs" - include: true - - - name: "domino" - include: true - runs: - run1: - slice_threshold: [0.3] - module_threshold: [0.05] - -# Here we specify which pathways to run and other file location information. -# DataLoader.py can currently only load a single dataset -# Assume that if a dataset label does not change, the lists of associated input files do not change -datasets: - - label: data0 - node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] - # DataLoader.py can currently only load a single edge file, which is the primary network - edge_files: ["network.txt"] - # Placeholder - other_files: [] - # Relative path from the spras directory - data_dir: "input" -# - label: data1 -# # Reuse some of the same sources file as 'data0' but different network and targets -# node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"] -# edge_files: ["alternative-network.txt"] -# other_files: [] -# # Relative path from the spras directory -# data_dir: "input" - -# If we want to reconstruct then we should set run to true. -# TODO: if include is true above but run is false here, algs are not run. -# is this the behavior we want? -reconstruction_settings: - - #set where everything is saved - locations: - - #place the save path here - # TODO move to global - reconstruction_dir: "output" - -analysis: - # Create one summary per pathway file and a single summary table for all pathways for each dataset - summary: - include: true - # Create Cytoscape session file with all pathway graphs for each dataset - cytoscape: - include: false - # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset - ml: - include: true - # specify how many principal components to calculate - components: 2 - # boolean to show the labels on the pca graph - labels: true - # 'ward', 'complete', 'average', 'single' - # if linkage: ward, must use metric: euclidean - linkage: 'ward' - # 'euclidean', 'manhattan', 'cosine' - metric: 'euclidean' - evaluation: - include: false diff --git a/docs/htcondor.rst b/docs/htcondor.rst index 3bf0665c6..16b87f6ee 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -1,7 +1,7 @@ Running with HTCondor ===================== -The folder `docker-wrappers/SPRAS `_ +The folder `htcondor/ `_ inside the SPRAS git repository contains several files that can be used to run workflows with this container on HTCondor. To use the ``spras`` image in this environment, first login to an HTCondor Access Point (AP). @@ -63,64 +63,54 @@ image does not use a "v" in the tag. Submitting All Jobs to a Single EP ---------------------------------- -Navigate to the ``spras/docker-wrappers/SPRAS`` directory and create the -``logs/`` directory (``mkdir logs``). Next, modify ``spras.sub`` so that -it uses the SPRAS apptainer image you created: - -:: - - container_image = < your spras image >.sif - -Make sure to modify the configuration file to have -``unpack_singularity`` set to ``true``, and ``containers.framework`` set -to ``singularity``: else, the workflow will (likely) fail. - -Then run ``condor_submit spras.sub``, which will submit SPRAS to -HTCondor as a single job with as many cores as indicated by the -``NUM_PROCS`` line in ``spras.sub``, using the value of -``EXAMPLE_CONFIG`` as the SPRAS configuration file. By default, the -``example_config.yaml`` runs everything except for ``cytoscape``, which -appears to fail periodically in HTCondor. - -**Note**: The ``spras.sub`` submit file is an example of how this -workflow could be submitted from a CHTC Access Point (AP) to the OSPool. -To run in the local CHTC pool, omit the ``+WantGlideIn`` and -``requirements`` lines. +Running all SPRAS steps on a single remote Execution Point (EP) is a good way +to get started with HTCondor, but it is significantly less efficient than using +HTCondor's distributed capabilities. This approach is best suited for +workflows that are not computationally intensive, or for testing and +debugging purposes. + +Before submitting all SPRAS jobs to a single remote Execution Point (EP), +you'll need to set up three things: +1. You'll need to modify ``htcondor/spras.sub`` to point at your container + image, along with any other configuration changes you want to make like + choosing a logging directory or toggling OSPool submission. Note that all + paths in the submit file are relative to the directory from which you run + ``condor_submit``, which will typically be the root of the SPRAS repository. +2. You'll need to ensure your SPRAS configuration file has a few key values + set, including ``unpack_singularity: true`` and + ``containers.framework: singularity``. +3. Finally, it's best practice to create the logging directory configured in + the submit file before submitting the job, e.g. to create the default log + directory, run ``mkdir htcondor/logs`` from the root of the repository. + +Once these steps are complete, you can submit the job from the root of the +the SPRAS repository by running ``condor_submit htcondor/spras.sub``. + +When the job completes, the ``output`` directory from the workflow should be +returned as ``output``. Submitting Parallel Jobs ------------------------ -Parallelizing SPRAS workflows with HTCondor requires the same setup as -the previous section, but with two additions. First, it requires an -activated SPRAS conda environment with a ``pip install``-ed version of -the SPRAS module (via ``pip install .`` inside the SPRAS directory). - -Second, it requires an experimental executor for HTCondor that has been -forked from the upstream `HTCondor Snakemake -executor `__. - -After activating your ``spras`` conda environment and ``pip``-installing -SPRAS, you can install the HTCondor Snakemake executor with the -following: +Parallelizing SPRAS workflows with HTCondor requires much of the same setup +as the previous section, but with several additions. +1. Build/activate the SPRAS conda/mamba environment and ``pip install`` the SPRAS module + (via ``pip install .`` inside the SPRAS directory). +2. Install the `HTCondor Snakemake +executor `__; once your + SPRAS conda/mamba environment is activated and SPRAS is ``pip install``-ed, + you can install the HTCondor Snakemake executor with the following: .. code:: bash pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git -Currently, this executor requires that all input to the workflow is -scoped to the current working directory. Therefore, you'll need to copy -the Snakefile and your input directory (as specified by -``example_config.yaml``) to this directory: - -.. code:: bash - - cp ../../Snakefile . && \ - cp -r ../../input . - -Instead of editing ``spras.sub`` to define the workflow, this scenario -requires editing the SPRAS profile in ``spras_profile/config.yaml``. -Make sure you specify the correct container, and change any other config -values needed by your workflow (defaults are fine in most cases). +3. Instead of editing ``spras.sub`` to define the workflow, this scenario + requires editing the SPRAS profile in ``htcondor/spras_profile/config.yaml``. + Make sure you specify the correct container, and change any other config + values needed by your workflow (defaults are fine in most cases). +4. Modify your SPRAS configuration file to set ``unpack_singularity: true`` and + ``containers.framework: singularity``. Then, to start the workflow with HTCondor in the CHTC pool, there are two options: @@ -132,11 +122,16 @@ The first option is to run Snakemake in a way that ties its execution to your terminal. This is good for testing short workflows and running short jobs. The downside is that closing your terminal causes the process to exit, removing any unfinished jobs. To use this option, -invoke Snakemake directly by running: +invoke Snakemake directly from the repository root by running: .. code:: bash - snakemake --profile spras_profile + snakemake --profile htcondor/spras_profile/ + +**Note**: Running the workflow in this way requires that your terminal +session stays active. Closing the terminal will suspend ongoing jobs, but +Snakemake will handle picking up where any previously-completed jobs left off +when you restart the workflow. Long Running Snakemake Jobs (Managed by HTCondor) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -144,30 +139,52 @@ Long Running Snakemake Jobs (Managed by HTCondor) The second option is to let HTCondor manage the Snakemake process, which allows the jobs to run as long as needed. Instead of seeing Snakemake output directly in your terminal, you'll be able to see it in a -specified log file. To use this option, make sure ``snakemake_long.py`` -is executable (you can run ``chmod +x snakemake_long.py`` from the AP to -make sure it is), and then run: +specified log file. To use this option, run from the repository root: -:: +.. code:: bash + + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ - ./snakemake_long.py --profile spras_profile --htcondor-jobdir +A convenience script called ``run_htcondor.sh`` is also provided in the +repository root. You can execute this script by running: + +.. code:: bash -When run in this mode, all log files for the workflow will be placed -into the path you provided for the logging directory. In particular, -Snakemake's outputs with job progress can be found split between -``/snakemake-long.err`` and ``/snakemake-long.out``. + ./run_htcondor.sh + +When executed in this mode, all log files for the workflow will be placed +into the logging directory (``htcondor/logs`` by default). In particular, +Snakemake's stdout/stderr outputs containing your workflow's progress can +be found split between ``htcondor/logs/snakemake.err`` and ``htcondor/logs/snakemake.out``. These will also log each rule and what HTCondor job ID was submitted for that rule (see the `troubleshooting section <#troubleshooting>`__ for information on how to use these extra log files). +**Note**: While you're in the initial stages of developing/debugging your +workflow, it's very useful to invoke Snakemake with the ``--verbose`` flag. +This can be passed to Snakemake via the ``snakemake_long.py`` script by +adding it to the script's argument list, e.g.: + +.. code:: bash + + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose + +If you use mamba instead of conda for environment management, you can specify +this with the ``--env-manager`` flag: + +.. code:: bash + + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --env-manager mamba + Adjusting Resources ------------------- Resource requirements can be adjusted as needed in -``spras_profile/config.yaml``, and HTCondor logs for this workflow can -be found in ``.snakemake/htcondor``. You can set a different log -directory by adding ``htcondor-jobdir: /path/to/dir`` to the profile's -configuration. +``htcondor/spras_profile/config.yaml``, and HTCondor logs for this workflow +can be found in your log directory. You can set a different log +directory by changing the configured ``htcondor-jobdir`` in the profile's +configuration. Alternatively, you can pass a different log directory +when invoking Snakemake with the ``--htcondor-jobdir`` argument. To run this same workflow in the OSPool, add the following to the profile's default-resources block: @@ -178,11 +195,6 @@ profile's default-resources block: requirements: | '(HAS_SINGULARITY == True) && (Poolname =!= "CHTC")' -**Note**: This workflow requires that the terminal session responsible -for running snakemake stays active. Closing the terminal will suspend -jobs, but the workflow can use Snakemake's checkpointing to pick up any -jobs where they left off. - **Note**: If you encounter an error that says ``No module named 'spras'``, make sure you've ``pip install``-ed the SPRAS module into your conda environment. @@ -195,11 +207,10 @@ To monitor the state of the job, you can use a second terminal to run ``condor_watch_q`` for realtime updates. Upon completion, the ``output`` directory from the workflow should be -returned as ``spras/docker-wrappers/SPRAS/output``, along with several -files containing the workflow's logging information (anything that -matches ``logs/spras_*`` and ending in ``.out``, ``.err``, or ``.log``). -If the job was unsuccessful, these files should contain useful debugging -clues about what may have gone wrong. +returned as ``output``, along with several files containing the workflow's +logging information (anything that matches ``htcondor/logs/spras_*`` and +ending in ``.out``, ``.err``, or ``.log``). If the job was unsuccessful, +these files should contain useful debugging clues about what may have gone wrong. **Note**: If you want to run the workflow with a different version of SPRAS, or one that contains development updates you've made, rebuild diff --git a/docker-wrappers/SPRAS/snakemake_long.py b/htcondor/snakemake_long.py similarity index 98% rename from docker-wrappers/SPRAS/snakemake_long.py rename to htcondor/snakemake_long.py index 8ad1ff537..0f30a4439 100755 --- a/docker-wrappers/SPRAS/snakemake_long.py +++ b/htcondor/snakemake_long.py @@ -100,7 +100,7 @@ def top_main(): # Make sure we have a value for the log directory and that the directory exists. if args.htcondor_jobdir is None: - args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "snakemake-long-logs" + args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "htcondor" / "logs" if not os.path.exists(args.htcondor_jobdir): os.makedirs(args.htcondor_jobdir) else: diff --git a/docker-wrappers/SPRAS/spras.sh b/htcondor/spras.sh similarity index 100% rename from docker-wrappers/SPRAS/spras.sh rename to htcondor/spras.sh diff --git a/docker-wrappers/SPRAS/spras.sub b/htcondor/spras.sub similarity index 87% rename from docker-wrappers/SPRAS/spras.sub rename to htcondor/spras.sub index 9dd1c4abc..79e1bbd94 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/htcondor/spras.sub @@ -5,11 +5,11 @@ ############################################################ # Define a few macros we use throughout the submit file # ############################################################ -CONFIG_FILE = example_config.yaml +CONFIG_FILE = config/config.yaml NUM_PROCS = 4 # Paths to input data and Snakefile. -INPUT_DIR = ../../input -SNAKEFILE = ../../Snakefile +INPUT_DIR = input +SNAKEFILE = Snakefile ############################################################ # Specify that the workflow should run in the SPRAS # @@ -21,8 +21,9 @@ SNAKEFILE = ../../Snakefile # first # ############################################################ universe = container -container_image = .sif -# container_image = docker://reedcompbio/spras:v0.6.0 +#container_image = .sif +container_image = instructions-overhaul.sif +# container_image = docker://reedcompbio/spras:v0.2.0 ############################################################ # Specify names for log/stdout/stderr files generated by # @@ -30,15 +31,15 @@ container_image = .sif # NOTE: You should `mkdir logs/` before running, or the # # spras_$(Cluster).log file won't be available. # ############################################################ -log = logs/spras_$(Cluster)_$(Process).log -output = logs/spras_$(Cluster)_$(Process).out -error = logs/spras_$(Cluster)_$(Process).err +log = htcondor/logs/spras_$(Cluster)_$(Process).log +output = htcondor/logs/spras_$(Cluster)_$(Process).out +error = htcondor/logs/spras_$(Cluster)_$(Process).err ############################################################ # Specify the script to run inside the container. This is # # simply a wrapper on the Snakefile. # ############################################################ -executable = spras.sh +executable = htcondor/spras.sh arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE) --retries 3" ############################################################ @@ -49,6 +50,7 @@ when_to_transfer_output = ON_EXIT transfer_input_files = $(CONFIG_FILE), $(INPUT_DIR), $(SNAKEFILE) # The output directory should match whatever you configure in your configfile. transfer_output_files = output +preserve_relative_paths = true ############################################################ # System specifications. Be sure to request enough disk to # @@ -70,7 +72,7 @@ JobBatchName = "SPRAS-workflow-OSPool" # needed if running from CHTC. If running from an OSPool # # AP, omit this line. # ############################################################ -+WantGlideIn = true +# +WantGlideIn = true ############################################################ # Not all Execution Points in the OSPool will have # @@ -81,7 +83,7 @@ JobBatchName = "SPRAS-workflow-OSPool" # this submit file from CHTC, we also need a requirement # # to prevent landing on a CHTC Execution Point. # ############################################################ -requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC") +# requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC") # Queue the job -queue 1 \ No newline at end of file +queue 1 diff --git a/docker-wrappers/SPRAS/spras_profile/config.yaml b/htcondor/spras_profile/config.yaml similarity index 83% rename from docker-wrappers/SPRAS/spras_profile/config.yaml rename to htcondor/spras_profile/config.yaml index 5cc0697d0..1720b59c4 100644 --- a/docker-wrappers/SPRAS/spras_profile/config.yaml +++ b/htcondor/spras_profile/config.yaml @@ -1,26 +1,31 @@ # Default configuration for the SPRAS/HTCondor executor profile. Each of these values # can also be passed via command line flags, e.g. `--jobs 30 --executor htcondor`. +# NOTE: File paths in here should be relative to where you submit from, typically the +# root of the SPRAS repository + # 'jobs' specifies the maximum number of HTCondor jobs that can be in the queue at once. jobs: 30 executor: htcondor -configfile: example_config.yaml +configfile: config/config.yaml +htcondor-jobdir: htcondor/logs + # Indicate to the plugin that jobs running on various EPs do not share a filesystem with # each other, or with the AP. shared-fs-usage: none # Distributed, heterogeneous computational environments are a wild place where strange things # can happen. If something goes wrong, try again up to 5 times. After that, we assume there's # a real error that requires user/admin intervention -retries: 5 +retries: 2 # Default resources will apply to all workflow steps. If a single workflow step fails due # to insufficient resources, it can be re-run with modified values. Snakemake will handle # picking up where it left off, and won't re-run steps that have already completed. default-resources: - job_wrapper: "spras.sh" + job_wrapper: "htcondor/spras.sh" # If running in CHTC, this only works with apptainer images # Note requirement for quotes around the image name - container_image: "'spras-v0.6.0.sif'" + container_image: "spras-v0.6.0.sif" universe: "container" # The value for request_disk should be large enough to accommodate the runtime container # image, any additional PRM container images, and your input data. diff --git a/run_htcondor.sh b/run_htcondor.sh new file mode 100755 index 000000000..8ca2d86e0 --- /dev/null +++ b/run_htcondor.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Example helper script to submit a SPRAS workflow to HTCondor with full parallelization +# +# Note that for full runs after any initial debugging, you may wish to remove the `--verbose` +# flag, as this significantly increases the size of log files + +./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose From 957835b6c5370c5dbedc7fa81e8f4381da55443a Mon Sep 17 00:00:00 2001 From: Justin Hiemstra <75916364+jhiemstrawisc@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:31:21 -0600 Subject: [PATCH 3/4] Update run_htcondor.sh Co-authored-by: Tristan F.-R. --- run_htcondor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_htcondor.sh b/run_htcondor.sh index 8ca2d86e0..adf25e54d 100755 --- a/run_htcondor.sh +++ b/run_htcondor.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Example helper script to submit a SPRAS workflow to HTCondor with full parallelization # From 644a1b674cecb4fda5e1d42686b829d3b49827f1 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Tue, 27 Jan 2026 12:34:16 -0600 Subject: [PATCH 4/4] Address review feedback --- htcondor/snakemake_long.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/htcondor/snakemake_long.py b/htcondor/snakemake_long.py index 0f30a4439..50848ce27 100755 --- a/htcondor/snakemake_long.py +++ b/htcondor/snakemake_long.py @@ -9,6 +9,7 @@ import argparse import os import pathlib +import shlex import subprocess import sys import time @@ -47,9 +48,10 @@ def submit_local(snakefile, profile, htcondor_jobdir, verbose=False, env_manager script_location = pathlib.Path(__file__).resolve() # Build arguments string, including optional flags - args_str = f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir} --env-manager {env_manager}" + args_list = ["long", "--snakefile", snakefile, "--profile", profile, "--htcondor-jobdir", htcondor_jobdir, "--env-manager", env_manager] if verbose: - args_str += " --verbose" + args_list.append("--verbose") + args_str = " ".join(shlex.quote(str(arg)) for arg in args_list) submit_description = htcondor.Submit({ "executable": script_location, @@ -101,11 +103,7 @@ def top_main(): # Make sure we have a value for the log directory and that the directory exists. if args.htcondor_jobdir is None: args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "htcondor" / "logs" - if not os.path.exists(args.htcondor_jobdir): - os.makedirs(args.htcondor_jobdir) - else: - if not os.path.exists(args.htcondor_jobdir): - os.makedirs(args.htcondor_jobdir) + args.htcondor_jobdir.mkdir(exist_ok=True) try: submit_local(args.snakefile, args.profile, args.htcondor_jobdir, args.verbose, args.env_manager)