From 9f1e7fa7f988199fdd91cba6fe210e33877a2472 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Wed, 7 May 2025 14:02:29 -0500 Subject: [PATCH 01/16] Allow 'apptainer' as singularity alias --- spras/config/config.py | 6 ++++ spras/config/schema.py | 2 +- spras/containers.py | 68 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 68 insertions(+), 8 deletions(-) diff --git a/spras/config/config.py b/spras/config/config.py index 605faecf4..f0c6ceaa5 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -71,6 +71,8 @@ def __init__(self, raw_config: dict[str, Any]): self.container_prefix: str = DEFAULT_CONTAINER_PREFIX # A Boolean specifying whether to unpack singularity containers. Default is False self.unpack_singularity = False + # A Boolean indiciating whether to enable container runtime profiling (apptainer/singularity only) + self.enable_container_profiling = False # A dictionary to store configured datasets against which SPRAS will be run self.datasets = None # A dictionary to store configured gold standard data against output of SPRAS runs @@ -299,6 +301,10 @@ def process_config(self, raw_config: RawConfig): if raw_config.container_registry and raw_config.container_registry.base_url != "" and raw_config.container_registry.owner != "": self.container_prefix = raw_config.container_registry.base_url + "/" + raw_config.container_registry.owner + if raw_config.enable_profiling and not raw_config.container_framework in ["singularity", "apptainer"]: + warnings.warn("enable_profiling is set to true, but the container framework is not singularity/apptainer. This setting will have no effect.") + self.enable_container_profiling = raw_config.enable_profiling + self.process_datasets(raw_config) self.process_algorithms(raw_config) self.process_analysis(raw_config) diff --git a/spras/config/schema.py b/spras/config/schema.py index c84ea4384..93d0225da 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -91,8 +91,8 @@ def validate(label: str): class ContainerFramework(CaseInsensitiveEnum): docker = 'docker' - # TODO: add apptainer variant once #260 gets merged singularity = 'singularity' + apptainer = 'apptainer' dsub = 'dsub' class ContainerRegistry(BaseModel): diff --git a/spras/containers.py b/spras/containers.py index b7711c4f6..c7f1f5d04 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -147,7 +147,7 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol container = config.config.container_prefix + "/" + container_suffix if normalized_framework == 'docker': return run_container_docker(container, command, volumes, working_dir, environment) - elif normalized_framework == 'singularity': + elif normalized_framework == 'singularity' or normalized_framework == "apptainer": return run_container_singularity(container, command, volumes, working_dir, environment) elif normalized_framework == 'dsub': return run_container_dsub(container, command, volumes, working_dir, environment) @@ -290,7 +290,52 @@ def run_container_docker(container: str, command: List[str], volumes: List[Tuple return out -def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None): +def create_cgroup(): + # 1. Find current cgroup path + with open("/proc/self/cgroup") as f: + first_line = next(f).strip() + cgroup_rel = first_line.split(":")[-1].strip() + + print(f"Current cgroup path: {cgroup_rel}") + mycgroup = os.path.join("/sys/fs/cgroup", cgroup_rel.lstrip("/")) + print(f"My cgroup path: {mycgroup}") + + peer_cgroup = os.path.join(os.path.dirname(mycgroup), f"spras-peer-{os.getpid()}") + print(f"Peer cgroup path: {peer_cgroup}") + + # 2. Create peer cgroup + try: + os.makedirs(peer_cgroup, exist_ok=True) + except Exception as e: + print(f"Failed to create cgroup: {e}") + + # 3. Move this process into the peer cgroup (so child inherits it) + try: + with open(os.path.join(peer_cgroup, "cgroup.procs"), "w") as f: + f.write(str(os.getpid())) + except Exception as e: + print(f"Failed to move process into cgroup: {e}") + + return peer_cgroup + + +def read_container_memory_peak(cgroup_path: str): + try: + with open(os.path.join(cgroup_path, "memory.peak")) as f: + peak_mem = f.read().strip() + print(f"Peak Container Memory Usage from cgroup: {peak_mem}") + except Exception as e: + print(f"Failed to read memory usage from cgroup: {e}") + + try: + with open(os.path.join(cgroup_path, "cpu.stat")) as f: + cpu_stat = f.read().strip() + print(f"CPU Stat from cgroup:\n{cpu_stat}") + except Exception as e: + print(f"Failed to read cpu.stat from cgroup: {e}") + + +def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, environment: Optional[dict[str, str]] = None): """ Runs a command in the container using Singularity. Only available on Linux. @@ -309,6 +354,9 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ if platform.system() != 'Linux': raise NotImplementedError('Singularity support is only available on Linux') + print("Creating cgroup for memory and CPU tracking") + mycgroup = create_cgroup() + # See https://stackoverflow.com/questions/3095071/in-python-what-happens-when-you-import-inside-of-a-function from spython.main import Client @@ -355,18 +403,24 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ Client.build(recipe=image_path, image=str(base_cont_path), sandbox=True, sudo=False) # Execute the locally unpacked container. - return Client.execute(str(base_cont_path), - command, - options=singularity_options, - bind=bind_paths) + result = Client.execute(str(base_cont_path), + command, + options=singularity_options, + bind=bind_paths) else: # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert - return Client.execute('docker://' + container, + result = Client.execute('docker://' + container, command, options=singularity_options, bind=bind_paths) + # Read stats from the container cgroup + print("Reading memory and CPU stats from cgroup") + read_container_memory_peak(mycgroup) + + return result + # Because this is called independently for each file, the same local path can be mounted to multiple volumes def prepare_volume(filename: Union[str, PurePath], volume_base: Union[str, PurePath]) -> Tuple[Tuple[PurePath, PurePath], str]: From 115a6a1c46df1c1a9573836eed5e3dbac36d531e Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 9 Jun 2025 13:56:44 -0500 Subject: [PATCH 02/16] Create knob that adds profiling flag to config object --- config/config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/config/config.yaml b/config/config.yaml index 1e5f1d561..06b8f7520 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -10,6 +10,13 @@ hash_length: 7 # - There is no support for other environments at the moment. container_framework: docker +# Enabling profiling adds a file called 'usage-profile.txt' to the output directory of each algorithm. +# The contents of this file describe the CPU utilization and peak memory consumption of the algorithm +# as seen by its runtime container. +# NOTE: Profiling is currently supported only when the container framework is set to apptainer/singularity +# and when the host system supports the 'cgroup' filesystem. +enable_profiling: false + # Only used if container_framework is set to singularity, this will unpack the singularity containers # to the local filesystem. This is useful when PRM containers need to run inside another container, # such as would be the case in an HTCondor/OSPool environment. From e917b91ea1044ac02c938b9614f03265b2f22899 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 9 Jun 2025 16:12:06 -0500 Subject: [PATCH 03/16] Create peer cgroup for PRM containers and aggregate usage stats This commit adds the needed bits for the main Python process to create a peer cgroup (linux only) such that when profiling is enabled, the PRM containers are run under this cgroup with the `memory.peak` and `cpu.stat` controllers enabled. Unfortunately we can't just point Python at some PID, because the PRM containers launch various processes without reporting the PIDs back to the originating process. This prevents us from regular inline monitoring. --- spras/allpairs.py | 6 +- spras/cgroup_wrapper.sh | 14 ++++ spras/containers.py | 143 ++++++++++++++++++++++++++------------ spras/domino.py | 2 + spras/meo.py | 3 +- spras/mincostflow.py | 3 +- spras/omicsintegrator1.py | 1 + spras/omicsintegrator2.py | 3 +- spras/pathlinker.py | 3 +- 9 files changed, 127 insertions(+), 51 deletions(-) create mode 100755 spras/cgroup_wrapper.sh diff --git a/spras/allpairs.py b/spras/allpairs.py index e0f28d748..7ff1ade5c 100644 --- a/spras/allpairs.py +++ b/spras/allpairs.py @@ -92,7 +92,8 @@ def run(nodetypes=None, network=None, directed_flag=None, output_file=None, cont volumes.append(bind_path) # Create the parent directories for the output file if needed - Path(output_file).parent.mkdir(parents=True, exist_ok=True) + out_dir = Path(output_file).parent + out_dir.mkdir(parents=True, exist_ok=True) bind_path, mapped_out_file = prepare_volume(output_file, work_dir) volumes.append(bind_path) @@ -111,7 +112,8 @@ def run(nodetypes=None, network=None, directed_flag=None, output_file=None, cont container_suffix, command, volumes, - work_dir) + work_dir, + out_dir) @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file, params): diff --git a/spras/cgroup_wrapper.sh b/spras/cgroup_wrapper.sh new file mode 100755 index 000000000..358876ddd --- /dev/null +++ b/spras/cgroup_wrapper.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# This script gets invoked by run_singularity_container when enable_profiling is set to true. +# Its purpose is to move its own PID into the specified cgroup and then execute the container commands +# passed to it. This has the effect of sticking all processes started by the container into the specified +# cgroup, which lets us monitor them all in aggregate for resource usage. + +CGROUP_PATH="$1" +echo "My cgroup path is: $CGROUP_PATH" +shift +echo $$ > "$CGROUP_PATH/cgroup.procs" + +echo "Executing command: $@" +exec "$@" \ No newline at end of file diff --git a/spras/containers.py b/spras/containers.py index c7f1f5d04..a094dc608 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -1,3 +1,4 @@ +import csv import os import platform import re @@ -131,7 +132,7 @@ def env_to_items(environment: dict[str, str]) -> Iterator[str]: # TODO consider a better default environment variable # Follow docker-py's naming conventions (https://docker-py.readthedocs.io/en/stable/containers.html) # Technically the argument is an image, not a container, but we use container here. -def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None): +def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, environment: Optional[dict[str, str]] = None): """ Runs a command in the container using Singularity or Docker @param framework: singularity or docker @@ -140,6 +141,7 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol @param volumes: a list of volumes to mount where each item is a (source, destination) tuple @param working_dir: the working directory in the container @param environment: environment variables to set in the container + @param out_dir: output directory for the rule's artifacts. Only passed onto run_container_singularity for the purpose of profiling. @return: output from Singularity execute or Docker run """ normalized_framework = framework.casefold() @@ -148,7 +150,7 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol if normalized_framework == 'docker': return run_container_docker(container, command, volumes, working_dir, environment) elif normalized_framework == 'singularity' or normalized_framework == "apptainer": - return run_container_singularity(container, command, volumes, working_dir, environment) + return run_container_singularity(container, command, volumes, working_dir, out_dir, environment) elif normalized_framework == 'dsub': return run_container_dsub(container, command, volumes, working_dir, environment) else: @@ -290,50 +292,88 @@ def run_container_docker(container: str, command: List[str], volumes: List[Tuple return out -def create_cgroup(): - # 1. Find current cgroup path +def create_cgroup() -> str: + """ + A helper function that creates a new peer cgroup for the current process. + Apptainer/singularity containers are placed in this cgroup so that they + can be tracked for memory and CPU usage. + This currently assumes HTCondor runs where the current process is already + in a two-level nested cgroup (introduced in HTCondor 24.8.0). + + Returns the path to the peer cgroup, which is needed by the cgroup_wrapper.sh script + to set up the cgroup for the container. + """ + + # Get the current process's cgroup path + # This assumes the cgroup is in the unified hierarchy with open("/proc/self/cgroup") as f: first_line = next(f).strip() cgroup_rel = first_line.split(":")[-1].strip() - print(f"Current cgroup path: {cgroup_rel}") mycgroup = os.path.join("/sys/fs/cgroup", cgroup_rel.lstrip("/")) - print(f"My cgroup path: {mycgroup}") - peer_cgroup = os.path.join(os.path.dirname(mycgroup), f"spras-peer-{os.getpid()}") - print(f"Peer cgroup path: {peer_cgroup}") - # 2. Create peer cgroup + # Create the peer cgroup directory try: os.makedirs(peer_cgroup, exist_ok=True) except Exception as e: print(f"Failed to create cgroup: {e}") - # 3. Move this process into the peer cgroup (so child inherits it) - try: - with open(os.path.join(peer_cgroup, "cgroup.procs"), "w") as f: - f.write(str(os.getpid())) - except Exception as e: - print(f"Failed to move process into cgroup: {e}") - return peer_cgroup -def read_container_memory_peak(cgroup_path: str): +def create_container_stats(cgroup_path: str, out_dir: str): + """ + Reads the contents of the provided cgroup's memory.peak and cpu.stat files. + This information is parsed and placed in the calling rule's output directory + as 'usage-profile.tsv'. + @param cgroup_path: path to the cgroup directory for the container + @param out_dir: output directory for the rule's artifacts -- used here to store profiling data + """ + + profile_path = os.path.join(out_dir, "usage-profile.tsv") + + peak_mem = "N/A" try: with open(os.path.join(cgroup_path, "memory.peak")) as f: peak_mem = f.read().strip() - print(f"Peak Container Memory Usage from cgroup: {peak_mem}") except Exception as e: print(f"Failed to read memory usage from cgroup: {e}") + cpu_usage = cpu_user = cpu_system = "N/A" try: with open(os.path.join(cgroup_path, "cpu.stat")) as f: - cpu_stat = f.read().strip() - print(f"CPU Stat from cgroup:\n{cpu_stat}") + # Parse out the contents of the cpu.stat file + # You can find these fields by searching "cpu.stat" in the cgroup documentation: + # https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html + for line in f: + parts = line.strip().split() + if len(parts) != 2: + continue + key, value = parts + if key == "usage_usec": + cpu_usage = value + elif key == "user_usec": + cpu_user = value + elif key == "system_usec": + cpu_system = value except Exception as e: print(f"Failed to read cpu.stat from cgroup: {e}") + # Set up the header for the TSV file + header = ["peak_memory_bytes", "cpu_usage_usec", "cpu_user_usec", "cpu_system_usec"] + row = [peak_mem, cpu_usage, cpu_user, cpu_system] + + # Write the contents of the file + write_header = not os.path.exists(profile_path) or os.path.getsize(profile_path) == 0 + with open(profile_path, "a", newline="") as out_f: + writer = csv.writer(out_f, delimiter="\t") + + # Only write the header if the file was previously empty or did not exist + if write_header: + writer.writerow(header) + writer.writerow(row) + def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, environment: Optional[dict[str, str]] = None): """ @@ -343,6 +383,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ @param command: command to run in the container @param volumes: a list of volumes to mount where each item is a (source, destination) tuple @param working_dir: the working directory in the container + @param out_dir: output directory for the rule's artifacts -- used here to store profiling data @param environment: environment variable to set in the container @return: output from Singularity execute """ @@ -354,9 +395,6 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ if platform.system() != 'Linux': raise NotImplementedError('Singularity support is only available on Linux') - print("Creating cgroup for memory and CPU tracking") - mycgroup = create_cgroup() - # See https://stackoverflow.com/questions/3095071/in-python-what-happens-when-you-import-inside-of-a-function from spython.main import Client @@ -376,12 +414,9 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ # https://docs.sylabs.io/guides/3.7/user-guide/environment_and_metadata.html#env-option singularity_options.extend(['--env', ",".join(env_to_items(environment))]) - # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containers + expanded_image = None if config.config.unpack_singularity: - # Split the string by "/" path_elements = container.split("/") - - # Get the last element, which will indicate the base container name base_cont = path_elements[-1] base_cont = base_cont.replace(":", "_").split(":")[0] sif_file = base_cont + ".sif" @@ -396,28 +431,46 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ base_cont_path = unpacked_dir / Path(base_cont) - # Check if the directory for base_cont already exists. When running concurrent jobs, it's possible + # Check whether the directory for base_cont_path already exists. When running concurrent jobs, it's possible # something else has already pulled/unpacked the container. - # Here, we expand the sif image from `image_path` to a directory indicated by `base_cont` + # Here, we expand the sif image from `image_path` to a directory indicated by `base_cont_path` if not base_cont_path.exists(): Client.build(recipe=image_path, image=str(base_cont_path), sandbox=True, sudo=False) - - # Execute the locally unpacked container. - result = Client.execute(str(base_cont_path), - command, - options=singularity_options, - bind=bind_paths) - + expanded_image = base_cont_path # This is the sandbox directory + + image_to_run = expanded_image if expanded_image else container + if config.config.enable_profiling: + # We won't end up using the spython client if profiling is enabled because + # we need to run everything manually to set up the cgroup + # Build the apptainer run command, which gets passed to the cgroup wrapper script + singularity_cmd = [ + "apptainer", "exec" + ] + for bind in bind_paths: + singularity_cmd.extend(["--bind", bind]) + singularity_cmd.extend(singularity_options) + singularity_cmd.append(image_to_run) + singularity_cmd.extend(command) + + my_cgroup = create_cgroup() + # The wrapper script is packaged with spras, and should be located in the same directory + # as `containers.py`. + wrapper = os.path.join(os.path.dirname(__file__), "cgroup_wrapper.sh") + cmd = [wrapper, my_cgroup] + singularity_cmd + proc = subprocess.run(cmd, capture_output=True, text=True) + print("Stdout from container execution:", proc.stdout) + + print("Reading memory and CPU stats from cgroup") + create_container_stats(my_cgroup, out_dir) + + result = proc.stdout else: - # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert - result = Client.execute('docker://' + container, - command, - options=singularity_options, - bind=bind_paths) - - # Read stats from the container cgroup - print("Reading memory and CPU stats from cgroup") - read_container_memory_peak(mycgroup) + result = Client.execute( + image=image_to_run, + command=command, + options=singularity_options, + bind=bind_paths + ) return result diff --git a/spras/domino.py b/spras/domino.py index 449715b1f..ac6be1378 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -117,6 +117,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, container_suffix, slicer_command, volumes, + out_dir, work_dir) # Make the Python command to run within the container @@ -141,6 +142,7 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, container_suffix, domino_command, volumes, + out_dir, work_dir) # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes diff --git a/spras/meo.py b/spras/meo.py index 3ae8be9cc..3e4ca4d46 100644 --- a/spras/meo.py +++ b/spras/meo.py @@ -184,7 +184,8 @@ def run(edges=None, sources=None, targets=None, output_file=None, max_path_lengt container_suffix, command, volumes, - work_dir) + work_dir, + out_dir) properties_file_local.unlink(missing_ok=True) diff --git a/spras/mincostflow.py b/spras/mincostflow.py index 03898a1bd..f883afb52 100644 --- a/spras/mincostflow.py +++ b/spras/mincostflow.py @@ -121,7 +121,8 @@ def run(sources=None, targets=None, edges=None, output_file=None, flow=None, cap container_suffix, command, volumes, - work_dir) + work_dir, + out_dir) # Check the output of the container out_dir_content = sorted(out_dir.glob('*.sif')) diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py index b3b7afbfe..a92d7ecea 100644 --- a/spras/omicsintegrator1.py +++ b/spras/omicsintegrator1.py @@ -202,6 +202,7 @@ def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=N command, volumes, work_dir, + out_dir, {'TMPDIR': mapped_out_dir}) conf_file_local.unlink(missing_ok=True) diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index ee133f2bd..b038baec2 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -134,7 +134,8 @@ def run(edges=None, prizes=None, output_file=None, w=None, b=None, g=None, noise container_suffix, command, volumes, - work_dir) + work_dir, + out_dir) # TODO do we want to retain other output files? # TODO if deleting other output files, write them all to a tmp directory and copy diff --git a/spras/pathlinker.py b/spras/pathlinker.py index a671c9b92..c7cabc97b 100644 --- a/spras/pathlinker.py +++ b/spras/pathlinker.py @@ -118,7 +118,8 @@ def run(nodetypes=None, network=None, output_file=None, k=None, container_framew container_suffix, command, volumes, - work_dir) + work_dir, + out_dir) # Rename the primary output file to match the desired output filename # Currently PathLinker only writes one output file so we do not need to delete others From b3bc8ff40d56440b7a96fc251e51ed4235530921 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 9 Jun 2025 16:16:27 -0500 Subject: [PATCH 04/16] Package new cgroup wrapper --- MANIFEST.in | 1 + pyproject.toml | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..72dcf4891 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include spras/cgroup_wrapper.sh diff --git a/pyproject.toml b/pyproject.toml index 7a58e8ccb..424f183cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,8 +71,13 @@ select = [ "W292", # missing-newline-at-end-of-file ] -[tool.setuptools] -# py-modules tells setuptools which directory is our actual module -py-modules = ["spras"] +[tool.setuptools.packages.find] +where = ["."] +include = ["spras*"] + +# Include non-python executables +[tool.setuptools.package-data] +"spras" = ["cgroup_wrapper.sh"] + # packages tells setuptools what the exported package is called (ie allows import spras) packages = ["spras", "spras.analysis", "spras.config"] From 1598a439e27b8619f2cadb981add09aedf0dc398 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 9 Jun 2025 16:18:21 -0500 Subject: [PATCH 05/16] Fix minor things --- config/config.yaml | 11 +++++------ spras/cgroup_wrapper.sh | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 06b8f7520..9abc99a6b 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -5,22 +5,21 @@ hash_length: 7 # Specify the container framework used by each PRM wrapper. Valid options include: # - docker (default if not specified) -# - singularity -- Also known as apptainer, useful in HPC/HTC environments where docker isn't allowed -# - dsub -- experimental with limited support, used for running on Google Cloud with the All of Us cloud environment. -# - There is no support for other environments at the moment. +# - singularity OR apptainer -- Apptainer (formerly Singularity) is useful in HPC/HTC environments where docker isn't allowed +# - dsub -- experimental with limited support, used for running on Google Cloud container_framework: docker -# Enabling profiling adds a file called 'usage-profile.txt' to the output directory of each algorithm. +# Enabling profiling adds a file called 'usage-profile.tsv' to the output directory of each algorithm. # The contents of this file describe the CPU utilization and peak memory consumption of the algorithm # as seen by its runtime container. # NOTE: Profiling is currently supported only when the container framework is set to apptainer/singularity # and when the host system supports the 'cgroup' filesystem. enable_profiling: false -# Only used if container_framework is set to singularity, this will unpack the singularity containers +# Only used if container_framework is set to singularity/apptainer, this will unpack the containers # to the local filesystem. This is useful when PRM containers need to run inside another container, # such as would be the case in an HTCondor/OSPool environment. -# NOTE: This unpacks singularity containers to the local filesystem, which will take up space in a way +# NOTE: This unpacks containers to the local filesystem, which will take up space in a way # that persists after the workflow is complete. To clean up the unpacked containers, the user must # manually delete them. For convenience, these unpacked files will exist in the current working directory # under `unpacked`. diff --git a/spras/cgroup_wrapper.sh b/spras/cgroup_wrapper.sh index 358876ddd..0bed992f0 100755 --- a/spras/cgroup_wrapper.sh +++ b/spras/cgroup_wrapper.sh @@ -11,4 +11,4 @@ shift echo $$ > "$CGROUP_PATH/cgroup.procs" echo "Executing command: $@" -exec "$@" \ No newline at end of file +exec "$@" From ed922199479ac3b54d3595badfae3dfc9f55838b Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Tue, 10 Jun 2025 12:17:48 -0500 Subject: [PATCH 06/16] Restore docker:// prefix for remote containers --- spras/containers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spras/containers.py b/spras/containers.py index a094dc608..ccb797c80 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -438,7 +438,9 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ Client.build(recipe=image_path, image=str(base_cont_path), sandbox=True, sudo=False) expanded_image = base_cont_path # This is the sandbox directory - image_to_run = expanded_image if expanded_image else container + # If not using the expanded sandbox image, we still need to prepend the docker:// prefix + # so apptainer knows to pull the image. + image_to_run = expanded_image if expanded_image else "docker://" + container if config.config.enable_profiling: # We won't end up using the spython client if profiling is enabled because # we need to run everything manually to set up the cgroup From 11c8e63cf014d6f2ca0110148c1fbbeaa509be75 Mon Sep 17 00:00:00 2001 From: Tristan F Date: Tue, 17 Jun 2025 08:38:29 -0700 Subject: [PATCH 07/16] fix(run_container): support paths --- spras/containers.py | 6 +++--- spras/domino.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index ccb797c80..c848e4c58 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -132,7 +132,7 @@ def env_to_items(environment: dict[str, str]) -> Iterator[str]: # TODO consider a better default environment variable # Follow docker-py's naming conventions (https://docker-py.readthedocs.io/en/stable/containers.html) # Technically the argument is an image, not a container, but we use container here. -def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, environment: Optional[dict[str, str]] = None): +def run_container(framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str | os.PathLike, environment: Optional[dict[str, str]] = None): """ Runs a command in the container using Singularity or Docker @param framework: singularity or docker @@ -156,7 +156,7 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol else: raise ValueError(f'{framework} is not a recognized container framework. Choose "docker", "dsub", or "singularity".') -def run_container_and_log(name: str, framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, environment: Optional[dict[str, str]] = None): +def run_container_and_log(name: str, framework: str, container_suffix: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str | os.PathLike, environment: Optional[dict[str, str]] = None): """ Runs a command in the container using Singularity or Docker with associated pretty printed messages. @param name: the display name of the running container for logging purposes @@ -173,7 +173,7 @@ def run_container_and_log(name: str, framework: str, container_suffix: str, comm print('Running {} on container framework "{}" on env {} with command: {}'.format(name, framework, list(env_to_items(environment)), ' '.join(command)), flush=True) try: - out = run_container(framework=framework, container_suffix=container_suffix, command=command, volumes=volumes, working_dir=working_dir, environment=environment) + out = run_container(framework=framework, container_suffix=container_suffix, command=command, volumes=volumes, working_dir=working_dir, out_dir=out_dir, environment=environment) if out is not None: if isinstance(out, list): out = ''.join(out) diff --git a/spras/domino.py b/spras/domino.py index ac6be1378..d283d684b 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -117,8 +117,8 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, container_suffix, slicer_command, volumes, - out_dir, - work_dir) + work_dir, + out_dir) # Make the Python command to run within the container domino_command = ['domino', From 79b6688517478fec768d62f0ca991d6fab4afa78 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 27 Jun 2025 10:15:02 -0500 Subject: [PATCH 08/16] Move apptainer profiling functions to separate file --- spras/containers.py | 6 ++-- spras/profiling.py | 86 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 spras/profiling.py diff --git a/spras/containers.py b/spras/containers.py index c848e4c58..b5e7e1b4d 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -1,4 +1,3 @@ -import csv import os import platform import re @@ -11,6 +10,7 @@ import spras.config.config as config from spras.logging import indent +from spras.profiling import create_apptainer_container_stats, create_peer_cgroup from spras.util import hash_filename @@ -454,7 +454,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ singularity_cmd.append(image_to_run) singularity_cmd.extend(command) - my_cgroup = create_cgroup() + my_cgroup = create_peer_cgroup() # The wrapper script is packaged with spras, and should be located in the same directory # as `containers.py`. wrapper = os.path.join(os.path.dirname(__file__), "cgroup_wrapper.sh") @@ -463,7 +463,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ print("Stdout from container execution:", proc.stdout) print("Reading memory and CPU stats from cgroup") - create_container_stats(my_cgroup, out_dir) + create_apptainer_container_stats(my_cgroup, out_dir) result = proc.stdout else: diff --git a/spras/profiling.py b/spras/profiling.py new file mode 100644 index 000000000..3019406fc --- /dev/null +++ b/spras/profiling.py @@ -0,0 +1,86 @@ +import csv +import os + + +def create_peer_cgroup() -> str: + """ + A helper function that creates a new peer cgroup for the current process. + Apptainer/singularity containers are placed in this cgroup so that they + can be tracked for memory and CPU usage. + This currently assumes HTCondor runs where the current process is already + in a two-level nested cgroup (introduced in HTCondor 24.8.0). + + Returns the path to the peer cgroup, which is needed by the cgroup_wrapper.sh script + to set up the cgroup for the container. + """ + + # Get the current process's cgroup path + # This assumes the cgroup is in the unified hierarchy + with open("/proc/self/cgroup") as f: + first_line = next(f).strip() + cgroup_rel = first_line.split(":")[-1].strip() + + mycgroup = os.path.join("/sys/fs/cgroup", cgroup_rel.lstrip("/")) + peer_cgroup = os.path.join(os.path.dirname(mycgroup), f"spras-peer-{os.getpid()}") + + # Create the peer cgroup directory + try: + os.makedirs(peer_cgroup, exist_ok=True) + except Exception as e: + print(f"Failed to create cgroup: {e}") + + return peer_cgroup + + +def create_apptainer_container_stats(cgroup_path: str, out_dir: str): + """ + Reads the contents of the provided cgroup's memory.peak and cpu.stat files. + This information is parsed and placed in the calling rule's output directory + as 'usage-profile.tsv'. + @param cgroup_path: path to the cgroup directory for the container + @param out_dir: output directory for the rule's artifacts -- used here to store profiling data + """ + + profile_path = os.path.join(out_dir, "usage-profile.tsv") + + peak_mem = "N/A" + try: + with open(os.path.join(cgroup_path, "memory.peak")) as f: + peak_mem = f.read().strip() + except Exception as e: + print(f"Failed to read memory usage from cgroup: {e}") + + cpu_usage = cpu_user = cpu_system = "N/A" + try: + with open(os.path.join(cgroup_path, "cpu.stat")) as f: + # Parse out the contents of the cpu.stat file + # You can find these fields by searching "cpu.stat" in the cgroup documentation: + # https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html + for line in f: + parts = line.strip().split() + if len(parts) != 2: + continue + key, value = parts + if key == "usage_usec": + cpu_usage = value + elif key == "user_usec": + cpu_user = value + elif key == "system_usec": + cpu_system = value + except Exception as e: + print(f"Failed to read cpu.stat from cgroup: {e}") + + # Set up the header for the TSV file + header = ["peak_memory_bytes", "cpu_usage_usec", "cpu_user_usec", "cpu_system_usec"] + row = [peak_mem, cpu_usage, cpu_user, cpu_system] + + # Write the contents of the file + write_header = not os.path.exists(profile_path) or os.path.getsize(profile_path) == 0 + with open(profile_path, "a", newline="") as out_f: + writer = csv.writer(out_f, delimiter="\t") + + # Only write the header if the file was previously empty or did not exist + if write_header: + writer.writerow(header) + writer.writerow(row) + From be6adb166e37956d043a1828e2dc54fb0fc6f19b Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 27 Jun 2025 10:15:32 -0500 Subject: [PATCH 09/16] Restore some comments about apptainer image unpacking --- spras/containers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/spras/containers.py b/spras/containers.py index b5e7e1b4d..6b9788797 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -416,6 +416,11 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ expanded_image = None if config.config.unpack_singularity: + # The incoming image string is of the format //: e.g. + # hub.docker.com/reedcompbio/spras:latest + # Here we first produce a .sif image using the image name and tag (base_cont) + # and then expand that image into a sandbox directory. For example, + # hub.docker.com/reedcompbio/spras:latest --> spras_latest.sif --> ./spras_latest/ path_elements = container.split("/") base_cont = path_elements[-1] base_cont = base_cont.replace(":", "_").split(":")[0] @@ -439,7 +444,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ expanded_image = base_cont_path # This is the sandbox directory # If not using the expanded sandbox image, we still need to prepend the docker:// prefix - # so apptainer knows to pull the image. + # so apptainer knows to pull and convert the image format from docker to apptainer. image_to_run = expanded_image if expanded_image else "docker://" + container if config.config.enable_profiling: # We won't end up using the spython client if profiling is enabled because From 29f000152933bbb6c39cfec7f4b9bc8b53318632 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Thu, 18 Sep 2025 10:22:37 -0500 Subject: [PATCH 10/16] Incorporate review feedback --- spras/cgroup_wrapper.sh | 11 ++++++++--- spras/config/config.py | 4 ++-- spras/config/schema.py | 1 + spras/containers.py | 2 +- spras/profiling.py | 4 ++++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/spras/cgroup_wrapper.sh b/spras/cgroup_wrapper.sh index 0bed992f0..e2ba5c3b9 100755 --- a/spras/cgroup_wrapper.sh +++ b/spras/cgroup_wrapper.sh @@ -1,14 +1,19 @@ #!/bin/bash # This script gets invoked by run_singularity_container when enable_profiling is set to true. -# Its purpose is to move its own PID into the specified cgroup and then execute the container commands -# passed to it. This has the effect of sticking all processes started by the container into the specified -# cgroup, which lets us monitor them all in aggregate for resource usage. +# Its arguments are +# If profiling is enabled, we've already created a new cgroup that has no running processes and +# we've started this script with its own PID. To isolate the inner container's resource usage stats, +# we add this script's PID to the new cgroup and then run the apptainer command. Since the generic +# snakemake/spras stuff is outside this cgroup, we can monitor the inner container's resource usage +# without conflating it with the overhead from spras itself. CGROUP_PATH="$1" echo "My cgroup path is: $CGROUP_PATH" +# Pop the first argument off the list so remaining args are just the apptainer command to exec shift echo $$ > "$CGROUP_PATH/cgroup.procs" +# Start apptainer echo "Executing command: $@" exec "$@" diff --git a/spras/config/config.py b/spras/config/config.py index f0c6ceaa5..6425a8bab 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -72,7 +72,7 @@ def __init__(self, raw_config: dict[str, Any]): # A Boolean specifying whether to unpack singularity containers. Default is False self.unpack_singularity = False # A Boolean indiciating whether to enable container runtime profiling (apptainer/singularity only) - self.enable_container_profiling = False + self.enable_profiling = False # A dictionary to store configured datasets against which SPRAS will be run self.datasets = None # A dictionary to store configured gold standard data against output of SPRAS runs @@ -303,7 +303,7 @@ def process_config(self, raw_config: RawConfig): if raw_config.enable_profiling and not raw_config.container_framework in ["singularity", "apptainer"]: warnings.warn("enable_profiling is set to true, but the container framework is not singularity/apptainer. This setting will have no effect.") - self.enable_container_profiling = raw_config.enable_profiling + self.enable_profiling = raw_config.enable_profiling self.process_datasets(raw_config) self.process_algorithms(raw_config) diff --git a/spras/config/schema.py b/spras/config/schema.py index 93d0225da..5cc4285ef 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -152,6 +152,7 @@ class RawConfig(BaseModel): container_framework: ContainerFramework = ContainerFramework.docker unpack_singularity: bool = False container_registry: ContainerRegistry + enable_profiling: bool = False hash_length: int = DEFAULT_HASH_LENGTH "The length of the hash used to identify a parameter combination" diff --git a/spras/containers.py b/spras/containers.py index 6b9788797..37b8b9f23 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -141,7 +141,7 @@ def run_container(framework: str, container_suffix: str, command: List[str], vol @param volumes: a list of volumes to mount where each item is a (source, destination) tuple @param working_dir: the working directory in the container @param environment: environment variables to set in the container - @param out_dir: output directory for the rule's artifacts. Only passed onto run_container_singularity for the purpose of profiling. + @param out_dir: output directory for the rule's artifacts. Only passed into run_container_singularity for the purpose of profiling. @return: output from Singularity execute or Docker run """ normalized_framework = framework.casefold() diff --git a/spras/profiling.py b/spras/profiling.py index 3019406fc..185ee4363 100644 --- a/spras/profiling.py +++ b/spras/profiling.py @@ -37,6 +37,10 @@ def create_apptainer_container_stats(cgroup_path: str, out_dir: str): Reads the contents of the provided cgroup's memory.peak and cpu.stat files. This information is parsed and placed in the calling rule's output directory as 'usage-profile.tsv'. + In particular, we capture peak memory (in bytes) and various CPU usage statistics: + - user_usec: Total user CPU time consumed in microseconds + - system_usec: Total system CPU time consumed in microseconds + - usage_usec: Total CPU time (usually but not always user + system) consumed in microseconds @param cgroup_path: path to the cgroup directory for the container @param out_dir: output directory for the rule's artifacts -- used here to store profiling data """ From c5c35293e375f6946ac4ff062142ee601b2a7472 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 29 Sep 2025 12:53:44 -0500 Subject: [PATCH 11/16] Add outdir and switch run_container-->run_container_and_log for RNet,RWR,STRWR These were PRMs whose `run_container` arguments were missed when I was updating everything to pass an output dir around --- spras/responsenet.py | 14 ++++++++------ spras/rwr.py | 16 +++++++++------- spras/strwr.py | 16 +++++++++------- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/spras/responsenet.py b/spras/responsenet.py index 48dd269cc..bbc3b5255 100644 --- a/spras/responsenet.py +++ b/spras/responsenet.py @@ -105,12 +105,14 @@ def run(sources=None, targets=None, edges=None, output_file=None, gamma=10, cont container_suffix = "responsenet:v2" # constructs a docker run call - run_container_and_log('ResponseNet', - container_framework, - container_suffix, - command, - volumes, - work_dir) + run_container_and_log( + 'ResponseNet', + container_framework, + container_suffix, + command, + volumes, + work_dir, + out_dir) # Rename the primary output file to match the desired output filename out_file_suffixed.rename(output_file) diff --git a/spras/rwr.py b/spras/rwr.py index 396cf4df1..4717aa064 100644 --- a/spras/rwr.py +++ b/spras/rwr.py @@ -2,7 +2,7 @@ import pandas as pd -from spras.containers import prepare_volume, run_container +from spras.containers import prepare_volume, run_container_and_log from spras.dataset import Dataset from spras.interactome import reinsert_direction_col_directed from spras.prm import PRM @@ -72,13 +72,15 @@ def run(network=None, nodes=None, alpha=None, output_file=None, container_framew command.extend(['--alpha', str(alpha)]) container_suffix = 'rwr:v1' - out = run_container(container_framework, - container_suffix, - command, - volumes, - work_dir) + run_container_and_log( + "RandomWalk with Restart", + container_framework, + container_suffix, + command, + volumes, + work_dir, + out_dir) - print(out) # Rename the primary output file to match the desired output filename output_edges = Path(out_dir, 'output.txt') output_edges.rename(output_file) diff --git a/spras/strwr.py b/spras/strwr.py index c4e1df95c..65ea9f923 100644 --- a/spras/strwr.py +++ b/spras/strwr.py @@ -1,6 +1,6 @@ from pathlib import Path -from spras.containers import prepare_volume, run_container +from spras.containers import prepare_volume, run_container_and_log from spras.dataset import Dataset from spras.interactome import reinsert_direction_col_directed from spras.prm import PRM @@ -77,13 +77,15 @@ def run(network=None, sources=None, targets=None, alpha=None, output_file=None, command.extend(['--alpha', str(alpha)]) container_suffix = 'st-rwr:v1' - out = run_container(container_framework, - container_suffix, - command, - volumes, - work_dir) + run_container_and_log( + "Source-Target RandomWalk with Restart", + container_framework, + container_suffix, + command, + volumes, + work_dir, + out_dir) - print(out) # Rename the primary output file to match the desired output filename output_edges = Path(out_dir, 'output.txt') output_edges.rename(output_file) From f1866594c4c3f2bcd3236a551e97b22784a15d22 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 29 Sep 2025 13:04:22 -0500 Subject: [PATCH 12/16] Fixup artifacts from rebase The removed functions are in `profiling.py` and should have been removed from `containers.py`. This also restores a comment that was removed while fixing merge conflicts. --- spras/containers.py | 85 +-------------------------------------------- 1 file changed, 1 insertion(+), 84 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 37b8b9f23..78eb2c8d0 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -291,90 +291,6 @@ def run_container_docker(container: str, command: List[str], volumes: List[Tuple # finally: return out - -def create_cgroup() -> str: - """ - A helper function that creates a new peer cgroup for the current process. - Apptainer/singularity containers are placed in this cgroup so that they - can be tracked for memory and CPU usage. - This currently assumes HTCondor runs where the current process is already - in a two-level nested cgroup (introduced in HTCondor 24.8.0). - - Returns the path to the peer cgroup, which is needed by the cgroup_wrapper.sh script - to set up the cgroup for the container. - """ - - # Get the current process's cgroup path - # This assumes the cgroup is in the unified hierarchy - with open("/proc/self/cgroup") as f: - first_line = next(f).strip() - cgroup_rel = first_line.split(":")[-1].strip() - - mycgroup = os.path.join("/sys/fs/cgroup", cgroup_rel.lstrip("/")) - peer_cgroup = os.path.join(os.path.dirname(mycgroup), f"spras-peer-{os.getpid()}") - - # Create the peer cgroup directory - try: - os.makedirs(peer_cgroup, exist_ok=True) - except Exception as e: - print(f"Failed to create cgroup: {e}") - - return peer_cgroup - - -def create_container_stats(cgroup_path: str, out_dir: str): - """ - Reads the contents of the provided cgroup's memory.peak and cpu.stat files. - This information is parsed and placed in the calling rule's output directory - as 'usage-profile.tsv'. - @param cgroup_path: path to the cgroup directory for the container - @param out_dir: output directory for the rule's artifacts -- used here to store profiling data - """ - - profile_path = os.path.join(out_dir, "usage-profile.tsv") - - peak_mem = "N/A" - try: - with open(os.path.join(cgroup_path, "memory.peak")) as f: - peak_mem = f.read().strip() - except Exception as e: - print(f"Failed to read memory usage from cgroup: {e}") - - cpu_usage = cpu_user = cpu_system = "N/A" - try: - with open(os.path.join(cgroup_path, "cpu.stat")) as f: - # Parse out the contents of the cpu.stat file - # You can find these fields by searching "cpu.stat" in the cgroup documentation: - # https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html - for line in f: - parts = line.strip().split() - if len(parts) != 2: - continue - key, value = parts - if key == "usage_usec": - cpu_usage = value - elif key == "user_usec": - cpu_user = value - elif key == "system_usec": - cpu_system = value - except Exception as e: - print(f"Failed to read cpu.stat from cgroup: {e}") - - # Set up the header for the TSV file - header = ["peak_memory_bytes", "cpu_usage_usec", "cpu_user_usec", "cpu_system_usec"] - row = [peak_mem, cpu_usage, cpu_user, cpu_system] - - # Write the contents of the file - write_header = not os.path.exists(profile_path) or os.path.getsize(profile_path) == 0 - with open(profile_path, "a", newline="") as out_f: - writer = csv.writer(out_f, delimiter="\t") - - # Only write the header if the file was previously empty or did not exist - if write_header: - writer.writerow(header) - writer.writerow(row) - - def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, environment: Optional[dict[str, str]] = None): """ Runs a command in the container using Singularity. @@ -414,6 +330,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ # https://docs.sylabs.io/guides/3.7/user-guide/environment_and_metadata.html#env-option singularity_options.extend(['--env', ",".join(env_to_items(environment))]) + # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containers expanded_image = None if config.config.unpack_singularity: # The incoming image string is of the format //: e.g. From e86f959b8a9188981f620fe3298ecf95f8cbf8e6 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 29 Sep 2025 13:18:04 -0500 Subject: [PATCH 13/16] Capture container stderr when profiling w/ Apptainer --- spras/containers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spras/containers.py b/spras/containers.py index 78eb2c8d0..88a02ec58 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -381,8 +381,7 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ # as `containers.py`. wrapper = os.path.join(os.path.dirname(__file__), "cgroup_wrapper.sh") cmd = [wrapper, my_cgroup] + singularity_cmd - proc = subprocess.run(cmd, capture_output=True, text=True) - print("Stdout from container execution:", proc.stdout) + proc = subprocess.run(cmd, capture_output=True, text=True, stderr=subprocess.STDOUT) print("Reading memory and CPU stats from cgroup") create_apptainer_container_stats(my_cgroup, out_dir) From d0cad7779538bb1490e640d4ec2d836a8d7efc42 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 29 Sep 2025 13:28:31 -0500 Subject: [PATCH 14/16] Add note about profiling/HTCondor version requirement --- config/config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config/config.yaml b/config/config.yaml index 9abc99a6b..aef81a767 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,6 +14,11 @@ container_framework: docker # as seen by its runtime container. # NOTE: Profiling is currently supported only when the container framework is set to apptainer/singularity # and when the host system supports the 'cgroup' filesystem. +# When profiling via HTCondor, this assumes the current process is already in a two-level nested cgroup +# (introduced in HTCondor 24.8.0). To specify a minimum HTCondor version, use the following `requirements` +# expression: +# +# requirements = versionGE(split(Target.CondorVersion)[1], "24.8.0") && (isenforcingdiskusage =!= true) enable_profiling: false # Only used if container_framework is set to singularity/apptainer, this will unpack the containers From 4e8a08053847739adb39799ea36b822b84c20572 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 29 Sep 2025 13:36:44 -0500 Subject: [PATCH 15/16] Add out_dir to bowtiebuilder --- spras/btb.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/spras/btb.py b/spras/btb.py index ced433efe..71f774858 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -133,12 +133,14 @@ def run(sources=None, targets=None, edges=None, output_file=None, container_fram mapped_out_prefix] container_suffix = "bowtiebuilder:v2" - run_container_and_log('BowTieBuilder', - container_framework, - container_suffix, - command, - volumes, - work_dir) + run_container_and_log( + 'BowTieBuilder', + container_framework, + container_suffix, + command, + volumes, + work_dir, + out_dir) # Output is already written to raw-pathway.txt file From b454fa406d1adf0be19a10489c179fa9a639793f Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 29 Sep 2025 13:51:06 -0500 Subject: [PATCH 16/16] Fix switched work_dir/out_dir in DOMINO --- spras/domino.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/spras/domino.py b/spras/domino.py index d283d684b..dabf4fc6b 100644 --- a/spras/domino.py +++ b/spras/domino.py @@ -137,13 +137,14 @@ def run(network=None, active_genes=None, output_file=None, slice_threshold=None, if module_threshold is not None: domino_command.extend(['--module_threshold', str(module_threshold)]) - run_container_and_log('DOMINO', - container_framework, - container_suffix, - domino_command, - volumes, - out_dir, - work_dir) + run_container_and_log( + 'DOMINO', + container_framework, + container_suffix, + domino_command, + volumes, + work_dir, + out_dir) # DOMINO creates a new folder in out_dir to output its modules HTML files into called active_genes # The filename is determined by the input active_genes and cannot be configured