From 40232abc47b274130b0c11ecda5e386346ed0300 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Wed, 13 Aug 2025 17:20:31 +0200 Subject: [PATCH 1/3] Replace shell commands with docker bindings --- .../executor/job_driver.py | 76 +++-- .../executor/job_runner.py | 268 ++++++++++-------- .../compute_horde_executor/executor/utils.py | 119 ++++++-- executor/pyproject.toml | 1 + executor/uv.lock | 14 + 5 files changed, 289 insertions(+), 189 deletions(-) diff --git a/executor/app/src/compute_horde_executor/executor/job_driver.py b/executor/app/src/compute_horde_executor/executor/job_driver.py index 92d81e8ac..eced8eb1e 100644 --- a/executor/app/src/compute_horde_executor/executor/job_driver.py +++ b/executor/app/src/compute_horde_executor/executor/job_driver.py @@ -8,7 +8,11 @@ from compute_horde_executor.executor.job_runner import BaseJobRunner from compute_horde_executor.executor.miner_client import JobError, MinerClient -from compute_horde_executor.executor.utils import get_machine_specs, temporary_process +from compute_horde_executor.executor.utils import ( + docker_container_wrapper, + get_docker_container_outputs, + get_machine_specs, +) logger = logging.getLogger(__name__) @@ -149,7 +153,7 @@ async def execute(self): logger.error(f"Job cleanup failed: {e}") async def _startup_stage(self) -> V0InitialJobRequest: - self.specs = get_machine_specs() + self.specs = await get_machine_specs() await self.run_security_checks_or_fail() initial_job_request = await self.miner_client.initial_msg await self.runner.prepare_initial(initial_job_request) @@ -185,66 +189,56 @@ async def run_security_checks_or_fail(self): await self.run_nvidia_toolkit_version_check_or_fail() async def run_cve_2022_0492_check_or_fail(self): - # TODO: TIMEOUTS - This doesn't kill the docker container, just the docker process that communicates with it. - async with temporary_process( - "docker", - "run", - "--rm", - CVE_2022_0492_IMAGE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) as docker_process: - stdout, stderr = await docker_process.communicate() - return_code = docker_process.returncode + async with docker_container_wrapper( + image=CVE_2022_0492_IMAGE, auto_remove=True + ) as docker_container: + results = await docker_container.wait() + return_code = results["StatusCode"] + stdout, stderr = await get_docker_container_outputs(docker_container) if return_code != 0: raise JobError( "CVE-2022-0492 check failed", - error_detail=f'stdout="{stdout.decode()}"\nstderr="{stderr.decode()}"', + error_detail=f'stdout="{stdout}"\nstderr="{stderr}"', ) expected_output = "Contained: cannot escape via CVE-2022-0492" - if expected_output not in stdout.decode(): + if expected_output not in stdout: raise JobError( f'CVE-2022-0492 check failed: "{expected_output}" not in stdout.', V0JobFailedRequest.ErrorType.SECURITY_CHECK, - f'stdout="{stdout.decode()}"\nstderr="{stderr.decode()}"', + f'stdout="{stdout}"\nstderr="{stderr}"', ) async def run_nvidia_toolkit_version_check_or_fail(self): - # TODO: TIMEOUTS - This doesn't kill the docker container, just the docker process that communicates with it. - async with temporary_process( - "docker", - "run", - "--rm", - "--privileged", - "-v", - "/:/host:ro", - "-v", - "/usr/bin:/usr/bin", - "-v", - "/usr/lib:/usr/lib", - "ubuntu:latest", - "bash", - "-c", - "nvidia-container-toolkit --version", - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) as docker_process: - stdout, stderr = await docker_process.communicate() - return_code = docker_process.returncode + async with docker_container_wrapper( + image="ubuntu:latest", + command=["bash", "-c", "nvidia-container-toolkit --version"], + auto_remove=True, + HostConfig={ + "Privileged": True, + "Binds": [ + "/:/host:ro", + "/usr/bin:/usr/bin", + "/usr/lib:/usr/lib", + ], + }, + ) as docker_container: + results = await docker_container.wait() + return_code = results["StatusCode"] + stdout, stderr = await get_docker_container_outputs(docker_container) if return_code != 0: raise JobError( f"nvidia-container-toolkit check failed: exit code {return_code}", - error_detail=f'stdout="{stdout.decode()}"\nstderr="{stderr.decode()}"', + error_detail=f'stdout="{stdout}"\nstderr="{stderr}"', ) - lines = stdout.decode().splitlines() + lines = stdout.splitlines() if not lines: raise JobError( "nvidia-container-toolkit check failed: no output from nvidia-container-toolkit", - error_detail=f'stdout="{stdout.decode()}"\nstderr="{stderr.decode()}"', + error_detail=f'stdout="{stdout}"\nstderr="{stderr}"', ) version = lines[0].rpartition(" ")[2] @@ -256,7 +250,7 @@ async def run_nvidia_toolkit_version_check_or_fail(self): f"Outdated NVIDIA Container Toolkit detected:" f'{version}" not >= {NVIDIA_CONTAINER_TOOLKIT_MINIMUM_SAFE_VERSION}', V0JobFailedRequest.ErrorType.SECURITY_CHECK, - f'stdout="{stdout.decode()}"\nstderr="{stderr.decode()}', + f'stdout="{stdout}"\nstderr="{stderr}', ) async def fail_if_execution_unsuccessful(self): diff --git a/executor/app/src/compute_horde_executor/executor/job_runner.py b/executor/app/src/compute_horde_executor/executor/job_runner.py index 877703701..c7a8d5c78 100644 --- a/executor/app/src/compute_horde_executor/executor/job_runner.py +++ b/executor/app/src/compute_horde_executor/executor/job_runner.py @@ -4,15 +4,16 @@ import logging import os import pathlib +import re import shlex import shutil import tempfile from abc import ABC, abstractmethod -from asyncio.subprocess import Process from collections.abc import AsyncGenerator from contextlib import asynccontextmanager from typing import Any +import aiodocker from compute_horde.base.docker import DockerRunOptionsPreset from compute_horde.protocol_messages import V0InitialJobRequest, V0JobFailedRequest, V0JobRequest from compute_horde_core.certificate import ( @@ -32,7 +33,10 @@ from django.conf import settings from compute_horde_executor.executor.miner_client import ExecutionResult, JobError, JobResult -from compute_horde_executor.executor.utils import temporary_process +from compute_horde_executor.executor.utils import ( + docker_container_wrapper, + get_docker_container_outputs, +) logger = logging.getLogger(__name__) @@ -84,13 +88,22 @@ WAIT_FOR_NGINX_TIMEOUT = 10 -def preset_to_docker_run_args(preset: DockerRunOptionsPreset) -> list[str]: +def preset_to_docker_run_args(preset: DockerRunOptionsPreset) -> dict[str, Any]: if settings.DEBUG_NO_GPU_MODE: - return [] + return {} elif preset == "none": - return [] + return {} elif preset == "nvidia_all": - return ["--runtime=nvidia", "--gpus", "all"] + return { + "Runtime": "nvidia", + "DeviceRequests": [ + { + "Driver": "nvidia", + "Count": -1, # All GPUs + "Capabilities": [["gpu"]], + } + ], + } else: raise JobError(f"Invalid preset: {preset}") @@ -150,21 +163,33 @@ def generate_streaming_certificate(self, executor_ip: str, public_key: str): async def cleanup_potential_old_jobs(self): logger.debug("Cleaning up potential old jobs") - await ( - await asyncio.create_subprocess_shell( - f"docker kill $(docker ps -q --filter 'name={job_container_name('.*')}')" - ) - ).communicate() - await ( - await asyncio.create_subprocess_shell( - f"docker kill $(docker ps -q --filter 'name={nginx_container_name('.*')}')" - ) - ).communicate() - await ( - await asyncio.create_subprocess_shell( - f"docker network rm $(docker network ls -q --filter 'name={network_name('.*')}')" - ) - ).communicate() + + async with aiodocker.Docker() as client: + # Clean up -job and -nginx containers + job_pattern = re.compile(job_container_name(".*")) + nginx_pattern = re.compile(nginx_container_name(".*")) + containers = await client.containers.list(all=True) + for _container in containers: + info = await _container.show() + name = info.get("Name", "").lstrip("/") + if job_pattern.match(name) or nginx_pattern.match(name): + try: + # TODO: This only kills but potentially leaves behind a stopped container. also remove? + await _container.kill() + except Exception as e: + logger.warning(f"Failed to kill container {name}: {e}") + + # Clean up networks + network_pattern = re.compile(network_name(".*")) + networks = await client.networks.list() + for _network in networks: + name = _network.get("Name", "") + if network_pattern.match(name): + network_obj = await client.networks.get(name) + try: + await network_obj.delete() + except Exception as e: + logger.warning(f"Failed to remove network {name}: {e}") async def prepare_initial(self, initial_job_request: V0InitialJobRequest): self.initial_job_request = initial_job_request @@ -179,22 +204,22 @@ async def pull_initial_job_image(self): if self.initial_job_request.docker_image is not None: logger.debug(f"Pulling Docker image {self.initial_job_request.docker_image}") - # TODO: TIMEOUTS - Check if this actually kills the process on timeout - async with temporary_process( - "docker", - "pull", - self.initial_job_request.docker_image, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) as docker_process: - stdout, stderr = await docker_process.communicate() - return_code = docker_process.returncode - - if return_code != 0: - raise JobError( - f"Failed to pull docker image: exit code {return_code}", - error_detail=f'stdout="{stdout.decode()}"\nstderr="{stderr.decode()}', - ) + async with aiodocker.Docker() as client: + stdout = "" + stderr = "" + try: + # async for _line in client.images.pull(self.initial_job_request.docker_image, stream=True): + async for _line in client.images.pull("alpine:latest", stream=True): + line_str = json.dumps(_line) + if "error" in _line: + stderr += line_str + "\n" + else: + stdout += line_str + "\n" + except Exception as e: + raise JobError( + f"Failed to pull docker image: {e}", + error_detail=f'stdout="{stdout}"\nstderr="{stderr}', + ) async def prepare_full(self, full_job_request: V0JobRequest): self.full_job_request = full_job_request @@ -206,10 +231,11 @@ async def get_docker_image(self) -> str: """ @abstractmethod - async def get_docker_run_args(self) -> list[str]: + async def get_docker_run_args(self) -> dict[str, Any]: """ - Return the list of arguments to pass to the `docker run` command, e.g. `["--rm", "--name", "container_name"]`. - Returned arguments should not include volumes and docker image name. + Return a dictionary of keyword arguments to pass to aiodocker.Docker().containers.create(). + Should typically contain the keys "name", and "HostConfig" but should not include volumes + and docker image names. """ @abstractmethod @@ -220,7 +246,7 @@ async def get_docker_run_cmd(self) -> list[str]: """ @abstractmethod - async def job_cleanup(self, job_process: Process): + async def job_cleanup(self, container: aiodocker.containers.DockerContainer): """ Perform any cleanup necessary after the job is finished. """ @@ -244,38 +270,34 @@ async def start_job(self) -> AsyncGenerator[None, Any]: "Call prepare_initial() and prepare_full() first" ) assert self.full_job_request is not None, "Call prepare_initial() and prepare_full() first" - extra_volume_flags = [] + # Get docker args + docker_kwargs = await self.get_docker_run_args() + + # Add volume mounts + if "Binds" not in docker_kwargs["HostConfig"]: + docker_kwargs["HostConfig"]["Binds"] = [] + docker_kwargs["HostConfig"]["Binds"].append(f"{self.volume_mount_dir.as_posix()}:/volume/") + docker_kwargs["HostConfig"]["Binds"].append( + f"{self.output_volume_mount_dir.as_posix()}:/output/" + ) + docker_kwargs["HostConfig"]["Binds"].append( + f"{self.specs_volume_mount_dir.as_posix()}:/specs/" + ) if self.full_job_request.artifacts_dir: - extra_volume_flags += [ - "-v", - f"{self.artifacts_mount_dir.as_posix()}/:{self.full_job_request.artifacts_dir}", - ] + docker_kwargs["HostConfig"]["Binds"].append( + f"{self.artifacts_mount_dir.as_posix()}/:{self.full_job_request.artifacts_dir}" + ) - self.cmd = [ - "docker", - "run", - *await self.get_docker_run_args(), - "-v", - f"{self.volume_mount_dir.as_posix()}/:/volume/", - "-v", - f"{self.output_volume_mount_dir.as_posix()}/:/output/", - "-v", - f"{self.specs_volume_mount_dir.as_posix()}/:/specs/", - *extra_volume_flags, - await self.get_docker_image(), - *await self.get_docker_run_cmd(), - ] await self.before_start_job() - # TODO: TIMEOUTS - This doesn't kill the docker container, just the docker process that communicates with it. - async with temporary_process( - *self.cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) as job_process: + async with docker_container_wrapper( + image=await self.get_docker_image(), + command=await self.get_docker_run_cmd(), + **docker_kwargs, + ) as docker_container: await self.after_start_job() yield - await self.job_cleanup(job_process) + await self.job_cleanup(docker_container) async def upload_results(self) -> JobResult: assert self.execution_result is not None, "No execution result" @@ -337,26 +359,26 @@ async def upload_results(self) -> JobResult: async def clean(self): # remove input/output directories with docker, to deal with funky file permissions root_for_remove = pathlib.Path("/temp_dir/") - process = await asyncio.create_subprocess_exec( - "docker", - "run", - "--rm", - "-v", - f"{self.temp_dir.as_posix()}/:/{root_for_remove.as_posix()}/", - "alpine:3.19", - "sh", - "-c", - f"rm -rf {shlex.quote(root_for_remove.as_posix())}/*", - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await process.communicate() - if process.returncode != 0: + async with docker_container_wrapper( + image="alpine:3.19", + command=["sh", "-c", f"rm -rf {shlex.quote(root_for_remove.as_posix())}/*"], + auto_remove=True, + HostConfig={ + "Binds": [ + f"{self.temp_dir.as_posix()}/:/{root_for_remove.as_posix()}/", + ] + }, + ) as docker_container: + result = await docker_container.wait() + return_code = result["StatusCode"] + stdout, stderr = await get_docker_container_outputs(docker_container) + + if return_code != 0: logger.error( - f"Failed to clean up {self.temp_dir.as_posix()}/: process exited with return code {process.returncode}\n" + f"Failed to clean up {self.temp_dir.as_posix()}/: process exited with return code {return_code}\n" "Stdout and stderr:\n" - f"{truncate(stdout.decode())}\n" - f"{truncate(stderr.decode())}\n" + f"{stdout}\n" + f"{stderr}\n" ) try: shutil.rmtree(self.temp_dir) @@ -435,14 +457,19 @@ async def get_docker_image(self) -> str: ) return self.full_job_request.docker_image - async def get_docker_run_args(self) -> list[str]: + async def get_docker_run_args(self) -> dict[str, Any]: assert self.full_job_request is not None, ( "Full job request must be set. Call prepare_full() first." ) assert self.initial_job_request is not None, ( "Initial job request must be set. Call prepare_initial() first." ) - preset_run_options = preset_to_docker_run_args( + + # Build keyword arguments to be passed to aiodocker.Docker().containers.create() + docker_kwargs: dict[str, Any] = {"name": self.job_container_name, "auto_remove": True} + + # NVIDIA environment + docker_kwargs["HostConfig"] = preset_to_docker_run_args( self.full_job_request.docker_run_options_preset ) @@ -451,26 +478,27 @@ async def get_docker_run_args(self) -> list[str]: if self.is_streaming_job: logger.debug("Spinning up local network for streaming job") job_network = self.job_network_name - process = await asyncio.create_subprocess_exec( - "docker", "network", "create", "--internal", self.job_network_name - ) - await process.wait() + async with aiodocker.Docker() as client: + try: + await client.networks.create( + { + "Name": self.job_network_name, + "CheckDuplicate": True, + "Internal": True, + } + ) + except Exception as e: + logger.warning(f"Failed to create network {job_network}: {e}") + docker_kwargs["HostConfig"]["NetworkMode"] = job_network - extra_volume_flags = [] if self.full_job_request.raw_script: raw_script_path = self.temp_dir / "script.py" raw_script_path.write_text(self.full_job_request.raw_script) - extra_volume_flags = ["-v", f"{raw_script_path.absolute().as_posix()}:/script.py"] - - return [ - *preset_run_options, - "--name", - self.job_container_name, - "--rm", - "--network", - job_network, - *extra_volume_flags, - ] + docker_kwargs["HostConfig"]["Binds"] = [ + f"{raw_script_path.absolute().as_posix()}:/script.py" + ] + + return docker_kwargs async def get_docker_run_cmd(self) -> list[str]: assert self.full_job_request is not None, ( @@ -519,7 +547,7 @@ async def after_start_job(self): if not job_ready: raise JobError("Streaming job health check failed") - async def job_cleanup(self, job_process: Process): + async def job_cleanup(self, container: aiodocker.containers.DockerContainer): assert self.initial_job_request is not None, ( "Initial job request must be set. Call prepare_initial() first." ) @@ -532,21 +560,22 @@ async def job_cleanup(self, job_process: Process): ) logger.debug(f"Waiting {docker_process_timeout} seconds for job container to finish") result = await asyncio.wait_for( - job_process.communicate(), + container.wait(), timeout=docker_process_timeout, ) - logger.debug(f"Job container exited with return code {job_process.returncode}") + return_code = result["StatusCode"] + stdout, stderr = await get_docker_container_outputs(container) + logger.debug(f"Job container exited with return code {return_code}") self.execution_result = ExecutionResult( timed_out=False, - return_code=job_process.returncode, - stdout=result[0].decode(), - stderr=result[1].decode(), + return_code=return_code, + stdout=stdout, + stderr=stderr, ) except TimeoutError: logger.debug("Job container timed out") - stdout = (await job_process.stdout.read()).decode() if job_process.stdout else "" - stderr = (await job_process.stderr.read()).decode() if job_process.stderr else "" + stdout, stderr = await get_docker_container_outputs(container) self.execution_result = ExecutionResult( timed_out=True, return_code=None, @@ -555,18 +584,15 @@ async def job_cleanup(self, job_process: Process): ) if self.is_streaming_job: + + async def _stop_nginx(): + async with aiodocker.Docker() as client: + container = await client.containers.get(self.nginx_container_name) + await container.stop() + # stop the associated nginx server try: await asyncio.sleep(1) - process = await asyncio.create_subprocess_exec( - "docker", - "stop", - self.nginx_container_name, - ) - try: - await asyncio.wait_for(process.wait(), DOCKER_STOP_TIMEOUT_SECONDS) - except TimeoutError: - process.kill() - raise + await asyncio.wait_for(_stop_nginx(), DOCKER_STOP_TIMEOUT_SECONDS) except Exception as e: logger.error(f"Failed to stop Nginx: {e}") diff --git a/executor/app/src/compute_horde_executor/executor/utils.py b/executor/app/src/compute_horde_executor/executor/utils.py index 8f7eb4f27..ad0e03b8f 100644 --- a/executor/app/src/compute_horde_executor/executor/utils.py +++ b/executor/app/src/compute_horde_executor/executor/utils.py @@ -1,4 +1,3 @@ -import asyncio import csv import logging import re @@ -7,6 +6,7 @@ import typing from contextlib import asynccontextmanager +import aiodocker from compute_horde.utils import MachineSpecs logger = logging.getLogger(__name__) @@ -21,16 +21,48 @@ def run_cmd(cmd): return proc.stdout +async def run_nvidia_smi(): + async with aiodocker.Docker() as client: + container = await client.containers.create( + { + "Image": "ubuntu", + "Cmd": [ + "nvidia-smi", + "--query-gpu=name,driver_version,name,memory.total,compute_cap,power.limit,clocks.gr,clocks.mem,uuid,serial", + "--format=csv", + ], + "HostConfig": { + "Runtime": "nvidia", + "DeviceRequests": [ + { + "Driver": "nvidia", + "Count": -1, + "Capabilities": [["gpu"]], + } + ], + }, + } + ) # type: aiodocker.containers.DockerContainer + await container.start() + result = await container.wait() + stdout, stderr = await get_docker_container_outputs(container) + await container.delete(force=True) + + if result["StatusCode"] != 0: + raise RuntimeError( + f"run_nvidia_smi error ({result['StatusCode']}) {stdout=!r} {stderr=!r}" + ) + + return stdout + + @typing.no_type_check -def get_machine_specs() -> MachineSpecs: +async def get_machine_specs() -> MachineSpecs: data = {} data["gpu"] = {"count": 0, "details": []} try: - nvidia_cmd = run_cmd( - "docker run --rm --runtime=nvidia --gpus all ubuntu " - "nvidia-smi --query-gpu=name,driver_version,name,memory.total,compute_cap,power.limit,clocks.gr,clocks.mem,uuid,serial --format=csv" - ) + nvidia_cmd = await run_nvidia_smi() csv_data = csv.reader(nvidia_cmd.splitlines()) header = [x.strip() for x in next(csv_data)] for row in csv_data: @@ -105,33 +137,66 @@ def get_machine_specs() -> MachineSpecs: @asynccontextmanager -async def temporary_process(program, *args, clean_exit_timeout: float = 1.0, **subprocess_kwargs): +async def docker_container_wrapper( + image: str, + name: str = None, + command: list[str] = None, + clean_exit_timeout: float = 1.0, + auto_remove: bool = False, + **container_kwargs, +): """ - Context manager. - Runs the program in a subprocess, yields it for you to interact with and cleans it up after the context exits. - This will first try to stop the process nicely but kill it shortly after. + Context manager for Docker containers using Docker SDK. + Creates and runs a container in a separate thread, yields it for interaction, and closes the client after the context exits. Parameters: - program: Program to execute - *args: Program arguments + image: Docker image to run + name: Name of the container (default: None) + command: Command to execute in the container. This should be formatted as a list that the + Docker API would understand, e.g. ["bash", "-c", "..."]. If None, will run the default + command for the image (default: None) clean_exit_timeout: Seconds to wait before force kill (default: 1.0) - **subprocess_kwargs: Additional keyword arguments passed to asyncio.create_subprocess_exec() + auto_remove: Automatically remove the container on exit. Equivalent to --rm flag (default: False) + **container_kwargs: Additional keyword arguments passed to aiodocker.Docker.containers.create() """ - process = await asyncio.create_subprocess_exec(program, *args, **subprocess_kwargs) + client = aiodocker.Docker() + container = None + + # Configure and run the Docker container + config = {"Image": image, **container_kwargs} + if command: + config["Cmd"] = command + container = await client.containers.create(config=config, name=name) + await container.start() + try: - yield process + yield container finally: - try: + if container and auto_remove: try: - process.terminate() - await asyncio.wait_for(process.wait(), timeout=clean_exit_timeout) - except ProcessLookupError: - # Process already gone - nothing to do - pass - except TimeoutError: - logger.warning( - f"Process `{program}` didn't exit after {clean_exit_timeout} seconds - killing ({args=})" - ) - process.kill() + # Try to stop the container (with SIGTERM and then after timeout with SIGKILL) + try: + await container.stop(timeout=int(clean_exit_timeout)) + except Exception as e: + logger.warning(f"Failed to stop container: {e}") + + # Remove the container + try: + await container.delete(force=True) + except Exception as e: + logger.warning(f"Failed to remove container: {e}") + + except Exception as e: + logger.error(f"Failed to clean up container: {e}", exc_info=True) + + # Close the Docker client + try: + await client.close() except Exception as e: - logger.error(f"Failed to clean up process `{program}` ({args=}): {e}", exc_info=True) + logger.warning(f"Failed to close Docker client: {e}") + + +async def get_docker_container_outputs(container: aiodocker.containers.DockerContainer): + stdout = "".join(await container.log(stdout=True, stderr=False)) + stderr = "".join(await container.log(stdout=False, stderr=True)) + return stdout, stderr diff --git a/executor/pyproject.toml b/executor/pyproject.toml index d77b2e261..7ab817a53 100644 --- a/executor/pyproject.toml +++ b/executor/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "django-business-metrics @ git+https://github.com/reef-technologies/django-business-metrics.git@9d08ddb3a9d26e8a7e478110d7c8c34c3aa03a01", "packaging>=24.2", "aiohttp>=3.12.0", + "aiodocker==0.24.0", ] [tool.uv.sources] diff --git a/executor/uv.lock b/executor/uv.lock index 297f86733..b5ed5c159 100644 --- a/executor/uv.lock +++ b/executor/uv.lock @@ -10,6 +10,18 @@ supported-markers = [ "sys_platform == 'darwin'", ] +[[package]] +name = "aiodocker" +version = "0.24.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/d7/30104dfac550ae6570d4dce24c2cbf2ddefd4937c9e861641314abfd8abb/aiodocker-0.24.0.tar.gz", hash = "sha256:661a6f9a479951f11f793031dcd5d55337e232c4ceaee69d51ceb885e5f16fac", size = 135482, upload-time = "2024-11-21T02:23:59.642Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/5b/2bb8b632041e314a0a917ade80382ca6a8f331f12c7eb409e59cd0485cc9/aiodocker-0.24.0-py3-none-any.whl", hash = "sha256:2199b7b01f8ce68f9cabab7910ecb26192f6f3494163f1ccffe527b4c3875689", size = 34849, upload-time = "2024-11-21T02:23:58.053Z" }, +] + [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -1038,6 +1050,7 @@ name = "executor" version = "0" source = { editable = "." } dependencies = [ + { name = "aiodocker", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "celery", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "channels", extra = ["daphne"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -1095,6 +1108,7 @@ type-check = [ [package.metadata] requires-dist = [ + { name = "aiodocker", specifier = "==0.24.0" }, { name = "aiohttp", specifier = ">=3.12.0" }, { name = "celery", specifier = "~=5.3.1" }, { name = "channels", extras = ["daphne"], specifier = "==4.*" }, From 48f1fc6a08908a90225631a40296fa57cae25f63 Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Thu, 14 Aug 2025 21:30:44 +0200 Subject: [PATCH 2/3] Pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest --- .github/workflows/executor_ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/executor_ci.yml b/.github/workflows/executor_ci.yml index a3ef40381..67a8f241f 100644 --- a/.github/workflows/executor_ci.yml +++ b/.github/workflows/executor_ci.yml @@ -89,6 +89,8 @@ jobs: - name: Build backenddevelopersltd/compute-horde-streaming-job-test:v0-latest image run: docker build . -t backenddevelopersltd/compute-horde-streaming-job-test:v0-latest working-directory: ./executor/app/src/compute_horde_executor/executor/tests/integration/docker_image_for_streaming_job_tests + - name: Pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest + run: docker pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest - name: Run unit tests run: nox -vs test - name: Executor Test Results From 21a94ed1afadea904a0e42a68a4662bbf1c4e86e Mon Sep 17 00:00:00 2001 From: Piotr Figwer Date: Thu, 14 Aug 2025 21:39:22 +0200 Subject: [PATCH 3/3] Add missing docker images --- .github/workflows/executor_ci.yml | 2 ++ .github/workflows/integration_ci.yml | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/executor_ci.yml b/.github/workflows/executor_ci.yml index 67a8f241f..5731f5527 100644 --- a/.github/workflows/executor_ci.yml +++ b/.github/workflows/executor_ci.yml @@ -91,6 +91,8 @@ jobs: working-directory: ./executor/app/src/compute_horde_executor/executor/tests/integration/docker_image_for_streaming_job_tests - name: Pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest run: docker pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest + - name: Pull alpine image + run: docker pull alpine:3.19 - name: Run unit tests run: nox -vs test - name: Executor Test Results diff --git a/.github/workflows/integration_ci.yml b/.github/workflows/integration_ci.yml index c03bf85c2..8cfed0c86 100644 --- a/.github/workflows/integration_ci.yml +++ b/.github/workflows/integration_ci.yml @@ -29,6 +29,10 @@ jobs: with: version: "0.6.x" enable-cache: true + - name: Pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest + run: docker pull us-central1-docker.pkg.dev/twistlock-secresearch/public/can-ctr-escape-cve-2022-0492:latest + - name: Pull alpine image + run: docker pull alpine:3.19 - name: Start all services run: local_stack/run_and_await_readiness.sh /tmp/integration_test_logs/