diff --git a/benchmarks/swtbench/eval_infer.py b/benchmarks/swtbench/eval_infer.py
index 7501bf23..22f25144 100644
--- a/benchmarks/swtbench/eval_infer.py
+++ b/benchmarks/swtbench/eval_infer.py
@@ -15,6 +15,7 @@
 import shutil
 import subprocess
 import sys
+import time
 from pathlib import Path
 from time import monotonic
 
@@ -78,6 +79,64 @@ def patch_swt_bench_for_micromamba(swt_bench_dir: Path) -> None:
         )
 
 
+def _write_profile_sitecustomize(swt_bench_dir: Path, profile_output: Path) -> None:
+    """
+    Drop a sitecustomize.py into the swt-bench checkout to capture internal timings.
+
+    This script is picked up automatically by Python when running swt-bench's
+    src/main.py. It records coarse phases (docker builds, run_instances, per-instance
+    execution) and writes them to SWTBench profile JSON.
+    """
+    site_path = swt_bench_dir / "sitecustomize.py"
+    template_path = Path(__file__).parent / "swtbench_sitecustomize.py"
+    site_path.write_text(template_path.read_text())
+
+
+def _patch_swtbench_circular_import(swt_bench_dir: Path) -> None:
+    """
+    Remove the src.main import from swt-bench/src/__init__.py to avoid the
+    circular import that breaks src/main.py when run as a script.
+    """
+    init_file = swt_bench_dir / "src" / "__init__.py"
+    if not init_file.exists():
+        logger.warning("swt-bench src/__init__.py not found; skipping patch")
+        return
+
+    original = init_file.read_text()
+    lines = original.splitlines()
+
+    patched: list[str] = []
+    skipping_block = False
+    paren_balance = 0
+    removed = False
+
+    for line in lines:
+        if skipping_block:
+            paren_balance += line.count("(") - line.count(")")
+            if paren_balance <= 0:
+                skipping_block = False
+            continue
+
+        if "from src.main import" in line:
+            removed = True
+            paren_balance = line.count("(") - line.count(")")
+            if paren_balance > 0:
+                skipping_block = True
+            continue
+
+        patched.append(line)
+
+    if not removed:
+        logger.info("No src.main re-export found in %s; no patch needed", init_file)
+        return
+
+    trailing_newline = "\n" if original.endswith("\n") else ""
+    init_file.write_text("\n".join(patched) + trailing_newline)
+    logger.info(
+        "Removed src.main re-export from %s to avoid circular import", init_file
+    )
+
+
 def _load_prediction_instance_ids(predictions_file: Path) -> list[str]:
     instance_ids: list[str] = []
     seen = set()
@@ -244,6 +303,29 @@ def run_swtbench_evaluation(
     """
     logger.info(f"Running SWT-Bench evaluation on {predictions_file}")
 
+    timeline: list[dict[str, object]] = []
+    eval_start_ns = time.perf_counter_ns()
+    success = False
+    predictions_path = Path(predictions_file).resolve()
+    profile_output = predictions_path.parent / (
+        predictions_path.stem + ".swtbench_harness.profile.json"
+    )
+    timeline_file = predictions_path.parent / (
+        predictions_path.stem + ".swtbench_eval.timeline.json"
+    )
+
+    def record(phase: str, start_ns: int, extra: dict[str, object] | None = None):
+        end_ns = time.perf_counter_ns()
+        entry: dict[str, object] = {
+            "phase": phase,
+            "start_ns": start_ns,
+            "end_ns": end_ns,
+            "duration_ms": (end_ns - start_ns) / 1_000_000,
+        }
+        if extra:
+            entry.update(extra)
+        timeline.append(entry)
+
     try:
         # Use a global cache directory for SWT-Bench source
         cache_dir = Path.home() / ".cache" / "openhands" / "swt-bench"
@@ -251,6 +333,7 @@ def run_swtbench_evaluation(
 
         # Clone SWT-Bench repository if it doesn't exist
         if not swt_bench_dir.exists():
+            clone_start = time.perf_counter_ns()
             logger.info("Setting up SWT-Bench source in global cache...")
             cache_dir.mkdir(parents=True, exist_ok=True)
 
@@ -266,7 +349,11 @@ def run_swtbench_evaluation(
                 raise subprocess.CalledProcessError(result.returncode, clone_cmd)
 
             logger.info(f"SWT-Bench source installed at {swt_bench_dir}")
+            record("clone_swt_bench", clone_start)
+        else:
+            record("reuse_swt_bench_cache", time.perf_counter_ns())
 
+        # Patch upstream sources for micromamba and circular import issues
         patch_swt_bench_for_micromamba(swt_bench_dir)
 
         # Get the directory and filename of the predictions file
@@ -274,14 +361,22 @@ def run_swtbench_evaluation(
         predictions_filename = predictions_path.name
 
         # Copy predictions file to swt-bench directory
+        copy_start = time.perf_counter_ns()
         swt_predictions_file = swt_bench_dir / predictions_filename
         shutil.copy2(predictions_file, swt_predictions_file)
+        record("copy_predictions", copy_start)
+
+        # Install a profiling sitecustomize so we can capture harness timings
+        _write_profile_sitecustomize(swt_bench_dir, profile_output)
+        # Patch upstream circular import (src/__init__.py -> src.main -> run_evaluation)
+        _patch_swtbench_circular_import(swt_bench_dir)
 
         # Run SWT-Bench evaluation by running python directly from the swt-bench directory
         # but using the uv environment's python executable which has all dependencies
         benchmarks_dir = Path(__file__).parent.parent.parent
 
         # Get the python executable from the uv environment
+        python_start = time.perf_counter_ns()
         python_executable = subprocess.run(
             [
                 "uv",
@@ -296,10 +391,16 @@ def run_swtbench_evaluation(
             text=True,
             cwd=benchmarks_dir,
         ).stdout.strip()
+        record("resolve_python_executable", python_start)
 
         # Set up environment with PYTHONPATH to include swt-bench directory
         env = os.environ.copy()
-        env["PYTHONPATH"] = str(swt_bench_dir)
+        env["PYTHONPATH"] = (
+            f"{swt_bench_dir}:{env['PYTHONPATH']}"
+            if env.get("PYTHONPATH")
+            else str(swt_bench_dir)
+        )
+        env["SWTBENCH_PROFILE_JSON"] = str(profile_output)
 
         cmd = [
             python_executable,
@@ -322,25 +423,29 @@ def run_swtbench_evaluation(
         logger.info("SWT-Bench evaluation output:")
         print("-" * 80)
 
-        eval_start = monotonic()
         # Stream output directly to console, running from swt-bench directory
+        harness_start = time.perf_counter_ns()
         result = subprocess.run(cmd, text=True, cwd=swt_bench_dir, env=env)
-        eval_end = monotonic()
+        record(
+            "swtbench_harness",
+            harness_start,
+            {"returncode": result.returncode, "cmd": cmd},
+        )
 
         print("-" * 80)
         if result.returncode == 0:
-            logger.info(
-                "SWT-Bench evaluation completed successfully in %.2fs",
-                eval_end - eval_start,
-            )
+            logger.info("SWT-Bench evaluation completed successfully")
         else:
             logger.error(
-                "SWT-Bench evaluation failed with return code %s after %.2fs",
-                result.returncode,
-                eval_end - eval_start,
+                f"SWT-Bench evaluation failed with return code {result.returncode}"
             )
             raise subprocess.CalledProcessError(result.returncode, cmd)
-
+        record(
+            "swtbench_eval_total",
+            eval_start_ns,
+            {"events_recorded": len(timeline)},
+        )
+        success = True
     except FileNotFoundError:
         logger.error(
             "SWT-Bench evaluation command not found. "
@@ -350,6 +455,27 @@ def run_swtbench_evaluation(
     except Exception as e:
         logger.error(f"Error running SWT-Bench evaluation: {e}")
         raise
+    finally:
+        if not success:
+            record(
+                "swtbench_eval_total",
+                eval_start_ns,
+                {"events_recorded": len(timeline), "status": "error"},
+            )
+        timeline_payload = {
+            "predictions_file": str(predictions_file),
+            "dataset": dataset,
+            "workers": workers,
+            "started_ns": eval_start_ns,
+            "ended_ns": time.perf_counter_ns(),
+            "status": "ok" if success else "error",
+            "events": timeline,
+        }
+        try:
+            timeline_file.write_text(json.dumps(timeline_payload, indent=2))
+            logger.info("Wrote timeline to %s", timeline_file)
+        except Exception as e:  # noqa: BLE001
+            logger.warning("Failed to write SWTBench timeline: %s", e)
 
 
 def main() -> None:
diff --git a/benchmarks/swtbench/swtbench_sitecustomize.py b/benchmarks/swtbench/swtbench_sitecustomize.py
new file mode 100644
index 00000000..10ccc401
--- /dev/null
+++ b/benchmarks/swtbench/swtbench_sitecustomize.py
@@ -0,0 +1,124 @@
+"""
+Runtime-injected sitecustomize for SWT-Bench harness profiling.
+
+This file is copied into the swt-bench checkout as sitecustomize.py to collect
+coarse-grained timing events without modifying upstream code. It is activated
+only when PROFILE_SWTBENCH/SWTBENCH_PROFILE_JSON are set by the caller.
+"""
+
+import atexit
+import importlib
+import json
+import os
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+
+PROFILE_PATH = os.environ.get("SWTBENCH_PROFILE_JSON", "swtbench_profile.json")
+_events: list[Dict[str, Any]] = []
+_lock = threading.Lock()
+_start_ns = time.perf_counter_ns()
+
+
+def _record(name: str, extra: Optional[Dict[str, Any]] = None):
+    start_ns = time.perf_counter_ns()
+
+    def _end(status: str = "ok", extra_end: Optional[Dict[str, Any]] = None):
+        end_ns = time.perf_counter_ns()
+        payload: Dict[str, Any] = {
+            "name": name,
+            "status": status,
+            "start_ns": start_ns,
+            "end_ns": end_ns,
+            "duration_ms": (end_ns - start_ns) / 1_000_000,
+        }
+        if extra:
+            payload.update(extra)
+        if extra_end:
+            payload.update(extra_end)
+        with _lock:
+            _events.append(payload)
+
+    return _end
+
+
+def _safe_patch(module, attr: str, wrapper):
+    try:
+        original = getattr(module, attr)
+        setattr(module, attr, wrapper(original))
+    except Exception:
+        # If patching fails, skip silently to avoid impacting the harness.
+        return
+
+
+# Patch swt-bench functions if available
+try:
+    run_evaluation = importlib.import_module("run_evaluation")  # type: ignore[assignment]
+
+    def _wrap_run_instances(original):
+        def _inner(predictions, instances, *args, **kwargs):
+            done = _record(
+                "run_instances",
+                {"instance_count": len(instances) if instances is not None else None},
+            )
+            try:
+                return original(predictions, instances, *args, **kwargs)
+            finally:
+                done()
+
+        return _inner
+
+    def _wrap_run_eval_exec_spec(original):
+        def _inner(exec_spec, model_patch, *args, **kwargs):
+            done = _record(
+                "run_eval_exec_spec",
+                {"instance_id": getattr(exec_spec, "instance_id", None)},
+            )
+            try:
+                return original(exec_spec, model_patch, *args, **kwargs)
+            finally:
+                done()
+
+        return _inner
+
+    _safe_patch(run_evaluation, "run_instances", _wrap_run_instances)
+    _safe_patch(run_evaluation, "run_eval_exec_spec", _wrap_run_eval_exec_spec)
+except Exception:
+    pass
+
+try:
+    docker_build = importlib.import_module("src.docker_build")  # type: ignore[assignment]
+
+    def _wrap_build_image(original):
+        def _inner(image_name, *args, **kwargs):
+            done = _record("docker_build", {"image_name": image_name})
+            try:
+                return original(image_name, *args, **kwargs)
+            finally:
+                done()
+
+        return _inner
+
+    _safe_patch(docker_build, "build_image", _wrap_build_image)
+except Exception:
+    pass
+
+
+def _flush() -> None:
+    end_ns = time.perf_counter_ns()
+    payload = {
+        "started_ns": _start_ns,
+        "ended_ns": end_ns,
+        "duration_ms": (end_ns - _start_ns) / 1_000_000,
+        "events": _events,
+    }
+    try:
+        Path(PROFILE_PATH).write_text(json.dumps(payload, indent=2))
+    except Exception:
+        # Avoid raising during interpreter shutdown
+        return
+
+
+atexit.register(_flush)
diff --git a/benchmarks/utils/evaluation.py b/benchmarks/utils/evaluation.py
index e2ae1db5..ec1e1945 100644
--- a/benchmarks/utils/evaluation.py
+++ b/benchmarks/utils/evaluation.py
@@ -6,6 +6,7 @@
 import json
 import os
 import sys
+import time
 from abc import ABC, abstractmethod
 from concurrent.futures import ProcessPoolExecutor, as_completed
 from contextlib import contextmanager
@@ -14,6 +15,7 @@
 from typing import Callable, List, Optional, Tuple
 from uuid import UUID
 
+import numpy as np
 from lmnr import Laminar
 from pydantic import BaseModel, Field
 from tqdm import tqdm
@@ -298,6 +300,17 @@ def _run_iterative_mode(
             # Create attempt-specific output callback
             attempt_outputs: List[EvalOutput] = []
 
+            def _make_json_safe(value: object) -> object:
+                if isinstance(value, np.ndarray):
+                    return value.tolist()
+                if isinstance(value, np.generic):
+                    return value.item()
+                if isinstance(value, dict):
+                    return {k: _make_json_safe(v) for k, v in value.items()}
+                if isinstance(value, (list, tuple)):
+                    return [_make_json_safe(v) for v in value]
+                return value
+
             def attempt_on_result(instance: EvalInstance, out: EvalOutput) -> None:
                 attempt_outputs.append(out)
                 # Write to attempt-specific file
@@ -306,8 +319,9 @@ def attempt_on_result(instance: EvalInstance, out: EvalOutput) -> None:
                     f"output.critic_attempt_{attempt}.jsonl",
                 )
                 try:
-                    with open(attempt_file, "a") as f:
-                        f.write(out.model_dump_json() + "\n")
+                    payload = _make_json_safe(out.model_dump(mode="json"))
+                    with open(attempt_file, "a", encoding="utf-8") as f:
+                        f.write(json.dumps(payload) + "\n")
                 except Exception as e:
                     logger.warning(
                         f"Failed to write to attempt file {attempt_file}: {e}"
@@ -461,6 +475,13 @@ def _process_one_mp(
         - Ensures proper context-managed cleanup
         - Returns (instance, output) so the parent can stream results
         """
+        timeline_dir = Path(self.metadata.eval_output_dir) / "timelines"
+        timeline_dir.mkdir(parents=True, exist_ok=True)
+
+        def write_timeline(entry: dict[str, object]) -> None:
+            path = timeline_dir / f"{instance.id}.attempt{entry.get('attempt', 0)}.json"
+            path.write_text(json.dumps(entry, indent=2))
+
         # Set up instance-specific logging
         log_dir = os.path.join(self.metadata.eval_output_dir, "logs")
         reset_logger_for_multiprocessing(log_dir, instance.id)
@@ -480,6 +501,12 @@ def _process_one_mp(
 
             while retry_count <= max_retries:
                 workspace = None
+                attempt_index = retry_count + 1
+                attempt_start_ns = time.perf_counter_ns()
+                attempt_start_ts = datetime.now(timezone.utc).isoformat()
+                attempt_status = "error"
+                phases: list[dict[str, int | str]] = []
+                resource_factor = self.metadata.base_resource_factor
 
                 # Start Laminar execution span and inject context into os.environ so workspace can pick it up
                 # Escape the serialized context to safely pass as a cli argument
@@ -506,11 +533,19 @@ def _process_one_mp(
                             f"resource_factor={resource_factor}"
                         )
 
+                    ws_start = time.perf_counter_ns()
                     workspace = self.prepare_workspace(
                         instance,
                         resource_factor=resource_factor,
                         forward_env=LMNR_ENV_VARS,
                     )
+                    phases.append(
+                        {
+                            "name": "prepare_workspace",
+                            "start_ns": int(ws_start),
+                            "end_ns": int(time.perf_counter_ns()),
+                        }
+                    )
 
                     # Record runtime/pod mapping only for remote runtimes
                     if isinstance(workspace, APIRemoteWorkspace):
@@ -535,10 +570,19 @@ def _process_one_mp(
                             runtime_run.session_id,
                             runtime_run.resource_factor,
                         )
+                    eval_start = time.perf_counter_ns()
                     out = self.evaluate_instance(instance, workspace)
+                    phases.append(
+                        {
+                            "name": "evaluate_instance",
+                            "start_ns": int(eval_start),
+                            "end_ns": int(time.perf_counter_ns()),
+                        }
+                    )
                     if runtime_runs:
                         out.runtime_runs = runtime_runs
                     logger.info("[child] done id=%s", instance.id)
+                    attempt_status = "ok"
                     return instance, out
                 except Exception as e:
                     last_error = e
@@ -593,6 +637,7 @@ def _process_one_mp(
                         return instance, error_output
                 finally:
                     # Ensure workspace cleanup happens regardless of success or failure
+                    cleanup_start = time.perf_counter_ns()
                     if workspace is not None:
                         try:
                             self._capture_conversation_archive(workspace, instance)
@@ -614,6 +659,43 @@ def _process_one_mp(
                                 f"{str(cleanup_error)[:50]}"
                             )
                     lmnr_span.end()
+                    phases.append(
+                        {
+                            "name": "cleanup",
+                            "start_ns": int(cleanup_start),
+                            "end_ns": int(time.perf_counter_ns()),
+                        }
+                    )
+                    attempt_end_ns = time.perf_counter_ns()
+                    write_timeline(
+                        {
+                            "instance_id": instance.id,
+                            "attempt": attempt_index,
+                            "critic_attempt": critic_attempt,
+                            "status": attempt_status,
+                            "error": (
+                                str(last_error) if attempt_status != "ok" else None
+                            ),
+                            "start_ts": attempt_start_ts,
+                            "end_ts": datetime.now(timezone.utc).isoformat(),
+                            "duration_ms": (attempt_end_ns - attempt_start_ns)
+                            / 1_000_000,
+                            "resource_factor": resource_factor,
+                            "runtime_failure_count": runtime_failure_count,
+                            "phases": [
+                                {
+                                    "name": p["name"],
+                                    "duration_ms": (
+                                        (int(p["end_ns"]) - int(p["start_ns"]))
+                                        / 1_000_000
+                                    ),
+                                    "start_ns": int(p["start_ns"]),
+                                    "end_ns": int(p["end_ns"]),
+                                }
+                                for p in phases
+                            ],
+                        }
+                    )
 
             # This should never be reached, but added for type safety
             error_output = self._create_error_output(
diff --git a/uv.lock b/uv.lock
index e7351742..7b04cc65 100644
--- a/uv.lock
+++ b/uv.lock
@@ -947,11 +947,11 @@ wheels = [
 
 [[package]]
 name = "filelock"
-version = "3.19.1"
+version = "3.20.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/40/bb/0ab3e58d22305b6f5440629d20683af28959bf793d98d11950e305c1c326/filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58", size = 17687, upload-time = "2025-08-14T16:56:03.016Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
 ]
 
 [[package]]
@@ -1678,11 +1678,11 @@ wheels = [
 
 [[package]]
 name = "libtmux"
-version = "0.46.2"
+version = "0.53.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9c/aa/7e1dcaa097156d6f3a7d8669be4389dced997feeb81744e3ff4681d65ee8/libtmux-0.46.2.tar.gz", hash = "sha256:9a398fec5d714129c8344555d466e1a903dfc0f741ba07aabe75a8ceb25c5dda", size = 346887, upload-time = "2025-05-26T19:40:04.096Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/28/e2b252817cb181aec2f42fe2d1d7fac5ec9c4d15bfb2b8ea4bd1179e4244/libtmux-0.53.0.tar.gz", hash = "sha256:1d19af4cea0c19543954d7e7317c7025c0739b029cccbe3b843212fae238f1bd", size = 405001, upload-time = "2025-12-14T11:59:11.337Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d6/2f/9d207039fcfa00d3b30e4d765f062fbcc42c873c7518a8cfebb3eafd00e0/libtmux-0.46.2-py3-none-any.whl", hash = "sha256:6c32dbf22bde8e5e33b2714a4295f6e838dc640f337cd4c085a044f6828c7793", size = 60873, upload-time = "2025-05-26T19:40:02.284Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/d0/2e8bc5caa639ebb9f8801ba0be7070a28d48d8ed60e2a428d40f71fb88b8/libtmux-0.53.0-py3-none-any.whl", hash = "sha256:024b7ae6a12aae55358e8feb914c8632b3ab9bd61c0987c53559643c6a58ee4f", size = 77582, upload-time = "2025-12-14T11:59:09.739Z" },
 ]
 
 [[package]]
@@ -2269,7 +2269,7 @@ wheels = [
 
 [[package]]
 name = "openhands-agent-server"
-version = "1.7.2"
+version = "1.8.1"
 source = { editable = "vendor/software-agent-sdk/openhands-agent-server" }
 dependencies = [
     { name = "aiosqlite" },
@@ -2407,11 +2407,12 @@ dev = [
 
 [[package]]
 name = "openhands-sdk"
-version = "1.7.2"
+version = "1.8.1"
 source = { editable = "vendor/software-agent-sdk/openhands-sdk" }
 dependencies = [
     { name = "deprecation" },
     { name = "fastmcp" },
+    { name = "filelock" },
     { name = "httpx" },
     { name = "litellm" },
     { name = "lmnr" },
@@ -2432,10 +2433,11 @@ requires-dist = [
     { name = "boto3", marker = "extra == 'boto3'", specifier = ">=1.35.0" },
     { name = "deprecation", specifier = ">=2.1.0" },
     { name = "fastmcp", specifier = ">=2.11.3" },
+    { name = "filelock", specifier = ">=3.20.1" },
     { name = "httpx", specifier = ">=0.27.0" },
     { name = "litellm", specifier = ">=1.80.10" },
     { name = "lmnr", specifier = ">=0.7.24" },
-    { name = "pydantic", specifier = ">=2.11.7" },
+    { name = "pydantic", specifier = ">=2.12.5" },
     { name = "python-frontmatter", specifier = ">=1.1.0" },
     { name = "python-json-logger", specifier = ">=3.3.0" },
     { name = "tenacity", specifier = ">=9.1.2" },
@@ -2445,7 +2447,7 @@ provides-extras = ["boto3"]
 
 [[package]]
 name = "openhands-tools"
-version = "1.7.2"
+version = "1.8.1"
 source = { editable = "vendor/software-agent-sdk/openhands-tools" }
 dependencies = [
     { name = "bashlex" },
@@ -2466,7 +2468,7 @@ requires-dist = [
     { name = "browser-use", specifier = ">=0.8.0" },
     { name = "cachetools" },
     { name = "func-timeout", specifier = ">=4.3.5" },
-    { name = "libtmux", specifier = ">=0.46.2" },
+    { name = "libtmux", specifier = ">=0.53.0" },
     { name = "openhands-sdk", editable = "vendor/software-agent-sdk/openhands-sdk" },
     { name = "pydantic", specifier = ">=2.11.7" },
     { name = "tom-swe", specifier = ">=1.0.3" },
@@ -2474,7 +2476,7 @@ requires-dist = [
 
 [[package]]
 name = "openhands-workspace"
-version = "1.7.2"
+version = "1.8.1"
 source = { editable = "vendor/software-agent-sdk/openhands-workspace" }
 dependencies = [
     { name = "openhands-agent-server" },