isaac-sim · hujc7 · Jan 22, 2026 · Jan 23, 2026 · Jan 27, 2026 · Jan 27, 2026
@@ -32,6 +32,7 @@
     "--distributed", action="store_true", default=False, help="Run training with multiple GPUs or nodes."
 )
 parser.add_argument("--export_io_descriptors", action="store_true", default=False, help="Export IO descriptors.")
+parser.add_argument("--timer", action="store_true", default=False, help="Enable IsaacLab Timer measurements/output.")
 # append RSL-RL cli arguments
 cli_args.add_rsl_rl_args(parser)
 # append AppLauncher cli args
@@ -83,8 +84,8 @@
 
 from isaaclab.utils.timer import Timer
 
-Timer.enable = False
-Timer.enable_display_output = False
+Timer.enable = args_cli.timer
+Timer.enable_display_output = args_cli.timer
 
 import isaaclab_tasks_experimental  # noqa: F401
 

@@ -15,6 +15,7 @@
 
 from isaaclab.managers import CommandManager, CurriculumManager, RewardManager, TerminationManager
 from isaaclab.ui.widgets import ManagerLiveVisualizer
+from isaaclab.utils.timer import Timer
 
 from .common import VecEnvStepReturn
 from .manager_based_env import ManagerBasedEnv
@@ -149,6 +150,7 @@ def setup_manager_visualizers(self):
     Operations - MDP
     """
 
+    @Timer(name="env_step", msg="Step took:", enable=True, format="us")
     def step(self, action: torch.Tensor) -> VecEnvStepReturn:
         """Execute one time-step of the environment's dynamics and reset terminated environments.
 
@@ -169,7 +171,14 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             A tuple containing the observations, rewards, resets (terminated and truncated) and extras.
         """
         # process actions
-        self.action_manager.process_action(action.to(self.device))
+        action_device = action.to(self.device)
+        with Timer(
+            name="action_manager.process_action",
+            msg="ActionManager.process_action took:",
+            enable=True,
+            format="us",
+        ):
+            self.action_manager.process_action(action_device)
 
         self.recorder_manager.record_pre_step()
 
@@ -181,11 +190,18 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
         for _ in range(self.cfg.decimation):
             self._sim_step_counter += 1
             # set actions into buffers
-            self.action_manager.apply_action()
+            with Timer(
+                name="action_manager.apply_action",
+                msg="ActionManager.apply_action took:",
+                enable=True,
+                format="us",
+            ):
+                self.action_manager.apply_action()
             # set actions into simulator
             self.scene.write_data_to_sim()
             # simulate
-            self.sim.step(render=False)
+            with Timer(name="simulate", msg="Newton simulation step took:", enable=True, format="us"):
+                self.sim.step(render=False)
             self.recorder_manager.record_post_physics_decimation_step()
             # render between steps only if the GUI or an RTX sensor needs it
             # note: we assume the render interval to be the shortest accepted rendering interval.
@@ -199,16 +215,36 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
         # -- update env counters (used for curriculum generation)
         self.episode_length_buf += 1  # step in current episode (per env)
         self.common_step_counter += 1  # total step (common for all envs)
+
         # -- check terminations
-        self.reset_buf = self.termination_manager.compute()
-        self.reset_terminated = self.termination_manager.terminated
-        self.reset_time_outs = self.termination_manager.time_outs
+        with Timer(
+            name="termination_manager.compute",
+            msg="TerminationManager.compute took:",
+            enable=True,
+            format="us",
+        ):
+            self.reset_buf = self.termination_manager.compute()
+            self.reset_terminated = self.termination_manager.terminated
+            self.reset_time_outs = self.termination_manager.time_outs
+
         # -- reward computation
-        self.reward_buf = self.reward_manager.compute(dt=self.step_dt)
+        with Timer(
+            name="reward_manager.compute",
+            msg="RewardManager.compute took:",
+            enable=True,
+            format="us",
+        ):
+            self.reward_buf = self.reward_manager.compute(dt=self.step_dt)
 
         if len(self.recorder_manager.active_terms) > 0:
             # update observations for recording if needed
-            self.obs_buf = self.observation_manager.compute()
+            with Timer(
+                name="observation_manager.compute",
+                msg="ObservationManager.compute took:",
+                enable=True,
+                format="us",
+            ):
+                self.obs_buf = self.observation_manager.compute()
             self.recorder_manager.record_post_step()
 
         # -- reset envs that terminated/timed-out and log the episode information
@@ -228,13 +264,27 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
             self.recorder_manager.record_post_reset(reset_env_ids)
 
         # -- update command
-        self.command_manager.compute(dt=self.step_dt)
+        with Timer(
+            name="command_manager.compute",
+            msg="CommandManager.compute took:",
+            enable=True,
+            format="us",
+        ):
+            self.command_manager.compute(dt=self.step_dt)
+
         # -- step interval events
         if "interval" in self.event_manager.available_modes:
             self.event_manager.apply(mode="interval", dt=self.step_dt)
+
         # -- compute observations
         # note: done after reset to get the correct observations for reset envs
-        self.obs_buf = self.observation_manager.compute(update_history=True)
+        with Timer(
+            name="observation_manager.compute_update_history",
+            msg="ObservationManager.compute (update_history) took:",
+            enable=True,
+            format="us",
+        ):
+            self.obs_buf = self.observation_manager.compute(update_history=True)
 
         # return observations, rewards, resets and extras
         return self.obs_buf, self.reward_buf, self.reset_terminated, self.reset_time_outs, self.extras

diff --git a/...ab_experimental/isaaclab_experimental/envs/MANAGER_BASED_WARP_MIGRATION_PLAN.md b/...ab_experimental/isaaclab_experimental/envs/MANAGER_BASED_WARP_MIGRATION_PLAN.md
@@ -0,0 +1,141 @@
+# Manager-based → Warp-first migration plan (experimental)
+
+This doc captures the incremental migration plan to make the **manager-based workflow** (config-driven managers) become **Warp-first and CUDA-graph-friendly**, while keeping the same external behavior/API as the stable manager-based environments.
+
+Scope: start with **Cartpole (manager-based)** as the pilot task.
+
+## Goals
+
+- Preserve the manager-based authoring model (tasks defined via config + MDP terms).
+- Keep Gym API behavior the same as the stable manager-based envs.
+- Make the core step/reset loop **graph-capturable** (fixed launch topology, persistent buffers, mask-based subset operations).
+- Avoid touching stable code by iterating inside `isaaclab_experimental` / `isaaclab_tasks_experimental`.
+
+## Current state (what exists already)
+
+- **Experimental env entry point**: `isaaclab_experimental.envs:ManagerBasedRLEnvWarp`
+- **Experimental Cartpole config + mdp**: `isaaclab_tasks_experimental.manager_based.classic.cartpole.*`
+- **First manager fork**: `isaaclab_experimental.managers.RewardManager` (Warp-backed buffers / kernels)
+- **Action path (Cartpole-minimal)**: `isaaclab_experimental.managers.ActionManager` + `isaaclab_experimental.envs.mdp.actions`
+  - Warp-first manager boundary (`process_action` consumes `wp.array`; may temporarily accept `torch.Tensor` and convert via `wp.from_torch`)
+  - Mask-based reset API (preferred for capture): `reset(mask: wp.array | torch.Tensor | None)`
+
+## Phased migration (minimal + incremental)
+
+### Phase 0 — Baseline experimental entry points (no behavior change)
+
+What:
+- Register new Gym IDs that point at experimental env entry points.
+- Keep task configs stable unless explicitly copied for isolation.
+
+Why:
+- Allows iteration without breaking stable tasks.
+
+Deliverables:
+- `isaaclab_experimental.envs:ManagerBasedRLEnvWarp`
+- `Isaac-…-v0` IDs under `isaaclab_tasks_experimental`
+
+### Phase 1 — Term-level Warp (keep Python managers, keep Torch-facing API)
+
+What:
+- Introduce Warp implementations for *select* MDP terms (Cartpole rewards/obs/events) while keeping:
+  - manager orchestration in Python
+  - env returns (`obs`, `rew`, `terminated`, `truncated`) as **torch.Tensor**
+- Use `wp.array` buffers internally and expose `torch` views via `wp.to_torch(...)` at boundaries.
+
+Why:
+- Lets you validate Warp math + data plumbing without rewriting the entire manager framework.
+
+Typical changes:
+- Add `out` buffers to term cfgs (or manager-owned persistent outputs).
+- Convert term functions from “return torch” → “write into wp.array”.
+
+Cartpole focus:
+- Rewards: pole angle term, alive/terminated terms, etc.
+- Observations: joint pos/vel relative
+- Events: reset by offset (mask-based subset)
+
+### Phase 2 — Manager-level Warp buffers (still Python-loop scheduling)
+
+What:
+- Keep manager iteration in Python, but move all per-env buffers to Warp:
+  - reward accumulation buffers
+  - termination buffers
+  - (optionally) action/observation buffers
+- Replace torch ops like `nonzero()`, `torch.mean(...)`, per-term tensor math with Warp kernels.
+
+Why:
+- Removes Torch from the hot-path while keeping the overall structure intact.
+
+Deliverables (pilot):
+- Warp-backed `RewardManager` (done/ongoing)
+- Warp-backed `ActionManager` (Cartpole-minimal; mask-based reset; optional Torch shim)
+- Next candidates: `TerminationManager`, `EventManager` (mask-based reset/interval), `ObservationManager`
+
+Notes (graph/capture):
+- `wp.from_torch(...)` creates a lightweight Warp wrapper around the Torch tensor memory, but you still pay Python-side overhead per call.
+  For CUDA graph capture, prefer **persistent buffers** (stable pointers) and update them in-place, then pass the persistent `wp.array`
+  through the manager boundary. This is the same caveat noted in `DirectRLEnvWarp.step`.
+
+Notes (Torch → Warp porting):
+- Torch implementations often “broadcast” per-joint constants into `(num_envs, action_dim)` tensors for convenience.
+  In Warp-first ports, prefer keeping these as **constant per-joint buffers** (e.g. `(action_dim,)` for `scale/offset`,
+  `(action_dim, 2)` for `clip`) and index by `j` inside kernels. This avoids redundant per-env storage and extra broadcast kernels,
+  while preserving behavior.
+
+Notes (Warp CUDA graph capture in manager-based env):
+- Partition the env step into **small stage functions** that only touch persistent CUDA buffers, then capture/replay them with Warp:
+  - `step_warp_action_process(...)`: `ActionManager.process_action` (env-step)
+  - `step_warp_action_apply(...)`: `ActionManager.apply_action` + `scene.write_data_to_sim` (sim-step)
+  - `step_warp_reward_compute(dt)`: `RewardManager.compute(dt)` (env-step)
+- Use a helper like `capture_or_launch(fn, *args, **kwargs)` keyed by `fn.__name__` to standardize:
+  “if first time: `wp.ScopedCapture()`; else: `wp.capture_launch(graph)`”.
+- Any captured stage that reads inputs must read from **stable pointers**:
+  e.g. keep a persistent `wp.array` action input buffer and copy incoming actions into it each step.
+- If the launch topology changes (term list / shapes / enabling debug-vis, etc.), invalidate cached graphs and recapture.
+
+### Phase 3 — Dependency surfacing and hybrid handling
+
+What:
+- Identify and isolate subsystems that still create Torch buffers internally (common examples: contact sensors, some recorders).
+- For each dependency:
+  - either keep as Torch “edge” (temporarily), or
+  - create Warp-first equivalents / alternate codepaths
+
+Why:
+- Some dependencies are not purely “MDP math” and need dedicated rewrites for graphability.
+
+### Phase 4 — Graph-friendly orchestrator rewrite (fixed topology + masks)
+
+What:
+- Replace the dynamic parts of the env `step()`/`reset()` control flow:
+  - eliminate dynamic indexing patterns (e.g., `nonzero()` → env-id lists)
+  - use **boolean masks** (`wp.array(dtype=wp.bool)`) and kernels that apply to subsets
+  - ensure persistent buffers are allocated once and reused
+  - ensure launch order is stable and capture-ready
+
+Why:
+- CUDA graph capture requires stable execution topology.
+
+Key design rules:
+- **No per-step Python branching on data-dependent indices** (or keep it outside capture).
+- Prefer `mask`-based APIs where possible (e.g., scene reset supports mask).
+- Maintain one-time allocations; no shape changes.
+
+### Phase 5 — Cleanup + consolidation
+
+What:
+- Remove transitional Torch shims and duplication where no longer needed.
+- Optionally add a stable public entry point once the experimental path is validated.
+
+## Practical “copy vs reuse” policy
+
+- **Copy** into experimental when you expect semantic changes (Cartpole config/mdp, selected managers).
+- **Reuse** stable implementations for everything else until it becomes a blocker.
+- Prefer one fork at a time (e.g., start with `RewardManager`, then termination, then events).
+
+## Suggested next steps (Cartpole)
+
+- Keep Cartpole task config isolated under `isaaclab_tasks_experimental`.
+- Continue stabilizing the experimental `RewardManager` interface (decide: term returns vs term writes).
+- Add the next minimal manager fork: `TerminationManager` using Warp buffers (still return torch views).
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
@@ -42,4 +42,12 @@
 .. _`Task Design Workflows`: https://isaac-sim.github.io/IsaacLab/source/features/task_workflows.html
 """
 
-from .direct_rl_env_warp import DirectRLEnvWarp
+from .direct_rl_env_warp import DirectRLEnvWarp  # noqa: F401
+from .manager_based_env_warp import ManagerBasedEnvWarp  # noqa: F401
+from .manager_based_rl_env_warp import ManagerBasedRLEnvWarp  # noqa: F401
+
+__all__ = [
+    "DirectRLEnvWarp",
+    "ManagerBasedEnvWarp",
+    "ManagerBasedRLEnvWarp",
+]