diff --git a/scripts/benchmarks/benchmark_non_rl.py b/scripts/benchmarks/benchmark_non_rl.py
index 9eef653455c..e7eeb24fe5a 100644
--- a/scripts/benchmarks/benchmark_non_rl.py
+++ b/scripts/benchmarks/benchmark_non_rl.py
@@ -12,10 +12,8 @@
 import sys
 import time
 
-from isaaclab.app import AppLauncher
-
 # add argparse arguments
-parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.")
+parser = argparse.ArgumentParser(description="Benchmark non-RL environment.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
 parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
 parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
@@ -34,11 +32,22 @@
     help="Benchmarking backend options, defaults OmniPerfKPIFile",
 )
 parser.add_argument("--output_folder", type=str, default=None, help="Output folder for the benchmark.")
+parser.add_argument(
+    "--kit",
+    action="store_true",
+    default=False,
+    help="Enable Isaac Sim Kit and use isaacsim.benchmark.services. Default: False (uses standalone benchmark).",
+)
+
+# Conditionally add AppLauncher args only if --kit is enabled
+if "--kit" in sys.argv:
+    from isaaclab.app import AppLauncher
+
+    AppLauncher.add_app_launcher_args(parser)
 
-# append AppLauncher cli args
-AppLauncher.add_app_launcher_args(parser)
 # parse the arguments
 args_cli, hydra_args = parser.parse_known_args()
+
 # always enable cameras to record video
 if args_cli.video:
     args_cli.enable_cameras = True
@@ -48,49 +57,68 @@
 
 app_start_time_begin = time.perf_counter_ns()
 
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
+# Conditionally launch Isaac Sim
+simulation_app = None
+app_launcher = None
+if args_cli.kit:
+    # Force Omniverse mode by setting environment variable
+    # This ensures SimulationApp is launched even without explicit visualizers
+    os.environ["LAUNCH_OV_APP"] = "1"
+
+    # launch omniverse app
+    app_launcher = AppLauncher(args_cli)
+    simulation_app = app_launcher.app
 
 app_start_time_end = time.perf_counter_ns()
 
 """Rest everything follows."""
 
-# enable benchmarking extension
-from isaacsim.core.utils.extensions import enable_extension
+sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
 
-enable_extension("isaacsim.benchmark.services")
+# Import benchmark infrastructure based on kit flag
+if args_cli.kit:
+    # enable benchmarking extension
+    from isaacsim.core.utils.extensions import enable_extension
 
-# Set the benchmark settings according to the inputs
-import carb
+    enable_extension("isaacsim.benchmark.services")
 
-settings = carb.settings.get_settings()
-settings.set("/exts/isaacsim.benchmark.services/metrics/metrics_output_folder", args_cli.output_folder)
-settings.set("/exts/isaacsim.benchmark.services/metrics/randomize_filename_prefix", True)
+    # Set the benchmark settings according to the inputs
+    import carb
 
-from isaacsim.benchmark.services import BaseIsaacBenchmark
+    settings = carb.settings.get_settings()
+    settings.set("/exts/isaacsim.benchmark.services/metrics/metrics_output_folder", args_cli.output_folder)
+    settings.set("/exts/isaacsim.benchmark.services/metrics/randomize_filename_prefix", True)
 
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+    from isaacsim.benchmark.services import BaseIsaacBenchmark
 
-from isaaclab.utils.timer import Timer
-from scripts.benchmarks.utils import (
-    get_isaaclab_version,
-    get_mujoco_warp_version,
-    get_newton_version,
-    log_app_start_time,
-    log_python_imports_time,
-    log_runtime_step_times,
-    log_scene_creation_time,
-    log_simulation_start_time,
-    log_task_start_time,
-    log_total_start_time,
-)
+    from scripts.benchmarks.utils.benchmark_utils import create_kit_logging_functions, get_timer_value
+
+    # Get all logging functions for kit mode
+    log_funcs = create_kit_logging_functions()
+else:
+    # Use standalone benchmark services
+    from scripts.benchmarks.utils.benchmark_utils import create_standalone_logging_functions, get_timer_value
+    from scripts.benchmarks.utils.standalone_benchmark import StandaloneBenchmark
+
+    # Get all logging functions for standalone mode
+    log_funcs = create_standalone_logging_functions()
+
+# Extract individual functions from the dictionary for easier use
+get_isaaclab_version = log_funcs["get_isaaclab_version"]
+get_mujoco_warp_version = log_funcs["get_mujoco_warp_version"]
+get_newton_version = log_funcs["get_newton_version"]
+log_app_start_time = log_funcs["log_app_start_time"]
+log_python_imports_time = log_funcs["log_python_imports_time"]
+log_task_start_time = log_funcs["log_task_start_time"]
+log_scene_creation_time = log_funcs["log_scene_creation_time"]
+log_simulation_start_time = log_funcs["log_simulation_start_time"]
+log_total_start_time = log_funcs["log_total_start_time"]
+log_runtime_step_times = log_funcs["log_runtime_step_times"]
 
 imports_time_begin = time.perf_counter_ns()
 
 import gymnasium as gym
 import numpy as np
-import os
 import torch
 from datetime import datetime
 
@@ -104,21 +132,40 @@
 
 
 # Create the benchmark
-benchmark = BaseIsaacBenchmark(
-    benchmark_name="benchmark_non_rl",
-    workflow_metadata={
-        "metadata": [
-            {"name": "task", "data": args_cli.task},
-            {"name": "seed", "data": args_cli.seed},
-            {"name": "num_envs", "data": args_cli.num_envs},
-            {"name": "num_frames", "data": args_cli.num_frames},
-            {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
-            {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
-            {"name": "Newton Info", "data": get_newton_version()},
-        ]
-    },
-    backend_type=args_cli.benchmark_backend,
-)
+if args_cli.kit:
+    benchmark = BaseIsaacBenchmark(
+        benchmark_name="benchmark_non_rl",
+        workflow_metadata={
+            "metadata": [
+                {"name": "task", "data": args_cli.task},
+                {"name": "seed", "data": args_cli.seed},
+                {"name": "num_envs", "data": args_cli.num_envs},
+                {"name": "num_frames", "data": args_cli.num_frames},
+                {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
+                {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
+                {"name": "Newton Info", "data": get_newton_version()},
+            ]
+        },
+        backend_type=args_cli.benchmark_backend,
+    )
+else:
+    benchmark = StandaloneBenchmark(
+        benchmark_name="benchmark_non_rl",
+        workflow_metadata={
+            "metadata": [
+                {"name": "task", "data": args_cli.task},
+                {"name": "seed", "data": args_cli.seed},
+                {"name": "num_envs", "data": args_cli.num_envs},
+                {"name": "num_frames", "data": args_cli.num_frames},
+                {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
+                {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
+                {"name": "Newton Info", "data": get_newton_version()},
+            ]
+        },
+        backend_type=args_cli.benchmark_backend,
+        output_folder=args_cli.output_folder,
+        randomize_filename_prefix=True,
+    )
 
 
 @hydra_task_config(args_cli.task, None)
@@ -127,15 +174,19 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
 
     # override configurations with non-hydra CLI arguments
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+    if args_cli.kit and hasattr(args_cli, "device") and args_cli.device is not None:
+        env_cfg.sim.device = args_cli.device
 
     # process distributed
     world_size = 1
     world_rank = 0
     if args_cli.distributed:
-        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
-        world_size = int(os.getenv("WORLD_SIZE", 1))
-        world_rank = app_launcher.global_rank
+        if args_cli.kit:
+            env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+            world_size = int(os.getenv("WORLD_SIZE", 1))
+            world_rank = app_launcher.global_rank
+        else:
+            print("[WARNING] Distributed mode is only supported with --kit flag.")
 
     task_startup_time_begin = time.perf_counter_ns()
 
@@ -143,7 +194,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
     env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)
     # wrap for video recording
     if args_cli.video:
-        log_root_path = os.path.abs(f"benchmark/{args_cli.task}")
+        log_root_path = os.path.abspath(f"benchmark/{args_cli.task}")
         log_dir = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
         video_kwargs = {
             "video_folder": os.path.join(log_root_path, log_dir, "videos"),
@@ -165,7 +216,30 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
     num_frames = 0
     # log frame times
     step_times = []
-    while simulation_app.is_running():
+
+    # Run loop depends on whether we're using kit or not
+    if args_cli.kit:
+        while simulation_app.is_running():
+            while num_frames < args_cli.num_frames:
+                # get upper and lower bounds of action space, sample actions randomly on this interval
+                action_high = 1
+                action_low = -1
+                actions = (action_high - action_low) * torch.rand(
+                    env.unwrapped.num_envs, env.unwrapped.single_action_space.shape[0], device=env.unwrapped.device
+                ) - action_high
+
+                # env stepping
+                env_step_time_begin = time.perf_counter_ns()
+                _ = env.step(actions)
+                end_step_time_end = time.perf_counter_ns()
+                step_times.append(end_step_time_end - env_step_time_begin)
+
+                num_frames += 1
+
+            # terminate
+            break
+    else:
+        # Standalone mode - simple loop
         while num_frames < args_cli.num_frames:
             # get upper and lower bounds of action space, sample actions randomly on this interval
             action_high = 1
@@ -182,9 +256,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
 
             num_frames += 1
 
-        # terminate
-        break
-
     if world_rank == 0:
         benchmark.store_measurements()
 
@@ -203,8 +274,13 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
         log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
         log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
         log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
-        log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
-        log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
+
+        # Timer may not be available in standalone mode
+        scene_creation_time = get_timer_value("scene_creation")
+        simulation_start_time = get_timer_value("simulation_start")
+
+        log_scene_creation_time(benchmark, scene_creation_time * 1000 if scene_creation_time else None)
+        log_simulation_start_time(benchmark, simulation_start_time * 1000 if simulation_start_time else None)
         log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
         log_runtime_step_times(benchmark, environment_step_times, compute_stats=True)
 
@@ -218,4 +294,5 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
     # run the main function
     main()
     # close sim app
-    simulation_app.close()
+    if simulation_app is not None:
+        simulation_app.close()
diff --git a/scripts/benchmarks/benchmark_rlgames.py b/scripts/benchmarks/benchmark_rlgames.py
index e399503e014..e788cf90a39 100644
--- a/scripts/benchmarks/benchmark_rlgames.py
+++ b/scripts/benchmarks/benchmark_rlgames.py
@@ -8,13 +8,12 @@
 """Launch Isaac Sim Simulator first."""
 
 import argparse
+import os
 import sys
 import time
 
-from isaaclab.app import AppLauncher
-
 # add argparse arguments
-parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.")
+parser = argparse.ArgumentParser(description="Benchmark RL agent with RL-Games.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
 parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
 parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
@@ -32,11 +31,23 @@
     choices=["LocalLogMetrics", "JSONFileMetrics", "OsmoKPIFile", "OmniPerfKPIFile"],
     help="Benchmarking backend options, defaults OmniPerfKPIFile",
 )
+parser.add_argument("--output_folder", type=str, default=None, help="Output folder for the benchmark.")
+parser.add_argument(
+    "--kit",
+    action="store_true",
+    default=False,
+    help="Enable Isaac Sim Kit and use isaacsim.benchmark.services. Default: False (uses standalone benchmark).",
+)
+
+# Conditionally add AppLauncher args only if --kit is enabled
+if "--kit" in sys.argv:
+    from isaaclab.app import AppLauncher
+
+    AppLauncher.add_app_launcher_args(parser)
 
-# append AppLauncher cli args
-AppLauncher.add_app_launcher_args(parser)
 # parse the arguments
 args_cli, hydra_args = parser.parse_known_args()
+
 # always enable cameras to record video
 if args_cli.video:
     args_cli.enable_cameras = True
@@ -46,19 +57,56 @@
 
 app_start_time_begin = time.perf_counter_ns()
 
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
+# Conditionally launch Isaac Sim
+simulation_app = None
+app_launcher = None
+if args_cli.kit:
+    # Force Omniverse mode by setting environment variable
+    # This ensures SimulationApp is launched even without explicit visualizers
+    os.environ["LAUNCH_OV_APP"] = "1"
+
+    # launch omniverse app
+    app_launcher = AppLauncher(args_cli)
+    simulation_app = app_launcher.app
 
 app_start_time_end = time.perf_counter_ns()
 
 """Rest everything follows."""
 
-# enable benchmarking extension
-from isaacsim.core.utils.extensions import enable_extension
-
-enable_extension("isaacsim.benchmark.services")
-from isaacsim.benchmark.services import BaseIsaacBenchmark
+# Import benchmark infrastructure based on kit flag
+if args_cli.kit:
+    # enable benchmarking extension
+    from isaacsim.core.utils.extensions import enable_extension
+
+    enable_extension("isaacsim.benchmark.services")
+    from isaacsim.benchmark.services import BaseIsaacBenchmark
+
+    from scripts.benchmarks.utils.benchmark_utils import create_kit_logging_functions, get_timer_value
+
+    # Get all logging functions for kit mode
+    log_funcs = create_kit_logging_functions()
+else:
+    # Use standalone benchmark services
+    from scripts.benchmarks.utils.benchmark_utils import create_standalone_logging_functions, get_timer_value
+    from scripts.benchmarks.utils.standalone_benchmark import StandaloneBenchmark
+
+    # Get all logging functions for standalone mode
+    log_funcs = create_standalone_logging_functions()
+
+# Extract individual functions from the dictionary for easier use
+get_isaaclab_version = log_funcs["get_isaaclab_version"]
+get_mujoco_warp_version = log_funcs["get_mujoco_warp_version"]
+get_newton_version = log_funcs["get_newton_version"]
+log_app_start_time = log_funcs["log_app_start_time"]
+log_python_imports_time = log_funcs["log_python_imports_time"]
+log_task_start_time = log_funcs["log_task_start_time"]
+log_scene_creation_time = log_funcs["log_scene_creation_time"]
+log_simulation_start_time = log_funcs["log_simulation_start_time"]
+log_total_start_time = log_funcs["log_total_start_time"]
+log_runtime_step_times = log_funcs["log_runtime_step_times"]
+log_rl_policy_rewards = log_funcs["log_rl_policy_rewards"]
+log_rl_policy_episode_lengths = log_funcs["log_rl_policy_episode_lengths"]
+parse_tf_logs = log_funcs["parse_tf_logs"]
 
 imports_time_begin = time.perf_counter_ns()
 
@@ -84,22 +132,6 @@
 
 imports_time_end = time.perf_counter_ns()
 
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
-
-from isaaclab.utils.timer import Timer
-from scripts.benchmarks.utils import (
-    log_app_start_time,
-    log_python_imports_time,
-    log_rl_policy_episode_lengths,
-    log_rl_policy_rewards,
-    log_runtime_step_times,
-    log_scene_creation_time,
-    log_simulation_start_time,
-    log_task_start_time,
-    log_total_start_time,
-    parse_tf_logs,
-)
-
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
 torch.backends.cudnn.deterministic = False
@@ -107,18 +139,40 @@
 
 
 # Create the benchmark
-benchmark = BaseIsaacBenchmark(
-    benchmark_name="benchmark_rlgames_train",
-    workflow_metadata={
-        "metadata": [
-            {"name": "task", "data": args_cli.task},
-            {"name": "seed", "data": args_cli.seed},
-            {"name": "num_envs", "data": args_cli.num_envs},
-            {"name": "max_iterations", "data": args_cli.max_iterations},
-        ]
-    },
-    backend_type=args_cli.benchmark_backend,
-)
+if args_cli.kit:
+    benchmark = BaseIsaacBenchmark(
+        benchmark_name="benchmark_rlgames_train",
+        workflow_metadata={
+            "metadata": [
+                {"name": "task", "data": args_cli.task},
+                {"name": "seed", "data": args_cli.seed},
+                {"name": "num_envs", "data": args_cli.num_envs},
+                {"name": "max_iterations", "data": args_cli.max_iterations},
+                {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
+                {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
+                {"name": "Newton Info", "data": get_newton_version()},
+            ]
+        },
+        backend_type=args_cli.benchmark_backend,
+    )
+else:
+    benchmark = StandaloneBenchmark(
+        benchmark_name="benchmark_rlgames_train",
+        workflow_metadata={
+            "metadata": [
+                {"name": "task", "data": args_cli.task},
+                {"name": "seed", "data": args_cli.seed},
+                {"name": "num_envs", "data": args_cli.num_envs},
+                {"name": "max_iterations", "data": args_cli.max_iterations},
+                {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
+                {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
+                {"name": "Newton Info", "data": get_newton_version()},
+            ]
+        },
+        backend_type=args_cli.benchmark_backend,
+        output_folder=args_cli.output_folder,
+        randomize_filename_prefix=True,
+    )
 
 
 @hydra_task_config(args_cli.task, "rl_games_cfg_entry_point")
@@ -127,7 +181,8 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
 
     # override configurations with non-hydra CLI arguments
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+    if args_cli.kit and hasattr(args_cli, "device") and args_cli.device is not None:
+        env_cfg.sim.device = args_cli.device
 
     # randomly sample a seed if seed = -1
     if args_cli.seed == -1:
@@ -137,9 +192,12 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
     # process distributed
     world_rank = 0
     if args_cli.distributed:
-        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
-        agent_cfg["params"]["config"]["device"] = f"cuda:{app_launcher.local_rank}"
-        world_rank = app_launcher.global_rank
+        if args_cli.kit:
+            env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+            agent_cfg["params"]["config"]["device"] = f"cuda:{app_launcher.local_rank}"
+            world_rank = app_launcher.global_rank
+        else:
+            print("[WARNING] Distributed mode is only supported with --kit flag.")
 
     # specify directory for logging experiments
     log_root_path = os.path.join("logs", "rl_games", agent_cfg["params"]["config"]["name"])
@@ -153,7 +211,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
     agent_cfg["params"]["config"]["full_experiment_name"] = log_dir
 
     # multi-gpu training config
-    if args_cli.distributed:
+    if args_cli.distributed and args_cli.kit:
         agent_cfg["params"]["seed"] += app_launcher.global_rank
         agent_cfg["params"]["config"]["device"] = f"cuda:{app_launcher.local_rank}"
         agent_cfg["params"]["config"]["device_name"] = f"cuda:{app_launcher.local_rank}"
@@ -229,24 +287,29 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
 
         # prepare RL timing dict
         rl_training_times = {
-            "Environment only step time": log_data["performance/step_time"],
-            "Environment + Inference step time": log_data["performance/step_inference_time"],
-            "Environment + Inference + Policy update time": log_data["performance/rl_update_time"],
-            "Environment only FPS": log_data["performance/step_fps"],
-            "Environment + Inference FPS": log_data["performance/step_inference_fps"],
-            "Environment + Inference + Policy update FPS": log_data["performance/step_inference_rl_update_fps"],
+            "Environment only step time": log_data.get("performance/step_time", []),
+            "Environment + Inference step time": log_data.get("performance/step_inference_time", []),
+            "Environment + Inference + Policy update time": log_data.get("performance/rl_update_time", []),
+            "Environment only FPS": log_data.get("performance/step_fps", []),
+            "Environment + Inference FPS": log_data.get("performance/step_inference_fps", []),
+            "Environment + Inference + Policy update FPS": log_data.get("performance/step_inference_rl_update_fps", []),
         }
 
         # log additional metrics to benchmark services
         log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
         log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
         log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
-        log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
-        log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
+
+        # Timer may not be available in standalone mode
+        scene_creation_time = get_timer_value("scene_creation")
+        simulation_start_time = get_timer_value("simulation_start")
+
+        log_scene_creation_time(benchmark, scene_creation_time * 1000 if scene_creation_time else None)
+        log_simulation_start_time(benchmark, simulation_start_time * 1000 if simulation_start_time else None)
         log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
         log_runtime_step_times(benchmark, rl_training_times, compute_stats=True)
-        log_rl_policy_rewards(benchmark, log_data["rewards/iter"])
-        log_rl_policy_episode_lengths(benchmark, log_data["episode_lengths/iter"])
+        log_rl_policy_rewards(benchmark, log_data.get("rewards/iter", []))
+        log_rl_policy_episode_lengths(benchmark, log_data.get("episode_lengths/iter", []))
 
         benchmark.stop()
 
@@ -258,4 +321,5 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: dict):
     # run the main function
     main()
     # close sim app
-    simulation_app.close()
+    if simulation_app is not None:
+        simulation_app.close()
diff --git a/scripts/benchmarks/benchmark_rsl_rl.py b/scripts/benchmarks/benchmark_rsl_rl.py
index 8d853e28853..f70cd2ef15a 100644
--- a/scripts/benchmarks/benchmark_rsl_rl.py
+++ b/scripts/benchmarks/benchmark_rsl_rl.py
@@ -3,10 +3,6 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-# Copyright (c) 2022-2025, The IsaacLab Project Developers.
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
 
 """Script to benchmark RL agent with RSL-RL."""
 
@@ -17,13 +13,10 @@
 import sys
 import time
 
-from isaaclab.app import AppLauncher
-
 sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
-import scripts.reinforcement_learning.rsl_rl.cli_args as cli_args  # isort: skip
 
 # add argparse arguments
-parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
+parser = argparse.ArgumentParser(description="Benchmark RL agent with RSL-RL.")
 parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.")
 parser.add_argument("--video_length", type=int, default=200, help="Length of the recorded video (in steps).")
 parser.add_argument("--video_interval", type=int, default=2000, help="Interval between video recordings (in steps).")
@@ -47,11 +40,22 @@
     default="/tmp",
     help="Output folder for the benchmark metrics.",
 )
+parser.add_argument(
+    "--kit",
+    action="store_true",
+    default=False,
+    help="Enable Isaac Sim Kit and use isaacsim.benchmark.services. Default: False (uses standalone benchmark).",
+)
+
+# Conditionally add RSL-RL and AppLauncher args only if --kit is enabled
+if "--kit" in sys.argv:
+    import scripts.reinforcement_learning.rsl_rl.cli_args as cli_args
+
+    cli_args.add_rsl_rl_args(parser)
 
-# append RSL-RL cli arguments
-cli_args.add_rsl_rl_args(parser)
-# append AppLauncher cli args
-AppLauncher.add_app_launcher_args(parser)
+    from isaaclab.app import AppLauncher
+
+    AppLauncher.add_app_launcher_args(parser)
 
 # to ensure kit args don't break the benchmark arg parsing
 args_cli, hydra_args = parser.parse_known_args()
@@ -65,9 +69,17 @@
 
 app_start_time_begin = time.perf_counter_ns()
 
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
+# Conditionally launch Isaac Sim
+simulation_app = None
+app_launcher = None
+if args_cli.kit:
+    # Force Omniverse mode by setting environment variable
+    # This ensures SimulationApp is launched even without explicit visualizers
+    os.environ["LAUNCH_OV_APP"] = "1"
+
+    # launch omniverse app
+    app_launcher = AppLauncher(args_cli)
+    simulation_app = app_launcher.app
 
 
 app_start_time_end = time.perf_counter_ns()
@@ -81,10 +93,6 @@
 
 from rsl_rl.runners import OnPolicyRunner
 
-from isaaclab.utils.timer import Timer
-
-Timer.enable_display_output = False
-
 from isaaclab.envs import DirectRLEnvCfg, ManagerBasedRLEnvCfg
 from isaaclab.utils.dict import print_dict
 from isaaclab.utils.io import dump_pickle, dump_yaml
@@ -97,37 +105,49 @@
 
 imports_time_end = time.perf_counter_ns()
 
-from isaacsim.core.utils.extensions import enable_extension
-
-enable_extension("isaacsim.benchmark.services")
-
-# Set the benchmark settings according to the inputs
-import carb
-
-settings = carb.settings.get_settings()
-settings.set("/exts/isaacsim.benchmark.services/metrics/metrics_output_folder", args_cli.output_folder)
-settings.set("/exts/isaacsim.benchmark.services/metrics/randomize_filename_prefix", True)
-
-
-from isaacsim.benchmark.services import BaseIsaacBenchmark
-
-from scripts.benchmarks.utils import (
-    get_isaaclab_version,
-    get_mujoco_warp_version,
-    get_newton_version,
-    log_app_start_time,
-    log_newton_finalize_builder_time,
-    log_newton_initialize_solver_time,
-    log_python_imports_time,
-    log_rl_policy_episode_lengths,
-    log_rl_policy_rewards,
-    log_runtime_step_times,
-    log_scene_creation_time,
-    log_simulation_start_time,
-    log_task_start_time,
-    log_total_start_time,
-    parse_tf_logs,
-)
+# Import benchmark infrastructure based on kit flag
+if args_cli.kit:
+    from isaacsim.core.utils.extensions import enable_extension
+
+    enable_extension("isaacsim.benchmark.services")
+
+    # Set the benchmark settings according to the inputs
+    import carb
+
+    settings = carb.settings.get_settings()
+    settings.set("/exts/isaacsim.benchmark.services/metrics/metrics_output_folder", args_cli.output_folder)
+    settings.set("/exts/isaacsim.benchmark.services/metrics/randomize_filename_prefix", True)
+
+    from isaacsim.benchmark.services import BaseIsaacBenchmark
+
+    from scripts.benchmarks.utils.benchmark_utils import create_kit_logging_functions, get_timer_value
+
+    # Get all logging functions for kit mode
+    log_funcs = create_kit_logging_functions()
+else:
+    # Use standalone benchmark services
+    from scripts.benchmarks.utils.benchmark_utils import create_standalone_logging_functions, get_timer_value
+    from scripts.benchmarks.utils.standalone_benchmark import StandaloneBenchmark
+
+    # Get all logging functions for standalone mode
+    log_funcs = create_standalone_logging_functions()
+
+# Extract individual functions from the dictionary for easier use
+get_isaaclab_version = log_funcs["get_isaaclab_version"]
+get_mujoco_warp_version = log_funcs["get_mujoco_warp_version"]
+get_newton_version = log_funcs["get_newton_version"]
+log_app_start_time = log_funcs["log_app_start_time"]
+log_python_imports_time = log_funcs["log_python_imports_time"]
+log_task_start_time = log_funcs["log_task_start_time"]
+log_scene_creation_time = log_funcs["log_scene_creation_time"]
+log_simulation_start_time = log_funcs["log_simulation_start_time"]
+log_newton_finalize_builder_time = log_funcs["log_newton_finalize_builder_time"]
+log_newton_initialize_solver_time = log_funcs["log_newton_initialize_solver_time"]
+log_total_start_time = log_funcs["log_total_start_time"]
+log_runtime_step_times = log_funcs["log_runtime_step_times"]
+log_rl_policy_rewards = log_funcs["log_rl_policy_rewards"]
+log_rl_policy_episode_lengths = log_funcs["log_rl_policy_episode_lengths"]
+parse_tf_logs = log_funcs["parse_tf_logs"]
 
 torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.allow_tf32 = True
@@ -135,30 +155,57 @@
 torch.backends.cudnn.benchmark = False
 
 # Create the benchmark
-benchmark = BaseIsaacBenchmark(
-    benchmark_name="benchmark_rsl_rl_train",
-    workflow_metadata={
-        "metadata": [
-            {"name": "task", "data": args_cli.task},
-            {"name": "seed", "data": args_cli.seed},
-            {"name": "num_envs", "data": args_cli.num_envs},
-            {"name": "max_iterations", "data": args_cli.max_iterations},
-            {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
-            {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
-            {"name": "Newton Info", "data": get_newton_version()},
-        ],
-    },
-    backend_type=args_cli.benchmark_backend,
-)
+if args_cli.kit:
+    benchmark = BaseIsaacBenchmark(
+        benchmark_name="benchmark_rsl_rl_train",
+        workflow_metadata={
+            "metadata": [
+                {"name": "task", "data": args_cli.task},
+                {"name": "seed", "data": args_cli.seed},
+                {"name": "num_envs", "data": args_cli.num_envs},
+                {"name": "max_iterations", "data": args_cli.max_iterations},
+                {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
+                {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
+                {"name": "Newton Info", "data": get_newton_version()},
+            ],
+        },
+        backend_type=args_cli.benchmark_backend,
+    )
+else:
+    benchmark = StandaloneBenchmark(
+        benchmark_name="benchmark_rsl_rl_train",
+        workflow_metadata={
+            "metadata": [
+                {"name": "task", "data": args_cli.task},
+                {"name": "seed", "data": args_cli.seed},
+                {"name": "num_envs", "data": args_cli.num_envs},
+                {"name": "max_iterations", "data": args_cli.max_iterations},
+                {"name": "Mujoco Warp Info", "data": get_mujoco_warp_version()},
+                {"name": "Isaac Lab Info", "data": get_isaaclab_version()},
+                {"name": "Newton Info", "data": get_newton_version()},
+            ],
+        },
+        backend_type=args_cli.benchmark_backend,
+        output_folder=args_cli.output_folder,
+        randomize_filename_prefix=True,
+    )
 
 
 @hydra_task_config(args_cli.task, "rsl_rl_cfg_entry_point")
 def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg):
     """Train with RSL-RL agent."""
     # parse configuration
-    benchmark.set_phase("loading", start_recording_frametime=False, start_recording_runtime=True)
+    if args_cli.kit:
+        benchmark.set_phase("loading", start_recording_frametime=False, start_recording_runtime=True)
+    else:
+        benchmark.set_phase("loading", start_recording_frametime=False, start_recording_runtime=False)
+
     # override configurations with non-hydra CLI arguments
-    agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    if args_cli.kit:
+        import scripts.reinforcement_learning.rsl_rl.cli_args as cli_args
+
+        agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+
     env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
     agent_cfg.max_iterations = (
         args_cli.max_iterations if args_cli.max_iterations is not None else agent_cfg.max_iterations
@@ -167,21 +214,25 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolic
     # set the environment seed
     # note: certain randomizations occur in the environment initialization so we set the seed here
     env_cfg.seed = agent_cfg.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+    if args_cli.kit and hasattr(args_cli, "device") and args_cli.device is not None:
+        env_cfg.sim.device = args_cli.device
 
     # multi-gpu training configuration
     world_rank = 0
     world_size = 1
     if args_cli.distributed:
-        env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
-        agent_cfg.device = f"cuda:{app_launcher.local_rank}"
-
-        # set seed to have diversity in different threads
-        seed = agent_cfg.seed + app_launcher.local_rank
-        env_cfg.seed = seed
-        agent_cfg.seed = seed
-        world_rank = app_launcher.global_rank
-        world_size = int(os.getenv("WORLD_SIZE", 1))
+        if args_cli.kit:
+            env_cfg.sim.device = f"cuda:{app_launcher.local_rank}"
+            agent_cfg.device = f"cuda:{app_launcher.local_rank}"
+
+            # set seed to have diversity in different threads
+            seed = agent_cfg.seed + app_launcher.local_rank
+            env_cfg.seed = seed
+            agent_cfg.seed = seed
+            world_rank = app_launcher.global_rank
+            world_size = int(os.getenv("WORLD_SIZE", 1))
+        else:
+            print("[WARNING] Distributed mode is only supported with --kit flag.")
 
     # specify directory for logging experiments
     log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
@@ -252,30 +303,41 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolic
         # prepare RL timing dict
         collection_fps = (
             1
-            / (np.array(log_data["Perf/collection time"]))
+            / (np.array(log_data.get("Perf/collection time", [1])))
             * env.unwrapped.num_envs
             * agent_cfg.num_steps_per_env
             * world_size
         )
         rl_training_times = {
-            "Collection Time": (np.array(log_data["Perf/collection time"]) / 1000).tolist(),
-            "Learning Time": (np.array(log_data["Perf/learning_time"]) / 1000).tolist(),
+            "Collection Time": (np.array(log_data.get("Perf/collection time", [])) / 1000).tolist(),
+            "Learning Time": (np.array(log_data.get("Perf/learning_time", [])) / 1000).tolist(),
             "Collection FPS": collection_fps.tolist(),
-            "Total FPS": log_data["Perf/total_fps"] * world_size,
+            "Total FPS": [x * world_size for x in log_data.get("Perf/total_fps", [])],
         }
 
         # log additional metrics to benchmark services
         log_app_start_time(benchmark, (app_start_time_end - app_start_time_begin) / 1e6)
         log_python_imports_time(benchmark, (imports_time_end - imports_time_begin) / 1e6)
         log_task_start_time(benchmark, (task_startup_time_end - task_startup_time_begin) / 1e6)
-        log_scene_creation_time(benchmark, Timer.get_timer_info("scene_creation") * 1000)
-        log_simulation_start_time(benchmark, Timer.get_timer_info("simulation_start") * 1000)
-        log_newton_finalize_builder_time(benchmark, Timer.get_timer_info("newton_finalize_builder") * 1000)
-        log_newton_initialize_solver_time(benchmark, Timer.get_timer_info("newton_initialize_solver") * 1000)
+
+        # Timer may not be available in standalone mode
+        scene_creation_time = get_timer_value("scene_creation")
+        simulation_start_time = get_timer_value("simulation_start")
+        newton_finalize_builder_time = get_timer_value("newton_finalize_builder")
+        newton_initialize_solver_time = get_timer_value("newton_initialize_solver")
+
+        log_scene_creation_time(benchmark, scene_creation_time * 1000 if scene_creation_time else None)
+        log_simulation_start_time(benchmark, simulation_start_time * 1000 if simulation_start_time else None)
+        log_newton_finalize_builder_time(
+            benchmark, newton_finalize_builder_time * 1000 if newton_finalize_builder_time else None
+        )
+        log_newton_initialize_solver_time(
+            benchmark, newton_initialize_solver_time * 1000 if newton_initialize_solver_time else None
+        )
         log_total_start_time(benchmark, (task_startup_time_end - app_start_time_begin) / 1e6)
         log_runtime_step_times(benchmark, rl_training_times, compute_stats=True)
-        log_rl_policy_rewards(benchmark, log_data["Train/mean_reward"])
-        log_rl_policy_episode_lengths(benchmark, log_data["Train/mean_episode_length"])
+        log_rl_policy_rewards(benchmark, log_data.get("Train/mean_reward", []))
+        log_rl_policy_episode_lengths(benchmark, log_data.get("Train/mean_episode_length", []))
 
         benchmark.stop()
 
@@ -287,4 +349,5 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg, agent_cfg: RslRlOnPolic
     # run the main function
     main()
     # close sim app
-    simulation_app.close()
+    if simulation_app is not None:
+        simulation_app.close()
diff --git a/scripts/benchmarks/utils/README.md b/scripts/benchmarks/utils/README.md
new file mode 100644
index 00000000000..7ed521a6799
--- /dev/null
+++ b/scripts/benchmarks/utils/README.md
@@ -0,0 +1,117 @@
+# Benchmark Utilities
+
+This directory contains utility modules used by the benchmark scripts.
+
+## Files
+
+### `benchmark_utils.py`
+Common utility functions for both kit and standalone modes. Provides factory functions that return the appropriate logging implementations based on the mode:
+- `create_kit_logging_functions()`: Returns logging functions for Isaac Sim kit mode
+- `create_standalone_logging_functions()`: Returns logging functions for standalone mode
+- `get_timer_value()`: Safely retrieves timer values
+
+### `standalone_benchmark.py`
+Standalone benchmark infrastructure that replicates the functionality of `isaacsim.benchmark.services` without requiring Isaac Sim. Contains:
+- Measurement classes (`SingleMeasurement`, `ListMeasurement`, `DictMeasurement`, `BooleanMeasurement`)
+- Metadata classes (`StringMetadata`, `IntMetadata`, `FloatMetadata`, `DictMetadata`)
+- `TestPhase` class for organizing measurements
+- Backend implementations:
+  - `OmniPerfKPIFile`: Single JSON with all phases
+  - `OsmoKPIFile`: Separate JSON per phase
+  - `JSONFileMetrics`: Detailed JSON with full objects
+  - `LocalLogMetrics`: Console output only
+- `StandaloneBenchmark`: Main benchmark class
+- **System Metrics Collection** (requires `psutil` and optionally `GPUtil`/`pynvml`):
+  - CPU metrics: user, system, idle, iowait percentages
+  - Memory metrics: RSS, VMS, USS (in GB)
+  - GPU metrics: memory usage, utilization
+  - Runtime duration per phase
+  - System information: CPU count, GPU device name
+
+### `utils.py`
+Legacy utility functions for Isaac Sim kit mode benchmarking. Contains:
+- Version retrieval functions (`get_isaaclab_version`, `get_newton_version`, `get_mujoco_warp_version`)
+- Logging functions for various metrics (timing, rewards, episode lengths, etc.)
+- `parse_tf_logs()`: TensorBoard log parser
+
+## Usage
+
+The benchmark scripts (`benchmark_non_rl.py`, `benchmark_rlgames.py`, `benchmark_rsl_rl.py`) automatically select the appropriate utilities based on the `--kit` flag:
+
+```python
+if args_cli.kit:
+    # Use Isaac Sim benchmark services
+    log_funcs = create_kit_logging_functions()
+else:
+    # Use standalone benchmark services
+    log_funcs = create_standalone_logging_functions()
+```
+
+This approach eliminates code duplication across the benchmark scripts while maintaining support for both modes.
+
+## System Metrics Collection
+
+The standalone benchmark can automatically collect system metrics similar to Isaac Sim's benchmark services:
+
+### Collected Metrics
+
+**System Information (collected once):**
+- Number of CPUs
+- GPU device name
+
+**Runtime Metrics (collected per phase):**
+- **Memory:**
+  - RSS (Resident Set Size)
+  - VMS (Virtual Memory Size)
+  - USS (Unique Set Size)
+- **CPU Usage:**
+  - User time percentage
+  - System time percentage
+  - Idle time percentage
+  - I/O wait time percentage
+- **GPU (if available):**
+  - Memory used
+  - Total memory
+  - GPU utilization percentage
+- **Runtime:** Phase execution duration
+
+### Requirements
+
+- **psutil**: Required for CPU and memory metrics (automatically detected)
+- **GPUtil** or **pynvml**: Optional for GPU metrics (automatically detected)
+- **nvidia-smi**: Fallback for GPU metrics (usually pre-installed with NVIDIA drivers)
+
+The GPU detection tries three methods in order:
+1. **GPUtil**: Fast Python library
+2. **pynvml** (nvidia-ml-py3): Official NVIDIA library
+3. **nvidia-smi**: Direct system call (most reliable, works even with driver/library version mismatch)
+
+Install with:
+```bash
+pip install psutil GPUtil
+# or
+pip install psutil nvidia-ml-py3
+```
+
+Note: `nvidia-smi` is automatically used as a fallback and doesn't require installation (comes with NVIDIA drivers).
+
+### Usage
+
+System metrics are collected automatically by default:
+
+```python
+benchmark = StandaloneBenchmark(
+    benchmark_name="my_benchmark",
+    collect_system_metrics=True  # default
+)
+```
+
+To disable system metrics:
+```python
+benchmark = StandaloneBenchmark(
+    benchmark_name="my_benchmark",
+    collect_system_metrics=False
+)
+```
+
+The metrics will appear in your output JSON alongside your custom measurements.
diff --git a/scripts/benchmarks/utils/__init__.py b/scripts/benchmarks/utils/__init__.py
new file mode 100644
index 00000000000..822b13ae1a1
--- /dev/null
+++ b/scripts/benchmarks/utils/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Utilities for benchmarking scripts."""
diff --git a/scripts/benchmarks/utils/benchmark_utils.py b/scripts/benchmarks/utils/benchmark_utils.py
new file mode 100644
index 00000000000..07a5b4dcfaa
--- /dev/null
+++ b/scripts/benchmarks/utils/benchmark_utils.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Common utility functions for benchmark scripts running in standalone or kit mode."""
+
+import os
+
+from isaaclab.utils.timer import Timer
+
+
+def create_kit_logging_functions():
+    """Create logging functions for kit mode that use isaacsim.benchmark.services.
+
+    Returns:
+        A dictionary containing all the logging function implementations for kit mode.
+    """
+    from scripts.benchmarks.utils.utils import (
+        get_isaaclab_version,
+        get_mujoco_warp_version,
+        get_newton_version,
+        log_app_start_time,
+        log_newton_finalize_builder_time,
+        log_newton_initialize_solver_time,
+        log_python_imports_time,
+        log_rl_policy_episode_lengths,
+        log_rl_policy_rewards,
+        log_runtime_step_times,
+        log_scene_creation_time,
+        log_simulation_start_time,
+        log_task_start_time,
+        log_total_start_time,
+        parse_tf_logs,
+    )
+
+    return {
+        "get_isaaclab_version": get_isaaclab_version,
+        "get_mujoco_warp_version": get_mujoco_warp_version,
+        "get_newton_version": get_newton_version,
+        "log_app_start_time": log_app_start_time,
+        "log_python_imports_time": log_python_imports_time,
+        "log_task_start_time": log_task_start_time,
+        "log_scene_creation_time": log_scene_creation_time,
+        "log_simulation_start_time": log_simulation_start_time,
+        "log_newton_finalize_builder_time": log_newton_finalize_builder_time,
+        "log_newton_initialize_solver_time": log_newton_initialize_solver_time,
+        "log_total_start_time": log_total_start_time,
+        "log_runtime_step_times": log_runtime_step_times,
+        "log_rl_policy_rewards": log_rl_policy_rewards,
+        "log_rl_policy_episode_lengths": log_rl_policy_episode_lengths,
+        "parse_tf_logs": parse_tf_logs,
+    }
+
+
+def create_standalone_logging_functions():  # noqa: C901
+    """Create logging functions for standalone mode that use standalone_benchmark.
+
+    Returns:
+        A dictionary containing all the logging function implementations for standalone mode.
+    """
+    from scripts.benchmarks.utils.standalone_benchmark import DictMeasurement, ListMeasurement, SingleMeasurement
+
+    def get_isaaclab_version():
+        try:
+            import isaaclab
+
+            return {"version": isaaclab.__version__, "commit": None, "branch": None}
+        except Exception:
+            return {"version": None, "commit": None, "branch": None}
+
+    def get_mujoco_warp_version():
+        try:
+            import mujoco_warp
+
+            return {"version": getattr(mujoco_warp, "__version__", None), "commit": None, "branch": None}
+        except Exception:
+            return {"version": None, "commit": None, "branch": None}
+
+    def get_newton_version():
+        try:
+            import newton
+
+            return {"version": newton.__version__, "commit": None, "branch": None}
+        except Exception:
+            return {"version": None, "commit": None, "branch": None}
+
+    def log_app_start_time(benchmark, value):
+        measurement = SingleMeasurement(name="App Launch Time", value=value, unit="ms")
+        benchmark.store_custom_measurement("startup", measurement)
+
+    def log_python_imports_time(benchmark, value):
+        measurement = SingleMeasurement(name="Python Imports Time", value=value, unit="ms")
+        benchmark.store_custom_measurement("startup", measurement)
+
+    def log_task_start_time(benchmark, value):
+        measurement = SingleMeasurement(name="Task Creation and Start Time", value=value, unit="ms")
+        benchmark.store_custom_measurement("startup", measurement)
+
+    def log_scene_creation_time(benchmark, value):
+        if value is not None:
+            measurement = SingleMeasurement(name="Scene Creation Time", value=value, unit="ms")
+            benchmark.store_custom_measurement("startup", measurement)
+
+    def log_simulation_start_time(benchmark, value):
+        if value is not None:
+            measurement = SingleMeasurement(name="Simulation Start Time", value=value, unit="ms")
+            benchmark.store_custom_measurement("startup", measurement)
+
+    def log_newton_finalize_builder_time(benchmark, value):
+        if value is not None:
+            measurement = SingleMeasurement(name="Newton Finalize Builder Time", value=value, unit="ms")
+            benchmark.store_custom_measurement("startup", measurement)
+
+    def log_newton_initialize_solver_time(benchmark, value):
+        if value is not None:
+            measurement = SingleMeasurement(name="Newton Initialize Solver Time", value=value, unit="ms")
+            benchmark.store_custom_measurement("startup", measurement)
+
+    def log_total_start_time(benchmark, value):
+        measurement = SingleMeasurement(name="Total Start Time (Launch to Train)", value=value, unit="ms")
+        benchmark.store_custom_measurement("startup", measurement)
+
+    def log_runtime_step_times(benchmark, value, compute_stats=True):
+        measurement = DictMeasurement(name="Step Frametimes", value=value)
+        benchmark.store_custom_measurement("runtime", measurement)
+        if compute_stats:
+            for k, v in value.items():
+                if isinstance(v, list) and len(v) > 0:
+                    measurement = SingleMeasurement(name=f"Min {k}", value=min(v), unit="ms")
+                    benchmark.store_custom_measurement("runtime", measurement)
+                    measurement = SingleMeasurement(name=f"Max {k}", value=max(v), unit="ms")
+                    benchmark.store_custom_measurement("runtime", measurement)
+                    measurement = SingleMeasurement(name=f"Mean {k}", value=sum(v) / len(v), unit="ms")
+                    benchmark.store_custom_measurement("runtime", measurement)
+
+    def log_rl_policy_rewards(benchmark, value):
+        measurement = ListMeasurement(name="Rewards", value=value)
+        benchmark.store_custom_measurement("train", measurement)
+        if len(value) > 0:
+            measurement = SingleMeasurement(name="Max Rewards", value=max(value), unit="float")
+            benchmark.store_custom_measurement("train", measurement)
+            measurement = SingleMeasurement(name="Last Reward", value=value[-1], unit="float")
+            benchmark.store_custom_measurement("train", measurement)
+
+    def log_rl_policy_episode_lengths(benchmark, value):
+        measurement = ListMeasurement(name="Episode Lengths", value=value)
+        benchmark.store_custom_measurement("train", measurement)
+        if len(value) > 0:
+            measurement = SingleMeasurement(name="Max Episode Lengths", value=max(value), unit="float")
+            benchmark.store_custom_measurement("train", measurement)
+            measurement = SingleMeasurement(name="Last Episode Length", value=value[-1], unit="float")
+            benchmark.store_custom_measurement("train", measurement)
+
+    def parse_tf_logs(log_dir: str):
+        """Parse tensorboard logs."""
+        import glob
+
+        from tensorboard.backend.event_processing import event_accumulator
+
+        list_of_files = glob.glob(f"{log_dir}/events*")
+        if not list_of_files:
+            return {}
+        latest_file = max(list_of_files, key=os.path.getctime)
+
+        log_data = {}
+        ea = event_accumulator.EventAccumulator(latest_file)
+        ea.Reload()
+        tags = ea.Tags()["scalars"]
+        for tag in tags:
+            log_data[tag] = []
+            for event in ea.Scalars(tag):
+                log_data[tag].append(event.value)
+        return log_data
+
+    return {
+        "get_isaaclab_version": get_isaaclab_version,
+        "get_mujoco_warp_version": get_mujoco_warp_version,
+        "get_newton_version": get_newton_version,
+        "log_app_start_time": log_app_start_time,
+        "log_python_imports_time": log_python_imports_time,
+        "log_task_start_time": log_task_start_time,
+        "log_scene_creation_time": log_scene_creation_time,
+        "log_simulation_start_time": log_simulation_start_time,
+        "log_newton_finalize_builder_time": log_newton_finalize_builder_time,
+        "log_newton_initialize_solver_time": log_newton_initialize_solver_time,
+        "log_total_start_time": log_total_start_time,
+        "log_runtime_step_times": log_runtime_step_times,
+        "log_rl_policy_rewards": log_rl_policy_rewards,
+        "log_rl_policy_episode_lengths": log_rl_policy_episode_lengths,
+        "parse_tf_logs": parse_tf_logs,
+    }
+
+
+def get_timer_value(timer_name: str) -> float:
+    """Safely get timer value, returning 0 if not available.
+
+    Args:
+        timer_name: Name of the timer to retrieve.
+
+    Returns:
+        Timer value in seconds, or 0 if not available.
+    """
+    value = Timer.get_timer_info(timer_name)
+    return value if value is not None else 0
diff --git a/scripts/benchmarks/utils/standalone_benchmark.py b/scripts/benchmarks/utils/standalone_benchmark.py
new file mode 100644
index 00000000000..28ac8909d56
--- /dev/null
+++ b/scripts/benchmarks/utils/standalone_benchmark.py
@@ -0,0 +1,861 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Standalone benchmark services that can run without Isaac Sim.
+
+This module provides a lightweight benchmarking system that mimics the functionality
+of isaacsim.benchmark.services but does not depend on Isaac Sim being available.
+"""
+
+import json
+import multiprocessing
+import os
+import subprocess
+import tempfile
+import time
+from contextlib import suppress
+from dataclasses import dataclass, field
+from datetime import datetime as dt
+from pathlib import Path
+
+# Optional dependencies for system metrics
+try:
+    import psutil
+
+    PSUTIL_AVAILABLE = True
+except ImportError:
+    PSUTIL_AVAILABLE = False
+    print("[WARNING] psutil not available. System metrics will not be collected. Install with: pip install psutil")
+
+# Try GPU libraries
+GPUTIL_AVAILABLE = False
+PYNVML_AVAILABLE = False
+
+try:
+    import GPUtil
+
+    GPUTIL_AVAILABLE = True
+    print("[INFO] GPUtil available for GPU metrics")
+except ImportError:
+    pass
+
+if not GPUTIL_AVAILABLE:
+    try:
+        import pynvml
+
+        PYNVML_AVAILABLE = True
+        print("[INFO] pynvml available for GPU metrics")
+    except ImportError:
+        pass
+
+if not GPUTIL_AVAILABLE and not PYNVML_AVAILABLE:
+    print("[INFO] GPUtil/pynvml not available, will use nvidia-smi if available")
+
+
+#############################
+# Measurement Data Classes  #
+#############################
+
+
+@dataclass
+class Measurement:
+    """Base measurement class."""
+
+    name: str
+
+
+@dataclass
+class SingleMeasurement(Measurement):
+    """Represents a single float measurement."""
+
+    value: float
+    unit: str
+    type: str = "single"
+
+
+@dataclass
+class BooleanMeasurement(Measurement):
+    """Represents a boolean measurement."""
+
+    bvalue: bool
+    type: str = "boolean"
+
+
+@dataclass
+class DictMeasurement(Measurement):
+    """Represents a dictionary measurement."""
+
+    value: dict
+    type: str = "dict"
+
+
+@dataclass
+class ListMeasurement(Measurement):
+    """Represents a list measurement."""
+
+    value: list
+    type: str = "list"
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}(name={self.name!r}, length={len(self.value)})"
+
+
+@dataclass
+class MetadataBase:
+    """Base metadata class."""
+
+    name: str
+
+
+@dataclass
+class StringMetadata(MetadataBase):
+    """String metadata."""
+
+    data: str
+    type: str = "string"
+
+
+@dataclass
+class IntMetadata(MetadataBase):
+    """Integer metadata."""
+
+    data: int
+    type: str = "int"
+
+
+@dataclass
+class FloatMetadata(MetadataBase):
+    """Float metadata."""
+
+    data: float
+    type: str = "float"
+
+
+@dataclass
+class DictMetadata(MetadataBase):
+    """Dictionary metadata."""
+
+    data: dict
+    type: str = "dict"
+
+
+@dataclass
+class TestPhase:
+    """Represents a single test phase which may have many metrics associated with it."""
+
+    phase_name: str
+    measurements: list[Measurement] = field(default_factory=list)
+    metadata: list[MetadataBase] = field(default_factory=list)
+
+    def get_metadata_field(self, name, default=KeyError):
+        """Get a metadata field's value.
+
+        Args:
+            name: Field name (case-insensitive).
+            default: Default value if not found. If KeyError, raises exception.
+
+        Returns:
+            The metadata value.
+
+        Raises:
+            KeyError: If the field is not found and default is KeyError.
+        """
+        name = name.lower()
+        for m in self.metadata:
+            name2 = m.name.replace(self.phase_name, "").strip().lower()
+            if name == name2:
+                return m.data
+
+        if default is KeyError:
+            raise KeyError(name)
+        return default
+
+    @classmethod
+    def metadata_from_dict(cls, m: dict) -> list[MetadataBase]:
+        """Create metadata from dictionary.
+
+        Args:
+            m: Dictionary containing metadata list.
+
+        Returns:
+            List of MetadataBase objects.
+        """
+        metadata = []
+        metadata_mapping = {str: StringMetadata, int: IntMetadata, float: FloatMetadata, dict: DictMetadata}
+        for meas in m["metadata"]:
+            if "data" in meas:
+                metadata_type = metadata_mapping.get(type(meas["data"]))
+                if metadata_type:
+                    curr_meta = metadata_type(name=meas["name"], data=meas["data"])
+                    metadata.append(curr_meta)
+        return metadata
+
+
+class TestPhaseEncoder(json.JSONEncoder):
+    """JSON encoder for TestPhase objects."""
+
+    def default(self, o):
+        return o.__dict__
+
+
+#############################
+# Backend Implementation    #
+#############################
+
+
+class OmniPerfKPIFile:
+    """Prints metrics into a JSON document compatible with OmniPerfKPIFile format."""
+
+    def __init__(self):
+        self._test_phases = []
+
+    def add_metrics(self, test_phase: TestPhase) -> None:
+        """Add a test phase to the backend.
+
+        Args:
+            test_phase: The test phase to add.
+        """
+        self._test_phases.append(test_phase)
+
+    def finalize(self, metrics_output_folder: str, randomize_filename_prefix: bool = False) -> None:
+        """Write metrics to output file.
+
+        Args:
+            metrics_output_folder: Output folder for metrics file.
+            randomize_filename_prefix: Whether to randomize the filename prefix.
+        """
+        if not self._test_phases:
+            print("[WARNING] No test phases to write. Skipping metrics file generation.")
+            return
+
+        workflow_data = {"timestamp": dt.now().isoformat()}
+
+        test_name = None
+        for test_phase in self._test_phases:
+            # Retrieve useful metadata from test_phase
+            test_name = test_phase.get_metadata_field("workflow_name")
+            phase_name = test_phase.get_metadata_field("phase")
+
+            phase_data = {}
+            log_statements = [f"{phase_name} Metrics:"]
+            # Add metadata as metrics
+            for metadata in test_phase.metadata:
+                phase_data[metadata.name] = metadata.data
+                log_statements.append(f"  {metadata.name}: {metadata.data}")
+            # Add measurements as metrics
+            for measurement in test_phase.measurements:
+                if isinstance(measurement, SingleMeasurement):
+                    log_statements.append(f"  {measurement.name}: {measurement.value} {measurement.unit}")
+                    phase_data[measurement.name] = measurement.value
+                elif isinstance(measurement, (DictMeasurement, ListMeasurement)):
+                    # For dict and list measurements, store them as-is
+                    phase_data[measurement.name] = measurement.value
+            # Log all metrics to console
+            print("\n".join(log_statements))
+
+            workflow_data[phase_name] = phase_data
+
+        # Generate the output filename
+        if randomize_filename_prefix:
+            _, metrics_filename_out = tempfile.mkstemp(
+                dir=metrics_output_folder, prefix=f"kpis_{test_name}", suffix=".json"
+            )
+        else:
+            metrics_filename_out = Path(metrics_output_folder) / f"kpis_{test_name}.json"
+        # Dump key-value pairs to the JSON document
+        json_data = json.dumps(workflow_data, indent=4)
+        with open(metrics_filename_out, "w") as f:
+            print(f"[INFO] Writing metrics to {metrics_filename_out}")
+            f.write(json_data)
+
+
+class OsmoKPIFile:
+    """Print metrics into separate JSON documents for each phase."""
+
+    def __init__(self):
+        self._test_phases = []
+
+    def add_metrics(self, test_phase: TestPhase) -> None:
+        """Add a test phase to the backend.
+
+        Args:
+            test_phase: The test phase to add.
+        """
+        self._test_phases.append(test_phase)
+
+    def finalize(self, metrics_output_folder: str, randomize_filename_prefix: bool = False) -> None:
+        """Write metrics to output files.
+
+        Args:
+            metrics_output_folder: Output folder for metrics files.
+            randomize_filename_prefix: Whether to randomize the filename prefix.
+        """
+        for test_phase in self._test_phases:
+            # Retrieve useful metadata from test_phase
+            test_name = test_phase.get_metadata_field("workflow_name")
+            phase_name = test_phase.get_metadata_field("phase")
+
+            osmo_kpis = {}
+            log_statements = [f"{phase_name} KPIs:"]
+            # Add metadata as KPIs
+            for metadata in test_phase.metadata:
+                osmo_kpis[metadata.name] = metadata.data
+                log_statements.append(f"  {metadata.name}: {metadata.data}")
+            # Add single measurements as KPIs
+            for measurement in test_phase.measurements:
+                if isinstance(measurement, SingleMeasurement):
+                    osmo_kpis[measurement.name] = measurement.value
+                    log_statements.append(f"  {measurement.name}: {measurement.value} {measurement.unit}")
+            # Log all KPIs to console
+            print("\n".join(log_statements))
+            # Generate the output filename
+            if randomize_filename_prefix:
+                _, metrics_filename_out = tempfile.mkstemp(
+                    dir=metrics_output_folder, prefix=f"kpis_{test_name}_{phase_name}", suffix=".json"
+                )
+            else:
+                metrics_filename_out = Path(metrics_output_folder) / f"kpis_{test_name}_{phase_name}.json"
+            # Dump key-value pairs to the JSON document
+            json_data = json.dumps(osmo_kpis, indent=4)
+            with open(metrics_filename_out, "w") as f:
+                f.write(json_data)
+
+
+class JSONFileMetrics:
+    """Dump all metrics to a single JSON file."""
+
+    def __init__(self):
+        self.data = []
+        self.test_name = None
+
+    def add_metrics(self, test_phase: TestPhase) -> None:
+        """Add a test phase to the backend.
+
+        Args:
+            test_phase: The test phase to add.
+        """
+        self.data.append(test_phase)
+
+    def finalize(self, metrics_output_folder: str, randomize_filename_prefix: bool = False) -> None:
+        """Write metrics to output file.
+
+        Args:
+            metrics_output_folder: Output folder for metrics file.
+            randomize_filename_prefix: Whether to randomize the filename prefix.
+        """
+        if not self.data:
+            print("[WARNING] No test data to write. Skipping metrics file generation.")
+            return
+
+        # Get test name
+        for test_phase in self.data:
+            test_name = test_phase.get_metadata_field("workflow_name")
+            if test_name != self.test_name:
+                if self.test_name:
+                    print(
+                        f"[WARNING] Nonempty test name {self.test_name} different from name {test_name} provided by"
+                        " test phase."
+                    )
+                self.test_name = test_name
+
+            phase_name = test_phase.get_metadata_field("phase")
+            for m in test_phase.measurements:
+                m.name = f"{test_name} {phase_name} {m.name}"
+
+            for m in test_phase.metadata:
+                m.name = f"{test_name} {phase_name} {m.name}"
+
+        json_data = json.dumps(self.data, indent=4, cls=TestPhaseEncoder)
+
+        # Generate the output filename
+        if randomize_filename_prefix:
+            _, metrics_filename_out = tempfile.mkstemp(
+                dir=metrics_output_folder, prefix=f"metrics_{self.test_name}", suffix=".json"
+            )
+        else:
+            metrics_filename_out = Path(metrics_output_folder) / f"metrics_{self.test_name}.json"
+
+        with open(metrics_filename_out, "w") as f:
+            print(f"[INFO] Writing metrics to {metrics_filename_out}")
+            f.write(json_data)
+
+        self.data.clear()
+
+
+class LocalLogMetrics:
+    """Simple backend that just logs metrics to console."""
+
+    def __init__(self):
+        self._test_phases = []
+
+    def add_metrics(self, test_phase: TestPhase) -> None:
+        """Add a test phase to the backend.
+
+        Args:
+            test_phase: The test phase to add.
+        """
+        self._test_phases.append(test_phase)
+
+    def finalize(self, metrics_output_folder: str, randomize_filename_prefix: bool = False) -> None:
+        """Log metrics to console.
+
+        Args:
+            metrics_output_folder: Not used for this backend.
+            randomize_filename_prefix: Not used for this backend.
+        """
+        for test_phase in self._test_phases:
+            test_name = test_phase.get_metadata_field("workflow_name")
+            phase_name = test_phase.get_metadata_field("phase")
+
+            print(f"\n{'=' * 60}")
+            print(f"Benchmark: {test_name} - Phase: {phase_name}")
+            print(f"{'=' * 60}")
+
+            print("\nMetadata:")
+            for metadata in test_phase.metadata:
+                print(f"  {metadata.name}: {metadata.data}")
+
+            print("\nMeasurements:")
+            for measurement in test_phase.measurements:
+                if isinstance(measurement, SingleMeasurement):
+                    print(f"  {measurement.name}: {measurement.value} {measurement.unit}")
+                elif isinstance(measurement, (DictMeasurement, ListMeasurement)):
+                    print(
+                        f"  {measurement.name}: {type(measurement.value).__name__} with {len(measurement.value)} items"
+                    )
+
+
+class MetricsBackend:
+    """Factory for creating metrics backends."""
+
+    @staticmethod
+    def get_instance(instance_type: str):
+        """Get a metrics backend instance.
+
+        Args:
+            instance_type: Type of backend ("OmniPerfKPIFile", "OsmoKPIFile", "JSONFileMetrics", "LocalLogMetrics").
+
+        Returns:
+            An instance of the requested backend.
+
+        Raises:
+            ValueError: If instance_type is not recognized.
+        """
+        if instance_type == "OmniPerfKPIFile":
+            return OmniPerfKPIFile()
+        elif instance_type == "OsmoKPIFile":
+            return OsmoKPIFile()
+        elif instance_type == "JSONFileMetrics":
+            return JSONFileMetrics()
+        elif instance_type == "LocalLogMetrics":
+            return LocalLogMetrics()
+        else:
+            raise ValueError(f"Unknown backend type: {instance_type}")
+
+
+#############################
+# Benchmark Class           #
+#############################
+
+
+class StandaloneBenchmark:
+    """Standalone benchmark class that works without Isaac Sim.
+
+    This class mimics the functionality of BaseIsaacBenchmark but does not
+    depend on Isaac Sim or its benchmark services extension.
+    """
+
+    def __init__(
+        self,
+        benchmark_name: str = "StandaloneBenchmark",
+        backend_type: str = "OmniPerfKPIFile",
+        workflow_metadata: dict = {},
+        output_folder: str | None = None,
+        randomize_filename_prefix: bool = False,
+        collect_system_metrics: bool = True,
+    ):
+        """Initialize the standalone benchmark.
+
+        Args:
+            benchmark_name: Name of the benchmark.
+            backend_type: Type of backend to use for metrics collection.
+            workflow_metadata: Metadata describing the benchmark.
+            output_folder: Output folder for metrics files. If None, uses temp directory.
+            randomize_filename_prefix: Whether to randomize the filename prefix.
+            collect_system_metrics: Whether to collect system metrics (CPU, memory, GPU).
+        """
+        self.benchmark_name = benchmark_name
+        self._test_phases = []
+        self._current_phase = None
+        self._collect_system_metrics = collect_system_metrics and PSUTIL_AVAILABLE
+
+        # System metrics tracking
+        self._phase_start_time = None
+        self._process = psutil.Process() if PSUTIL_AVAILABLE else None
+
+        # Check if nvidia-smi is available
+        self._nvidia_smi_available = self._check_nvidia_smi()
+
+        # Get metrics backend
+        self._metrics = MetricsBackend.get_instance(instance_type=backend_type)
+
+        # Set output folder
+        if output_folder is None:
+            self._metrics_output_folder = tempfile.gettempdir()
+        else:
+            self._metrics_output_folder = output_folder
+
+        self._randomize_filename_prefix = randomize_filename_prefix
+
+        # Generate workflow-level metadata
+        self._metadata = [StringMetadata(name="workflow_name", data=self.benchmark_name)]
+        if "metadata" in workflow_metadata:
+            self._metadata.extend(TestPhase.metadata_from_dict(workflow_metadata))
+        elif workflow_metadata:
+            print(
+                "[WARNING] workflow_metadata provided, but missing expected 'metadata' entry. Metadata will not be"
+                " read."
+            )
+
+        print(f"[INFO] Benchmark initialized: {self.benchmark_name}")
+        print(f"[INFO] Output folder: {self._metrics_output_folder}")
+        print(f"[INFO] Backend type: {backend_type}")
+        print(f"[INFO] System metrics collection: {'enabled' if self._collect_system_metrics else 'disabled'}")
+        if self._nvidia_smi_available:
+            print("[INFO] nvidia-smi available for direct GPU queries")
+        self.benchmark_start_time = time.time()
+
+    def _check_nvidia_smi(self) -> bool:
+        """Check if nvidia-smi is available.
+
+        Returns:
+            True if nvidia-smi is available and working.
+        """
+        try:
+            result = subprocess.run(
+                ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], capture_output=True, text=True, timeout=2
+            )
+            return result.returncode == 0
+        except (FileNotFoundError, subprocess.TimeoutExpired, Exception):
+            return False
+
+    def _get_gpu_info_nvidia_smi(self) -> dict[str, any] | None:
+        """Get GPU information using nvidia-smi directly.
+
+        Returns:
+            Dictionary with GPU info or None if failed.
+        """
+        try:
+            # Query multiple fields at once
+            result = subprocess.run(
+                [
+                    "nvidia-smi",
+                    "--query-gpu=name,memory.used,memory.total,utilization.gpu",
+                    "--format=csv,noheader,nounits",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=2,
+            )
+
+            if result.returncode == 0:
+                line = result.stdout.strip().split("\n")[0]  # Get first GPU
+                parts = [p.strip() for p in line.split(",")]
+                if len(parts) >= 4:
+                    return {
+                        "name": parts[0],
+                        "memory_used_mb": float(parts[1]),
+                        "memory_total_mb": float(parts[2]),
+                        "utilization": float(parts[3]),
+                    }
+        except Exception as e:
+            print(f"[WARNING] nvidia-smi query failed: {e}")
+
+        return None
+
+    def _collect_system_info(self) -> list[MetadataBase]:
+        """Collect system information as metadata.
+
+        Returns:
+            List of metadata objects with system information.
+        """
+        metadata = []
+
+        if not self._collect_system_metrics:
+            return metadata
+
+        # CPU count
+        metadata.append(IntMetadata(name="num_cpus", data=multiprocessing.cpu_count()))
+
+        # GPU information - try multiple methods
+        gpu_detected = False
+
+        # Method 1: Try GPUtil
+        if GPUTIL_AVAILABLE and not gpu_detected:
+            try:
+                gpus = GPUtil.getGPUs()
+                if gpus and len(gpus) > 0:
+                    metadata.append(StringMetadata(name="gpu_device_name", data=gpus[0].name))
+                    gpu_detected = True
+                    print(f"[INFO] GPU detected via GPUtil: {gpus[0].name}")
+            except Exception as e:
+                print(f"[WARNING] Failed to get GPU info via GPUtil: {e}")
+
+        # Method 2: Try pynvml
+        if PYNVML_AVAILABLE and not gpu_detected:
+            try:
+                pynvml.nvmlInit()
+                device_count = pynvml.nvmlDeviceGetCount()
+                if device_count > 0:
+                    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+                    gpu_name = pynvml.nvmlDeviceGetName(handle)
+                    if isinstance(gpu_name, bytes):
+                        gpu_name = gpu_name.decode("utf-8")
+                    metadata.append(StringMetadata(name="gpu_device_name", data=gpu_name))
+                    gpu_detected = True
+                    print(f"[INFO] GPU detected via pynvml: {gpu_name}")
+                pynvml.nvmlShutdown()
+            except Exception as e:
+                print(f"[WARNING] Failed to get GPU info via pynvml: {e}")
+                with suppress(Exception):
+                    pynvml.nvmlShutdown()
+
+        # Method 3: Try nvidia-smi directly (most reliable, bypasses driver mismatch)
+        if self._nvidia_smi_available and not gpu_detected:
+            try:
+                gpu_info = self._get_gpu_info_nvidia_smi()
+                if gpu_info:
+                    metadata.append(StringMetadata(name="gpu_device_name", data=gpu_info["name"]))
+                    gpu_detected = True
+                    print(f"[INFO] GPU detected via nvidia-smi: {gpu_info['name']}")
+            except Exception as e:
+                print(f"[WARNING] Failed to get GPU info via nvidia-smi: {e}")
+
+        if not gpu_detected:
+            print("[WARNING] No GPU detected. GPU metrics will not be available.")
+            print(
+                "[INFO] This is likely due to NVIDIA driver issues (Driver/library version mismatch or error code 18)."
+            )
+            print(
+                "[INFO] To fix: 1) sudo reboot (reloads drivers) 2) Check nvidia-smi manually 3) Reinstall NVIDIA"
+                " drivers"
+            )
+            print("[INFO] The benchmark will continue without GPU metrics.")
+
+        return metadata
+
+    def _collect_runtime_metrics(self) -> list[Measurement]:
+        """Collect runtime system metrics.
+
+        Returns:
+            List of measurement objects with runtime metrics.
+        """
+        measurements = []
+
+        if not self._collect_system_metrics:
+            return measurements
+
+        try:
+            # Memory metrics (in GB)
+            mem_info = self._process.memory_info()
+            measurements.append(SingleMeasurement(name="System Memory RSS", value=mem_info.rss / (1024**3), unit="GB"))
+            measurements.append(SingleMeasurement(name="System Memory VMS", value=mem_info.vms / (1024**3), unit="GB"))
+
+            # USS (Unique Set Size) if available
+            try:
+                mem_full = self._process.memory_full_info()
+                measurements.append(
+                    SingleMeasurement(name="System Memory USS", value=mem_full.uss / (1024**3), unit="GB")
+                )
+            except (AttributeError, psutil.AccessDenied):
+                pass
+
+            # CPU usage
+            cpu_times = psutil.cpu_times_percent(interval=0.1)
+            measurements.append(SingleMeasurement(name="System CPU user", value=cpu_times.user, unit="%"))
+            measurements.append(SingleMeasurement(name="System CPU system", value=cpu_times.system, unit="%"))
+            measurements.append(SingleMeasurement(name="System CPU idle", value=cpu_times.idle, unit="%"))
+            if hasattr(cpu_times, "iowait"):
+                measurements.append(SingleMeasurement(name="System CPU iowait", value=cpu_times.iowait, unit="%"))
+
+            # Runtime duration
+            if self._phase_start_time is not None:
+                runtime_ms = (time.time() - self._phase_start_time) * 1000
+                measurements.append(SingleMeasurement(name="Runtime", value=runtime_ms, unit="ms"))
+
+            # GPU metrics - try multiple methods
+            gpu_metrics_collected = False
+
+            # Method 1: Try GPUtil
+            if GPUTIL_AVAILABLE and not gpu_metrics_collected:
+                try:
+                    gpus = GPUtil.getGPUs()
+                    if gpus and len(gpus) > 0:
+                        gpu = gpus[0]
+                        measurements.append(
+                            SingleMeasurement(name="GPU Memory Tracked", value=gpu.memoryUsed / 1024, unit="GB")
+                        )
+                        measurements.append(
+                            SingleMeasurement(name="GPU Memory Dedicated", value=gpu.memoryTotal / 1024, unit="GB")
+                        )
+                        measurements.append(SingleMeasurement(name="GPU Utilization", value=gpu.load * 100, unit="%"))
+                        gpu_metrics_collected = True
+                except Exception as e:
+                    print(f"[WARNING] Failed to collect GPU metrics via GPUtil: {e}")
+
+            # Method 2: Try pynvml
+            if PYNVML_AVAILABLE and not gpu_metrics_collected:
+                try:
+                    pynvml.nvmlInit()
+                    device_count = pynvml.nvmlDeviceGetCount()
+                    if device_count > 0:
+                        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+                        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+                        util_rates = pynvml.nvmlDeviceGetUtilizationRates(handle)
+                        measurements.append(
+                            SingleMeasurement(name="GPU Memory Tracked", value=mem_info.used / (1024**3), unit="GB")
+                        )
+                        measurements.append(
+                            SingleMeasurement(name="GPU Memory Dedicated", value=mem_info.total / (1024**3), unit="GB")
+                        )
+                        measurements.append(SingleMeasurement(name="GPU Utilization", value=util_rates.gpu, unit="%"))
+                        gpu_metrics_collected = True
+                    pynvml.nvmlShutdown()
+                except Exception as e:
+                    print(f"[WARNING] Failed to collect GPU metrics via pynvml: {e}")
+                    with suppress(Exception):
+                        pynvml.nvmlShutdown()
+
+            # Method 3: Try nvidia-smi directly (most reliable, bypasses driver mismatch)
+            if self._nvidia_smi_available and not gpu_metrics_collected:
+                try:
+                    gpu_info = self._get_gpu_info_nvidia_smi()
+                    if gpu_info:
+                        measurements.append(
+                            SingleMeasurement(
+                                name="GPU Memory Tracked", value=gpu_info["memory_used_mb"] / 1024, unit="GB"
+                            )
+                        )
+                        measurements.append(
+                            SingleMeasurement(
+                                name="GPU Memory Dedicated", value=gpu_info["memory_total_mb"] / 1024, unit="GB"
+                            )
+                        )
+                        measurements.append(
+                            SingleMeasurement(name="GPU Utilization", value=gpu_info["utilization"], unit="%")
+                        )
+                        gpu_metrics_collected = True
+                        print("[INFO] Collected GPU metrics via nvidia-smi")
+                except Exception as e:
+                    print(f"[WARNING] Failed to collect GPU metrics via nvidia-smi: {e}")
+
+            if not gpu_metrics_collected:
+                if GPUTIL_AVAILABLE or PYNVML_AVAILABLE or self._nvidia_smi_available:
+                    print("[WARNING] GPU libraries/tools available but all methods failed to collect metrics.")
+                    print("[INFO] This usually indicates NVIDIA driver issues (Driver/library mismatch or GPU lost).")
+                    print("[INFO] To fix: sudo reboot (reloads drivers and libraries)")
+                else:
+                    print("[INFO] No GPU libraries available. GPU metrics will not be collected.")
+
+        except Exception as e:
+            print(f"[WARNING] Failed to collect runtime metrics: {e}")
+            import traceback
+
+            traceback.print_exc()
+
+        return measurements
+
+    def set_phase(
+        self, phase: str, start_recording_frametime: bool = True, start_recording_runtime: bool = True
+    ) -> None:
+        """Set the current benchmarking phase.
+
+        Args:
+            phase: Name of the phase.
+            start_recording_frametime: Not used in standalone version (for API compatibility).
+            start_recording_runtime: Not used in standalone version (for API compatibility).
+        """
+        print(f"[INFO] Starting phase: {phase}")
+        self._current_phase = phase
+        self._phase_start_time = time.time()
+
+    def store_measurements(self) -> None:
+        """Store measurements for the current phase.
+
+        This method should be called after completing work in a phase and before
+        setting a new phase or calling stop().
+        """
+        if self._current_phase is None:
+            print("[WARNING] No phase set. Call set_phase() before store_measurements().")
+            return
+
+        # Create a new test phase
+        test_phase = TestPhase(phase_name=self._current_phase, measurements=[], metadata=[])
+
+        # Collect system info metadata (only for first phase or if explicitly needed)
+        if len(self._test_phases) == 0:
+            system_metadata = self._collect_system_info()
+            test_phase.metadata.extend(system_metadata)
+
+        # Collect runtime metrics
+        runtime_measurements = self._collect_runtime_metrics()
+        test_phase.measurements.extend(runtime_measurements)
+
+        # Update test phase metadata with phase name and benchmark metadata
+        test_phase.metadata.extend(self._metadata)
+        test_phase.metadata.append(StringMetadata(name="phase", data=self._current_phase))
+        self._test_phases.append(test_phase)
+
+        print(f"[INFO] Stored measurements for phase: {self._current_phase}")
+
+    def store_custom_measurement(self, phase_name: str, custom_measurement: Measurement) -> None:
+        """Store a custom measurement for a specific phase.
+
+        Args:
+            phase_name: Name of the phase.
+            custom_measurement: The measurement to store.
+        """
+        # Check if the phase already exists
+        existing_phase = next((phase for phase in self._test_phases if phase.phase_name == phase_name), None)
+
+        if existing_phase:
+            # Add the custom measurement to the existing phase
+            existing_phase.measurements.append(custom_measurement)
+        else:
+            # If the phase does not exist, create a new test phase
+            new_test_phase = TestPhase(phase_name=phase_name, measurements=[custom_measurement], metadata=[])
+            # Update test phase metadata with phase name and benchmark metadata
+            new_test_phase.metadata.extend(self._metadata)
+            new_test_phase.metadata.append(StringMetadata(name="phase", data=phase_name))
+
+            # Add the new test phase to the list of test phases
+            self._test_phases.append(new_test_phase)
+
+    def stop(self):
+        """Stop benchmarking and write accumulated metrics to file."""
+        print("[INFO] Stopping benchmark")
+
+        if not self._test_phases:
+            print(
+                "[WARNING] No test phases collected. After set_phase(), store_measurements() should be called. "
+                "No metrics will be written."
+            )
+            return
+
+        # Create output folder if it doesn't exist
+        if not os.path.exists(self._metrics_output_folder):
+            os.makedirs(self._metrics_output_folder, exist_ok=True)
+
+        print("[INFO] Writing metrics data.")
+
+        # Finalize by adding all test phases to the backend metrics
+        for test_phase in self._test_phases:
+            self._metrics.add_metrics(test_phase)
+
+        self._metrics.finalize(self._metrics_output_folder, self._randomize_filename_prefix)
+
+        elapsed_time = time.time() - self.benchmark_start_time
+        print(f"[INFO] Benchmark completed in {elapsed_time:.2f} seconds")
diff --git a/scripts/benchmarks/utils/test_gpu_detection.py b/scripts/benchmarks/utils/test_gpu_detection.py
new file mode 100755
index 00000000000..97555dceaaf
--- /dev/null
+++ b/scripts/benchmarks/utils/test_gpu_detection.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Test GPU detection for standalone benchmark."""
+
+from contextlib import suppress
+
+print("Testing GPU detection methods...\n")
+
+# Test GPUtil
+print("=" * 60)
+print("Testing GPUtil:")
+print("=" * 60)
+try:
+    import GPUtil
+
+    print("✓ GPUtil is installed")
+    gpus = GPUtil.getGPUs()
+    if gpus:
+        print(f"✓ Found {len(gpus)} GPU(s)")
+        for i, gpu in enumerate(gpus):
+            print(f"  GPU {i}: {gpu.name}")
+            print(f"    Memory Used: {gpu.memoryUsed:.2f} MB")
+            print(f"    Memory Total: {gpu.memoryTotal:.2f} MB")
+            print(f"    Load: {gpu.load * 100:.1f}%")
+    else:
+        print("✗ No GPUs found via GPUtil")
+except ImportError:
+    print("✗ GPUtil not installed. Install with: pip install GPUtil")
+except Exception as e:
+    print(f"✗ Error using GPUtil: {e}")
+
+print()
+
+# Test pynvml
+print("=" * 60)
+print("Testing pynvml (nvidia-ml-py3):")
+print("=" * 60)
+try:
+    import pynvml
+
+    print("✓ pynvml is installed")
+    pynvml.nvmlInit()
+    device_count = pynvml.nvmlDeviceGetCount()
+    print(f"✓ Found {device_count} GPU(s)")
+
+    for i in range(device_count):
+        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+        name = pynvml.nvmlDeviceGetName(handle)
+        if isinstance(name, bytes):
+            name = name.decode("utf-8")
+        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        util_rates = pynvml.nvmlDeviceGetUtilizationRates(handle)
+
+        print(f"  GPU {i}: {name}")
+        print(f"    Memory Used: {mem_info.used / (1024**2):.2f} MB")
+        print(f"    Memory Total: {mem_info.total / (1024**2):.2f} MB")
+        print(f"    GPU Utilization: {util_rates.gpu}%")
+        print(f"    Memory Utilization: {util_rates.memory}%")
+
+    pynvml.nvmlShutdown()
+except ImportError:
+    print("✗ pynvml not installed. Install with: pip install nvidia-ml-py3")
+except Exception as e:
+    print(f"✗ Error using pynvml: {e}")
+    with suppress(Exception):
+        pynvml.nvmlShutdown()
+
+print()
+
+# Test nvidia-smi directly
+print("=" * 60)
+print("Testing nvidia-smi (direct system call):")
+print("=" * 60)
+try:
+    import subprocess
+
+    result = subprocess.run(
+        ["nvidia-smi", "--query-gpu=name,memory.used,memory.total,utilization.gpu", "--format=csv,noheader,nounits"],
+        capture_output=True,
+        text=True,
+        timeout=2,
+    )
+    if result.returncode == 0:
+        print("✓ nvidia-smi is available")
+        lines = result.stdout.strip().split("\n")
+        for i, line in enumerate(lines):
+            parts = [p.strip() for p in line.split(",")]
+            if len(parts) >= 4:
+                print(f"  GPU {i}: {parts[0]}")
+                print(f"    Memory Used: {float(parts[1]):.2f} MB")
+                print(f"    Memory Total: {float(parts[2]):.2f} MB")
+                print(f"    GPU Utilization: {float(parts[3]):.1f}%")
+    else:
+        print(f"✗ nvidia-smi returned error code: {result.returncode}")
+        print(f"  stderr: {result.stderr}")
+except FileNotFoundError:
+    print("✗ nvidia-smi not found. NVIDIA drivers may not be installed.")
+except subprocess.TimeoutExpired:
+    print("✗ nvidia-smi timed out")
+except Exception as e:
+    print(f"✗ Error running nvidia-smi: {e}")
+
+print()
+
+# Test torch CUDA
+print("=" * 60)
+print("Testing PyTorch CUDA:")
+print("=" * 60)
+try:
+    import torch
+
+    print("✓ PyTorch is installed")
+    if torch.cuda.is_available():
+        print("✓ CUDA is available")
+        print(f"  Device count: {torch.cuda.device_count()}")
+        for i in range(torch.cuda.device_count()):
+            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
+            mem_allocated = torch.cuda.memory_allocated(i) / (1024**3)
+            mem_reserved = torch.cuda.memory_reserved(i) / (1024**3)
+            print(f"    Memory Allocated: {mem_allocated:.2f} GB")
+            print(f"    Memory Reserved: {mem_reserved:.2f} GB")
+    else:
+        print("✗ CUDA not available in PyTorch")
+except ImportError:
+    print("✗ PyTorch not installed")
+except Exception as e:
+    print(f"✗ Error using PyTorch: {e}")
+
+print()
+print("=" * 60)
+print("Summary:")
+print("=" * 60)
+print("GPU Detection Methods (in priority order):")
+print("1. GPUtil - Python library, fast but affected by driver mismatch")
+print("2. pynvml - Official NVIDIA library, affected by driver mismatch")
+print("3. PyTorch CUDA - Shows PyTorch-allocated memory (may be 0)")
+print("4. nvidia-smi - Direct system call, MOST RELIABLE, bypasses driver issues")
+print()
+print("If no GPUs were detected, possible reasons:")
+print("1. No NVIDIA GPU in the system")
+print("2. NVIDIA drivers not installed (check: nvidia-smi)")
+print("3. Driver/library version mismatch (ERROR: 'Driver/library version mismatch')")
+print("   → FIX: sudo reboot  (reloads driver and libraries)")
+print("4. GPU libraries not installed")
+print("5. Permission issues (try: sudo usermod -a -G video $USER)")
+print()
+print("Best Solution for 'Driver/library version mismatch':")
+print("  → Use nvidia-smi method (Method 4) - works despite mismatch")
+print("  → Or reboot system to sync driver/library versions")
+print()
+print("Recommended install command:")
+print("  pip install psutil GPUtil nvidia-ml-py3")
+print()
+print("Note: PyTorch is usually already installed in Isaac Lab environments")
+print("      and nvidia-smi is the most reliable fallback.")
diff --git a/scripts/benchmarks/utils.py b/scripts/benchmarks/utils/utils.py
similarity index 100%
rename from scripts/benchmarks/utils.py
rename to scripts/benchmarks/utils/utils.py