diff --git a/pyproject.toml b/pyproject.toml
index 51d820917..6ee8e4f3c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath"
-version = "2.3.0"
+version = "2.3.1"
 description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
diff --git a/src/uipath/_cli/_evals/_evaluator_factory.py b/src/uipath/_cli/_evals/_evaluator_factory.py
index 15c54007e..253ce36d4 100644
--- a/src/uipath/_cli/_evals/_evaluator_factory.py
+++ b/src/uipath/_cli/_evals/_evaluator_factory.py
@@ -1,4 +1,5 @@
 import importlib.util
+import logging
 import sys
 from pathlib import Path
 from typing import Any
@@ -68,6 +69,8 @@
     ToolCallOutputEvaluatorConfig,
 )
 
+logger = logging.getLogger(__name__)
+
 
 class EvaluatorFactory:
     """Factory class for creating evaluator instances based on configuration."""
@@ -106,12 +109,15 @@ def _prepare_evaluator_config(data: dict[str, Any]) -> dict[str, Any]:
 
     @classmethod
     def create_evaluator(
-        cls, data: dict[str, Any], evaluators_dir: Path | None = None
+        cls,
+        data: dict[str, Any],
+        evaluators_dir: Path | None = None,
+        agent_model: str | None = None,
     ) -> BaseEvaluator[Any, Any, Any]:
         if data.get("version", None) == "1.0":
             return cls._create_evaluator_internal(data, evaluators_dir)
         else:
-            return cls._create_legacy_evaluator_internal(data)
+            return cls._create_legacy_evaluator_internal(data, agent_model)
 
     @staticmethod
     def _create_evaluator_internal(
@@ -371,11 +377,14 @@ def _create_llm_judge_simulation_trajectory_evaluator(
     @staticmethod
     def _create_legacy_evaluator_internal(
         data: dict[str, Any],
+        agent_model: str | None = None,
     ) -> LegacyBaseEvaluator[Any]:
         """Create an evaluator instance from configuration data.
 
         Args:
             data: Dictionary containing evaluator configuration from JSON file
+            agent_model: Optional model name from agent settings for resolving
+                'same-as-agent' model configuration
 
         Returns:
             Appropriate evaluator instance based on category
@@ -391,9 +400,13 @@ def _create_legacy_evaluator_internal(
             case JsonSimilarityEvaluatorParams():
                 return EvaluatorFactory._create_legacy_json_similarity_evaluator(params)
             case LLMEvaluatorParams():
-                return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(params)
+                return EvaluatorFactory._create_legacy_llm_as_judge_evaluator(
+                    params, agent_model
+                )
             case TrajectoryEvaluatorParams():
-                return EvaluatorFactory._create_legacy_trajectory_evaluator(params)
+                return EvaluatorFactory._create_legacy_trajectory_evaluator(
+                    params, agent_model
+                )
             case _:
                 raise ValueError(f"Unknown evaluator category: {params}")
 
@@ -414,6 +427,7 @@ def _create_legacy_json_similarity_evaluator(
     @staticmethod
     def _create_legacy_llm_as_judge_evaluator(
         params: LLMEvaluatorParams,
+        agent_model: str | None = None,
     ) -> LegacyLlmAsAJudgeEvaluator:
         """Create an LLM-as-a-judge evaluator."""
         if not params.prompt:
@@ -421,26 +435,51 @@ def _create_legacy_llm_as_judge_evaluator(
 
         if not params.model:
             raise ValueError("LLM evaluator must include 'model' field")
+
+        # Resolve 'same-as-agent' to actual agent model
         if params.model == "same-as-agent":
-            raise ValueError(
-                "'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
+            if not agent_model:
+                raise ValueError(
+                    "'same-as-agent' model option requires agent settings. "
+                    "Ensure agent.json contains valid model settings."
+                )
+            logger.info(
+                f"Resolving 'same-as-agent' to agent model: {agent_model} "
+                f"for evaluator '{params.name}'"
             )
+            params = params.model_copy(update={"model": agent_model})
 
+        logger.info(
+            f"Creating LLM-as-judge evaluator '{params.name}' with model: {params.model}"
+        )
         return LegacyLlmAsAJudgeEvaluator(**params.model_dump(), config={})
 
     @staticmethod
     def _create_legacy_trajectory_evaluator(
         params: TrajectoryEvaluatorParams,
+        agent_model: str | None = None,
     ) -> LegacyTrajectoryEvaluator:
         """Create a trajectory evaluator."""
         if not params.prompt:
             raise ValueError("Trajectory evaluator must include 'prompt' field")
 
         if not params.model:
-            raise ValueError("LLM evaluator must include 'model' field")
+            raise ValueError("Trajectory evaluator must include 'model' field")
+
+        # Resolve 'same-as-agent' to actual agent model
         if params.model == "same-as-agent":
-            raise ValueError(
-                "'same-as-agent' model option is not supported by coded agents evaluations. Please select a specific model for the evaluator."
+            if not agent_model:
+                raise ValueError(
+                    "'same-as-agent' model option requires agent settings. "
+                    "Ensure agent.json contains valid model settings."
+                )
+            logger.info(
+                f"Resolving 'same-as-agent' to agent model: {agent_model} "
+                f"for evaluator '{params.name}'"
             )
+            params = params.model_copy(update={"model": agent_model})
 
+        logger.info(
+            f"Creating trajectory evaluator '{params.name}' with model: {params.model}"
+        )
         return LegacyTrajectoryEvaluator(**params.model_dump(), config={})
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
index 23751d266..508230a7f 100644
--- a/src/uipath/_cli/_evals/_runtime.py
+++ b/src/uipath/_cli/_evals/_runtime.py
@@ -5,7 +5,16 @@
 from contextlib import contextmanager
 from pathlib import Path
 from time import time
-from typing import Any, Awaitable, Iterable, Iterator, Sequence, Tuple
+from typing import (
+    Any,
+    Awaitable,
+    Iterable,
+    Iterator,
+    Protocol,
+    Sequence,
+    Tuple,
+    runtime_checkable,
+)
 
 import coverage
 from opentelemetry import context as context_api
@@ -67,6 +76,27 @@
     set_execution_context,
 )
 
+logger = logging.getLogger(__name__)
+
+
+@runtime_checkable
+class LLMAgentRuntimeProtocol(Protocol):
+    """Protocol for runtimes that can provide agent model information.
+
+    Runtimes that implement this protocol can be queried for
+    the agent's configured LLM model, enabling features like 'same-as-agent'
+    model resolution for evaluators.
+    """
+
+    def get_agent_model(self) -> str | None:
+        """Return the agent's configured LLM model name.
+
+        Returns:
+            The model name from agent settings (e.g., 'gpt-4o-2024-11-20'),
+            or None if no model is configured.
+        """
+        ...
+
 
 class ExecutionSpanExporter(SpanExporter):
     """Custom exporter that stores spans grouped by execution ids."""
@@ -180,6 +210,8 @@ def __init__(
         self.logs_exporter: ExecutionLogsExporter = ExecutionLogsExporter()
         self.execution_id = str(uuid.uuid4())
         self.schema: UiPathRuntimeSchema | None = None
+        self._agent_model: str | None = None
+        self._metadata_loaded: bool = False
         self.coverage = coverage.Coverage(branch=True)
 
     async def __aenter__(self) -> "UiPathEvalRuntime":
@@ -192,14 +224,33 @@ async def __aexit__(self, *args: Any) -> None:
             self.coverage.stop()
             self.coverage.report(include=["./*"], show_missing=True)
 
-    async def get_schema(self) -> UiPathRuntimeSchema:
-        if not self.schema:
-            temp_runtime = await self.factory.new_runtime(
-                entrypoint=self.context.entrypoint or "",
-                runtime_id="default",
-            )
+    async def _ensure_metadata_loaded(self) -> None:
+        """Load metadata (schema, agent model) from a single temporary runtime.
+
+        This method creates one temporary runtime to fetch both schema and agent
+        model, avoiding the overhead of creating multiple runtimes for metadata
+        queries. Results are cached for subsequent access.
+        """
+        if self._metadata_loaded:
+            return
+
+        temp_runtime = await self.factory.new_runtime(
+            entrypoint=self.context.entrypoint or "",
+            runtime_id="metadata-query",
+        )
+        try:
             self.schema = await temp_runtime.get_schema()
+            self._agent_model = self._find_agent_model_in_runtime(temp_runtime)
+            if self._agent_model:
+                logger.debug(f"Got agent model from runtime: {self._agent_model}")
+            self._metadata_loaded = True
+        finally:
             await temp_runtime.dispose()
+
+    async def get_schema(self) -> UiPathRuntimeSchema:
+        await self._ensure_metadata_loaded()
+        if self.schema is None:
+            raise ValueError("Schema could not be loaded")
         return self.schema
 
     @contextmanager
@@ -232,7 +283,7 @@ async def initiate_evaluation(
         evaluation_set, _ = EvalHelpers.load_eval_set(
             self.context.eval_set, self.context.eval_ids
         )
-        evaluators = self._load_evaluators(evaluation_set)
+        evaluators = await self._load_evaluators(evaluation_set)
 
         await self.event_bus.publish(
             EvaluationEvents.CREATE_EVAL_SET_RUN,
@@ -601,7 +652,48 @@ async def run_evaluator(
 
         return result
 
-    def _load_evaluators(
+    async def _get_agent_model(self) -> str | None:
+        """Get agent model from the runtime.
+
+        Uses the cached metadata from _ensure_metadata_loaded(), which creates
+        a single temporary runtime to fetch both schema and agent model.
+
+        Returns:
+            The model name from agent settings, or None if not found.
+        """
+        try:
+            await self._ensure_metadata_loaded()
+            return self._agent_model
+        except Exception:
+            return None
+
+    def _find_agent_model_in_runtime(self, runtime: Any) -> str | None:
+        """Recursively search for get_agent_model in runtime and its delegates.
+
+        Runtimes may be wrapped (e.g., ResumableRuntime wraps TelemetryWrapper
+        which wraps the base runtime). This method traverses the wrapper chain
+        to find a runtime that implements LLMAgentRuntimeProtocol.
+
+        Args:
+            runtime: The runtime to check (may be a wrapper)
+
+        Returns:
+            The model name if found, None otherwise.
+        """
+        # Check if this runtime implements the protocol
+        if isinstance(runtime, LLMAgentRuntimeProtocol):
+            return runtime.get_agent_model()
+
+        # Check for delegate property (used by UiPathResumableRuntime, TelemetryRuntimeWrapper)
+        delegate = getattr(runtime, "delegate", None) or getattr(
+            runtime, "_delegate", None
+        )
+        if delegate is not None:
+            return self._find_agent_model_in_runtime(delegate)
+
+        return None
+
+    async def _load_evaluators(
         self, evaluation_set: EvaluationSet
     ) -> list[BaseEvaluator[Any, Any, Any]]:
         """Load evaluators referenced by the evaluation set."""
@@ -611,6 +703,9 @@ def _load_evaluators(
             raise ValueError("eval_set cannot be None")
         evaluators_dir = Path(eval_set).parent.parent / "evaluators"
 
+        # Load agent model for 'same-as-agent' resolution in legacy evaluators
+        agent_model = await self._get_agent_model()
+
         # If evaluatorConfigs is specified, use that (new field with weights)
         # Otherwise, fall back to evaluatorRefs (old field without weights)
         if (
@@ -638,7 +733,9 @@ def _load_evaluators(
             try:
                 evaluator_id = data.get("id")
                 if evaluator_id in evaluator_ref_ids:
-                    evaluator = EvaluatorFactory.create_evaluator(data, evaluators_dir)
+                    evaluator = EvaluatorFactory.create_evaluator(
+                        data, evaluators_dir, agent_model=agent_model
+                    )
                     evaluators.append(evaluator)
                     found_evaluator_ids.add(evaluator_id)
             except Exception as e:
diff --git a/tests/cli/eval/test_eval_runtime_metadata.py b/tests/cli/eval/test_eval_runtime_metadata.py
new file mode 100644
index 000000000..c11bf122d
--- /dev/null
+++ b/tests/cli/eval/test_eval_runtime_metadata.py
@@ -0,0 +1,465 @@
+"""Tests for UiPathEvalRuntime metadata loading functionality.
+
+This module tests:
+- _ensure_metadata_loaded() - single runtime creation for both schema and agent model
+- _get_agent_model() - cached agent model retrieval
+- get_schema() - cached schema retrieval
+- _find_agent_model_in_runtime() - recursive delegate traversal
+- LLMAgentRuntimeProtocol - protocol implementation detection
+"""
+
+from pathlib import Path
+from typing import Any, AsyncGenerator
+
+import pytest
+from uipath.core.tracing import UiPathTraceManager
+from uipath.runtime import (
+    UiPathExecuteOptions,
+    UiPathRuntimeEvent,
+    UiPathRuntimeProtocol,
+    UiPathRuntimeResult,
+    UiPathRuntimeStatus,
+    UiPathStreamOptions,
+)
+from uipath.runtime.schema import UiPathRuntimeSchema
+
+from uipath._cli._evals._runtime import (
+    LLMAgentRuntimeProtocol,
+    UiPathEvalContext,
+    UiPathEvalRuntime,
+)
+from uipath._events._event_bus import EventBus
+
+
+class MockRuntimeSchema(UiPathRuntimeSchema):
+    """Mock schema for testing."""
+
+    def __init__(self):
+        super().__init__(
+            filePath="test.py",
+            uniqueId="test",
+            type="workflow",
+            input={"type": "object", "properties": {}},
+            output={"type": "object", "properties": {}},
+        )
+
+
+class BaseTestRuntime:
+    """Base test runtime without agent model support."""
+
+    async def execute(
+        self,
+        input: dict[str, Any] | None = None,
+        options: UiPathExecuteOptions | None = None,
+    ) -> UiPathRuntimeResult:
+        return UiPathRuntimeResult(
+            output={},
+            status=UiPathRuntimeStatus.SUCCESSFUL,
+        )
+
+    async def stream(
+        self,
+        input: dict[str, Any] | None = None,
+        options: UiPathStreamOptions | None = None,
+    ) -> AsyncGenerator[UiPathRuntimeEvent, None]:
+        yield UiPathRuntimeResult(
+            output={},
+            status=UiPathRuntimeStatus.SUCCESSFUL,
+        )
+
+    async def get_schema(self) -> UiPathRuntimeSchema:
+        return MockRuntimeSchema()
+
+    async def dispose(self) -> None:
+        pass
+
+
+class AgentModelRuntime(BaseTestRuntime):
+    """Test runtime that implements LLMAgentRuntimeProtocol."""
+
+    def __init__(self, model: str | None = "gpt-4o-2024-11-20"):
+        self._model = model
+
+    def get_agent_model(self) -> str | None:
+        return self._model
+
+
+class WrapperRuntime(BaseTestRuntime):
+    """Test runtime that wraps another runtime (like UiPathResumableRuntime)."""
+
+    def __init__(self, delegate: Any):
+        self.delegate = delegate
+
+    async def get_schema(self) -> UiPathRuntimeSchema:
+        return await self.delegate.get_schema()
+
+
+class PrivateDelegateRuntime(BaseTestRuntime):
+    """Test runtime with private _delegate attribute."""
+
+    def __init__(self, delegate: Any):
+        self._delegate = delegate
+
+    async def get_schema(self) -> UiPathRuntimeSchema:
+        return await self._delegate.get_schema()
+
+
+class MockFactory:
+    """Mock factory for creating test runtimes."""
+
+    def __init__(self, runtime_creator):
+        self.runtime_creator = runtime_creator
+        self.new_runtime_call_count = 0
+
+    def discover_entrypoints(self) -> list[str]:
+        return ["test"]
+
+    async def discover_runtimes(self) -> list[UiPathRuntimeProtocol]:
+        return [await self.runtime_creator()]
+
+    async def new_runtime(
+        self, entrypoint: str, runtime_id: str
+    ) -> UiPathRuntimeProtocol:
+        self.new_runtime_call_count += 1
+        return await self.runtime_creator()
+
+    async def dispose(self) -> None:
+        pass
+
+
+class TestLLMAgentRuntimeProtocol:
+    """Tests for LLMAgentRuntimeProtocol detection."""
+
+    def test_protocol_detects_implementing_class(self):
+        """Test that protocol correctly identifies implementing classes."""
+        runtime = AgentModelRuntime("gpt-4")
+        assert isinstance(runtime, LLMAgentRuntimeProtocol)
+
+    def test_protocol_rejects_non_implementing_class(self):
+        """Test that protocol correctly rejects non-implementing classes."""
+        runtime = BaseTestRuntime()
+        assert not isinstance(runtime, LLMAgentRuntimeProtocol)
+
+    def test_protocol_rejects_wrapper_without_method(self):
+        """Test that wrapper without get_agent_model is not detected."""
+        inner = AgentModelRuntime("gpt-4")
+        wrapper = WrapperRuntime(inner)
+        assert not isinstance(wrapper, LLMAgentRuntimeProtocol)
+
+
+class TestFindAgentModelInRuntime:
+    """Tests for _find_agent_model_in_runtime recursive search."""
+
+    @pytest.fixture
+    def eval_runtime(self):
+        """Create an eval runtime for testing."""
+        context = UiPathEvalContext()
+        context.eval_set = str(
+            Path(__file__).parent / "evals" / "eval-sets" / "default.json"
+        )
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+
+        async def create_runtime():
+            return BaseTestRuntime()
+
+        factory = MockFactory(create_runtime)
+        return UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+    def test_finds_model_in_direct_runtime(self, eval_runtime):
+        """Test finding agent model directly on runtime."""
+        runtime = AgentModelRuntime("gpt-4o")
+        result = eval_runtime._find_agent_model_in_runtime(runtime)
+        assert result == "gpt-4o"
+
+    def test_finds_model_in_wrapped_runtime(self, eval_runtime):
+        """Test finding agent model through wrapper's delegate."""
+        inner = AgentModelRuntime("claude-3")
+        wrapper = WrapperRuntime(inner)
+        result = eval_runtime._find_agent_model_in_runtime(wrapper)
+        assert result == "claude-3"
+
+    def test_finds_model_in_deeply_wrapped_runtime(self, eval_runtime):
+        """Test finding agent model through multiple wrapper layers."""
+        inner = AgentModelRuntime("gpt-4-turbo")
+        wrapper1 = WrapperRuntime(inner)
+        wrapper2 = WrapperRuntime(wrapper1)
+        result = eval_runtime._find_agent_model_in_runtime(wrapper2)
+        assert result == "gpt-4-turbo"
+
+    def test_finds_model_via_private_delegate(self, eval_runtime):
+        """Test finding agent model through _delegate attribute."""
+        inner = AgentModelRuntime("gemini-pro")
+        wrapper = PrivateDelegateRuntime(inner)
+        result = eval_runtime._find_agent_model_in_runtime(wrapper)
+        assert result == "gemini-pro"
+
+    def test_returns_none_when_no_model(self, eval_runtime):
+        """Test returns None when no runtime implements the protocol."""
+        runtime = BaseTestRuntime()
+        result = eval_runtime._find_agent_model_in_runtime(runtime)
+        assert result is None
+
+    def test_returns_none_for_none_model(self, eval_runtime):
+        """Test returns None when runtime returns None for model."""
+        runtime = AgentModelRuntime(None)
+        result = eval_runtime._find_agent_model_in_runtime(runtime)
+        assert result is None
+
+
+class TestEnsureMetadataLoaded:
+    """Tests for _ensure_metadata_loaded caching behavior."""
+
+    @pytest.fixture
+    def context(self):
+        """Create eval context."""
+        context = UiPathEvalContext()
+        context.eval_set = str(
+            Path(__file__).parent / "evals" / "eval-sets" / "default.json"
+        )
+        return context
+
+    async def test_loads_both_schema_and_model(self, context):
+        """Test that _ensure_metadata_loaded fetches both schema and agent model."""
+
+        async def create_runtime():
+            return AgentModelRuntime("gpt-4o-mini")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        # Initially not loaded
+        assert eval_runtime._metadata_loaded is False
+        assert eval_runtime.schema is None
+        assert eval_runtime._agent_model is None
+
+        await eval_runtime._ensure_metadata_loaded()
+
+        # Both should now be loaded
+        assert eval_runtime._metadata_loaded is True
+        assert eval_runtime.schema is not None
+        assert eval_runtime._agent_model == "gpt-4o-mini"
+
+    async def test_creates_only_one_runtime(self, context):
+        """Test that only one temporary runtime is created for metadata."""
+
+        async def create_runtime():
+            return AgentModelRuntime("test-model")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        # Call multiple times
+        await eval_runtime._ensure_metadata_loaded()
+        await eval_runtime._ensure_metadata_loaded()
+        await eval_runtime._ensure_metadata_loaded()
+
+        # Factory should only have been called once
+        assert factory.new_runtime_call_count == 1
+
+    async def test_caches_results(self, context):
+        """Test that results are cached after first load."""
+        call_count = 0
+
+        async def create_runtime():
+            nonlocal call_count
+            call_count += 1
+            return AgentModelRuntime(f"model-{call_count}")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        await eval_runtime._ensure_metadata_loaded()
+        first_model = eval_runtime._agent_model
+
+        await eval_runtime._ensure_metadata_loaded()
+        second_model = eval_runtime._agent_model
+
+        # Should be the same cached value
+        assert first_model == second_model == "model-1"
+
+
+class TestGetAgentModel:
+    """Tests for _get_agent_model method."""
+
+    @pytest.fixture
+    def context(self):
+        """Create eval context."""
+        context = UiPathEvalContext()
+        context.eval_set = str(
+            Path(__file__).parent / "evals" / "eval-sets" / "default.json"
+        )
+        return context
+
+    async def test_returns_agent_model(self, context):
+        """Test that _get_agent_model returns the correct model."""
+
+        async def create_runtime():
+            return AgentModelRuntime("gpt-4o-2024-11-20")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        model = await eval_runtime._get_agent_model()
+        assert model == "gpt-4o-2024-11-20"
+
+    async def test_returns_none_when_no_model(self, context):
+        """Test that _get_agent_model returns None when runtime has no model."""
+
+        async def create_runtime():
+            return BaseTestRuntime()
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        model = await eval_runtime._get_agent_model()
+        assert model is None
+
+    async def test_returns_cached_model(self, context):
+        """Test that _get_agent_model uses cached value."""
+
+        async def create_runtime():
+            return AgentModelRuntime("cached-model")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        # First call loads metadata
+        model1 = await eval_runtime._get_agent_model()
+        # Second call should use cache
+        model2 = await eval_runtime._get_agent_model()
+
+        assert model1 == model2 == "cached-model"
+        assert factory.new_runtime_call_count == 1
+
+    async def test_handles_exception_gracefully(self, context):
+        """Test that _get_agent_model returns None on exception."""
+
+        async def create_runtime():
+            raise RuntimeError("Factory error")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        model = await eval_runtime._get_agent_model()
+        assert model is None
+
+
+class TestGetSchema:
+    """Tests for get_schema method."""
+
+    @pytest.fixture
+    def context(self):
+        """Create eval context."""
+        context = UiPathEvalContext()
+        context.eval_set = str(
+            Path(__file__).parent / "evals" / "eval-sets" / "default.json"
+        )
+        return context
+
+    async def test_returns_schema(self, context):
+        """Test that get_schema returns the schema."""
+
+        async def create_runtime():
+            return BaseTestRuntime()
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        schema = await eval_runtime.get_schema()
+        assert schema is not None
+        assert schema.file_path == "test.py"
+
+    async def test_returns_cached_schema(self, context):
+        """Test that get_schema uses cached value."""
+
+        async def create_runtime():
+            return BaseTestRuntime()
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        # First call loads metadata
+        schema1 = await eval_runtime.get_schema()
+        # Second call should use cache
+        schema2 = await eval_runtime.get_schema()
+
+        assert schema1 is schema2
+        assert factory.new_runtime_call_count == 1
+
+    async def test_schema_and_model_share_runtime(self, context):
+        """Test that get_schema and _get_agent_model share the same runtime creation."""
+
+        async def create_runtime():
+            return AgentModelRuntime("shared-model")
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        # Call both methods
+        schema = await eval_runtime.get_schema()
+        model = await eval_runtime._get_agent_model()
+
+        # Should only create one runtime
+        assert factory.new_runtime_call_count == 1
+        assert schema is not None
+        assert model == "shared-model"
+
+
+class TestWrappedRuntimeModelResolution:
+    """Tests for model resolution through realistic wrapper chains."""
+
+    @pytest.fixture
+    def context(self):
+        """Create eval context."""
+        context = UiPathEvalContext()
+        context.eval_set = str(
+            Path(__file__).parent / "evals" / "eval-sets" / "default.json"
+        )
+        return context
+
+    async def test_resolves_model_through_resumable_telemetry_chain(self, context):
+        """Test model resolution through ResumableRuntime -> TelemetryWrapper -> BaseRuntime chain.
+
+        This mimics the real wrapper chain:
+        UiPathResumableRuntime -> TelemetryRuntimeWrapper -> AgentsLangGraphRuntime
+        """
+        # Base runtime with model
+        base_runtime = AgentModelRuntime("gpt-4o-from-agent-json")
+
+        # Simulate TelemetryRuntimeWrapper
+        telemetry_wrapper = WrapperRuntime(base_runtime)
+
+        # Simulate UiPathResumableRuntime
+        resumable_runtime = WrapperRuntime(telemetry_wrapper)
+
+        async def create_runtime():
+            return resumable_runtime
+
+        factory = MockFactory(create_runtime)
+        event_bus = EventBus()
+        trace_manager = UiPathTraceManager()
+        eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
+
+        model = await eval_runtime._get_agent_model()
+        assert model == "gpt-4o-from-agent-json"
diff --git a/uv.lock b/uv.lock
index 5ae61eecf..727e3b946 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2477,7 +2477,7 @@ wheels = [
 
 [[package]]
 name = "uipath"
-version = "2.3.0"
+version = "2.3.1"
 source = { editable = "." }
 dependencies = [
     { name = "click" },