diff --git a/docs.json b/docs.json
index 0878a67b..3738b767 100644
--- a/docs.json
+++ b/docs.json
@@ -215,7 +215,8 @@
           {
             "group": "Extensions",
             "pages": [
-              "openhands/usage/cli/mcp-servers"
+              "openhands/usage/cli/mcp-servers",
+              "openhands/usage/cli/critic"
             ]
           },
           {
@@ -268,7 +269,8 @@
                   "sdk/guides/agent-custom",
                   "sdk/guides/convo-custom-visualizer",
                   "sdk/guides/agent-stuck-detector",
-                  "sdk/guides/agent-tom-agent"
+                  "sdk/guides/agent-tom-agent",
+                  "sdk/guides/critic"
                 ]
               },
               {
diff --git a/openhands/usage/cli/critic-demo.mp4 b/openhands/usage/cli/critic-demo.mp4
new file mode 100644
index 00000000..2cc6ed27
Binary files /dev/null and b/openhands/usage/cli/critic-demo.mp4 differ
diff --git a/openhands/usage/cli/critic.mdx b/openhands/usage/cli/critic.mdx
new file mode 100644
index 00000000..5bc1f930
--- /dev/null
+++ b/openhands/usage/cli/critic.mdx
@@ -0,0 +1,41 @@
+---
+title: Critic (Experimental)
+description: Automatic task success prediction for OpenHands LLM Provider users
+---
+
+<Warning>
+**This feature is highly experimental** and subject to change. The API, configuration, and behavior may evolve significantly based on feedback and testing.
+</Warning>
+
+## Overview
+
+If you're using the [OpenHands LLM Provider](/openhands/usage/llms/openhands-llms), an experimental **critic feature** is automatically enabled to predict task success in real-time.
+
+For detailed information about the critic feature, including programmatic access and advanced usage, see the [SDK Critic Guide](/sdk/guides/critic).
+
+
+## What is the Critic?
+
+The critic is an LLM-based evaluator that analyzes agent actions and conversation history to predict the quality or success probability of agent decisions. It provides:
+
+- **Quality scores**: Probability scores between 0.0 and 1.0 indicating predicted success
+- **Real-time feedback**: Scores computed during agent execution, not just at completion
+
+<video
+  controls
+  className="w-full aspect-video"
+  src="/openhands/usage/cli/critic-demo.mp4"
+></video>
+
+![Critic output in CLI](./screenshots/critic-cli-output.png)
+
+## Pricing
+
+The critic feature is **free during the public beta phase** for all OpenHands LLM Provider users.
+
+## Disabling the Critic
+
+If you prefer not to use the critic feature, you can disable it in your settings.
+
+![Critic settings in CLI](./screenshots/critic-cli-settings.png)
+
diff --git a/openhands/usage/cli/screenshots/critic-cli-output.png b/openhands/usage/cli/screenshots/critic-cli-output.png
new file mode 100644
index 00000000..1dc97ea6
Binary files /dev/null and b/openhands/usage/cli/screenshots/critic-cli-output.png differ
diff --git a/openhands/usage/cli/screenshots/critic-cli-settings.png b/openhands/usage/cli/screenshots/critic-cli-settings.png
new file mode 100644
index 00000000..3eb41695
Binary files /dev/null and b/openhands/usage/cli/screenshots/critic-cli-settings.png differ
diff --git a/sdk/api-reference/openhands.sdk.agent.mdx b/sdk/api-reference/openhands.sdk.agent.mdx
index 07de2190..f55127f3 100644
--- a/sdk/api-reference/openhands.sdk.agent.mdx
+++ b/sdk/api-reference/openhands.sdk.agent.mdx
@@ -26,18 +26,8 @@ AgentBase and implements the agent execution logic.
 
 #### Properties
 
-- `agent_context`: AgentContext | None
-- `condenser`: CondenserBase | None
-- `filter_tools_regex`: str | None
-- `include_default_tools`: list[str]
-- `llm`: LLM
-- `mcp_config`: dict[str, Any]
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `security_policy_filename`: str
-- `system_prompt_filename`: str
-- `system_prompt_kwargs`: dict[str, object]
-- `tools`: list[Tool]
 
 #### Methods
 
@@ -94,11 +84,12 @@ agent implementations must follow.
 
 - `agent_context`: AgentContext | None
 - `condenser`: CondenserBase | None
+- `critic`: CriticBase | None
 - `filter_tools_regex`: str | None
 - `include_default_tools`: list[str]
 - `llm`: LLM
 - `mcp_config`: dict[str, Any]
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `name`: str
   Returns the name of the Agent.
diff --git a/sdk/api-reference/openhands.sdk.conversation.mdx b/sdk/api-reference/openhands.sdk.conversation.mdx
index 7a58929d..ee27a282 100644
--- a/sdk/api-reference/openhands.sdk.conversation.mdx
+++ b/sdk/api-reference/openhands.sdk.conversation.mdx
@@ -126,6 +126,10 @@ Send a message to the agent.
 
 Set the confirmation policy for the conversation.
 
+#### abstractmethod set_security_analyzer()
+
+Set the security analyzer for the conversation.
+
 #### abstractmethod update_secrets()
 
 ### class Conversation
@@ -197,8 +201,6 @@ Bases: `OpenHandsModel`
 - `execution_status`: [ConversationExecutionStatus](#class-conversationexecutionstatus)
 - `id`: UUID
 - `max_iterations`: int
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `persistence_dir`: str | None
 - `secret_registry`: [SecretRegistry](#class-secretregistry)
 - `security_analyzer`: SecurityAnalyzerBase | None
@@ -280,6 +282,10 @@ actions that are pending confirmation or execution.
 
 Return True if the lock is currently held by any thread.
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### model_post_init()
 
 This function is meant to behave like a BaseModel method to initialise private attributes.
@@ -352,7 +358,25 @@ Conversation will then calls MyVisualizer() followed by initialize(state)
 
 Initialize the visualizer base.
 
-#### initialize()
+#### create_sub_visualizer()
+
+Create a visualizer for a sub-agent during delegation.
+
+Override this method to support sub-agent visualization in multi-agent
+delegation scenarios. The sub-visualizer will be used to display events
+from the spawned sub-agent.
+
+By default, returns None which means sub-agents will not have visualization.
+Subclasses that support delegation (like DelegationVisualizer) should
+override this method to create appropriate sub-visualizers.
+
+* Parameters:
+  `agent_id` – The identifier of the sub-agent being spawned
+* Returns:
+  A visualizer instance for the sub-agent, or None if sub-agent
+  visualization is not supported
+
+#### final initialize()
 
 Initialize the visualizer with conversation state.
 
@@ -772,8 +796,6 @@ even when callable secrets fail on subsequent calls.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `secret_sources`: dict[str, SecretSource]
 
 #### Methods
@@ -808,6 +830,10 @@ fresh values from callables to ensure comprehensive masking.
 * Returns:
   Text with secret values replaced by `<secret-hidden>`
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### model_post_init()
 
 This function is meant to behave like a BaseModel method to initialise private attributes.
diff --git a/sdk/api-reference/openhands.sdk.event.mdx b/sdk/api-reference/openhands.sdk.event.mdx
index 35e19600..5e2fbcaa 100644
--- a/sdk/api-reference/openhands.sdk.event.mdx
+++ b/sdk/api-reference/openhands.sdk.event.mdx
@@ -12,8 +12,9 @@ Bases: [`LLMConvertibleEvent`](#class-llmconvertibleevent)
 #### Properties
 
 - `action`: Action | None
+- `critic_result`: CriticResult | None
 - `llm_response_id`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `reasoning_content`: str | None
 - `responses_reasoning_item`: ReasoningItemModel | None
@@ -47,7 +48,7 @@ represents an error produced by the agent/scaffold, not model output.
 #### Properties
 
 - `error`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `visualize`: Text
@@ -68,7 +69,7 @@ This action indicates a condensation of the conversation history is happening.
 
 - `forgotten_event_ids`: list[str]
 - `llm_response_id`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `summary`: str | None
@@ -86,7 +87,7 @@ This action is used to request a condensation of the conversation history.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `visualize`: Text
@@ -112,7 +113,7 @@ This event represents a summary generated by a condenser.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `summary`: str
@@ -138,7 +139,7 @@ to ensure compatibility with websocket transmission.
 #### Properties
 
 - `key`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `value`: Any
@@ -194,7 +195,7 @@ instead of writing it to a file inside the Docker container.
 
 - `filename`: str
 - `log_data`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `model_name`: str
 - `source`: Literal['agent', 'user', 'environment']
@@ -208,11 +209,8 @@ Base class for events that can be converted to LLM messages.
 
 #### Properties
 
-- `id`: EventID
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `source`: SourceType
-- `timestamp`: str
 
 #### Methods
 
@@ -234,8 +232,8 @@ This is originally the “MessageAction”, but it suppose not to be tool call.
 #### Properties
 
 - `activated_skills`: list[str]
+- `critic_result`: CriticResult | None
 - `extended_content`: list[TextContent]
-- `id`: EventID
 - `llm_message`: Message
 - `llm_response_id`: str | None
 - `model_config`: ClassVar[ConfigDict] = (configuration object)
@@ -245,7 +243,6 @@ This is originally the “MessageAction”, but it suppose not to be tool call.
 - `source`: Literal['agent', 'user', 'environment']
 - `thinking_blocks`: Sequence[ThinkingBlock | RedactedThinkingBlock]
   Return the Anthropic thinking blocks from the LLM message.
-- `timestamp`: str
 - `visualize`: Text
   Return Rich Text representation of this message event.
 
@@ -264,7 +261,7 @@ Examples include tool execution, error, user reject.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `tool_call_id`: str
@@ -277,7 +274,7 @@ Bases: [`ObservationBaseEvent`](#class-observationbaseevent)
 #### Properties
 
 - `action_id`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `observation`: Observation
 - `visualize`: Text
@@ -296,7 +293,7 @@ Event indicating that the agent execution was paused by user request.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `visualize`: Text
@@ -310,7 +307,7 @@ System prompt added by the agent.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source`: Literal['agent', 'user', 'environment']
 - `system_prompt`: TextContent
@@ -331,7 +328,7 @@ Event from VLLM representing token IDs used in LLM interaction.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `prompt_token_ids`: list[int]
 - `response_token_ids`: list[int]
@@ -346,7 +343,7 @@ Observation when user rejects an action in confirmation mode.
 #### Properties
 
 - `action_id`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `rejection_reason`: str
 - `visualize`: Text
diff --git a/sdk/api-reference/openhands.sdk.llm.mdx b/sdk/api-reference/openhands.sdk.llm.mdx
index 8e22367b..fc63ab18 100644
--- a/sdk/api-reference/openhands.sdk.llm.mdx
+++ b/sdk/api-reference/openhands.sdk.llm.mdx
@@ -11,14 +11,15 @@ Bases: `BaseContent`
 
 #### Properties
 
-- `cache_prompt`: bool
 - `image_urls`: list[str]
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `type`: Literal['image']
 
 #### Methods
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### to_llm_dict()
 
 Convert to LLM API format.
@@ -330,8 +331,6 @@ Bases: `BaseModel`
 - `content`: Sequence[[TextContent](#class-textcontent) | [ImageContent](#class-imagecontent)]
 - `force_string_serializer`: bool
 - `function_calling_enabled`: bool
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `name`: str | None
 - `reasoning_content`: str | None
 - `responses_reasoning_item`: [ReasoningItemModel](#class-reasoningitemmodel) | None
@@ -360,6 +359,10 @@ Policy (non-stream):
 - Collect assistant text by concatenating output_text parts from message items
 - Normalize function_call items to MessageToolCall list
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### to_chat_dict()
 
 Serialize message for OpenAI Chat Completions.
@@ -401,10 +404,11 @@ for Responses function_call_output call_id.
 
 - `arguments`: str
 - `id`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `name`: str
 - `origin`: Literal['completion', 'responses']
+- `costs`: list[Cost]
+- `response_latencies`: list[ResponseLatency]
+- `token_usages`: list[TokenUsage]
 
 #### Methods
 
@@ -418,6 +422,10 @@ Create a MessageToolCall from a typed OpenAI Responses function_call item.
 
 Note: OpenAI Responses function_call.arguments is already a JSON string.
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### to_chat_dict()
 
 Serialize to OpenAI Chat Completions tool_calls format.
@@ -426,29 +434,6 @@ Serialize to OpenAI Chat Completions tool_calls format.
 
 Serialize to OpenAI Responses ‘function_call’ input item format.
 
-### class Metrics
-
-Bases: [`MetricsSnapshot`](#class-metricssnapshot)
-
-Metrics class can record various metrics during running and evaluation.
-We track:
-
-  - accumulated_cost and costs
-  - max_budget_per_task (budget limit)
-  - A list of ResponseLatency
-  - A list of TokenUsage (one per call).
-
-
-#### Properties
-
-- `costs`: list[Cost]
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `response_latencies`: list[ResponseLatency]
-- `token_usages`: list[TokenUsage]
-
-#### Methods
-
 #### add_cost()
 
 #### add_response_latency()
@@ -490,6 +475,10 @@ Log the metrics.
 
 Merge ‘other’ metrics into this one.
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### classmethod validate_accumulated_cost()
 
 ### class MetricsSnapshot
@@ -506,9 +495,14 @@ Does not include lists of individual costs, latencies, or token usages.
 - `accumulated_cost`: float
 - `accumulated_token_usage`: TokenUsage | None
 - `max_budget_per_task`: float | None
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `model_name`: str
+
+#### Methods
+
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 ### class ReasoningItemModel
 
 Bases: `BaseModel`
@@ -523,10 +517,15 @@ Do not log or render encrypted_content.
 - `content`: list[str] | None
 - `encrypted_content`: str | None
 - `id`: str | None
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `status`: str | None
 - `summary`: list[str]
+
+#### Methods
+
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 ### class RedactedThinkingBlock
 
 Bases: `BaseModel`
@@ -540,9 +539,14 @@ before extended thinking was enabled.
 #### Properties
 
 - `data`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `type`: Literal['redacted_thinking']
+
+#### Methods
+
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 ### class RegistryEvent
 
 Bases: `BaseModel`
@@ -571,7 +575,7 @@ Key features:
 
 - `active_llm`: [LLM](#class-llm) | None
 - `llms_for_routing`: dict[str, [LLM](#class-llm)]
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `router_name`: str
 
@@ -631,7 +635,6 @@ Bases: `BaseContent`
 
 #### Properties
 
-- `cache_prompt`: bool
 - `enable_truncation`: bool
 - `model_config`: ClassVar[ConfigDict] = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
@@ -657,8 +660,12 @@ and passed back to the API for tool use scenarios.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `signature`: str | None
 - `thinking`: str
-- `type`: Literal['thinking']
\ No newline at end of file
+- `type`: Literal['thinking']
+
+#### Methods
+
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
diff --git a/sdk/api-reference/openhands.sdk.tool.mdx b/sdk/api-reference/openhands.sdk.tool.mdx
index 1e6234ac..62b85a29 100644
--- a/sdk/api-reference/openhands.sdk.tool.mdx
+++ b/sdk/api-reference/openhands.sdk.tool.mdx
@@ -13,7 +13,7 @@ Base schema for input action.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `visualize`: Text
   Return Rich Text representation of this action.
@@ -47,9 +47,8 @@ Tool for signaling the completion of a task or conversation.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `name`: ClassVar[str] = 'finish'
 
 #### Methods
 
@@ -65,6 +64,8 @@ Create FinishTool instance.
 * Raises:
   `ValueError` – If any parameters are provided.
 
+#### name = 'finish'
+
 ### class Observation
 
 Bases: `Schema`, `ABC`
@@ -77,7 +78,7 @@ Base schema for output observation.
 - `ERROR_MESSAGE_HEADER`: ClassVar[str] = '[An error occurred during execution.]n'
 - `content`: list[TextContent | ImageContent]
 - `is_error`: bool
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `text`: str
   Extract all text content from the observation.
@@ -113,9 +114,8 @@ Tool for logging thoughts without making changes.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
+- `model_config`: = (configuration object)
   Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `name`: ClassVar[str] = 'think'
 
 #### Methods
 
@@ -131,6 +131,8 @@ Create ThinkTool instance.
 * Raises:
   `ValueError` – If any parameters are provided.
 
+#### name = 'think'
+
 ### class Tool
 
 Bases: `BaseModel`
@@ -142,13 +144,15 @@ This is only used in agent-sdk for type schema for server use.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `name`: str
 - `params`: dict[str, Any]
 
 #### Methods
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### classmethod validate_name()
 
 Validate that name is not empty.
diff --git a/sdk/api-reference/openhands.sdk.workspace.mdx b/sdk/api-reference/openhands.sdk.workspace.mdx
index a1427c87..48066655 100644
--- a/sdk/api-reference/openhands.sdk.workspace.mdx
+++ b/sdk/api-reference/openhands.sdk.workspace.mdx
@@ -25,8 +25,6 @@ support the context manager protocol for safe resource management.
 
 #### Properties
 
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `working_dir`: Annotated[str, BeforeValidator(func=_convert_path_to_str, json_schema_input_type=PydanticUndefined), FieldInfo(annotation=NoneType, required=True, description='The working directory for agent operations and tool execution. Accepts both string paths and Path objects. Path objects are automatically converted to strings.')]
 
 #### Methods
@@ -101,6 +99,10 @@ Get the git diff for the file at the path given.
 * Raises:
   `Exception` – If path is not a git repository or getting diff failed
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### pause()
 
 Pause the workspace to conserve resources.
@@ -132,11 +134,16 @@ Result of executing a command in the workspace.
 
 - `command`: str
 - `exit_code`: int
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `stderr`: str
 - `stdout`: str
 - `timeout_occurred`: bool
+
+#### Methods
+
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 ### class FileOperationResult
 
 Bases: `BaseModel`
@@ -149,10 +156,15 @@ Result of a file upload or download operation.
 - `destination_path`: str
 - `error`: str | None
 - `file_size`: int | None
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
 - `source_path`: str
 - `success`: bool
+
+#### Methods
+
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 ### class LocalWorkspace
 
 Bases: [`BaseWorkspace`](#class-baseworkspace)
@@ -172,13 +184,6 @@ should operate directly on the host system.
 ...     content = workspace.read_file("README.md")
 ```
 
-
-#### Properties
-
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `working_dir`: Annotated[str, BeforeValidator(_convert_path_to_str), Field(description='The working directory for agent operations and tool execution. Accepts both string paths and Path objects. Path objects are automatically converted to strings.')]
-
 #### Methods
 
 #### __init__()
@@ -263,6 +268,10 @@ Get the git diff for the file at the path given.
 * Raises:
   `Exception` – If path is not a git repository or getting diff failed
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### pause()
 
 Pause the workspace (no-op for local workspaces).
@@ -306,12 +315,7 @@ as it provides better isolation and security.
   Check if the remote workspace is alive by querying the health endpoint.
   * Returns:
     True if the health endpoint returns a successful response, False otherwise.
-- `api_key`: str | None
 - `client`: Client
-- `host`: str
-- `model_config`: ClassVar[ConfigDict] = (configuration object)
-  Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
-- `working_dir`: str
 
 #### Methods
 
@@ -385,6 +389,10 @@ Get the git diff for the file at the path given.
 * Raises:
   `Exception` – If path is not a git repository or getting diff failed
 
+#### model_config = (configuration object)
+
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
 #### model_post_init()
 
 Override this method to perform additional initialization after __init__ and model_construct.
diff --git a/sdk/guides/critic.mdx b/sdk/guides/critic.mdx
new file mode 100644
index 00000000..c0098fbf
--- /dev/null
+++ b/sdk/guides/critic.mdx
@@ -0,0 +1,179 @@
+---
+title: Critic (Experimental)
+description: Real-time evaluation of agent actions using an LLM-based critic model.
+---
+
+<Warning>
+**This feature is highly experimental** and subject to change. The API, configuration, and behavior may evolve significantly based on feedback and testing.
+</Warning>
+
+<Note>
+The critic model is hosted by the OpenHands LLM Provider and is currently free to use. This example is available on GitHub: [examples/01_standalone_sdk/34_critic_example.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/34_critic_example.py)
+</Note>
+
+## What is a Critic?
+
+A **critic** is an evaluator that analyzes agent actions and conversation history to predict the quality or success probability of agent decisions. The critic runs alongside the agent and provides:
+
+- **Quality scores**: Probability scores between 0.0 and 1.0 indicating predicted success
+- **Real-time feedback**: Scores computed during agent execution, not just at completion
+
+You can use critic scores to build automated workflows, such as triggering the agent to reflect on and fix its previous solution when the critic indicates poor task performance.
+
+<Note>
+This critic is a more advanced extension of the approach described in our blog post [SOTA on SWE-Bench Verified with Inference-Time Scaling and Critic Model](https://openhands.dev/blog/sota-on-swe-bench-verified-with-inference-time-scaling-and-critic-model). A technical report with detailed evaluation metrics is forthcoming.
+</Note>
+
+## Quick Start
+
+When using the OpenHands LLM Provider (`llm-proxy.*.all-hands.dev`), the critic is **automatically configured** - no additional setup required.
+
+```python icon="python" expandable examples/01_standalone_sdk/34_critic_example.py
+"""Example demonstrating critic-based evaluation of agent actions.
+
+This is EXPERIMENTAL.
+
+This shows how to configure an agent with a critic to evaluate action quality
+in real-time. The critic scores are displayed in the conversation visualizer.
+
+For All-Hands LLM proxy (llm-proxy.*.all-hands.dev), the critic is auto-configured
+using the same base_url with /vllm suffix and "critic" as the model name.
+"""
+
+import os
+import re
+
+from openhands.sdk import LLM, Agent, Conversation, Tool
+from openhands.sdk.critic import APIBasedCritic
+from openhands.sdk.critic.base import CriticBase
+from openhands.tools.file_editor import FileEditorTool
+from openhands.tools.task_tracker import TaskTrackerTool
+from openhands.tools.terminal import TerminalTool
+
+
+def get_required_env(name: str) -> str:
+    value = os.getenv(name)
+    if value:
+        return value
+    raise ValueError(
+        f"Missing required environment variable: {name}. "
+        f"Set {name} before running this example."
+    )
+
+
+def get_default_critic(llm: LLM) -> CriticBase | None:
+    """Auto-configure critic for All-Hands LLM proxy.
+
+    When the LLM base_url matches `llm-proxy.*.all-hands.dev`, returns an
+    APIBasedCritic configured with:
+    - server_url: {base_url}/vllm
+    - api_key: same as LLM
+    - model_name: "critic"
+
+    Returns None if base_url doesn't match or api_key is not set.
+    """
+    base_url = llm.base_url
+    api_key = llm.api_key
+    if base_url is None or api_key is None:
+        return None
+
+    # Match: llm-proxy.{env}.all-hands.dev (e.g., staging, prod, eval)
+    pattern = r"^https?://llm-proxy\.[^./]+\.all-hands\.dev"
+    if not re.match(pattern, base_url):
+        return None
+
+    return APIBasedCritic(
+        server_url=f"{base_url.rstrip('/')}/vllm",
+        api_key=api_key,
+        model_name="critic",
+    )
+
+
+llm_api_key = get_required_env("LLM_API_KEY")
+
+llm = LLM(
+    model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
+    api_key=llm_api_key,
+    base_url=os.getenv("LLM_BASE_URL", None),
+)
+
+# Try auto-configuration for All-Hands proxy, fall back to explicit env vars
+critic = get_default_critic(llm)
+if critic is None:
+    critic = APIBasedCritic(
+        server_url=get_required_env("CRITIC_SERVER_URL"),
+        api_key=get_required_env("CRITIC_API_KEY"),
+        model_name=get_required_env("CRITIC_MODEL_NAME"),
+    )
+
+
+# Configure agent with critic
+agent = Agent(
+    llm=llm,
+    tools=[
+        Tool(name=TerminalTool.name),
+        Tool(name=FileEditorTool.name),
+        Tool(name=TaskTrackerTool.name),
+    ],
+    # Add critic to evaluate agent actions
+    critic=critic,
+)
+
+cwd = os.getcwd()
+conversation = Conversation(agent=agent, workspace=cwd)
+
+conversation.send_message(
+    "Create a file called GREETING.txt with a friendly greeting message."
+)
+conversation.run()
+
+print("\nAll done! Check the output above for 'Critic Score' in the visualizer.")
+```
+
+```bash Running the Example
+uv run python examples/01_standalone_sdk/34_critic_example.py
+```
+
+## Understanding Critic Results
+
+Critic evaluations produce scores and feedback:
+
+- **`score`**: Float between 0.0 and 1.0 representing predicted success probability
+- **`message`**: Optional feedback with detailed probabilities
+- **`success`**: Boolean property (True if score >= 0.5)
+
+Results are automatically displayed in the conversation visualizer:
+
+![Critic results in SDK visualizer](./screenshots/critic-sdk-visualizer.png)
+
+### Accessing Results Programmatically
+
+```python
+from openhands.sdk import Event, ActionEvent, MessageEvent
+
+def callback(event: Event):
+    if isinstance(event, (ActionEvent, MessageEvent)):
+        if event.critic_result is not None:
+            print(f"Critic score: {event.critic_result.score:.3f}")
+            print(f"Success: {event.critic_result.success}")
+
+conversation = Conversation(agent=agent, callbacks=[callback])
+```
+
+## Troubleshooting
+
+### Critic Evaluations Not Appearing
+
+- Verify the critic is properly configured and passed to the Agent
+- Ensure you're using the OpenHands LLM Provider (`llm-proxy.*.all-hands.dev`)
+
+### API Authentication Errors
+
+- Verify `LLM_API_KEY` is set correctly
+- Check that the API key has not expired
+
+## Next Steps
+
+- **[Observability](/sdk/guides/observability)** - Monitor and log agent behavior
+- **[Metrics](/sdk/guides/metrics)** - Collect performance metrics
+- **[Stuck Detector](/sdk/guides/agent-stuck-detector)** - Detect unproductive agent patterns
diff --git a/sdk/guides/hooks.mdx b/sdk/guides/hooks.mdx
index eaee0390..620cd38c 100644
--- a/sdk/guides/hooks.mdx
+++ b/sdk/guides/hooks.mdx
@@ -37,7 +37,7 @@ from pathlib import Path
 from pydantic import SecretStr
 
 from openhands.sdk import LLM, Conversation
-from openhands.sdk.hooks import HookConfig
+from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher
 from openhands.tools.preset.default import get_default_agent
 
 
@@ -66,60 +66,65 @@ with tempfile.TemporaryDirectory() as tmpdir:
     log_file = workspace / "tool_usage.log"
     summary_file = workspace / "summary.txt"
 
-    # Configure ALL hook types in one config
-    hook_config = HookConfig.from_dict(
-        {
-            "hooks": {
-                "PreToolUse": [
-                    {
-                        "matcher": "terminal",
-                        "hooks": [
-                            {
-                                "type": "command",
-                                "command": str(SCRIPT_DIR / "block_dangerous.sh"),
-                                "timeout": 10,
-                            }
-                        ],
-                    }
+    # Configure hooks using the typed approach (recommended)
+    # This provides better type safety and IDE support
+    hook_config = HookConfig(
+        pre_tool_use=[
+            HookMatcher(
+                matcher="terminal",
+                hooks=[
+                    HookDefinition(
+                        command=str(SCRIPT_DIR / "block_dangerous.sh"),
+                        timeout=10,
+                    )
                 ],
-                "PostToolUse": [
-                    {
-                        "matcher": "*",
-                        "hooks": [
-                            {
-                                "type": "command",
-                                "command": f"LOG_FILE={log_file} "
-                                f"{SCRIPT_DIR / 'log_tools.sh'}",
-                                "timeout": 5,
-                            }
-                        ],
-                    }
+            )
+        ],
+        post_tool_use=[
+            HookMatcher(
+                matcher="*",
+                hooks=[
+                    HookDefinition(
+                        command=(f"LOG_FILE={log_file} {SCRIPT_DIR / 'log_tools.sh'}"),
+                        timeout=5,
+                    )
                 ],
-                "UserPromptSubmit": [
-                    {
-                        "hooks": [
-                            {
-                                "type": "command",
-                                "command": str(SCRIPT_DIR / "inject_git_context.sh"),
-                            }
-                        ],
-                    }
+            )
+        ],
+        user_prompt_submit=[
+            HookMatcher(
+                hooks=[
+                    HookDefinition(
+                        command=str(SCRIPT_DIR / "inject_git_context.sh"),
+                    )
                 ],
-                "Stop": [
-                    {
-                        "hooks": [
-                            {
-                                "type": "command",
-                                "command": f"SUMMARY_FILE={summary_file} "
-                                f"{SCRIPT_DIR / 'require_summary.sh'}",
-                            }
-                        ],
-                    }
+            )
+        ],
+        stop=[
+            HookMatcher(
+                hooks=[
+                    HookDefinition(
+                        command=(
+                            f"SUMMARY_FILE={summary_file} "
+                            f"{SCRIPT_DIR / 'require_summary.sh'}"
+                        ),
+                    )
                 ],
-            }
-        }
+            )
+        ],
     )
 
+    # Alternative: You can also use .from_dict() for loading from JSON config files
+    # Example with a single hook matcher:
+    # hook_config = HookConfig.from_dict({
+    #     "hooks": {
+    #         "PreToolUse": [{
+    #             "matcher": "terminal",
+    #             "hooks": [{"command": "path/to/script.sh", "timeout": 10}]
+    #         }]
+    #     }
+    # })
+
     agent = get_default_agent(llm=llm)
     conversation = Conversation(
         agent=agent,
diff --git a/sdk/guides/plugins.mdx b/sdk/guides/plugins.mdx
index 8f8287d5..b38531e8 100644
--- a/sdk/guides/plugins.mdx
+++ b/sdk/guides/plugins.mdx
@@ -91,10 +91,23 @@ for skill in plugin.skills:
         print(f"    Triggers: {skill.trigger}")
 
 # Hooks
-print(f"\nHooks: {'Configured' if plugin.hooks else 'None'}")
-if plugin.hooks:
-    for event_type, matchers in plugin.hooks.hooks.items():
-        print(f"  - {event_type}: {len(matchers)} matcher(s)")
+hook_config = plugin.hooks
+has_hooks = hook_config is not None and not hook_config.is_empty()
+print(f"\nHooks: {'Configured' if has_hooks else 'None'}")
+if has_hooks:
+    assert hook_config is not None
+    if hook_config.pre_tool_use:
+        print(f"  - PreToolUse: {len(hook_config.pre_tool_use)} matcher(s)")
+    if hook_config.post_tool_use:
+        print(f"  - PostToolUse: {len(hook_config.post_tool_use)} matcher(s)")
+    if hook_config.user_prompt_submit:
+        print(f"  - UserPromptSubmit: {len(hook_config.user_prompt_submit)} matcher(s)")
+    if hook_config.session_start:
+        print(f"  - SessionStart: {len(hook_config.session_start)} matcher(s)")
+    if hook_config.session_end:
+        print(f"  - SessionEnd: {len(hook_config.session_end)} matcher(s)")
+    if hook_config.stop:
+        print(f"  - Stop: {len(hook_config.stop)} matcher(s)")
 
 # MCP Config
 print(f"\nMCP Config: {'Configured' if plugin.mcp_config else 'None'}")
@@ -138,6 +151,7 @@ if not api_key:
     print("Skipping agent demo (LLM_API_KEY not set)")
     print("\nTo run the full demo, set the LLM_API_KEY environment variable:")
     print("  export LLM_API_KEY=your-api-key")
+    print("EXAMPLE_COST: 0")
     sys.exit(0)
 
 # Configure LLM
@@ -146,6 +160,7 @@ llm = LLM(
     usage_id="plugin-demo",
     model=model,
     api_key=SecretStr(api_key),
+    base_url=os.getenv("LLM_BASE_URL"),
 )
 
 # Create agent context with plugin skills
@@ -212,6 +227,7 @@ with tempfile.TemporaryDirectory() as tmpdir:
         print("No hook log file found (hooks may not have executed)")
 
     print(f"\nTotal cost: ${llm.metrics.accumulated_cost:.4f}")
+    print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost:.4f}")
 ```
 
 ```bash Running the Example
diff --git a/sdk/guides/screenshots/critic-sdk-visualizer.png b/sdk/guides/screenshots/critic-sdk-visualizer.png
new file mode 100644
index 00000000..b8a7473c
Binary files /dev/null and b/sdk/guides/screenshots/critic-sdk-visualizer.png differ
diff --git a/sdk/guides/skill.mdx b/sdk/guides/skill.mdx
index 6ac5ec04..8b9ff62f 100644
--- a/sdk/guides/skill.mdx
+++ b/sdk/guides/skill.mdx
@@ -378,6 +378,7 @@ when triggered, plus the agent can proactively read them anytime.
 """
 
 import os
+import sys
 from pathlib import Path
 
 from pydantic import SecretStr
@@ -392,108 +393,102 @@ from openhands.tools.file_editor import FileEditorTool
 from openhands.tools.terminal import TerminalTool
 
 
-def main():
-    # Get the directory containing this script
-    script_dir = Path(__file__).parent
-    example_skills_dir = script_dir / "example_skills"
-
-    # =========================================================================
-    # Part 1: Loading Skills from a Directory
-    # =========================================================================
-    print("=" * 80)
-    print("Part 1: Loading Skills from a Directory")
-    print("=" * 80)
-
-    print(f"Loading skills from: {example_skills_dir}")
-
-    # Discover resources in the skill directory
-    skill_subdir = example_skills_dir / "rot13-encryption"
-    resources = discover_skill_resources(skill_subdir)
-    print("\nDiscovered resources in rot13-encryption/:")
-    print(f"  - scripts: {resources.scripts}")
-    print(f"  - references: {resources.references}")
-    print(f"  - assets: {resources.assets}")
-
-    # Load skills from the directory
-    repo_skills, knowledge_skills, agent_skills = load_skills_from_dir(
-        example_skills_dir
-    )
-
-    print("\nLoaded skills from directory:")
-    print(f"  - Repo skills: {list(repo_skills.keys())}")
-    print(f"  - Knowledge skills: {list(knowledge_skills.keys())}")
-    print(f"  - Agent skills (SKILL.md): {list(agent_skills.keys())}")
-
-    # Access the loaded skill and show all AgentSkills standard fields
-    if agent_skills:
-        skill_name = list(agent_skills.keys())[0]
-        loaded_skill = agent_skills[skill_name]
-        print(f"\nDetails for '{skill_name}' (AgentSkills standard fields):")
-        print(f"  - Name: {loaded_skill.name}")
-        desc = loaded_skill.description or ""
-        print(f"  - Description: {desc[:70]}...")
-        print(f"  - License: {loaded_skill.license}")
-        print(f"  - Compatibility: {loaded_skill.compatibility}")
-        print(f"  - Metadata: {loaded_skill.metadata}")
-        if loaded_skill.resources:
-            print("  - Resources:")
-            print(f"    - Scripts: {loaded_skill.resources.scripts}")
-            print(f"    - References: {loaded_skill.resources.references}")
-            print(f"    - Assets: {loaded_skill.resources.assets}")
-            print(f"    - Skill root: {loaded_skill.resources.skill_root}")
-
-    # =========================================================================
-    # Part 2: Using Skills with an Agent
-    # =========================================================================
-    print("\n" + "=" * 80)
-    print("Part 2: Using Skills with an Agent")
-    print("=" * 80)
-
-    # Check for API key
-    api_key = os.getenv("LLM_API_KEY")
-    if not api_key:
-        print("Skipping agent demo (LLM_API_KEY not set)")
-        print("\nTo run the full demo, set the LLM_API_KEY environment variable:")
-        print("  export LLM_API_KEY=your-api-key")
-        return
-
-    # Configure LLM
-    model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
-    llm = LLM(
-        usage_id="skills-demo",
-        model=model,
-        api_key=SecretStr(api_key),
-        base_url=os.getenv("LLM_BASE_URL"),
-    )
-
-    # Create agent context with loaded skills
-    agent_context = AgentContext(
-        skills=list(agent_skills.values()),
-        # Disable public skills for this demo to keep output focused
-        load_public_skills=False,
-    )
-
-    # Create agent with tools so it can read skill resources
-    tools = [
-        Tool(name=TerminalTool.name),
-        Tool(name=FileEditorTool.name),
-    ]
-    agent = Agent(llm=llm, tools=tools, agent_context=agent_context)
+# Get the directory containing this script
+script_dir = Path(__file__).parent
+example_skills_dir = script_dir / "example_skills"
+
+# =========================================================================
+# Part 1: Loading Skills from a Directory
+# =========================================================================
+print("=" * 80)
+print("Part 1: Loading Skills from a Directory")
+print("=" * 80)
+
+print(f"Loading skills from: {example_skills_dir}")
+
+# Discover resources in the skill directory
+skill_subdir = example_skills_dir / "rot13-encryption"
+resources = discover_skill_resources(skill_subdir)
+print("\nDiscovered resources in rot13-encryption/:")
+print(f"  - scripts: {resources.scripts}")
+print(f"  - references: {resources.references}")
+print(f"  - assets: {resources.assets}")
+
+# Load skills from the directory
+repo_skills, knowledge_skills, agent_skills = load_skills_from_dir(example_skills_dir)
+
+print("\nLoaded skills from directory:")
+print(f"  - Repo skills: {list(repo_skills.keys())}")
+print(f"  - Knowledge skills: {list(knowledge_skills.keys())}")
+print(f"  - Agent skills (SKILL.md): {list(agent_skills.keys())}")
+
+# Access the loaded skill and show all AgentSkills standard fields
+if agent_skills:
+    skill_name = next(iter(agent_skills))
+    loaded_skill = agent_skills[skill_name]
+    print(f"\nDetails for '{skill_name}' (AgentSkills standard fields):")
+    print(f"  - Name: {loaded_skill.name}")
+    desc = loaded_skill.description or ""
+    print(f"  - Description: {desc[:70]}...")
+    print(f"  - License: {loaded_skill.license}")
+    print(f"  - Compatibility: {loaded_skill.compatibility}")
+    print(f"  - Metadata: {loaded_skill.metadata}")
+    if loaded_skill.resources:
+        print("  - Resources:")
+        print(f"    - Scripts: {loaded_skill.resources.scripts}")
+        print(f"    - References: {loaded_skill.resources.references}")
+        print(f"    - Assets: {loaded_skill.resources.assets}")
+        print(f"    - Skill root: {loaded_skill.resources.skill_root}")
+
+# =========================================================================
+# Part 2: Using Skills with an Agent
+# =========================================================================
+print("\n" + "=" * 80)
+print("Part 2: Using Skills with an Agent")
+print("=" * 80)
+
+# Check for API key
+api_key = os.getenv("LLM_API_KEY")
+if not api_key:
+    print("Skipping agent demo (LLM_API_KEY not set)")
+    print("\nTo run the full demo, set the LLM_API_KEY environment variable:")
+    print("  export LLM_API_KEY=your-api-key")
+    sys.exit(0)
+
+# Configure LLM
+model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
+llm = LLM(
+    usage_id="skills-demo",
+    model=model,
+    api_key=SecretStr(api_key),
+    base_url=os.getenv("LLM_BASE_URL"),
+)
 
-    # Create conversation
-    conversation = Conversation(agent=agent, workspace=os.getcwd())
+# Create agent context with loaded skills
+agent_context = AgentContext(
+    skills=list(agent_skills.values()),
+    # Disable public skills for this demo to keep output focused
+    load_public_skills=False,
+)
 
-    # Test the skill (triggered by "encrypt" keyword)
-    # The skill provides instructions and a script for ROT13 encryption
-    print("\nSending message with 'encrypt' keyword to trigger skill...")
-    conversation.send_message("Encrypt the message 'hello world'.")
-    conversation.run()
+# Create agent with tools so it can read skill resources
+tools = [
+    Tool(name=TerminalTool.name),
+    Tool(name=FileEditorTool.name),
+]
+agent = Agent(llm=llm, tools=tools, agent_context=agent_context)
 
-    print(f"\nTotal cost: ${llm.metrics.accumulated_cost:.4f}")
+# Create conversation
+conversation = Conversation(agent=agent, workspace=os.getcwd())
 
+# Test the skill (triggered by "encrypt" keyword)
+# The skill provides instructions and a script for ROT13 encryption
+print("\nSending message with 'encrypt' keyword to trigger skill...")
+conversation.send_message("Encrypt the message 'hello world'.")
+conversation.run()
 
-if __name__ == "__main__":
-    main()
+print(f"\nTotal cost: ${llm.metrics.accumulated_cost:.4f}")
+print(f"EXAMPLE_COST: {llm.metrics.accumulated_cost:.4f}")
 ```
 
 ```bash Running the Example