diff --git a/src/askui/chat/migrations/shared/assistants/seeds.py b/src/askui/chat/migrations/shared/assistants/seeds.py index 4ccf194b..210f2b40 100644 --- a/src/askui/chat/migrations/shared/assistants/seeds.py +++ b/src/askui/chat/migrations/shared/assistants/seeds.py @@ -24,22 +24,22 @@ """ ), tools=[ - "computer_disconnect", - "computer_connect", - "computer_mouse_click", - "computer_get_mouse_position", - "computer_keyboard_pressed", - "computer_keyboard_release", - "computer_keyboard_tap", - "computer_list_displays", - "computer_mouse_hold_down", - "computer_mouse_release", - "computer_mouse_scroll", - "computer_move_mouse", - "computer_retrieve_active_display", - "computer_screenshot", - "computer_set_active_display", - "computer_type", + "disconnect", + "connect", + "mouse_click", + "get_mouse_position", + "keyboard_pressed", + "keyboard_release", + "keyboard_tap", + "list_displays", + "mouse_hold_down", + "mouse_release", + "mouse_scroll", + "move_mouse", + "retrieve_active_display", + "screenshot", + "set_active_display", + "type", ], ) @@ -124,22 +124,22 @@ """ ), tools=[ - "android_screenshot_tool", - "android_tap_tool", - "android_type_tool", - "android_drag_and_drop_tool", - "android_key_event_tool", - "android_swipe_tool", - "android_key_combination_tool", - "android_shell_tool", - "android_connect_tool", - "android_get_connected_devices_serial_numbers_tool", - "android_get_connected_displays_infos_tool", - "android_get_current_connected_device_infos_tool", - "android_get_connected_device_display_infos_tool", - "android_select_device_by_serial_number_tool", - "android_select_display_by_unique_id_tool", - "android_setup_helper", + "screenshot_tool", + "tap_tool", + "type_tool", + "drag_and_drop_tool", + "key_event_tool", + "swipe_tool", + "key_combination_tool", + "shell_tool", + "connect_tool", + "get_connected_devices_serial_numbers_tool", + "get_connected_displays_infos_tool", + "get_current_connected_device_infos_tool", + "get_connected_device_display_infos_tool", + "select_device_by_serial_number_tool", + "select_display_by_unique_id_tool", + "setup_helper", ], ) diff --git a/src/askui/models/shared/tools.py b/src/askui/models/shared/tools.py index c1bcc8f1..f26aaf5a 100644 --- a/src/askui/models/shared/tools.py +++ b/src/askui/models/shared/tools.py @@ -1,5 +1,7 @@ import logging +import re import types +import uuid from abc import ABC, abstractmethod from datetime import timedelta from functools import wraps @@ -19,7 +21,7 @@ from fastmcp.utilities.types import Image as FastMcpImage from mcp import Tool as McpTool from PIL import Image -from pydantic import BaseModel, Field +from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from typing_extensions import Self from askui.models.shared.agent_message_param import ( @@ -161,7 +163,11 @@ def _create_tool_result_block_param_for_playwright_error( class Tool(BaseModel, ABC): - name: str = Field(description="Name of the tool") + model_config = ConfigDict( + validate_by_alias=True, + ) + + base_name: str = Field(alias="name", description="Name of the tool") description: str = Field(description="Description of what the tool does") input_schema: InputSchema = Field( default_factory=_default_input_schema, @@ -171,12 +177,32 @@ class Tool(BaseModel, ABC): description="Tags required for the tool", default=[] ) + _unique_id: str = PrivateAttr(default_factory=lambda: str(uuid.uuid4())) + @abstractmethod def __call__(self, *args: Any, **kwargs: Any) -> ToolCallResult: """Executes the tool with the given arguments.""" error_msg = "Tool subclasses must implement __call__ method" raise NotImplementedError(error_msg) + @property + def name(self) -> str: + """Returns the unique name for this tool instance.""" + name_parts = [self.base_name] + if len(self.required_tags) > 0: + name_parts.append(f"tags_{'_'.join(self.required_tags)}") + name_parts.append(self._unique_id) + name = "_".join(name_parts) + # Ensure name matches pattern ^[a-zA-Z0-9_-]$ + name = re.sub(r"[^a-zA-Z0-9_-]", "_", name) + # Ensure name is not longer than 64 characters + return name[:64] + + @name.setter + def name(self, value: str) -> None: + """Sets the base name of the tool.""" + self.base_name = value + def to_params( self, ) -> BetaToolUnionParam: diff --git a/src/askui/tools/android/tools.py b/src/askui/tools/android/tools.py index 82f4851b..48fab49f 100644 --- a/src/askui/tools/android/tools.py +++ b/src/askui/tools/android/tools.py @@ -16,7 +16,7 @@ class AndroidScreenshotTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_screenshot_tool", + name="screenshot_tool", description=( """ Takes a screenshot of the currently active window. @@ -45,7 +45,7 @@ class AndroidTapTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_tap_tool", + name="tap_tool", description=( """ Performs a tap (touch) gesture at the given (x, y) coordinates on the @@ -111,7 +111,7 @@ class AndroidTypeTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_type_tool", + name="type_tool", description=( """ Types the given text on the Android device screen. @@ -148,7 +148,7 @@ class AndroidDragAndDropTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_drag_and_drop_tool", + name="drag_and_drop_tool", description=( """ Performs a drag and drop gesture on the Android device screen. @@ -201,7 +201,7 @@ def __call__(self, x1: int, y1: int, x2: int, y2: int, duration: int = 1000) -> class AndroidKeyTapEventTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_key_event_tool", + name="key_event_tool", description=( """ Performs a key press on the android device. @@ -238,7 +238,7 @@ class AndroidSwipeTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_swipe_tool", + name="swipe_tool", description=( """ Performs a swipe gesture on the Android device screen, similar to @@ -312,7 +312,7 @@ class AndroidKeyCombinationTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_key_combination_tool", + name="key_combination_tool", description=( """ Performs a combination of key presses on the Android device, similar to @@ -368,7 +368,7 @@ class AndroidShellTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: super().__init__( - name="android_shell_tool", + name="shell_tool", description=( """ Executes a shell command directly on the Android device through ADB. @@ -411,7 +411,7 @@ class AndroidGetConnectedDevicesSerialNumbersTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None): super().__init__( - name="android_get_connected_devices_serial_numbers_tool", + name="get_connected_devices_serial_numbers_tool", description="Can be used to get all connected devices serial numbers.", agent_os=agent_os, ) @@ -429,7 +429,7 @@ class AndroidGetConnectedDisplaysInfosTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None): super().__init__( - name="android_get_connected_device_display_infos_tool", + name="get_connected_device_display_infos_tool", description="Can be used to get all connected displays infos for the " "current selected device.", agent_os=agent_os, @@ -449,7 +449,7 @@ class AndroidGetCurrentConnectedDeviceInfosTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None): super().__init__( - name="android_get_current_connected_device_infos_tool", + name="get_current_connected_device_infos_tool", description=""" Can be used to get the current selected device and selected display infos. """, @@ -474,7 +474,7 @@ class AndroidSelectDeviceBySerialNumberTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None): super().__init__( - name="android_select_device_by_serial_number_tool", + name="select_device_by_serial_number_tool", description="Can be used to select a device by its serial number.", input_schema={ "type": "object", @@ -502,7 +502,7 @@ class AndroidSelectDisplayByUniqueIDTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None): super().__init__( - name="android_select_display_by_unique_id_tool", + name="select_display_by_unique_id_tool", description="Can be used to select a display by its unique ID.", input_schema={ "type": "object", @@ -530,7 +530,7 @@ class AndroidConnectTool(AndroidBaseTool): def __init__(self, agent_os: AndroidAgentOsFacade | None = None): super().__init__( - name="android_connect_tool", + name="connect_tool", description="""Can be used to connect the adb client to the server. Needs to select a device after connecting the adb client. """, diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index 01c09237..11030cda 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -218,17 +218,20 @@ def connect(self) -> None: self._start_execution() self.set_display(self._display) + def _get_stub(self) -> controller_v1.ControllerAPIStub: + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized. Call `connect()` first." + ) + return self._stub + def _run_recorder_action( self, acion_class_id: controller_v1_pbs.ActionClassID, action_parameters: controller_v1_pbs.ActionParameters, ) -> controller_v1_pbs.Response_RunRecordedAction: time.sleep(self._pre_action_wait) - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) response: controller_v1_pbs.Response_RunRecordedAction = ( - self._stub.RunRecordedAction( + self._get_stub().RunRecordedAction( controller_v1_pbs.Request_RunRecordedAction( sessionInfo=self._session_info, actionClassID=acion_class_id, @@ -240,10 +243,7 @@ def _run_recorder_action( time.sleep((response.requiredMilliseconds / 1000)) num_retries = 0 for _ in range(self._max_retries): - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - poll_response: controller_v1_pbs.Response_Poll = self._stub.Poll( + poll_response: controller_v1_pbs.Response_Poll = self._get_stub().Poll( controller_v1_pbs.Request_Poll( sessionInfo=self._session_info, pollEventID=controller_v1_pbs.PollEventID.PollEventID_ActionFinished, @@ -312,10 +312,7 @@ def __exit__( self.disconnect() def _start_session(self) -> None: - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - response = self._stub.StartSession( + response = self._get_stub().StartSession( controller_v1_pbs.Request_StartSession( sessionGUID=self._session_guid, immediateExecution=True ) @@ -323,26 +320,17 @@ def _start_session(self) -> None: self._session_info = response.sessionInfo def _stop_session(self) -> None: - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - self._stub.EndSession( + self._get_stub().EndSession( controller_v1_pbs.Request_EndSession(sessionInfo=self._session_info) ) def _start_execution(self) -> None: - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - self._stub.StartExecution( + self._get_stub().StartExecution( controller_v1_pbs.Request_StartExecution(sessionInfo=self._session_info) ) def _stop_execution(self) -> None: - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - self._stub.StopExecution( + self._get_stub().StopExecution( controller_v1_pbs.Request_StopExecution(sessionInfo=self._session_info) ) @@ -360,10 +348,7 @@ def screenshot(self, report: bool = True) -> Image.Image: Image.Image: A PIL Image object containing the screenshot. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - screenResponse = self._stub.CaptureScreen( + screenResponse = self._get_stub().CaptureScreen( controller_v1_pbs.Request_CaptureScreen( sessionInfo=self._session_info, captureParameters=controller_v1_pbs.CaptureParameters( @@ -651,10 +636,7 @@ def set_display(self, display: int = 1) -> None: This can be either a real display ID or a virtual display ID. Defaults to `1`. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - self._stub.SetActiveDisplay( + self._get_stub().SetActiveDisplay( controller_v1_pbs.Request_SetActiveDisplay(displayID=display) ) self._display = display @@ -714,14 +696,11 @@ def list_displays( Returns: DisplaysListResponse """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "list_displays()") response: controller_v1_pbs.Response_GetDisplayInformation = ( - self._stub.GetDisplayInformation(controller_v1_pbs.Request_Void()) + self._get_stub().GetDisplayInformation(controller_v1_pbs.Request_Void()) ) response_dict = MessageToDict( @@ -751,14 +730,15 @@ def get_process_list( controller_v1_pbs.Response_GetProcessList: Process list response containing: - processes: List of ProcessInfo objects """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"get_process_list({get_extended_info})") - response: controller_v1_pbs.Response_GetProcessList = self._stub.GetProcessList( - controller_v1_pbs.Request_GetProcessList(getExtendedInfo=get_extended_info) + response: controller_v1_pbs.Response_GetProcessList = ( + self._get_stub().GetProcessList( + controller_v1_pbs.Request_GetProcessList( + getExtendedInfo=get_extended_info + ) + ) ) self._reporter.add_message( "AgentOS", f"get_process_list({get_extended_info}) -> {response}" @@ -780,14 +760,13 @@ def get_window_list( controller_v1_pbs.Response_GetWindowList: Window list response containing: - windows: List of WindowInfo objects with ID and name """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"get_window_list({process_id})") - response: controller_v1_pbs.Response_GetWindowList = self._stub.GetWindowList( - controller_v1_pbs.Request_GetWindowList(processID=process_id) + response: controller_v1_pbs.Response_GetWindowList = ( + self._get_stub().GetWindowList( + controller_v1_pbs.Request_GetWindowList(processID=process_id) + ) ) self._reporter.add_message( @@ -808,14 +787,11 @@ def get_automation_target_list( Automation target list response: - targets: List of AutomationTarget objects """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "get_automation_target_list()") response: controller_v1_pbs.Response_GetAutomationTargetList = ( - self._stub.GetAutomationTargetList(controller_v1_pbs.Request_Void()) + self._get_stub().GetAutomationTargetList(controller_v1_pbs.Request_Void()) ) self._reporter.add_message( "AgentOS", f"get_automation_target_list() -> {response}" @@ -831,13 +807,10 @@ def set_mouse_delay(self, delay_ms: int) -> None: Args: delay_ms (int): The delay in milliseconds to set for mouse actions. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"set_mouse_delay({delay_ms})") - self._stub.SetMouseDelay( + self._get_stub().SetMouseDelay( controller_v1_pbs.Request_SetMouseDelay( sessionInfo=self._session_info, delayInMilliseconds=delay_ms ) @@ -851,13 +824,10 @@ def set_keyboard_delay(self, delay_ms: int) -> None: Args: delay_ms (int): The delay in milliseconds to set for keyboard actions. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"set_keyboard_delay({delay_ms})") - self._stub.SetKeyboardDelay( + self._get_stub().SetKeyboardDelay( controller_v1_pbs.Request_SetKeyboardDelay( sessionInfo=self._session_info, delayInMilliseconds=delay_ms ) @@ -880,9 +850,6 @@ def set_active_window(self, process_id: int, window_id: int) -> int: AskUiControllerError: If display length is not increased after adding the window. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message( "AgentOS", f"set_active_window({process_id}, {window_id})" @@ -890,7 +857,7 @@ def set_active_window(self, process_id: int, window_id: int) -> int: display_length_before_adding_window = len(self.list_displays().data) - self._stub.SetActiveWindow( + self._get_stub().SetActiveWindow( controller_v1_pbs.Request_SetActiveWindow( processID=process_id, windowID=window_id ) @@ -913,15 +880,12 @@ def set_active_automation_target(self, target_id: int) -> None: Args: target_id (int): The ID of the automation target to set as active. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message( "AgentOS", f"set_active_automation_target({target_id})" ) - self._stub.SetActiveAutomationTarget( + self._get_stub().SetActiveAutomationTarget( controller_v1_pbs.Request_SetActiveAutomationTarget(ID=target_id) ) @@ -944,9 +908,6 @@ def schedule_batched_action( controller_v1_pbs.Response_ScheduleBatchedAction: Response containing the scheduled action ID. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message( "AgentOS", @@ -954,7 +915,7 @@ def schedule_batched_action( ) response: controller_v1_pbs.Response_ScheduleBatchedAction = ( - self._stub.ScheduleBatchedAction( + self._get_stub().ScheduleBatchedAction( controller_v1_pbs.Request_ScheduleBatchedAction( sessionInfo=self._session_info, actionClassID=action_class_id, @@ -970,13 +931,10 @@ def start_batch_run(self) -> None: """ Start executing batched actions. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "start_batch_run()") - self._stub.StartBatchRun( + self._get_stub().StartBatchRun( controller_v1_pbs.Request_StartBatchRun(sessionInfo=self._session_info) ) @@ -985,13 +943,10 @@ def stop_batch_run(self) -> None: """ Stop executing batched actions. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "stop_batch_run()") - self._stub.StopBatchRun( + self._get_stub().StopBatchRun( controller_v1_pbs.Request_StopBatchRun(sessionInfo=self._session_info) ) @@ -1004,16 +959,13 @@ def get_action_count(self) -> controller_v1_pbs.Response_GetActionCount: controller_v1_pbs.Response_GetActionCount: Response containing the action count. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) - self._reporter.add_message("AgentOS", "get_action_count()") - - response: controller_v1_pbs.Response_GetActionCount = self._stub.GetActionCount( - controller_v1_pbs.Request_GetActionCount(sessionInfo=self._session_info) + response: controller_v1_pbs.Response_GetActionCount = ( + self._get_stub().GetActionCount( + controller_v1_pbs.Request_GetActionCount(sessionInfo=self._session_info) + ) ) - + self._reporter.add_message("AgentOS", f"get_action_count() -> {response}") return response @telemetry.record_call() @@ -1030,13 +982,10 @@ def get_action(self, action_index: int) -> controller_v1_pbs.Response_GetAction: - actionClassID: The action class ID - actionParameters: The action parameters """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"get_action({action_index})") - response: controller_v1_pbs.Response_GetAction = self._stub.GetAction( + response: controller_v1_pbs.Response_GetAction = self._get_stub().GetAction( controller_v1_pbs.Request_GetAction( sessionInfo=self._session_info, actionIndex=action_index ) @@ -1052,13 +1001,10 @@ def remove_action(self, action_id: int) -> None: Args: action_id (int): The ID of the action to remove. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"remove_action({action_id})") - self._stub.RemoveAction( + self._get_stub().RemoveAction( controller_v1_pbs.Request_RemoveAction( sessionInfo=self._session_info, actionID=action_id ) @@ -1069,13 +1015,10 @@ def remove_all_actions(self) -> None: """ Clear all recorded or batched actions. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "remove_all_actions()") - self._stub.RemoveAllActions( + self._get_stub().RemoveAllActions( controller_v1_pbs.Request_RemoveAllActions(sessionInfo=self._session_info) ) @@ -1094,9 +1037,6 @@ def _send_command(self, command: Command) -> AskUIAgentOSSendResponseSchema: AskUiControllerInvalidCommandError: If the command fails schema validation on the server side. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) header = Header(authentication=Guid(root=self._session_guid)) message = Message(header=header, command=command) @@ -1106,7 +1046,7 @@ def _send_command(self, command: Command) -> AskUIAgentOSSendResponseSchema: request_str = request.model_dump_json(exclude_none=True, by_alias=True) try: - response: controller_v1_pbs.Response_Send = self._stub.Send( + response: controller_v1_pbs.Response_Send = self._get_stub().Send( controller_v1_pbs.Request_Send(message=request_str) ) except grpc.RpcError as e: @@ -1125,9 +1065,6 @@ def get_mouse_position(self) -> Coordinate: Returns: Coordinate: Response containing the result of the mouse position change. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "get_mouse_position()") res = self._send_command(GetMousePositionCommand()) coordinate = Coordinate( @@ -1146,9 +1083,6 @@ def set_mouse_position(self, x: int, y: int) -> None: x (int): The horizontal coordinate (in pixels) to set the cursor to. y (int): The vertical coordinate (in pixels) to set the cursor to. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) location = Location(x=Length(root=x), y=Length(root=y)) command = SetMousePositionCommand(parameters=[location]) self._reporter.add_message("AgentOS", f"set_mouse_position({x},{y})") @@ -1165,9 +1099,6 @@ def render_quad(self, style: RenderObjectStyle) -> int: Returns: int: Object ID. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"render_quad({style})") command = AddRenderObjectCommand(parameters=["Quad", style]) res = self._send_command(command) @@ -1185,9 +1116,6 @@ def render_line(self, style: RenderObjectStyle, points: list[Coordinate]) -> int Returns: int: Object ID. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"render_line({style}, {points})") command = AddRenderObjectCommand(parameters=["Line", style, points]) res = self._send_command(command) @@ -1205,9 +1133,6 @@ def render_image(self, style: RenderObjectStyle, image_data: str) -> int: Returns: int: Object ID. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"render_image({style}, [image_data])") image = RenderImage(root=image_data) command = AddRenderObjectCommand(parameters=["Image", style, image]) @@ -1227,9 +1152,6 @@ def render_text(self, style: RenderObjectStyle, content: str) -> int: Returns: int: Object ID. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"render_text({style}, {content})") text = RenderText(root=content) command = AddRenderObjectCommand(parameters=["Text", style, text]) @@ -1248,9 +1170,6 @@ def update_render_object(self, object_id: int, style: RenderObjectStyle) -> None Returns: int: Object ID. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message( "AgentOS", f"update_render_object({object_id}, {style})" ) @@ -1266,9 +1185,6 @@ def delete_render_object(self, object_id: int) -> None: Args: object_id (RenderObjectId): The ID of the render object to delete. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", f"delete_render_object({object_id})") render_object_id = RenderObjectId(root=object_id) command = DeleteRenderObjectCommand(parameters=[render_object_id]) @@ -1279,9 +1195,6 @@ def clear_render_objects(self) -> None: """ Clear all render objects from the display. """ - assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( - "Stub is not initialized. Call Connect first." - ) self._reporter.add_message("AgentOS", "clear_render_objects()") command = ClearRenderObjectsCommand() self._send_command(command) diff --git a/src/askui/tools/askui/askui_controller_settings.py b/src/askui/tools/askui/askui_controller_settings.py index 6ad23769..9a2d5cec 100644 --- a/src/askui/tools/askui/askui_controller_settings.py +++ b/src/askui/tools/askui/askui_controller_settings.py @@ -2,9 +2,8 @@ import sys from functools import cached_property -from pydantic import BaseModel, Field, field_validator, model_validator +from pydantic import BaseModel, Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict -from typing_extensions import Self class RemoteDeviceController(BaseModel): @@ -93,22 +92,6 @@ def validate_controller_args(cls, value: str) -> str: return value - @model_validator(mode="after") - def validate_either_component_registry_or_installation_directory_is_set( - self, - ) -> "Self": - if ( - self.component_registry_file is None - and self.installation_directory is None - and self.controller_path_setting is None - ): - error_msg = ( - "Either ASKUI_COMPONENT_REGISTRY_FILE, ASKUI_INSTALLATION_DIRECTORY, " - "or ASKUI_CONTROLLER_PATH environment variable must be set" - ) - raise ValueError(error_msg) - return self - def _find_remote_device_controller_by_installation_directory( self, ) -> pathlib.Path | None: @@ -177,11 +160,14 @@ def controller_path(self) -> pathlib.Path: or self._find_remote_device_controller_by_component_registry_file() or self._find_remote_device_controller_by_installation_directory() ) - assert result is not None, ( - "No AskUI Remote Device Controller found. Please set the " - "ASKUI_COMPONENT_REGISTRY_FILE, ASKUI_INSTALLATION_DIRECTORY, or " - "ASKUI_CONTROLLER_PATH environment variable." - ) + if result is None: + error_msg = ( + "No AskUI Remote Device Controller found. Please set the " + "ASKUI_COMPONENT_REGISTRY_FILE, ASKUI_INSTALLATION_DIRECTORY, or " + "ASKUI_CONTROLLER_PATH environment variable." + ) + raise ValueError(error_msg) + if not result.is_file(): error_msg = ( "AskUIRemoteDeviceController executable does not exist under " diff --git a/src/askui/tools/computer/connect_tool.py b/src/askui/tools/computer/connect_tool.py index b707d827..8fddb45a 100644 --- a/src/askui/tools/computer/connect_tool.py +++ b/src/askui/tools/computer/connect_tool.py @@ -7,7 +7,7 @@ class ComputerConnectTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_connect", + name="connect", description=( "Connect to the agent OS controller to enable computer control. " "Useful for establishing an initial connection or reconnecting " diff --git a/src/askui/tools/computer/disconnect_tool.py b/src/askui/tools/computer/disconnect_tool.py index e6d60e0d..1e8b83a4 100644 --- a/src/askui/tools/computer/disconnect_tool.py +++ b/src/askui/tools/computer/disconnect_tool.py @@ -7,7 +7,7 @@ class ComputerDisconnectTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_disconnect", + name="disconnect", description=( "Disconnect from the agent OS controller. " "Needs to be used once you are done with the task and want to stop" diff --git a/src/askui/tools/computer/get_mouse_position_tool.py b/src/askui/tools/computer/get_mouse_position_tool.py index bf1b6b82..829769a7 100644 --- a/src/askui/tools/computer/get_mouse_position_tool.py +++ b/src/askui/tools/computer/get_mouse_position_tool.py @@ -7,7 +7,7 @@ class ComputerGetMousePositionTool(ComputerBaseTool): def __init__(self, agent_os: ComputerAgentOsFacade | None = None) -> None: super().__init__( - name="computer_get_mouse_position", + name="get_mouse_position", description="Get the current mouse position.", agent_os=agent_os, required_tags=[ToolTags.SCALED_AGENT_OS.value], diff --git a/src/askui/tools/computer/get_system_info_tool.py b/src/askui/tools/computer/get_system_info_tool.py index c1d0a0d2..7f68c07d 100644 --- a/src/askui/tools/computer/get_system_info_tool.py +++ b/src/askui/tools/computer/get_system_info_tool.py @@ -15,7 +15,7 @@ class ComputerGetSystemInfoTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_get_system_info_tool", + name="get_system_info_tool", description=""" Get the system information. This tool returns the system information as a JSON object. diff --git a/src/askui/tools/computer/keyboard_pressed_tool.py b/src/askui/tools/computer/keyboard_pressed_tool.py index 84fe9fea..0a82595e 100644 --- a/src/askui/tools/computer/keyboard_pressed_tool.py +++ b/src/askui/tools/computer/keyboard_pressed_tool.py @@ -9,7 +9,7 @@ class ComputerKeyboardPressedTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_keyboard_pressed", + name="keyboard_pressed", description="Press and hold a keyboard key.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/keyboard_release_tool.py b/src/askui/tools/computer/keyboard_release_tool.py index 9399fc0b..49a338f4 100644 --- a/src/askui/tools/computer/keyboard_release_tool.py +++ b/src/askui/tools/computer/keyboard_release_tool.py @@ -9,7 +9,7 @@ class ComputerKeyboardReleaseTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_keyboard_release", + name="keyboard_release", description="Release a keyboard key.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/keyboard_tap_tool.py b/src/askui/tools/computer/keyboard_tap_tool.py index c944af7d..de5233e2 100644 --- a/src/askui/tools/computer/keyboard_tap_tool.py +++ b/src/askui/tools/computer/keyboard_tap_tool.py @@ -9,7 +9,7 @@ class ComputerKeyboardTapTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_keyboard_tap", + name="keyboard_tap", description="Tap (press and release) a keyboard key.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/list_displays_tool.py b/src/askui/tools/computer/list_displays_tool.py index 293cc15f..8ce94177 100644 --- a/src/askui/tools/computer/list_displays_tool.py +++ b/src/askui/tools/computer/list_displays_tool.py @@ -5,7 +5,7 @@ class ComputerListDisplaysTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_list_displays", + name="list_displays", description=""" List all the available displays on the computer. """, diff --git a/src/askui/tools/computer/mouse_click_tool.py b/src/askui/tools/computer/mouse_click_tool.py index e2f9b101..ee07c654 100644 --- a/src/askui/tools/computer/mouse_click_tool.py +++ b/src/askui/tools/computer/mouse_click_tool.py @@ -9,7 +9,7 @@ class ComputerMouseClickTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_mouse_click", + name="mouse_click", description="Click and release the mouse button at the current position.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/mouse_hold_down_tool.py b/src/askui/tools/computer/mouse_hold_down_tool.py index b96e7f9d..b3923cd6 100644 --- a/src/askui/tools/computer/mouse_hold_down_tool.py +++ b/src/askui/tools/computer/mouse_hold_down_tool.py @@ -9,7 +9,7 @@ class ComputerMouseHoldDownTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_mouse_hold_down", + name="mouse_hold_down", description="Hold down the mouse button at the current position.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/mouse_release_tool.py b/src/askui/tools/computer/mouse_release_tool.py index c8bffa86..2e8e5bb0 100644 --- a/src/askui/tools/computer/mouse_release_tool.py +++ b/src/askui/tools/computer/mouse_release_tool.py @@ -9,7 +9,7 @@ class ComputerMouseReleaseTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_mouse_release", + name="mouse_release", description="Release the mouse button at the current position.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/mouse_scroll_tool.py b/src/askui/tools/computer/mouse_scroll_tool.py index 13e1c8f9..a02645d5 100644 --- a/src/askui/tools/computer/mouse_scroll_tool.py +++ b/src/askui/tools/computer/mouse_scroll_tool.py @@ -7,7 +7,7 @@ class ComputerMouseScrollTool(ComputerBaseTool): def __init__(self, agent_os: ComputerAgentOsFacade | None = None) -> None: super().__init__( - name="computer_mouse_scroll", + name="mouse_scroll", description="Scroll the mouse wheel at the current position.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/move_mouse_tool.py b/src/askui/tools/computer/move_mouse_tool.py index 0f014040..dabcae50 100644 --- a/src/askui/tools/computer/move_mouse_tool.py +++ b/src/askui/tools/computer/move_mouse_tool.py @@ -7,7 +7,7 @@ class ComputerMoveMouseTool(ComputerBaseTool): def __init__(self, agent_os: ComputerAgentOsFacade | None = None) -> None: super().__init__( - name="computer_move_mouse", + name="move_mouse", description="Move the mouse to a specific position.", input_schema={ "type": "object", diff --git a/src/askui/tools/computer/retrieve_active_display_tool.py b/src/askui/tools/computer/retrieve_active_display_tool.py index 44a0a89e..00f22977 100644 --- a/src/askui/tools/computer/retrieve_active_display_tool.py +++ b/src/askui/tools/computer/retrieve_active_display_tool.py @@ -5,7 +5,7 @@ class ComputerRetrieveActiveDisplayTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_retrieve_active_display", + name="retrieve_active_display", description=""" Retrieve the currently active display on the computer. The display is used to take screenshots and perform actions. diff --git a/src/askui/tools/computer/screenshot_tool.py b/src/askui/tools/computer/screenshot_tool.py index 27caa501..30f8278b 100644 --- a/src/askui/tools/computer/screenshot_tool.py +++ b/src/askui/tools/computer/screenshot_tool.py @@ -9,7 +9,7 @@ class ComputerScreenshotTool(ComputerBaseTool): def __init__(self, agent_os: ComputerAgentOsFacade | None = None) -> None: super().__init__( - name="computer_screenshot", + name="screenshot", description="Take a screenshot of the current screen.", agent_os=agent_os, required_tags=[ToolTags.SCALED_AGENT_OS.value], diff --git a/src/askui/tools/computer/set_active_display_tool.py b/src/askui/tools/computer/set_active_display_tool.py index a2d24f78..22e3e710 100644 --- a/src/askui/tools/computer/set_active_display_tool.py +++ b/src/askui/tools/computer/set_active_display_tool.py @@ -5,7 +5,7 @@ class ComputerSetActiveDisplayTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_set_active_display", + name="set_active_display", description=""" Set the display screen from which screenshots are taken and on which actions are performed. diff --git a/src/askui/tools/computer/type_tool.py b/src/askui/tools/computer/type_tool.py index 8ebc586a..9c874f41 100644 --- a/src/askui/tools/computer/type_tool.py +++ b/src/askui/tools/computer/type_tool.py @@ -7,7 +7,7 @@ class ComputerTypeTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_type", + name="type", description="Type text on the computer.", input_schema={ "type": "object", diff --git a/src/askui/tools/store/android/save_screenshot_tool.py b/src/askui/tools/store/android/save_screenshot_tool.py index d3c06ffd..9edc587f 100644 --- a/src/askui/tools/store/android/save_screenshot_tool.py +++ b/src/askui/tools/store/android/save_screenshot_tool.py @@ -41,7 +41,7 @@ class AndroidSaveScreenshotTool(AndroidBaseTool): def __init__(self, base_dir: str) -> None: super().__init__( - name="android_save_screenshot_tool", + name="save_screenshot_tool", description=( "Saves a screenshot of the currently connected Android device screen " "to disk as a PNG image file. The screenshot is captured from the " diff --git a/src/askui/tools/store/computer/experimental/window_management/add_window_as_virtual_display.py b/src/askui/tools/store/computer/experimental/window_management/add_window_as_virtual_display.py index 771a7890..7750b333 100644 --- a/src/askui/tools/store/computer/experimental/window_management/add_window_as_virtual_display.py +++ b/src/askui/tools/store/computer/experimental/window_management/add_window_as_virtual_display.py @@ -11,7 +11,7 @@ class ComputerAddWindowAsVirtualDisplayTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_add_window_as_virtual_display_tool", + name="add_window_as_virtual_display_tool", description=""" Converts a specific window into a virtual display and assigns it a unique display ID. This tool is essential for automating windows that diff --git a/src/askui/tools/store/computer/experimental/window_management/list_process.py b/src/askui/tools/store/computer/experimental/window_management/list_process.py index f41da109..aebd8b17 100644 --- a/src/askui/tools/store/computer/experimental/window_management/list_process.py +++ b/src/askui/tools/store/computer/experimental/window_management/list_process.py @@ -11,7 +11,7 @@ class ComputerListProcessTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_list_process_tool", + name="list_process_tool", description=""" Lists all running processes on the computer that have at least one window. This tool is used as the first step in window management workflows to diff --git a/src/askui/tools/store/computer/experimental/window_management/list_process_windows.py b/src/askui/tools/store/computer/experimental/window_management/list_process_windows.py index b990e6e7..850c8dcc 100644 --- a/src/askui/tools/store/computer/experimental/window_management/list_process_windows.py +++ b/src/askui/tools/store/computer/experimental/window_management/list_process_windows.py @@ -11,7 +11,7 @@ class ComputerListProcessWindowsTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_list_process_windows_tool", + name="list_process_windows_tool", description=""" Lists all windows belonging to a specific process identified by its process ID. This tool is used after list_process_tool to discover all diff --git a/src/askui/tools/store/computer/experimental/window_management/set_process_in_focus.py b/src/askui/tools/store/computer/experimental/window_management/set_process_in_focus.py index 436158ec..2e27550f 100644 --- a/src/askui/tools/store/computer/experimental/window_management/set_process_in_focus.py +++ b/src/askui/tools/store/computer/experimental/window_management/set_process_in_focus.py @@ -11,7 +11,7 @@ class ComputerSetProcessInFocusTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_set_process_in_focus_tool", + name="set_process_in_focus_tool", description=""" Brings a process into focus. When you use this tool, it activates the specified process and brings it to the foreground. The process itself diff --git a/src/askui/tools/store/computer/experimental/window_management/set_window_in_focus.py b/src/askui/tools/store/computer/experimental/window_management/set_window_in_focus.py index 8b810425..e597a78c 100644 --- a/src/askui/tools/store/computer/experimental/window_management/set_window_in_focus.py +++ b/src/askui/tools/store/computer/experimental/window_management/set_window_in_focus.py @@ -11,7 +11,7 @@ class ComputerSetWindowInFocusTool(ComputerBaseTool): def __init__(self, agent_os: AgentOs | None = None) -> None: super().__init__( - name="computer_set_window_in_focus_tool", + name="set_window_in_focus_tool", description=""" Brings a specific window to the foreground and sets it as the active focused window. This tool is used to switch focus to a particular diff --git a/src/askui/tools/store/computer/save_screenshot_tool.py b/src/askui/tools/store/computer/save_screenshot_tool.py index 0218fc74..fd3f3f7c 100644 --- a/src/askui/tools/store/computer/save_screenshot_tool.py +++ b/src/askui/tools/store/computer/save_screenshot_tool.py @@ -41,7 +41,7 @@ class ComputerSaveScreenshotTool(ComputerBaseTool): def __init__(self, base_dir: str) -> None: super().__init__( - name="computer_save_screenshot_tool", + name="save_screenshot_tool", description=( "Saves a screenshot of the currently active computer screen " "to disk as a PNG image file. The screenshot is captured from the " diff --git a/src/askui/tools/store/universal/__init__.py b/src/askui/tools/store/universal/__init__.py index 8135dd5e..ee1e05ef 100644 --- a/src/askui/tools/store/universal/__init__.py +++ b/src/askui/tools/store/universal/__init__.py @@ -5,6 +5,7 @@ """ from .list_files_tool import ListFilesTool +from .load_image_tool import LoadImageTool from .print_to_console import PrintToConsoleTool from .read_from_file_tool import ReadFromFileTool from .write_to_file_tool import WriteToFileTool @@ -14,4 +15,5 @@ "PrintToConsoleTool", "ReadFromFileTool", "WriteToFileTool", + "LoadImageTool", ] diff --git a/src/askui/tools/store/universal/list_files_tool.py b/src/askui/tools/store/universal/list_files_tool.py index c973baa4..e2d93aa0 100644 --- a/src/askui/tools/store/universal/list_files_tool.py +++ b/src/askui/tools/store/universal/list_files_tool.py @@ -13,7 +13,7 @@ class ListFilesTool(Tool): directories during execution. Args: - base_dir (str): The base directory path where file listing will start. + base_dir (str | Path): The base directory path where file listing will start. All directory paths will be relative to this directory. Example: @@ -29,12 +29,15 @@ class ListFilesTool(Tool): ``` """ - def __init__(self, base_dir: str) -> None: + def __init__(self, base_dir: str | Path) -> None: + if not isinstance(base_dir, Path): + base_dir = Path(base_dir) + absolute = base_dir.absolute() super().__init__( name="list_files_tool", description=( "Lists files and directories in a directory on the filesystem. The " - f"base directory is set to '{base_dir}' during tool initialization. " + f"base directory is set to '{absolute}' during tool initialization. " "All directory paths are relative to this base directory. Use this " "tool to explore the filesystem structure, discover available files " "and directories, or navigate the directory tree during execution." @@ -46,11 +49,11 @@ def __init__(self, base_dir: str) -> None: "type": "string", "description": ( "The relative path of the directory to list. The path is " - f"relative to the base directory '{base_dir}' specified " + f"relative to the base directory '{absolute}' specified " "during tool initialization. For example, if " "directory_path is 'output', the directory will be listed " - f"from '{base_dir}/output'. If not specified or empty, " - f"lists the base directory '{base_dir}' itself." + f"from '{absolute}/output'. If not specified or empty, " + f"lists the base directory '{absolute}' itself." ), }, "recursive": { @@ -66,7 +69,7 @@ def __init__(self, base_dir: str) -> None: "required": [], }, ) - self._base_dir = Path(base_dir) + self._base_dir = base_dir def __call__(self, directory_path: str = "", recursive: bool = False) -> str: """ diff --git a/src/askui/tools/store/universal/load_image_tool.py b/src/askui/tools/store/universal/load_image_tool.py new file mode 100644 index 00000000..5a0512e9 --- /dev/null +++ b/src/askui/tools/store/universal/load_image_tool.py @@ -0,0 +1,124 @@ +from pathlib import Path +from typing import Tuple + +from PIL import Image + +from askui.models.shared.tools import Tool +from askui.utils.image_utils import scale_image_to_fit + + +class LoadImageTool(Tool): + """ + Tool for loading images from a directory on the filesystem. + + This tool allows the agent to load and process images from the filesystem, + making them available for analysis, comparison, or visual inspection. It + supports common image formats (PNG, JPEG, BMP, GIF, etc.) and automatically + scales images to fit within a target size for efficient processing. This is + useful for tasks like analyzing screenshots, comparing visual elements, + verifying image content, or providing visual context during execution. + + Args: + base_dir (str | Path): The base directory path where images will be loaded + from. All image paths will be relative to this directory. + + Example: + ```python + from askui import VisionAgent + from askui.tools.store.universal import LoadImageTool + + with VisionAgent() as agent: + agent.act( + "Describe the logo image called 'logo.png'", + tools=[LoadImageTool(base_dir="images")] + ) + ``` + + Example: + ```python + from askui import VisionAgent + from askui.tools.store.universal import LoadImageTool + + with VisionAgent( + act_tools=[LoadImageTool(base_dir="images")] + ) as agent: + agent.act("Describe the logo image called 'logo.png'") + ``` + """ + + def __init__(self, base_dir: str | Path) -> None: + if not isinstance(base_dir, Path): + base_dir = Path(base_dir) + absolute = base_dir.absolute() + super().__init__( + name="load_image_tool", + description=( + "Loads an image from the filesystem and returns it for analysis or " + f"processing. The base directory is set to '{absolute}' during tool " + "initialization. All image paths are relative to this base directory. " + "Supports common image formats (PNG, JPEG, BMP, GIF, etc.). Images are " + "automatically scaled to fit within a target size for efficient " + "processing. Use this tool to analyze screenshots, compare visual " + "elements, verify image content, inspect UI elements, or provide " + "visual context during execution." + ), + input_schema={ + "type": "object", + "properties": { + "image_path": { + "type": "string", + "description": ( + "The relative path of the image file to load. The path is " + f"relative to the base directory '{absolute}' specified " + "during tool initialization. For example, if image_path " + "is 'screenshots/login.png', the image will be loaded from " + f"'{absolute}/screenshots/login.png'." + ), + }, + }, + "required": [ + "image_path", + ], + }, + ) + self._base_dir = base_dir + self._target_size = (1024, 768) + + def __call__(self, image_path: str = "") -> Tuple[str, Image.Image]: + """ + Load an image from the specified path and return it for processing. + + The image is automatically scaled to fit within the target size (1024x768) + while preserving aspect ratio, ensuring efficient processing without + losing important visual details. + + Args: + image_path (str): The relative path of the image file to load, relative + to the base directory specified during tool initialization. + + Returns: + Tuple[str, Image.Image]: A tuple containing a confirmation message + indicating the image was successfully loaded (including the full + absolute path) and the loaded PIL Image object respectively. + + Raises: + FileNotFoundError: If the image file does not exist at the specified path. + FileExistsError: If the path exists but is not a file (e.g., a directory). + """ + absolute_image_path = self._base_dir / image_path + + if not absolute_image_path.exists(): + error_msg = f"Image not found: {absolute_image_path}" + raise FileNotFoundError(error_msg) + + if not absolute_image_path.is_file(): + error_msg = f"Path is not a file: {absolute_image_path}" + raise FileExistsError(error_msg) + + image = Image.open(absolute_image_path) + image = scale_image_to_fit(image, target_size=self._target_size) + + return ( + f"Image was successfully loaded from {absolute_image_path}", + image, + ) diff --git a/src/askui/tools/store/universal/read_from_file_tool.py b/src/askui/tools/store/universal/read_from_file_tool.py index 014bb7a6..f6cb4f11 100644 --- a/src/askui/tools/store/universal/read_from_file_tool.py +++ b/src/askui/tools/store/universal/read_from_file_tool.py @@ -12,9 +12,13 @@ class ReadFromFileTool(Tool): accessing any text-based data stored on the filesystem during execution. Args: - base_dir (str): The base directory path where files will be read from. + base_dir (str | Path): The base directory path where files will be read from. All file paths will be relative to this directory. + encodings (list[str]): The list of encodings to try to read the file. + If not provided, the default encodings will be used. The default encodings + are "utf-8" and "latin-1". + Example: ```python from askui import VisionAgent @@ -28,12 +32,19 @@ class ReadFromFileTool(Tool): ``` """ - def __init__(self, base_dir: str) -> None: + def __init__( + self, + base_dir: str | Path, + encodings: list[str] | None = None, + ) -> None: + if not isinstance(base_dir, Path): + base_dir = Path(base_dir) + absolute = base_dir.absolute() super().__init__( name="read_from_file_tool", description=( "Reads text content from a file on the filesystem. The base directory " - f"is set to '{base_dir}' during tool initialization. All file paths are" + f"is set to '{absolute}' during tool initialization. All file paths are" " relative to this base directory. Use this tool to load configuration " "files, read data files, process logs, or access any text-based data " "stored on the filesystem during execution." @@ -45,17 +56,18 @@ def __init__(self, base_dir: str) -> None: "type": "string", "description": ( "The relative path of the file to read from. The path is " - f"relative to the base directory '{base_dir}' specified " + f"relative to the base directory '{absolute}' specified " "during tool initialization. For example, if file_path " "is 'config/settings.txt', the file will be read from " - f"'{base_dir}/config/settings.txt'." + f"'{absolute}/config/settings.txt'." ), }, }, "required": ["file_path"], }, ) - self._base_dir = Path(base_dir) + self._base_dir = base_dir + self._encodings = encodings or ["utf-8", "latin-1"] def __call__(self, file_path: str) -> str: """ @@ -72,6 +84,7 @@ def __call__(self, file_path: str) -> str: Raises: FileNotFoundError: If the file does not exist. OSError: If the file cannot be read due to filesystem errors. + RuntimeError: If the file cannot be read due to encoding errors. """ absolute_file_path = self._base_dir / file_path @@ -83,5 +96,18 @@ def __call__(self, file_path: str) -> str: error_msg = f"Path is not a file: {absolute_file_path}" raise ValueError(error_msg) - content = absolute_file_path.read_text(encoding="utf-8") + content = None + for encoding in self._encodings: + try: + content = absolute_file_path.read_text(encoding=encoding) + break + except UnicodeDecodeError: + continue + + if not content: + error_msg = ( + f"Failed to read file {absolute_file_path} with any" + f" of the encodings: {', '.join(self._encodings)}" + ) + raise RuntimeError(error_msg) return f"Content of {absolute_file_path}:\n\n{content}" diff --git a/src/askui/tools/store/universal/write_to_file_tool.py b/src/askui/tools/store/universal/write_to_file_tool.py index f0b58b13..cef8c1ea 100644 --- a/src/askui/tools/store/universal/write_to_file_tool.py +++ b/src/askui/tools/store/universal/write_to_file_tool.py @@ -13,7 +13,7 @@ class WriteToFileTool(Tool): creating reports, or storing any text-based data during execution. Args: - base_dir (str): The base directory path where files will be written. + base_dir (str | Path): The base directory path where files will be written. All file paths will be relative to this directory. The base directory will be created if it doesn't exist. @@ -41,12 +41,16 @@ class WriteToFileTool(Tool): ``` """ - def __init__(self, base_dir: str) -> None: + def __init__(self, base_dir: str | Path) -> None: + if not isinstance(base_dir, Path): + base_dir = Path(base_dir) + absolute = base_dir.absolute() super().__init__( name="write_to_file_tool", description=( "Writes text content to a file on the filesystem. The file path is " - "relative to the base directory specified during tool initialization. " + f"relative to the base directory '{absolute}' specified during tool " + "initialization. " "The directory structure will be created automatically if it doesn't " "exist. You can choose to overwrite existing files or append to them " "by setting the append parameter. Use this tool to save results, " @@ -59,13 +63,11 @@ def __init__(self, base_dir: str) -> None: "file_path": { "type": "string", "description": ( - "The relative path of the file where content should be " - "written. The path is relative to the base directory " - "specified during tool initialization. For example, if " - "base_dir is '/output' and file_path is " - "'results/data.txt', the file will be written to " - "'/output/results/data.txt'. Subdirectories will be " - "created automatically if needed." + "The relative path of the file to write to. The path is " + f"relative to the base directory '{absolute}' specified " + "during tool initialization. For example, if file_path " + "is 'config/settings.txt', the file will be written to " + f"'{absolute}/config/settings.txt'." ), }, "content": { @@ -92,8 +94,7 @@ def __init__(self, base_dir: str) -> None: "required": ["file_path", "content", "append"], }, ) - self._base_dir = Path(base_dir) - self._base_dir.mkdir(parents=True, exist_ok=True) + self._base_dir = base_dir def __call__(self, file_path: str, content: str, append: bool) -> str: """ diff --git a/tests/e2e/tools/askui/test_askui_controller.py b/tests/e2e/tools/askui/test_askui_controller.py index 8148a716..bca9e591 100644 --- a/tests/e2e/tools/askui/test_askui_controller.py +++ b/tests/e2e/tools/askui/test_askui_controller.py @@ -182,7 +182,7 @@ def test_operations_before_connect() -> None: client = AskUiControllerClient(reporter=CompositeReporter(), display=1) with pytest.raises( - AssertionError, match="Stub is not initialized. Call Connect first." + AssertionError, match="Stub is not initialized. Call `connect()` first." ): client.screenshot() diff --git a/tests/unit/tools/askui/test_askui_controller_settings.py b/tests/unit/tools/askui/test_askui_controller_settings.py index a1f7a852..983e2cbe 100644 --- a/tests/unit/tools/askui/test_askui_controller_settings.py +++ b/tests/unit/tools/askui/test_askui_controller_settings.py @@ -102,9 +102,10 @@ def test_no_environment_variables_raises_error(self) -> None: """Test that ValueError is raised when no environment variables are set.""" with patch.dict("os.environ", {}, clear=True): with pytest.raises( - ValueError, match="Either ASKUI_COMPONENT_REGISTRY_FILE" + ValueError, match="No AskUI Remote Device Controller found. Please set" ): - AskUiControllerSettings() + settings = AskUiControllerSettings() + _ = settings.controller_path def test_build_controller_path_windows(self) -> None: """Test _build_controller_path for Windows platform.""" @@ -393,7 +394,7 @@ def test_assertion_error_when_no_controller_found(self) -> None: patch.object(settings, "controller_path_setting", None), ): with pytest.raises( - AssertionError, match="No AskUI Remote Device Controller found" + ValueError, match="No AskUI Remote Device Controller found" ): _ = settings.controller_path diff --git a/tests/unit/tools/test_caching_tools.py b/tests/unit/tools/test_caching_tools.py index a4404114..f1f622c4 100644 --- a/tests/unit/tools/test_caching_tools.py +++ b/tests/unit/tools/test_caching_tools.py @@ -83,7 +83,7 @@ def test_retrieve_cached_test_executions_respects_custom_format() -> None: def test_execute_cached_execution_initializes_without_toolbox() -> None: """Test that ExecuteCachedExecution can be initialized without toolbox.""" tool = ExecuteCachedTrajectory() - assert tool.name == "execute_cached_executions_tool" + assert tool.name.startswith("execute_cached_executions_tool") def test_execute_cached_execution_raises_error_without_toolbox() -> None: