diff --git a/.cursor/App.notepad b/.cursor/App.notepad new file mode 100644 index 0000000..2a07843 --- /dev/null +++ b/.cursor/App.notepad @@ -0,0 +1,39 @@ +These files represent the entire application + +Agent.exe/ +├── .cursor/ # Directory for cursor-related files +│ ├── @instructions.md # Instructions for the development process +│ ├── @current_status.md # Current status of the project +│ └── @plan.md # Project plan and roadmap +├── .erb/ # Electron React Boilerplate configurations +│ └── configs/ # Webpack and other build configurations +│ └── @webpack.config.main.dev.ts # Webpack config for main process development +├── assets/ # Static assets like images and icons +├── release/ # Build output directory +│ └── app/ # Application package directory +│ └── @package.json # Package configuration for the built app +├── src/ # Source code directory +│ ├── main/ # Main process code +│ │ ├── @main.ts # Entry point for the main process +│ │ ├── @menu.ts # Application menu configuration +│ │ ├── @preload.ts # Preload script for renderer process +│ │ └── store/ # State management for main process +│ │ ├── @create.ts # Store creation and configuration +│ │ ├── @runAgent.ts # Logic for running the AI agent +│ │ └── @types.ts # TypeScript type definitions +│ └── renderer/ # Renderer process code +│ ├── @App.tsx # Main React component +│ ├── @index.ejs # HTML template for the app +│ ├── @index.tsx # Entry point for the renderer process +│ ├── @RunHistory.tsx # Component for displaying run history +│ ├── @global.d.ts # Global type declarations +│ └── hooks/ # Custom React hooks +│ └── @useStore.ts # Hook for accessing the Zustand store +├── .editorconfig # Editor configuration for consistent coding styles +├── .env # Environment variables (not tracked in git) +├── .eslintignore # Files to be ignored by ESLint +├── .eslintrc # ESLint configuration +├── .gitignore # Git ignore file +├── @package.json # Node.js dependencies and scripts +├── @README.md # Project documentation and overview +└── tsconfig.json # TypeScript configuration \ No newline at end of file diff --git a/.cursor/current_status.md b/.cursor/current_status.md new file mode 100644 index 0000000..2ff16d9 --- /dev/null +++ b/.cursor/current_status.md @@ -0,0 +1,182 @@ +# Current Status + +## Iteration 1: Project Evaluation + +1. Completed initial project structure analysis +2. Identified key components and their purposes: + - Main process (`src/main/main.ts`) + - Renderer process (`src/renderer/App.tsx`) + - State management (`src/main/store/create.ts`) + - AI agent logic (`src/main/store/runAgent.ts`) +3. Reviewed existing features: + - Direct computer control by Claude 3.5 Sonnet + - Real-time execution of AI-generated commands + - Simple Electron-based user interface + - Support for MacOS (with theoretical support for Windows and Linux) + - Primary display interaction + - Emergency stop functionality +4. Identified areas for enhancement as per the project plan: + - Persistent settings + - System prompt configuration + - Enhanced logging with images + - Plugin system + - Window state persistence + - Session management + - Configurable tools system +5. Next steps: + - Begin implementation of persistent settings + - Set up development environment for new features + - Create a more detailed roadmap for feature implementation + +## Iteration 2: Persistent Settings Implementation + +1. Installed `electron-store` library for efficient local storage +2. Created `src/main/settings.ts` to manage application settings +3. Modified `src/main/store/create.ts` to use persistent settings +4. Updated `src/renderer/App.tsx` to utilize persistent settings +5. Implemented saving and loading of `instructions` and `fullyAuto` settings +6. Next steps: + - Test the persistent settings functionality + - Move on to the next enhancement: System Prompt Configuration + +## Iteration 3: System Prompt Configuration + +1. Updated `src/main/store/types.ts` to include systemPrompt in AppState +2. Modified `src/main/settings.ts` to handle systemPrompt persistence +3. Updated `src/main/store/create.ts` to include systemPrompt in the store +4. Created new `src/renderer/SystemPrompt.tsx` component for editing system prompts +5. Updated `src/renderer/App.tsx` to include the SystemPrompt component as a tab +6. Implemented a resizable textarea for editing the system prompt +7. Added a "Apply Default Prompt" button to easily set a predefined system prompt +8. Modified `src/main/store/runAgent.ts` to incorporate systemPrompt in AI interactions +9. Tested the system prompt configuration functionality +10. Next steps: + - Move on to the next enhancement: Enhanced Logging with Images + +## Iteration 4: Error Logging Enhancement + +1. Updated `src/main/store/runAgent.ts` to import and use `electron-log` +2. Modified `promptForAction` function to log errors from the API call +3. Enhanced `runAgent` function to provide more detailed error messages to the user +4. Implemented error logging for both API calls and general execution errors +5. Added logging of the message stack sent to the API when errors occur +6. Included logging of the full conversation history when errors happen in the main loop +7. Corrected an oversight in the `promptForAction` function to ensure the correct return format is maintained +8. Next steps: + - Test the error logging functionality + - Investigate and fix the issue with images remaining in the message history + - Move on to the next enhancement: Enhanced Logging with Images + +## Iteration 5: Enhanced Logging with Images + +1. Updated `src/renderer/RunHistory.tsx` to display screenshots sent to the LLM API +2. Modified the component to handle both assistant messages and user messages containing images +3. Implemented image rendering using base64-encoded data +4. Ensured proper scrolling behavior for the run history +5. Tested the image display functionality +6. Next steps: + - Test the enhanced logging with images feature + - Move on to the next enhancement: Plugin System + +## Iteration 6: User Feedback and Continuous Input System + +1. Designed the continuous input data structure +2. Updated `src/main/store/types.ts` to include new types for user input +3. Modified `src/main/store/create.ts` to handle continuous user input in the store +4. Created a new `ContinuousInput` component in `src/renderer/ContinuousInput.tsx` +5. Updated `src/renderer/App.tsx` to include the new ContinuousInput component +6. Modified `src/main/store/runAgent.ts` to check for and incorporate new user inputs +7. Implemented the mechanism to add new user inputs to the message stack +8. Integrated the new input system with the existing run history +9. Fixed issues related to incorrect message roles and API compatibility: + - Updated `runAgent` function to filter out 'tool' role messages before sending to API + - Ensured tool use and results are included as part of 'assistant' messages + - Updated `AppMessage` type to only allow 'user' and 'assistant' roles +10. Corrected the return format in the `promptForAction` function to match expected `BetaMessageParam` type +11. Enhanced error handling and logging throughout the application +12. Next steps: + - Conduct thorough testing of the continuous input functionality + - Ensure the AI can properly respond to and incorporate new instructions + - Refine error handling and user feedback mechanisms + - Move on to the next enhancement: Plugin System + +## Iteration 7: Refinement and Testing + +1. Conduct comprehensive testing of all implemented features: + - Persistent settings + - System prompt configuration + - Enhanced logging with images + - Continuous input system +2. Identify and fix any remaining bugs or issues +3. Optimize performance and user experience +4. Update documentation to reflect recent changes and new features +5. Next steps: + - Complete testing and refinement phase + - Prepare for the implementation of the Plugin System + - Review and update project roadmap based on progress and any new requirements + +## Iteration 8: Window State and Instructions Box Persistence + +1. Implemented window state (size, position) persistence using electron-store +2. Added instructions box height persistence using localStorage +3. Updated `src/main/main.ts` to handle window state saving and restoring +4. Modified `src/renderer/App.tsx` to manage instructions box height persistence +5. Resolved issues with store access in the main process +6. Tested window state persistence across multiple sessions +7. Verified instructions box height persistence +8. Next steps: + - Address any remaining issues or inconsistencies + - Move on to the next planned enhancement: Plugin System + +## Iteration 9: Instructions Input Box Expansion + +1. Modified `src/renderer/App.tsx` to make the instructions input box expand to 100% flex height +2. Updated the textarea component to use `flex: 1` instead of a fixed height +3. Removed manual resizing functionality for the instructions box +4. Adjusted the layout to accommodate the expanded instructions box +5. Next steps: + - Test the expanded instructions input box functionality + - Ensure proper layout and scrolling behavior with the new design + - Address any potential conflicts with other components (e.g., run history, error messages) + - Continue with the implementation of the Plugin System + +## Iteration 10: Plugin System Implementation + +1. Created `src/main/pluginManager.ts` to handle plugin loading and management +2. Implemented plugin loading functionality in the main process +3. Added IPC handlers in `src/main/main.ts` for plugin-related operations +4. Created `src/renderer/Plugins.tsx` component for displaying and managing plugins +5. Updated `src/renderer/App.tsx` to include the new Plugins component as a tab +6. Implemented plugin installation functionality +7. Added ability to load and display installed plugins +8. Resolved issue with duplicate plugin display after app restart: + - Updated `pluginManager.ts` to clear existing plugins before loading + - Simplified `Plugins.tsx` to directly use the plugin list from the main process +9. Tested plugin loading, installation, and display functionality +10. Next steps: + - Implement plugin execution functionality + - Add ability to enable/disable plugins + - Create a more robust plugin API for developers + - Enhance error handling and user feedback for plugin operations + - Consider adding a plugin marketplace or repository feature + - Continue testing and refining the Plugin System + +## Iteration 11: Service Registry Implementation + +1. Created `src/main/serviceRegistry.ts` to manage and provide access to various services +2. Implemented `ServiceRegistry` class with methods to register, get, and retrieve all services +3. Updated `src/main/main.ts` to use the new service registry: + - Registered computerVision and anthropic services + - Added logging for registered services + - Implemented an IPC handler to get services from the renderer process +4. Modified `src/main/store/anthropic.ts` to register itself with the service registry +5. Updated type definitions in `serviceRegistry.ts` to include `AnthropicService` +6. Removed manual registration of anthropic service from `main.ts` +7. Tested service registration and logging functionality +8. Next steps: + - Ensure all services are properly registered and accessible + - Implement usage of registered services throughout the application + - Consider adding more services to the registry as needed + - Update renderer process to utilize the new service registry system + - Continue refining and testing the service registry implementation + - Move on to the next planned enhancement or address any remaining issues diff --git a/.cursor/errors_resolutions.md b/.cursor/errors_resolutions.md new file mode 100644 index 0000000..5c2ae47 --- /dev/null +++ b/.cursor/errors_resolutions.md @@ -0,0 +1,91 @@ +# Errors and Resolutions + +**Top Mistakes to Avoid:** + +1. **Always maintain the correct return format in API response handling:** Ensure that the `promptForAction` function always returns an object with `content` and `role` properties, matching the expected `BetaMessageParam` type. +2. **Use only allowed message roles:** When working with the Anthropic API, ensure that only 'user' and 'assistant' roles are used in the conversation history. +3. **Be cautious of plugin loading and display logic:** Ensure that plugins are loaded only once and displayed correctly in the UI. + +# Error # 1... + +# Error # 2: Incorrect Return Format in promptForAction + +**Error Description:** +The `promptForAction` function was inadvertently modified to return the raw API response instead of the expected `BetaMessageParam` format. + +**Resolution:** +Restored the correct return statement in the `promptForAction` function: +```typescript +return { content: message.content, role: message.role }; +``` + +**Lessons Learned:** +- Always double-check the return types and formats when modifying functions, especially those interfacing with external APIs. +- Maintain consistency with the defined types and interfaces throughout the codebase. +- When making changes, ensure that all parts of the function, including the return statement, are updated accordingly. + +# Error # 3: Unexpected 'tool' Role in Message History + +**Error Description:** +The Anthropic API returned an error stating "Unexpected role 'tool'. Allowed roles are 'user' or 'assistant'." This occurred because the conversation history included messages with a 'tool' role, which is not supported by the current version of the API. + +**Resolution:** +1. Updated the `runAgent` function to filter out any messages with 'tool' role before sending to `promptForAction`: +```typescript +const filteredHistory = getState().runHistory.filter(msg => msg.role === 'user' || msg.role === 'assistant'); +const message = await promptForAction(filteredHistory, getState().systemPrompt); +``` +2. Ensured that tool use and tool results are included as part of 'assistant' messages in the conversation history. +3. Updated the `AppMessage` type in `types.ts` to only allow 'user' and 'assistant' roles: +```typescript +export type AppMessage = { + role: 'user' | 'assistant'; + content: string | BetaMessageParam['content']; +}; +``` + +**Lessons Learned:** +- Stay up-to-date with API documentation and requirements, as they may change over time. +- Implement proper type checking and validation to ensure that only allowed message roles are used in the conversation history. +- When working with external APIs, always validate the data structure before sending requests to avoid unexpected errors. + +# Error # 4: Duplicate Plugin Display in UI + +**Error Description:** +After restarting the application, installed plugins were being displayed twice in the UI, despite only being installed once. + +**Resolution:** +1. Updated the `pluginManager.ts` file to clear existing plugins before loading: +```typescript +loadPlugins() { + console.log(`Attempting to load plugins from: ${this.pluginsDir}`); + this.plugins = []; // Clear existing plugins before loading + // ... rest of the loading logic +} +``` +2. Simplified the `Plugins.tsx` component to directly use the plugin list received from the main process: +```typescript +export function Plugins() { + const [plugins, setPlugins] = useState([]); + + useEffect(() => { + const removeListener = window.electron.ipcRenderer.on('get-plugins-response', (installedPlugins) => { + setPlugins(installedPlugins as Plugin[]); + }); + + window.electron.ipcRenderer.sendMessage('get-plugins'); + + return () => { + removeListener(); + }; + }, []); + + // ... rest of the component +} +``` + +**Lessons Learned:** +- Be cautious of state management in both the main process and renderer process, especially when dealing with persistent data like plugins. +- Implement clear loading and resetting mechanisms for data that persists across app restarts. +- Use console logging strategically to track the flow of data and identify points of duplication or unexpected behavior. +- Always test the application's behavior after a restart to catch issues related to persistent data or state management. diff --git a/.cursor/instructions.md b/.cursor/instructions.md new file mode 100644 index 0000000..82d4c91 --- /dev/null +++ b/.cursor/instructions.md @@ -0,0 +1,13 @@ +Begin building out the project defined in @plan.md +# Every iteration, be sure to update the @current_status.md document to capture our place in the development cycle +# When errors are encountered, document them and their resolution in @errors_resolutions.md if multiple attempts to fix take place, be sure to collect all attempts, the new reasoning, and ultimately the final resolution. + +# NOTE: pay close attention to directory structure, @plan.md is located in the project root. Any bootstrapping that takes place should understand where root is, and where any client / serverside applications are. + +# IMPORTANT: record achievements in @current_status.md +# IMPORTANT: @current_status.md is append only +# IMPORTANT: @errors_resolutions.md is append only +# IMPORTANT: @plan.md is read only +# IMPORTANT: @instructions.md is read only + +# IMPORTANT: every iteration we should be able to run our application and have it work based on the stage of the project. Deliver Value every iteration. diff --git a/.cursor/plan.md b/.cursor/plan.md new file mode 100644 index 0000000..3d22a42 --- /dev/null +++ b/.cursor/plan.md @@ -0,0 +1,222 @@ +# Agent.exe Project Plan + +## Project Overview + +Agent.exe is an Electron-based application that allows Claude 3.5 Sonnet, an AI model, to control a user's local computer directly. The project aims to showcase and utilize Claude's computer use capabilities in a lightweight, user-friendly interface. + +## Project Directory + +``` +Agent.exe/ +├── .cursor/ # Directory for cursor-related files +│ ├── instructions.md # Instructions for the development process +│ ├── current_status.md # Current status of the project +│ └── plan.md # Project plan and roadmap +├── .erb/ # Electron React Boilerplate configurations +│ └── configs/ # Webpack and other build configurations +│ └── webpack.config.main.dev.ts # Webpack config for main process development +├── assets/ # Static assets like images and icons +├── release/ # Build output directory +│ └── app/ # Application package directory +│ └── package.json # Package configuration for the built app +├── src/ # Source code directory +│ ├── main/ # Main process code +│ │ ├── main.ts # Entry point for the main process +│ │ ├── preload.ts # Preload script for renderer process +│ │ ├── settings.ts # Manage application settings +│ │ └── store/ # State management for main process +│ │ ├── create.ts # Store creation and configuration +│ │ ├── runAgent.ts # Logic for running the AI agent +│ │ └── types.ts # TypeScript type definitions +│ └── renderer/ # Renderer process code +│ ├── App.tsx # Main React component +│ ├── index.ejs # HTML template for the app +│ ├── index.tsx # Entry point for the renderer process +│ ├── RunHistory.tsx # Component for displaying run history +│ ├── SystemPrompt.tsx # Component for system prompt configuration +│ ├── FeedbackRequest.tsx # Component for handling user feedback requests +│ ├── ContinuousInput.tsx # Component for continuous user input +│ ├── global.d.ts # Global type declarations +│ └── hooks/ # Custom React hooks +│ └── useStore.ts # Hook for accessing the Zustand store +├── .editorconfig # Editor configuration for consistent coding styles +├── .env # Environment variables (not tracked in git) +├── .eslintignore # Files to be ignored by ESLint +├── .eslintrc # ESLint configuration +├── .gitignore # Git ignore file +├── package.json # Node.js dependencies and scripts +├── README.md # Project documentation and overview +└── tsconfig.json # TypeScript configuration +``` + +## Current Features + +1. Direct computer control by Claude 3.5 Sonnet +2. Real-time execution of AI-generated commands +3. Simple Electron-based user interface +4. Support for MacOS (with theoretical support for Windows and Linux) +5. Primary display interaction +6. Emergency stop functionality + +## Planned Enhancements + +### ✅ 1. Persistent Settings + +- ✅ Implement a settings storage system that saves user preferences across application restarts +- ✅ Utilize Electron's `electron-store` or a similar library for efficient local storage + +### ✅ 2. System Prompt Configuration + +- ✅ Add a new tab or section in the UI for users to input and edit system prompts +- ✅ Store and load system prompts as part of the persistent settings + +### ✅ 3. Enhanced Logging with Images + +- ✅ Modify the `RunHistory` component to include image support +- ✅ Capture and display screenshots of actions performed by the AI +- ✅ Implement an image storage and retrieval system + +### 4. Plugin System + +- ✅ Design and implement a plugin architecture to extend the application's functionality +- ✅ Create a plugin loader and manager +- ✅ Develop a standardized plugin API for third-party developers +- Implement a central registry for all services and plugins: + - Core services (e.g., Computer use service, Basic LLM service) + - Plugins +- Allow plugins to register with the central registry and specify: + - Services they provide + - Services they depend on + - Hook points they utilize (Pre-execution, Post-execution, LLM call interception) +- Enable plugins to define and register tools for LLM use: + - Plugins can specify which parts of their interface are available as tools + - Tools are registered with unique names for LLM invocation + - LLM responses can request to use these tools by name +- Implement a tool execution system: + - Process LLM requests to use specific tools + - Route tool requests to the appropriate plugin + - Handle tool execution results and incorporate them into the LLM context +- Develop an adapter system for tool compatibility: + - Allow tools to be exposed in standard API formats (e.g., OpenAI, Anthropic) + - Create adapters to translate between our internal tool format and standard formats + - Enable easy integration with various LLM providers and their function-calling capabilities +- Enable plugins to access other registered plugins and services through the central registry +- Implement a plugin discovery and dependency resolution system + +Example plugins: +1. Screenshot Analyzer (Pre-execution / Computer Use): + - Applies region flags to screenshots + - Performs local image classification before sending to LLM + - Provides a tool for on-demand image analysis (compatible with OpenAI function format) +2. Voice Input (Pre-execution): + - Converts voice commands to text input + - Offers a tool for transcribing audio files (adaptable to Anthropic tool format) +3. Voice Output (Post-execution): + - Converts LLM text responses to speech + - Provides a text-to-speech tool for selective vocalization (compatible with multiple API formats) +4. LLM Proxy Impersonator (LLM call interceptor): + - Impersonates various LLM APIs (OpenAI, Claude, OpenRouter, etc.) + - Allows monitoring and manipulation of message payloads + - Enables custom tool use or RAG implementations + - Offers tools for switching between different LLM providers (adaptable to various API formats) + +### ✅ 5. Window State Persistence + +- ✅ Save and restore the application window's size and position between restarts +- ✅ Utilize Electron's `electron-window-state` or a custom implementation + +### 6. Session Management + +- Implement functionality to save the current message stack as a "session" +- Create a session loading mechanism +- Design a user interface for managing (saving, loading, deleting) sessions + +### 7. Configurable Tools System + +- Develop a tool configuration interface in the settings +- Implement a tool management system that can launch, close, and manage tool state +- Create a screenshot masking system to focus on the active tool +- Pass tool context to the AI model + +### ✅ 8. User Feedback and Continuous Input System + +- ✅ Implement an always-present input box for users to provide additional instructions +- ✅ Create a mechanism for the AI to incorporate new instructions into its ongoing task +- ✅ Integrate the continuous input system with the existing run history + +## Implementation Plan + +### ✅ 1. Persistent Settings + +1. ✅ Install `electron-store` or a similar library +2. ✅ Create a `settings.ts` file to manage application settings +3. ✅ Modify the main process to load and save settings +4. ✅ Update the renderer process to reflect and modify settings + +### ✅ 2. System Prompt Configuration + +1. ✅ Add a new React component for system prompt input +2. ✅ Modify the store to include system prompt state +3. ✅ Update the AI interaction logic to incorporate the system prompt + +### ✅ 3. Enhanced Logging with Images + +1. ✅ Modify the `RunHistory` component to support image display +2. ✅ Update the `runAgent` function to capture screenshots after each action +3. ✅ Implement an image storage system using the file system or a lightweight database + +### 4. Plugin System + +1. ✅ Design the plugin API and architecture +2. ✅ Create a plugin loader in the main process +3. ✅ Implement a plugin manager to handle plugin lifecycle +4. ✅ Develop a UI for enabling/disabling plugins +5. Implement a central registry for services and plugins +6. Enhance the plugin API to allow registration of services, dependencies, and hook points +7. Develop a system for plugins to define and register tools for LLM use +8. Implement a tool execution system to handle LLM requests for tool use +9. Create an adapter system for tool compatibility with standard API formats: + - Develop adapters for OpenAI function format + - Implement adapters for Anthropic tool format + - Design a flexible adapter interface for future API formats +10. Create a service and tool discovery mechanism for plugins +11. Update the `runAgent` function to incorporate plugin hooks, services, and tool use +12. Implement example plugins with various service integrations and tools, showcasing API format compatibility +13. Enhance error handling and logging for plugin and tool operations +14. Create comprehensive documentation for plugin developers, including: + - API reference + - Hook system usage + - Service interaction guidelines + - Tool definition and registration process + - Guide on adapting tools to standard API formats + - Example plugin and tool implementations with API format adaptations + +### ✅ 5. Window State Persistence + +1. ✅ Install `electron-window-state` or implement a custom solution +2. ✅ Modify the main process to save and restore window state +3. ✅ Update the `createWindow` function to use the saved state + +### 6. Session Management + +1. Design the session data structure +2. Implement save and load functions for sessions +3. Create a UI for session management (list, save, load, delete) +4. Modify the store to handle session state + +### 7. Configurable Tools System + +1. Design the tool configuration data structure +2. Create a UI for tool configuration in the settings +3. Implement a tool management system in the main process +4. Modify the `runAgent` function to handle tool context and management +5. Develop a screenshot masking system for active tools + +### ✅ 8. User Feedback and Continuous Input System + +1. ✅ Design the continuous input data structure and integration with the existing message stack +2. ✅ Create a persistent UI component for the input box in the main application window +3. ✅ Implement a mechanism in the main process to handle new user inputs +4. ✅ Modify the `runAgent` function to check for and incorporate new user inputs during execution +5. ✅ Update the renderer process to display the input box and send new instructions to the main process +6. ✅ Integrate the new input system with the existing run history display diff --git a/README.md b/README.md index 8f99c84..65b9dbe 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,34 @@ +# Agent.exe: Claude's Computer Control Interface + +[📊 View Project Progress Report](progress_report.md) +[🗺️ View Project Plan](.cursor/plan.md) + Presenting **Agent.exe**: the easiest way to let Claude's new [computer use](https://www.anthropic.com/news/3-5-models-and-computer-use) capabilities take over your computer! buy pizza https://github.com/user-attachments/assets/2a371241-bc43-46d4-896e-256b3adc388d +## Recent Updates + +We've made significant improvements to Agent.exe. Here are some of the latest updates: + +![Update 1: System Prompt Configuration](update1.png) + +We've added a new "System Prompt" tab that allows users to customize the instructions given to Claude. This gives you more control over how Claude interacts with your computer. + +![Update 2: Continuous Input and Run History](update2.png) + +The main interface now includes a continuous input box for providing additional instructions to Claude during a run. We've also improved the run history display to show Claude's actions and reasoning more clearly. + +![Update 3: Plugin System](update3.png) + +We're excited to introduce our new plugin system! This feature allows users to extend Agent.exe's functionality with custom plugins. You can now install, manage, and utilize various plugins to enhance Claude's capabilities. The new Plugins tab provides an easy-to-use interface for managing your installed plugins. + +Check out our [Sample Plugin](src/plugins/SamplePlugin.js) for an example of how to create your own plugins! + +## ORIGINAL PROJECT 👇 + ### Motivation I wanted to see how good Claude's new [computer use](https://www.anthropic.com/news/3-5-models-and-computer-use) APIs were, and the default project they provided felt too heavyweight. This is a simple Electron app that lets Claude 3.5 Sonnet control your local computer directly. I was planning on adding a "semi-auto" mode where the user has to confirm each action before it executes, but each step is so slow I found that wasn't necessary and if the model is getting confused you can easily just hit the "stop" button to end the run. @@ -35,3 +60,5 @@ I wanted to see how good Claude's new [computer use](https://www.anthropic.com/n ### Roadmap - I literally wrote this in 6 hours, probably isn't going anywhere. But I will review PRs and merge them if they seem cool. + +## Shannon Code Fork diff --git a/dev1.png b/dev1.png new file mode 100644 index 0000000..b342b04 Binary files /dev/null and b/dev1.png differ diff --git a/progress_report.md b/progress_report.md new file mode 100644 index 0000000..40f13b7 --- /dev/null +++ b/progress_report.md @@ -0,0 +1,64 @@ +# Agent.exe Progress Report + +## Current Status + +As of the latest update, Agent.exe has made significant progress in implementing planned features and enhancements. Here's a summary of the current status: + +### Completed Features + +1. **Persistent Settings** + - Implemented using electron-store + - Settings for instructions, fullyAuto, and systemPrompt are now persisted + +2. **System Prompt Configuration** + - Added a new tab for system prompt configuration + - Implemented saving and loading of system prompts + +3. **Enhanced Logging with Images** + - Modified RunHistory component to display screenshots + - Implemented image rendering using base64-encoded data + +4. **User Feedback and Continuous Input System** + - Implemented ContinuousInput component + - Integrated new input system with existing run history + - Added mechanism to incorporate new user inputs during agent execution + +5. **Window State Persistence** + - Implemented window state (size, position) persistence using electron-store + - Added instructions box height persistence using localStorage + +### Pending Features + +6. **Session Management** + - Not started + +7. **Configurable Tools System** + - Not started + +8. **Plugin System** + - Not started + +## Next Steps + +1. Implement the remaining planned enhancements: + - Session Management + - Configurable Tools System + - Plugin System +2. Conduct thorough testing of all implemented features +3. Optimize performance and user experience +4. Update documentation to reflect recent changes and new features + +## Overall Progress + +The project has made significant progress, with the core functionality and more than half of the planned enhancements completed. The remaining tasks focus on advanced features that will further improve the application's flexibility and extensibility. + +## Recent Challenges and Resolutions + +Two main errors were encountered and successfully resolved during development: + +1. Incorrect Return Format in promptForAction +2. Unexpected 'tool' Role in Message History + +These issues were addressed by implementing proper type checking, updating API interactions, and ensuring correct data formats throughout the application. + +The project continues to evolve and improve, with a focus on delivering a robust and user-friendly interface for Claude's computer use capabilities. diff --git a/src/main/computerVision.ts b/src/main/computerVision.ts new file mode 100644 index 0000000..cc66382 --- /dev/null +++ b/src/main/computerVision.ts @@ -0,0 +1,25 @@ +import { desktopCapturer, screen } from 'electron'; + +export interface ComputerVisionService { + getScreenshot: () => Promise; +} + +export const computerVisionService: ComputerVisionService = { + getScreenshot: async (): Promise => { + const primaryDisplay = screen.getPrimaryDisplay(); + const { width, height } = primaryDisplay.size; + + const sources = await desktopCapturer.getSources({ + types: ['screen'], + thumbnailSize: { width, height }, + }); + const primarySource = sources[0]; + + if (primarySource) { + const screenshot = primarySource.thumbnail; + const base64Image = screenshot.toPNG().toString('base64'); + return base64Image; + } + throw new Error('No display found for screenshot'); + } +}; diff --git a/src/main/main.ts b/src/main/main.ts index 1ac3c03..6022c53 100644 --- a/src/main/main.ts +++ b/src/main/main.ts @@ -8,7 +8,7 @@ * When running `npm run build` or `npm run build:main`, this file is compiled to * `./src/main.js` using webpack. This gives us some performance wins. */ -import { app, BrowserWindow, ipcMain, shell, screen } from 'electron'; +import { app, BrowserWindow, ipcMain, shell, screen, dialog } from 'electron'; import log from 'electron-log'; import { autoUpdater } from 'electron-updater'; import path from 'path'; @@ -16,6 +16,12 @@ import { mainZustandBridge } from 'zutron/main'; import MenuBuilder from './menu'; import { store } from './store/create'; import { resolveHtmlPath } from './util'; +import Store from 'electron-store'; +import fs from 'fs'; +import { pluginManager } from './pluginManager'; +import { serviceRegistry } from './serviceRegistry'; +import { computerVisionService } from './computerVision'; +import { anthropicService } from './store/anthropic'; // Add this import class AppUpdater { constructor() { @@ -58,6 +64,35 @@ const installExtensions = async () => { .catch(console.log); }; +// Add this function to load plugins +async function loadPlugins() { + const pluginsDir = path.join(app.getAppPath(), 'src', 'plugins'); + console.log(`Attempting to load plugins from: ${pluginsDir}`); + + if (!fs.existsSync(pluginsDir)) { + console.log(`Plugins directory does not exist: ${pluginsDir}`); + return; + } + + const pluginFiles = fs.readdirSync(pluginsDir); + + for (const file of pluginFiles) { + if (file.endsWith('.js')) { + const pluginPath = path.join(pluginsDir, file); + try { + const PluginClass = require(pluginPath); + const plugin = new PluginClass(); + if (typeof plugin.initialize === 'function') { + plugin.initialize(); + } + console.log(`Loaded plugin: ${file}`); + } catch (error) { + console.error(`Error loading plugin ${file}:`, error); + } + } + } +} + const createWindow = async () => { if (isDebug) { await installExtensions(); @@ -75,26 +110,45 @@ const createWindow = async () => { const primaryDisplay = screen.getPrimaryDisplay(); const { width, height } = primaryDisplay.workAreaSize; - mainWindow = new BrowserWindow({ - show: false, + const electronStore = new Store(); + + interface WindowState { + width: number; + height: number; + x?: number; + y?: number; + } + + const windowState = electronStore.get('windowState', { width: 350, height: 600, - x: width - 350, // Position from right edge - y: 0, // Position from top edge (changed from: y: height - 500) - frame: false, // Remove default frame - transparent: true, // Optional: enables transparency - alwaysOnTop: true, // Keep window on top + x: undefined, + y: undefined, + }) as WindowState; + + mainWindow = new BrowserWindow({ + show: false, + width: windowState.width, + height: windowState.height, + x: windowState.x, + y: windowState.y, + frame: false, + transparent: true, + alwaysOnTop: true, icon: getAssetPath('icon.png'), webPreferences: { preload: app.isPackaged ? path.join(__dirname, 'preload.js') : path.join(__dirname, '../../.erb/dll/preload.js'), + contextIsolation: true, + nodeIntegration: false, + sandbox: false, }, }); mainWindow.loadURL(resolveHtmlPath('index.html')); - mainWindow.on('ready-to-show', () => { + mainWindow.webContents.on('did-finish-load', () => { if (!mainWindow) { throw new Error('"mainWindow" is not defined'); } @@ -103,6 +157,9 @@ const createWindow = async () => { } else { mainWindow.show(); } + + // Load plugins after the window content has loaded + pluginManager.loadPlugins(); }); mainWindow.on('closed', () => { @@ -144,6 +201,43 @@ const createWindow = async () => { ipcMain.handle('close-window', () => { mainWindow?.close(); }); + + ['resized', 'moved'].forEach((event) => { + mainWindow?.on(event as any, () => { + if (mainWindow && !mainWindow.isMaximized()) { + const bounds = mainWindow.getBounds(); + electronStore.set('windowState', bounds); + } + }); + }); + + // Add these handlers after the existing IPC handlers + ipcMain.on('get-plugins', (event) => { + const plugins = pluginManager.getPlugins(); + event.reply('get-plugins-response', plugins); + }); + + ipcMain.on('install-plugin', async (event) => { + const result = await dialog.showOpenDialog(mainWindow!, { + properties: ['openFile'], + filters: [{ name: 'JavaScript', extensions: ['js'] }], + }); + + if (!result.canceled && result.filePaths.length > 0) { + const filePath = result.filePaths[0]; + pluginManager.installPlugin(filePath); + event.reply('install-plugin-response', true); + } else { + event.reply('install-plugin-response', false); + } + }); + + // Add this new IPC handler for services + ipcMain.on('get-services', (event) => { + const services = serviceRegistry.getServices(); + log.info(`Sending services to renderer. Count: ${services.length}`); + event.reply('get-services-response', services); + }); }; /** @@ -161,11 +255,24 @@ app.on('window-all-closed', () => { app .whenReady() .then(() => { + // Register services + serviceRegistry.register('computerVision', computerVisionService); + serviceRegistry.register('anthropic', anthropicService); // Add this line + log.info('Services registered in whenReady'); + createWindow(); + pluginManager.loadPlugins(); app.on('activate', () => { - // On macOS it's common to re-create a window in the app when the - // dock icon is clicked and there are no other windows open. if (mainWindow === null) createWindow(); }); + + // Add this to log the registered services and their functions + const registeredServices = serviceRegistry.getAll(); + log.info('Registered services:', registeredServices); + + // Add an IPC handler to get services + ipcMain.handle('get-services', () => { + return serviceRegistry.getAll(); + }); }) .catch(console.log); diff --git a/src/main/pluginManager.ts b/src/main/pluginManager.ts new file mode 100644 index 0000000..bae1153 --- /dev/null +++ b/src/main/pluginManager.ts @@ -0,0 +1,60 @@ +import fs from 'fs'; +import path from 'path'; +import { app } from 'electron'; + +interface Plugin { + name: string; + initialize: () => void; +} + +class PluginManager { + private plugins: Plugin[] = []; + private pluginsDir: string; + + constructor() { + this.pluginsDir = path.join(app.getPath('userData'), 'plugins'); + if (!fs.existsSync(this.pluginsDir)) { + fs.mkdirSync(this.pluginsDir, { recursive: true }); + } + } + + loadPlugins() { + console.log(`Attempting to load plugins from: ${this.pluginsDir}`); + this.plugins = []; // Clear existing plugins before loading + + const pluginFiles = fs.readdirSync(this.pluginsDir); + + for (const file of pluginFiles) { + if (file.endsWith('.js')) { + const pluginPath = path.join(this.pluginsDir, file); + try { + const pluginContent = fs.readFileSync(pluginPath, 'utf-8'); + const PluginModule = eval(`(function(module, exports) { ${pluginContent} \n return module.exports; })(Object.create(null), {})`); + const plugin: Plugin = new PluginModule(); + this.plugins.push(plugin); + if (typeof plugin.initialize === 'function') { + plugin.initialize(); + } + console.log(`Loaded plugin: ${file}`); + } catch (error) { + console.error(`Error loading plugin ${file}:`, error); + } + } + } + } + + getPlugins(): Plugin[] { + return this.plugins; + } + + installPlugin(sourcePath: string) { + const fileName = path.basename(sourcePath); + const destPath = path.join(this.pluginsDir, fileName); + fs.copyFileSync(sourcePath, destPath); + console.log(`Installed plugin: ${fileName}`); + // Reload plugins after installation + this.loadPlugins(); + } +} + +export const pluginManager = new PluginManager(); diff --git a/src/main/preload.ts b/src/main/preload.ts index 300af24..725bffe 100644 --- a/src/main/preload.ts +++ b/src/main/preload.ts @@ -1,11 +1,11 @@ // Disable no-unused-vars, broken for spread args /* eslint no-unused-vars: off */ -import { contextBridge, ipcRenderer } from 'electron'; +import { contextBridge, ipcRenderer, IpcRendererEvent } from 'electron'; import { preloadZustandBridge } from 'zutron/preload'; import type { AppState } from './store/types'; -export type Channels = 'ipc-example'; +export type Channels = 'ipc-example' | 'get-plugins' | 'get-plugins-response' | 'install-plugin' | 'install-plugin-response'; const electronHandler = { ipcRenderer: { @@ -24,6 +24,9 @@ const electronHandler = { once(channel: Channels, func: (...args: unknown[]) => void) { ipcRenderer.once(channel, (_event, ...args) => func(...args)); }, + invoke(channel: string, ...args: unknown[]) { + return ipcRenderer.invoke(channel, ...args); + }, }, // Add window controls windowControls: { diff --git a/src/main/serviceRegistry.ts b/src/main/serviceRegistry.ts new file mode 100644 index 0000000..cb466fa --- /dev/null +++ b/src/main/serviceRegistry.ts @@ -0,0 +1,39 @@ +import log from 'electron-log'; + +type ServiceType = any; // We'll use a generic type since specific interfaces are now in their own files + +interface ServiceInfo { + name: string; + functions: string[]; +} + +class ServiceRegistry { + private services: Map = new Map(); + + register(name: string, service: ServiceType) { + this.services.set(name, service); + const functions = this.getFunctions(service); + log.info(`Registered service: ${name} with functions: ${functions.join(', ')}`); + } + + private getFunctions(service: ServiceType): string[] { + if (typeof service !== 'object' || service === null) { + return []; + } + + return Object.keys(service).filter(key => typeof service[key] === 'function'); + } + + get(name: string): T | undefined { + return this.services.get(name) as T | undefined; + } + + getAll(): ServiceInfo[] { + return Array.from(this.services.entries()).map(([name, service]) => ({ + name, + functions: this.getFunctions(service) + })); + } +} + +export const serviceRegistry = new ServiceRegistry(); diff --git a/src/main/settings.ts b/src/main/settings.ts new file mode 100644 index 0000000..d673de0 --- /dev/null +++ b/src/main/settings.ts @@ -0,0 +1,34 @@ +import Store from 'electron-store'; +import { AppState } from './store/types'; + +interface SettingsSchema { + instructions: string | null; + fullyAuto: boolean; + systemPrompt: string; +} + +const store = new Store({ + defaults: { + instructions: null, + fullyAuto: true, + systemPrompt: '', + }, +}); + +export const loadSettings = (): Partial => ({ + instructions: store.get('instructions'), + fullyAuto: store.get('fullyAuto'), + systemPrompt: store.get('systemPrompt'), +}); + +export const saveSettings = (state: Partial) => { + if (state.instructions !== undefined) { + store.set('instructions', state.instructions); + } + if (state.fullyAuto !== undefined) { + store.set('fullyAuto', state.fullyAuto); + } + if (state.systemPrompt !== undefined) { + store.set('systemPrompt', state.systemPrompt); + } +}; diff --git a/src/main/store/anthropic.ts b/src/main/store/anthropic.ts index 981d68d..d8cf593 100644 --- a/src/main/store/anthropic.ts +++ b/src/main/store/anthropic.ts @@ -1,9 +1,83 @@ import Anthropic from '@anthropic-ai/sdk'; +import { BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages/messages'; import dotenv from 'dotenv'; +import { serviceRegistry } from '../serviceRegistry'; dotenv.config(); -export const anthropic = new Anthropic({ +export interface AnthropicService { + createVisionMessage: ( + systemPrompt: string, + messages: BetaMessageParam[], + tools: any[], + getAiScaledScreenDimensions: () => { width: number; height: number } + ) => Promise; + createMessage: ( + systemPrompt: string, + messages: BetaMessageParam[] + ) => Promise; +} + +const anthropic = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY, - baseURL: process.env.ANTHROPIC_BASE_URL || 'https://api.anthropic.com', }); + +export const anthropicService: AnthropicService = { + createVisionMessage: async ( + systemPrompt: string, + messages: BetaMessageParam[], + tools: any[], + getAiScaledScreenDimensions: () => { width: number; height: number } + ) => { + return await anthropic.beta.messages.create({ + model: 'claude-3-5-sonnet-20241022', + max_tokens: 1024, + system: systemPrompt, + messages: messages, + tools: [ + { + type: 'computer_20241022', + name: 'computer', + display_width_px: getAiScaledScreenDimensions().width, + display_height_px: getAiScaledScreenDimensions().height, + display_number: 1, + }, + { + name: 'finish_run', + description: + 'Call this function when you have achieved the goal of the task.', + input_schema: { + type: 'object', + properties: { + success: { + type: 'boolean', + description: 'Whether the task was successful', + }, + error: { + type: 'string', + description: 'The error message if the task was not successful', + }, + }, + required: ['success'], + }, + }, + ...tools, + ], + betas: ['computer-use-2024-10-22'], + }); + }, + createMessage: async ( + systemPrompt: string, + messages: BetaMessageParam[] + ) => { + return await anthropic.beta.messages.create({ + model: 'claude-3-5-sonnet-20241022', + max_tokens: 1024, + system: systemPrompt, + messages: messages, + }); + }, +}; + +// Register the anthropic service with the service registry +serviceRegistry.register('anthropic', anthropicService); diff --git a/src/main/store/create.ts b/src/main/store/create.ts index 768c8fb..4ee2846 100644 --- a/src/main/store/create.ts +++ b/src/main/store/create.ts @@ -2,21 +2,50 @@ import { createStore } from 'zustand/vanilla'; import { createDispatch } from 'zutron/main'; import { AppState } from './types'; import { runAgent } from './runAgent'; +import { loadSettings, saveSettings } from '../settings'; +import log from 'electron-log'; + +const initialSettings = loadSettings(); export const store = createStore((set, get) => ({ - instructions: 'find flights from seattle to sf for next tuesday to thursday', - fullyAuto: true, // renamed and changed default to true + instructions: initialSettings.instructions ?? '', + fullyAuto: initialSettings.fullyAuto ?? true, + systemPrompt: initialSettings.systemPrompt ?? '', running: false, error: null, runHistory: [], RUN_AGENT: async () => runAgent(set, get), STOP_RUN: () => set({ running: false }), - SET_INSTRUCTIONS: (instructions) => set({ instructions }), + SET_INSTRUCTIONS: (instructions) => { + set({ instructions }); + saveSettings({ instructions }); + }, SET_FULLY_AUTO: (fullyAuto) => { - // renamed from SET_HUMAN_SUPERVISED - set({ fullyAuto: fullyAuto ?? true }); // changed default to true + set({ fullyAuto }); + saveSettings({ fullyAuto }); + }, + SET_SYSTEM_PROMPT: (systemPrompt) => { + set({ systemPrompt }); + saveSettings({ systemPrompt }); }, CLEAR_HISTORY: () => set({ runHistory: [] }), + feedbackRequest: null, + SET_FEEDBACK_REQUEST: (request) => set({ feedbackRequest: request }), + SUBMIT_FEEDBACK: (response) => { + // Handle the feedback response + }, + userInput: null, + ADD_USER_INPUT: (input) => { + log.info('Store: ADD_USER_INPUT action received:', input); + const newInput = { id: Date.now().toString(), role: 'user', content: input }; + set({ userInput: newInput }); + log.info('Store: Updated state after ADD_USER_INPUT:', get()); + // Check if the agent is not running, and if so, start it + if (!get().running) { + log.info('Store: Agent not running, restarting...'); + runAgent(set, get); + } + }, })); export const dispatch = createDispatch(store); diff --git a/src/main/store/runAgent.ts b/src/main/store/runAgent.ts index e45d65e..18a3587 100644 --- a/src/main/store/runAgent.ts +++ b/src/main/store/runAgent.ts @@ -5,9 +5,11 @@ import { import { Button, Key, keyboard, mouse, Point } from '@nut-tree-fork/nut-js'; // import { createCanvas, loadImage } from 'canvas'; import { desktopCapturer, screen } from 'electron'; -import { anthropic } from './anthropic'; +import { serviceRegistry } from '../serviceRegistry'; import { AppState, NextAction } from './types'; import { extractAction } from './extractAction'; +import log from 'electron-log'; +import { store } from './create'; // Import the store const MAX_STEPS = 50; @@ -78,17 +80,23 @@ const mapFromAiSpace = (x: number, y: number) => { const promptForAction = async ( runHistory: BetaMessageParam[], + systemPrompt: string ): Promise => { - // Strip images from all but the last message + // Strip images from all but the last message and remove unnecessary fields const historyWithoutImages = runHistory.map((msg, index) => { - if (index === runHistory.length - 1) return msg; // Keep the last message intact + if (index === runHistory.length - 1) { + // Keep the last message intact, but remove unnecessary fields + const { role, content } = msg; + return { role, content }; + } if (Array.isArray(msg.content)) { return { - ...msg, + role: msg.role, content: msg.content.map((item) => { if (item.type === 'tool_result' && typeof item.content !== 'string') { return { - ...item, + type: item.type, + tool_use_id: item.tool_use_id, content: item.content?.filter((c) => c.type !== 'image'), }; } @@ -96,47 +104,28 @@ const promptForAction = async ( }), }; } - return msg; + // For text messages, just keep role and content + return { role: msg.role, content: msg.content }; }); - const message = await anthropic.beta.messages.create({ - model: 'claude-3-5-sonnet-20241022', - max_tokens: 1024, - tools: [ - { - type: 'computer_20241022', - name: 'computer', - display_width_px: getAiScaledScreenDimensions().width, - display_height_px: getAiScaledScreenDimensions().height, - display_number: 1, - }, - { - name: 'finish_run', - description: - 'Call this function when you have achieved the goal of the task.', - input_schema: { - type: 'object', - properties: { - success: { - type: 'boolean', - description: 'Whether the task was successful', - }, - error: { - type: 'string', - description: 'The error message if the task was not successful', - }, - }, - required: ['success'], - }, - }, - ], - system: `The user will ask you to perform a task and you should use their computer to do so. After each step, take a screenshot and carefully evaluate if you have achieved the right outcome. Explicitly show your thinking: "I have evaluated step X..." If not correct, try again. Only when you confirm a step was executed correctly should you move on to the next one. Note that you have to click into the browser address bar before typing a URL. You should always call a tool! Always return a tool call. Remember call the finish_run tool when you have achieved the goal of the task. Do not explain you have finished the task, just call the tool. Use keyboard shortcuts to navigate whenever possible.`, - // tool_choice: { type: 'any' }, - messages: historyWithoutImages, - betas: ['computer-use-2024-10-22'], - }); + try { + const llmService = serviceRegistry.get('anthropic'); + if (!llmService) { + throw new Error('LLM service not found in registry'); + } - return { content: message.content, role: message.role }; + const message = await llmService.createVisionMessage( + systemPrompt, + historyWithoutImages, + [], + getAiScaledScreenDimensions + ); + return message; + } catch (error) { + log.error('Error in promptForAction:', error); + log.error('Message stack:', JSON.stringify(historyWithoutImages, null, 2)); + throw error; + } }; export const performAction = async (action: NextAction) => { @@ -196,21 +185,18 @@ export const performAction = async (action: NextAction) => { }; export const runAgent = async ( - setState: (state: AppState) => void, - getState: () => AppState, + setState: (state: Partial) => void, + getState: () => AppState ) => { setState({ - ...getState(), running: true, runHistory: [{ role: 'user', content: getState().instructions ?? '' }], error: null, }); while (getState().running) { - // Add this check at the start of the loop if (getState().runHistory.length >= MAX_STEPS * 2) { setState({ - ...getState(), error: 'Maximum steps exceeded', running: false, }); @@ -218,9 +204,8 @@ export const runAgent = async ( } try { - const message = await promptForAction(getState().runHistory); + const message = await promptForAction(getState().runHistory, getState().systemPrompt); setState({ - ...getState(), runHistory: [...getState().runHistory, message], }); const { action, reasoning, toolId } = extractAction( @@ -231,14 +216,12 @@ export const runAgent = async ( if (action.type === 'error') { setState({ - ...getState(), error: action.message, running: false, }); break; } else if (action.type === 'finish') { setState({ - ...getState(), running: false, }); break; @@ -246,7 +229,7 @@ export const runAgent = async ( if (!getState().running) { break; } - performAction(action); + await performAction(action); await new Promise((resolve) => setTimeout(resolve, 500)); if (!getState().running) { @@ -254,7 +237,6 @@ export const runAgent = async ( } setState({ - ...getState(), runHistory: [ ...getState().runHistory, { @@ -282,11 +264,27 @@ export const runAgent = async ( }, ], }); + + // Check for user input + if (getState().userInput) { + const userInput = getState().userInput; + setState({ userInput: null }); + // Add the user input to the message stack + setState({ + runHistory: [...getState().runHistory, { role: 'user', content: userInput?.content ?? '' }], + }); + } } catch (error: unknown) { + log.error('Error in runAgent:', error); + log.error('Full message stack:', JSON.stringify(getState().runHistory, null, 2)); + let errorMessage = 'An unknown error occurred'; + if (error instanceof Error) { + errorMessage = error.message; + } else if (typeof error === 'object' && error !== null && 'message' in error) { + errorMessage = String(error.message); + } setState({ - ...getState(), - error: - error instanceof Error ? error.message : 'An unknown error occurred', + error: `Error: ${errorMessage}. Please try again or check the logs for more details.`, running: false, }); break; diff --git a/src/main/store/types.ts b/src/main/store/types.ts index e05897a..42935cd 100644 --- a/src/main/store/types.ts +++ b/src/main/store/types.ts @@ -14,17 +14,43 @@ export type NextAction = | { type: 'finish' } | { type: 'error'; message: string }; +export type FeedbackRequest = { + id: string; + question: string; +}; + +export type FeedbackResponse = { + id: string; + answer: string; +}; + +export type UserInput = { + id: string; + content: string; +}; + +export type AppMessage = { + role: 'user' | 'assistant'; + content: string | BetaMessageParam['content']; +}; + export type AppState = { instructions: string | null; fullyAuto: boolean; running: boolean; error: string | null; - - runHistory: BetaMessageParam[]; + runHistory: AppMessage[]; + systemPrompt: string; + feedbackRequest: FeedbackRequest | null; + userInput: UserInput | null; RUN_AGENT: () => void; STOP_RUN: () => void; SET_INSTRUCTIONS: (instructions: string) => void; SET_FULLY_AUTO: (fullyAuto: boolean) => void; + SET_SYSTEM_PROMPT: (systemPrompt: string) => void; CLEAR_HISTORY: () => void; + SET_FEEDBACK_REQUEST: (request: FeedbackRequest | null) => void; + SUBMIT_FEEDBACK: (response: FeedbackResponse) => void; + ADD_USER_INPUT: (input: string) => void; }; diff --git a/src/plugins/SamplePlugin.js b/src/plugins/SamplePlugin.js new file mode 100644 index 0000000..edaec5f --- /dev/null +++ b/src/plugins/SamplePlugin.js @@ -0,0 +1,13 @@ +class SamplePlugin { + constructor() { + this.name = 'Sample Plugin'; + } + + initialize() { + console.log(` : loaded and initialized. 🎉🎊🥳`); + } + + // Add any other methods or properties as needed +} + +module.exports = SamplePlugin; diff --git a/src/renderer/App.tsx b/src/renderer/App.tsx index a981f9d..f675604 100644 --- a/src/renderer/App.tsx +++ b/src/renderer/App.tsx @@ -1,4 +1,4 @@ -import React from 'react'; +import React, { useState, useEffect, useRef } from 'react'; import { Box, Button, @@ -8,6 +8,11 @@ import { Link, Switch, VStack, + Tabs, + TabList, + TabPanels, + Tab, + TabPanel, extendTheme, Spinner, useToast, @@ -18,6 +23,10 @@ import { Route, MemoryRouter as Router, Routes } from 'react-router-dom'; import { useDispatch } from 'zutron'; import { useStore } from './hooks/useStore'; import { RunHistory } from './RunHistory'; +import { SystemPrompt } from './SystemPrompt'; +import { FeedbackRequest } from './FeedbackRequest'; +import { ContinuousInput } from './ContinuousInput'; +import { Plugins } from './Plugins'; function Main() { const dispatch = useDispatch(window.zutron); @@ -27,26 +36,65 @@ function Main() { running, error, runHistory, + feedbackRequest, } = useStore(); - // Add local state for instructions const [localInstructions, setLocalInstructions] = React.useState( savedInstructions ?? '', ); - const toast = useToast(); // Add toast hook + const toast = useToast(); + const [activeTab, setActiveTab] = useState(0); + const [instructionsHeight, setInstructionsHeight] = useState(48); + const instructionsRef = useRef(null); + + useEffect(() => { + // Load saved instructions height + const savedHeight = localStorage.getItem('instructionsHeight'); + if (savedHeight) { + setInstructionsHeight(parseInt(savedHeight, 10)); + } + }, []); + + useEffect(() => { + if (error) { + toast({ + title: "Error", + description: error, + status: "error", + duration: 5000, + isClosable: true, + position: "top", + }); + } + }, [error, toast]); const startRun = () => { - // Update Zustand state before starting the run dispatch({ type: 'SET_INSTRUCTIONS', payload: localInstructions }); dispatch({ type: 'RUN_AGENT', payload: null }); }; + const clearHistory = () => { + dispatch({ type: 'CLEAR_HISTORY', payload: null }); + }; + const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === 'Enter' && !e.metaKey && !e.shiftKey) { + if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); startRun(); } }; + const handleFullyAutoChange = (e: React.ChangeEvent) => { + const newValue = e.target.checked; + dispatch({ type: 'SET_FULLY_AUTO', payload: newValue }); + }; + + const handleInstructionsResize = (e: React.ChangeEvent) => { + const height = e.target.scrollHeight; + setInstructionsHeight(height); + localStorage.setItem('instructionsHeight', height.toString()); + e.target.style.height = `${height}px`; + }; + return ( {/* Title heading no longer needs drag property since parent is draggable */} @@ -107,86 +155,86 @@ function Main() { pt={16} sx={{ '& > *': { - // Make all direct children non-draggable '-webkit-app-region': 'no-drag', }, }} > - ) => { - setLocalInstructions(e.target.value); - // Auto-adjust height - e.target.style.height = 'auto'; - e.target.style.height = `${e.target.scrollHeight}px`; - }} - onKeyDown={handleKeyDown} - /> + + + Main + System Prompt + Plugins + + + + + + ) => { + setLocalInstructions(e.target.value); + handleInstructionsResize(e); + }} + onKeyDown={handleKeyDown} + /> + + + + + + {feedbackRequest && ( + dispatch({ type: 'SUBMIT_FEEDBACK', payload: response })} + /> + )} + + + + + + + + + + + + + { - toast({ - description: - "Whoops, automatic mode isn't actually implemented yet. 😬", - status: 'info', - duration: 3000, - isClosable: true, - }); - }} + onChange={handleFullyAutoChange} /> Full Auto {running && } - {!running && runHistory.length > 0 && ( - - )} + - - {/* Add error display */} - {error && ( - - {error} - - )} - - {/* RunHistory component */} - - - ); diff --git a/src/renderer/ContinuousInput.tsx b/src/renderer/ContinuousInput.tsx new file mode 100644 index 0000000..78fdd1f --- /dev/null +++ b/src/renderer/ContinuousInput.tsx @@ -0,0 +1,28 @@ +import React, { useState } from 'react'; +import { Box, Button, Input, HStack } from '@chakra-ui/react'; +import { useDispatch } from 'zutron'; + +export function ContinuousInput() { + const [input, setInput] = useState(''); + const dispatch = useDispatch(window.zutron); + + const handleSubmit = () => { + if (input.trim()) { + dispatch({ type: 'ADD_USER_INPUT', payload: input }); + setInput(''); + } + }; + + return ( + + + setInput(e.target.value)} + placeholder="Enter additional instructions..." + /> + + + + ); +} diff --git a/src/renderer/FeedbackRequest.tsx b/src/renderer/FeedbackRequest.tsx new file mode 100644 index 0000000..3c71de7 --- /dev/null +++ b/src/renderer/FeedbackRequest.tsx @@ -0,0 +1,26 @@ +import React, { useState } from 'react'; +import { Box, Button, Input, Text, VStack } from '@chakra-ui/react'; +import { FeedbackRequest as FeedbackRequestType, FeedbackResponse } from '../main/store/types'; + +type Props = { + request: FeedbackRequestType; + onSubmit: (response: FeedbackResponse) => void; +}; + +export function FeedbackRequest({ request, onSubmit }: Props) { + const [answer, setAnswer] = useState(''); + + const handleSubmit = () => { + onSubmit({ id: request.id, answer }); + }; + + return ( + + + {request.question} + setAnswer(e.target.value)} /> + + + + ); +} diff --git a/src/renderer/Plugins.tsx b/src/renderer/Plugins.tsx new file mode 100644 index 0000000..9963df1 --- /dev/null +++ b/src/renderer/Plugins.tsx @@ -0,0 +1,91 @@ +import React, { useEffect, useState } from 'react'; +import { Box, VStack, Text, Heading, Button, Divider } from '@chakra-ui/react'; + +interface Plugin { + name: string; +} + +interface Service { + name: string; + functions: string[]; +} + +export function Plugins() { + const [plugins, setPlugins] = useState([]); + const [services, setServices] = useState>([]); + + const fetchPlugins = () => { + window.electron.ipcRenderer.sendMessage('get-plugins'); + }; + + const fetchServices = async () => { + try { + const fetchedServices = await window.electron.ipcRenderer.invoke('get-services'); + setServices(fetchedServices); + } catch (error) { + console.error('Error fetching services:', error); + } + }; + + useEffect(() => { + fetchPlugins(); + fetchServices(); + + const removePluginListener = window.electron.ipcRenderer.on('get-plugins-response', (installedPlugins) => { + console.log('Received plugins:', installedPlugins); + setPlugins(installedPlugins as Plugin[]); + }); + + const removeServiceListener = window.electron.ipcRenderer.on('get-services-response', (registeredServices) => { + console.log('Received services:', registeredServices); + setServices(registeredServices as Service[]); + }); + + return () => { + removePluginListener(); + removeServiceListener(); + }; + }, []); + + const handleInstallPlugin = () => { + window.electron.ipcRenderer.sendMessage('install-plugin'); + }; + + useEffect(() => { + const removeListener = window.electron.ipcRenderer.on('install-plugin-response', (success) => { + if (success) { + fetchPlugins(); + } + }); + + return () => { + removeListener(); + }; + }, []); + + return ( + + Installed Plugins + {plugins.map((plugin, index) => ( + + {plugin.name} + + ))} + + + + + Registered Services + {services.map((service, index) => ( + + {service.name} +
    + {service.functions.map((func) => ( +
  • {func}
  • + ))} +
+
+ ))} +
+ ); +} diff --git a/src/renderer/RunHistory.tsx b/src/renderer/RunHistory.tsx index 82b63e2..81a3cfe 100644 --- a/src/renderer/RunHistory.tsx +++ b/src/renderer/RunHistory.tsx @@ -1,4 +1,4 @@ -import { Box } from '@chakra-ui/react'; +import { Box, Image } from '@chakra-ui/react'; import { useEffect } from 'react'; import { useStore } from './hooks/useStore'; import { extractAction } from '../main/store/extractAction'; @@ -7,8 +7,7 @@ export function RunHistory() { const { runHistory } = useStore(); const messages = runHistory - .filter((m) => m.role === 'assistant') - .map((m) => extractAction(m)); + .filter((m) => m.role === 'assistant' || (m.role === 'user' && Array.isArray(m.content))); useEffect(() => { const element = document.getElementById('run-history'); @@ -31,18 +30,39 @@ export function RunHistory() { p={4} overflow="auto" > - {messages.map((action, index) => { - const { type, ...params } = action.action; - return ( - - - {action.reasoning} + {messages.map((message, index) => { + if (message.role === 'assistant') { + const action = extractAction(message); + return ( + + + {action.reasoning} + + + {action.action.type}({JSON.stringify(action.action)}) + - - {type}({params ? JSON.stringify(params) : ''}) - - - ); + ); + } else if (Array.isArray(message.content)) { + const imageContent = message.content.find( + (item) => item.type === 'tool_result' && Array.isArray(item.content) + ); + if (imageContent && Array.isArray((imageContent as any).content)) { + const imageItem = (imageContent as any).content.find((item: any) => item.type === 'image'); + if (imageItem && imageItem.source && imageItem.source.type === 'base64') { + return ( + + Screenshot + + ); + } + } + } + return null; })} ); diff --git a/src/renderer/SystemPrompt.tsx b/src/renderer/SystemPrompt.tsx new file mode 100644 index 0000000..5fc98d8 --- /dev/null +++ b/src/renderer/SystemPrompt.tsx @@ -0,0 +1,47 @@ +import React, { useState } from 'react'; +import { + Box, + Button, + Textarea, + VStack, +} from '@chakra-ui/react'; +import { useDispatch } from 'zutron'; +import { useStore } from './hooks/useStore'; + +const DEFAULT_SYSTEM_PROMPT = `The user will ask you to perform a task and you should use their computer to do so. After each step, take a screenshot and carefully evaluate if you have achieved the right outcome. Explicitly show your thinking: "I have evaluated step X..." If not correct, try again. Only when you confirm a step was executed correctly should you move on to the next one. Note that you have to click into the browser address bar before typing a URL. You should always call a tool! Always return a tool call. Remember call the finish_run tool when you have achieved the goal of the task. Do not explain you have finished the task, just call the tool. Use keyboard shortcuts to navigate whenever possible.`; + +export function SystemPrompt() { + const dispatch = useDispatch(window.zutron); + const { systemPrompt } = useStore(); + const [localSystemPrompt, setLocalSystemPrompt] = useState(systemPrompt); + + const handleSave = () => { + dispatch({ type: 'SET_SYSTEM_PROMPT', payload: localSystemPrompt }); + }; + + const applyDefaultPrompt = () => { + setLocalSystemPrompt(DEFAULT_SYSTEM_PROMPT); + }; + + return ( + +