diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000..fbb319f5 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,41 @@ + + +# AIOpsLab Project Instructions + +This is the AIOpsLab project - a holistic framework for designing, developing, and evaluating autonomous AIOps agents. + +## Project Context +- **Framework**: AIOpsLab is designed for building reproducible, standardized, interoperable and scalable benchmarks for AIOps agents +- **Language**: Python 3.11+ with Poetry for dependency management +- **Key Features**: + - Deploy microservice cloud environments + - Inject faults for testing + - Generate workloads + - Export telemetry data + - Orchestrate components + - Provide interfaces for agent interaction and evaluation + +## Code Style Guidelines +- Follow Python PEP 8 standards +- Use type hints where appropriate +- Maintain consistent docstring format +- Use the existing project structure and patterns + +## Key Components +- `aiopslab/`: Core framework code +- `aiopslab/generators/`: Fault injection and workload generation +- `aiopslab/observer/`: Monitoring and telemetry +- `aiopslab/orchestrator/`: Main orchestration logic +- `aiopslab/service/`: Service management utilities +- `clients/`: AI/ML client implementations +- `tests/`: Test suites + +## Dependencies +- Uses Poetry for dependency management +- Requires Python >= 3.11, < 3.13 +- Key dependencies include Kubernetes, OpenAI, Pydantic, Rich, Prometheus API client + +## Development Notes +- This is an active research project for AIOps agent evaluation +- Focus on maintaining compatibility with the existing benchmark suite +- When adding new features, consider the impact on reproducibility and scalability diff --git a/.gitignore b/.gitignore index 71e2721f..116e2db5 100644 --- a/.gitignore +++ b/.gitignore @@ -161,169 +161,6 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ - # Visual Studio Code .vscode/ @@ -360,3 +197,29 @@ aiopslab/observer/prometheus/prometheus/Chart.lock # Ignore customized config files aiopslab/config.yml scripts/ansible/inventory.yml + +# Additional security protections +# Environment files +.env* +*.env +env.local +.env.local +.env.*.local + +# API keys and secrets +*api_key* +*secret* +*token* +*.pem +*.key +*.crt + +# Log files that might contain sensitive data +*_output.log +flash_output.log +debug.log +*.log + +# Temporary files that might contain secrets +temp_*.txt +*_temp.txt diff --git a/HOW_TO_RUN.md b/HOW_TO_RUN.md new file mode 100644 index 00000000..2bc2f19f --- /dev/null +++ b/HOW_TO_RUN.md @@ -0,0 +1,190 @@ +# πŸš€ AIOpsLab - How to Run the Project + +## Prerequisites Setup + +### 1. Install Python 3.11+ (REQUIRED) + +**Method 1: From Python.org (Recommended)** +1. Visit [python.org/downloads](https://www.python.org/downloads/) +2. Download Python 3.11+ for Windows +3. Run installer and **IMPORTANT: Check "Add Python to PATH"** +4. Verify installation: Open new terminal and run `python --version` + +**Method 2: Using Chocolatey** +```powershell +# Install Chocolatey first if you don't have it +Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) + +# Install Python +choco install python --version=3.11.0 +``` + +**Method 3: Using Windows Package Manager** +```powershell +winget install Python.Python.3.11 +``` + +### 2. Install Poetry (Dependency Manager) + +After Python is installed, install Poetry: + +```powershell +# Method 1: Official installer (Recommended) +(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python - + +# Method 2: Using pip +pip install poetry + +# Method 3: Using pipx (if you have it) +pipx install poetry +``` + +**Add Poetry to PATH:** +- Add `%APPDATA%\Python\Scripts` to your PATH environment variable +- Or restart your terminal/VS Code + +## Project Setup & Running + +### Step 1: Verify Prerequisites +```powershell +# Check Python version (should be 3.11+) +python --version + +# Check Poetry +poetry --version +``` + +### Step 2: Configure Poetry Environment +```powershell +# Set Poetry to use Python 3.11 +poetry env use python3.11 + +# Or if you have a specific Python path +poetry env use C:\Python311\python.exe +``` + +### Step 3: Install Dependencies +```powershell +# Install all project dependencies +poetry install +``` + +### Step 4: Create Configuration +```powershell +# Copy example config +copy aiopslab\config.yml.example aiopslab\config.yml +``` + +### Step 5: Set Environment Variables (Optional) +```powershell +# For OpenAI API (if you want to use AI features) +$env:OPENAI_API_KEY = "your-openai-api-key-here" + +# Make it permanent (optional) +[System.Environment]::SetEnvironmentVariable("OPENAI_API_KEY", "your-key-here", "User") +``` + +## Running the Project + +### Method 1: Using Poetry (Recommended) +```powershell +# Activate the virtual environment +poetry shell + +# Run the CLI +poetry run python cli.py +``` + +### Method 2: Using VS Code Tasks +1. Press `Ctrl+Shift+P` +2. Type "Tasks: Run Task" +3. Select "Run AIOpsLab CLI" + +### Method 3: Using VS Code Debug +1. Press `F5` to start debugging +2. Choose "Python: AIOpsLab CLI" + +## Available Commands + +Once the CLI is running, you can use these commands: + +``` +# Start a problem +start + +# List available problems +# (The problems are defined in aiopslab/orchestrator/problems/registry.py) + +# Exit the application +exit +``` + +## Running Tests +```powershell +# Run all tests +poetry run python -m pytest tests/ -v + +# Or use VS Code task +# Ctrl+Shift+P β†’ Tasks: Run Task β†’ Run Tests +``` + +## Code Formatting and Type Checking +```powershell +# Format code +poetry run black . + +# Type checking +poetry run pyright +``` + +## Troubleshooting + +### Python Not Found +- Ensure Python 3.11+ is installed and in PATH +- Try `python --version` and `py --version` +- Restart your terminal/VS Code after installation + +### Poetry Not Found +- Ensure Poetry is installed and in PATH +- Try `poetry --version` +- Add `%APPDATA%\Python\Scripts` to PATH + +### Dependencies Issues +```powershell +# Clear cache and reinstall +poetry cache clear --all pypi +poetry install --no-cache +``` + +### Virtual Environment Issues +```powershell +# Remove and recreate environment +poetry env remove python +poetry env use python3.11 +poetry install +``` + +## What the Project Does + +AIOpsLab is a framework for: +- **Testing AIOps agents** in simulated environments +- **Fault injection** in microservices +- **Workload generation** for testing +- **Telemetry collection** and analysis +- **Kubernetes orchestration** for cloud environments + +The CLI provides an interactive interface to: +- Start problem scenarios +- Interact with simulated environments +- Test AI agents for operations tasks +- Evaluate agent performance + +## Next Steps + +1. **Start with local setup** (no Azure needed) +2. **Use kind for Kubernetes** (local cluster) +3. **Set qualitative_eval: false** in config (no LLM calls) +4. **Explore the problems** in `aiopslab/orchestrator/problems/` +5. **Check the tutorial** in `TutorialSetup.md` for Kubernetes setup + +Remember: You can run AIOpsLab completely locally without any cloud resources! diff --git a/README.md b/README.md index cdf87afd..c7abf8a4 100644 --- a/README.md +++ b/README.md @@ -27,9 +27,7 @@ Moreover, AIOpsLab provides a built-in benchmark suite with a set of problems to

πŸ“¦ Installation

### Requirements -- Python >= 3.11 -- [Helm](https://helm.sh/) -- Additional requirements depend on the deployment option selected, which is explained in the next section + Recommended installation: ```bash @@ -119,23 +117,11 @@ The clients will automatically load API keys from your .env file. You can check the running status of the cluster using [k9s](https://k9scli.io/) or other cluster monitoring tools conveniently. -To browse your logged `session_id` values in the W&B app as a table: - -1. Make sure you have W&B installed and configured. -2. Set the USE_WANDB environment variable: - ```bash - # Add to your .env file - echo "USE_WANDB=true" >> .env - ``` -3. In the W&B web UI, open any run and click Tables β†’ Add Query Panel. -4. In the key field, type `runs.summary` and click `Run`, then you will see the results displayed in a table format.

βš™οΈ Usage

AIOpsLab can be used in the following ways: -- [Onboard your agent to AIOpsLab](#how-to-onboard-your-agent-to-aiopslab) -- [Add new applications to AIOpsLab](#how-to-add-new-applications-to-aiopslab) -- [Add new problems to AIOpsLab](#how-to-add-new-problems-to-aiopslab) +For detailed instructions on developing and testing agents, see [clients/README_AGENTS.md](./clients/README_AGENTS.md). ### Running agents remotely You can run AIOpsLab on a remote machine with larger computational resources. This section guides you through setting up and using AIOpsLab remotely. diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 00000000..a5fd84d8 --- /dev/null +++ b/SETUP.md @@ -0,0 +1,81 @@ +# AIOpsLab Setup Guide + +## Prerequisites Installation + +### For Windows: + +1. **Install Python 3.11+**: + - Download from [python.org](https://www.python.org/downloads/) + - Or use Windows Store: `winget install Python.Python.3.11` + - Or use Chocolatey: `choco install python --version=3.11.0` + +2. **Install Poetry**: + ```powershell + # Method 1: Official installer + (Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python - + + # Method 2: Using pip + pip install poetry + + # Method 3: Using pipx (recommended) + pip install pipx + pipx install poetry + ``` + +3. **Add Poetry to PATH**: + - Add `%APPDATA%\Python\Scripts` to your PATH environment variable + - Or restart your terminal/VS Code + +## Project Setup + +1. **Configure Poetry to use Python 3.11**: + ```bash + poetry env use python3.11 + ``` + +2. **Install project dependencies**: + ```bash + poetry install + ``` + +3. **Activate the virtual environment**: + ```bash + poetry shell + ``` + +## Usage + +### Running the CLI +```bash +poetry run python cli.py +``` + +### Running Tests +```bash +poetry run python -m pytest tests/ -v +``` + +### Code Formatting +```bash +poetry run black . +``` + +### Type Checking +```bash +poetry run pyright +``` + +## VS Code Integration + +The workspace is configured with: +- **Tasks**: Use `Ctrl+Shift+P` β†’ "Tasks: Run Task" to run predefined tasks +- **Debug**: Use `F5` to start debugging the CLI or current file +- **Extensions**: Python, Black Formatter, Pylint, and Kubernetes tools are installed + +## Next Steps + +1. Install Python 3.11+ and Poetry following the instructions above +2. Run `poetry install` to set up the project +3. Review the `README.md` for detailed project information +4. Check `TutorialSetup.md` for additional setup instructions +5. Explore the `aiopslab/` directory for core functionality diff --git a/aiopslab/orchestrator/evaluators/qualitative.py b/aiopslab/orchestrator/evaluators/qualitative.py index 0df404b6..0efe859d 100644 --- a/aiopslab/orchestrator/evaluators/qualitative.py +++ b/aiopslab/orchestrator/evaluators/qualitative.py @@ -74,11 +74,26 @@ def inference(self, payload: list[dict[str, str]]) -> list[str]: if cache_result is not None: return cache_result - client = OpenAI(api_key=os.getenv("OPENAI_KEY")) + # Check if using Azure OpenAI + if os.getenv("OPENAI_API_TYPE") == "azure": + from openai import AzureOpenAI + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + if not azure_endpoint: + raise ValueError("AZURE_OPENAI_ENDPOINT environment variable is required for Azure OpenAI") + client = AzureOpenAI( + api_key=os.getenv("OPENAI_API_KEY"), + api_version=os.getenv("OPENAI_API_VERSION", "2023-12-01-preview"), + azure_endpoint=azure_endpoint + ) + model_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-4") + else: + client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + model_name = "gpt-4-turbo-2024-04-09" + try: response = client.chat.completions.create( messages=payload, # type: ignore - model="gpt-4-turbo-2024-04-09", + model=model_name, max_tokens=1024, temperature=0.0, top_p=0.95, diff --git a/clients/README_AGENTS.md b/clients/README_AGENTS.md new file mode 100644 index 00000000..e67de7ee --- /dev/null +++ b/clients/README_AGENTS.md @@ -0,0 +1,308 @@ +# Custom AIOps Agents for AIOpsLab + +This directory contains custom agents designed for the AIOpsLab framework. These agents demonstrate different approaches to solving AIOps problems with varying levels of complexity and capabilities. + +## Available Agents + +### 1. Custom Agent (`custom_agent.py`) +**Advanced agent with comprehensive problem-solving capabilities** + +**Features:** +- Systematic problem analysis framework +- Multi-step reasoning with hypothesis validation +- Telemetry data interpretation +- Iterative solution refinement +- Detailed analysis tracking and reporting + +**Best for:** +- Complex problems requiring deep analysis +- Scenarios where you need detailed reasoning traces +- Problems with multiple potential root causes +- When you want comprehensive documentation of the solution process + +**Usage:** +```python +from clients.custom_agent import CustomAgent + +agent = CustomAgent() +orchestrator.register_agent(agent, name="custom-aiops-agent") +``` + +### 2. Simple Agent (`simple_agent.py`) +**Streamlined agent focused on efficiency** + +**Features:** +- Clear, step-by-step problem analysis +- Systematic troubleshooting approach +- Effective solution implementation +- Minimal overhead with focused reasoning + +**Best for:** +- Straightforward problems with clear symptoms +- When you need quick, effective solutions +- Learning and understanding the AIOpsLab framework +- Production environments where efficiency is key + +**Usage:** +```python +from clients.simple_agent import SimpleAgent + +agent = SimpleAgent() +orchestrator.register_agent(agent, name="simple-aiops-agent") +``` + +## Getting Started + +### Prerequisites +1. Ensure AIOpsLab is properly set up and running +2. Configure your OpenAI API key for the LLM backend +3. Have a Kubernetes cluster available (local or remote) + +### Quick Start + +1. **Test the Simple Agent:** + ```bash + cd c:\workspace + python clients\test_agents.py --agent simple + ``` + +2. **Test the Custom Agent:** + ```bash + python clients\test_agents.py --agent custom --steps 15 + ``` + +3. **List Available Problems:** + ```bash + python clients\test_agents.py --list-problems + ``` + +4. **Test with a specific problem:** + ```bash + python clients\test_agents.py --agent custom --problem misconfig_app_hotel_res-mitigation-1 --steps 12 + ``` + +### Configuration + +Edit `clients/configs/agent_config.yml` to customize agent behavior: + +```yaml +agent_config: + custom_agent: + max_steps: 15 + analysis_depth: "comprehensive" + simple_agent: + max_steps: 10 + analysis_depth: "focused" +``` + +## Agent Architecture + +Both agents follow the standard AIOpsLab agent interface: + +```python +class Agent: + def init_context(self, problem_desc: str, instructions: str, apis: str): + """Initialize agent with problem context and available APIs""" + pass + + async def get_action(self, input_data: str) -> str: + """Process input and return the next action""" + pass +``` + +### Key Components + +1. **Problem Context Initialization** + - Parse problem description and instructions + - Categorize available APIs (telemetry, shell, submit) + - Set up the reasoning framework + +2. **Action Generation** + - Process environmental input + - Apply reasoning methodology + - Generate structured responses + +3. **Response Formatting** + - Follow consistent output patterns + - Provide clear reasoning traces + - Include actionable next steps + +## Customization + +### Creating Your Own Agent + +1. **Inherit from base patterns:** + ```python + class MyAgent: + def __init__(self): + self.history = [] + self.llm = GPT4Turbo() + + def init_context(self, problem_desc, instructions, apis): + # Your initialization logic + pass + + async def get_action(self, input_data): + # Your action generation logic + pass + ``` + +2. **Add custom reasoning:** + - Implement domain-specific analysis + - Add specialized API handling + - Create custom response formats + +3. **Register with orchestrator:** + ```python + orchestrator.register_agent(agent, name="my-custom-agent") + ``` + +### Extending Existing Agents + +You can extend the provided agents by: + +1. **Overriding methods:** + ```python + class EnhancedCustomAgent(CustomAgent): + def _process_input(self, input_data): + # Add custom input processing + return super()._process_input(input_data) + ``` + +2. **Adding new capabilities:** + ```python + class SpecializedAgent(SimpleAgent): + def __init__(self): + super().__init__() + self.specialized_tools = [] + + def add_specialized_analysis(self, data): + # Your specialized logic + pass + ``` + +## API Reference + +### Available APIs in AIOpsLab + +**Telemetry APIs:** +- Prometheus metrics queries +- Log analysis +- Trace data access + +**Shell API:** +- Execute commands in the target environment +- File system operations +- Service management + +**Submit API:** +- Submit solution for evaluation +- Provide final analysis report + +### Response Format + +Agents should structure their responses as: + +``` +Thought: +Action: +``` + +For the custom agent, use the enhanced format: + +``` +Analysis: +Hypothesis: +Action: +Rationale: +Expected_Outcome: +``` + +## Testing and Validation + +### Running Tests + +1. **Unit Tests:** + ```bash + python -m pytest tests/ -v + ``` + +2. **Integration Tests:** + ```bash + python clients\test_agents.py --agent simple --problem test_problem + ``` + +3. **Performance Tests:** + ```bash + python clients\test_agents.py --agent custom --steps 20 + ``` + +### Debugging + +Enable detailed logging by setting: +```python +import logging +logging.basicConfig(level=logging.DEBUG) +``` + +Monitor agent behavior: +```python +# For custom agent +summary = agent.get_analysis_summary() +print(f"Steps taken: {summary['total_steps']}") +``` + +## Best Practices + +1. **Problem Analysis** + - Always start with understanding the problem scope + - Gather baseline data before making changes + - Validate hypotheses with evidence + +2. **Solution Implementation** + - Make incremental changes + - Test each change before proceeding + - Document your reasoning + +3. **Error Handling** + - Gracefully handle API failures + - Provide meaningful error messages + - Implement retry mechanisms + +4. **Performance** + - Minimize unnecessary API calls + - Cache frequently accessed data + - Use appropriate timeouts + +## Troubleshooting + +### Common Issues + +1. **Agent not responding:** + - Check OpenAI API key configuration + - Verify network connectivity + - Review error logs + +2. **API errors:** + - Ensure Kubernetes cluster is accessible + - Check API permissions + - Verify service endpoints + +3. **Performance issues:** + - Reduce max_steps if needed + - Optimize API usage + - Check resource constraints + +## Contributing + +To contribute new agents or improvements: + +1. Fork the repository +2. Create a new agent following the established patterns +3. Add comprehensive tests +4. Update documentation +5. Submit a pull request + +## License + +This code is licensed under the MIT License. See LICENSE.txt for details. diff --git a/clients/custom_agent.py b/clients/custom_agent.py new file mode 100644 index 00000000..33c9517b --- /dev/null +++ b/clients/custom_agent.py @@ -0,0 +1,279 @@ +"""Custom AIOps Agent for AIOpsLab. + +This agent implements a comprehensive approach to AIOps problem solving with: +- Systematic problem analysis +- Multi-step reasoning +- Telemetry data analysis +- Iterative solution refinement +""" + +import asyncio +import json +import re +from typing import Dict, List, Any, Optional + +from aiopslab.orchestrator import Orchestrator +from clients.utils.llm import GPT4Turbo +from clients.utils.templates import DOCS + + +class CustomAgent: + """A comprehensive AIOps agent with advanced reasoning capabilities.""" + + def __init__(self): + self.history = [] + self.llm = GPT4Turbo() + self.problem_context = {} + self.analysis_steps = [] + self.solution_attempts = [] + self.current_step = 0 + self.max_analysis_steps = 10 + + def init_context(self, problem_desc: str, instructions: str, apis: str): + """Initialize the context for the agent.""" + + # Store problem context + self.problem_context = { + 'description': problem_desc, + 'instructions': instructions, + 'apis': apis + } + + # Categorize APIs + self.shell_api = self._filter_dict(apis, lambda k, _: "exec_shell" in k) + self.submit_api = self._filter_dict(apis, lambda k, _: "submit" in k) + self.telemetry_apis = self._filter_dict( + apis, lambda k, _: "exec_shell" not in k and "submit" not in k + ) + + # Create enhanced system message + self.system_message = self._create_enhanced_system_message( + problem_desc, instructions, apis + ) + + # Initialize conversation + self.history.append({"role": "system", "content": self.system_message}) + self.history.append({"role": "user", "content": self._create_initial_task_message()}) + + def _create_enhanced_system_message(self, problem_desc: str, instructions: str, apis: str) -> str: + """Create an enhanced system message with structured approach.""" + + stringify_apis = lambda apis: "\n\n".join([f"{k}\n{v}" for k, v in apis.items()]) + + return f""" +{problem_desc} + +You are an advanced AIOps agent with the following capabilities: +1. Systematic problem analysis and diagnosis +2. Telemetry data interpretation +3. Root cause analysis +4. Solution implementation and validation +5. Iterative refinement + +ANALYSIS FRAMEWORK: +1. **Problem Understanding**: Thoroughly analyze the problem description +2. **Data Collection**: Gather relevant telemetry and system data +3. **Pattern Recognition**: Identify anomalies and patterns +4. **Hypothesis Formation**: Develop potential root causes +5. **Solution Design**: Create targeted mitigation strategies +6. **Implementation**: Execute solutions systematically +7. **Validation**: Verify effectiveness and monitor results + +AVAILABLE APIS: + +Telemetry APIs: +{stringify_apis(self.telemetry_apis)} + +Shell API: +{stringify_apis(self.shell_api)} + +Submit API: +{stringify_apis(self.submit_api)} + +RESPONSE FORMAT: +Always respond with structured reasoning: + +Analysis: +Hypothesis: +Action: +Rationale: +Expected_Outcome: + +IMPORTANT GUIDELINES: +- Take a methodical approach, don't jump to conclusions +- Always validate your hypotheses with data +- Consider multiple potential root causes +- Monitor the impact of your actions +- Be prepared to adapt your approach based on results +""" + + def _create_initial_task_message(self) -> str: + """Create the initial task message with structured approach.""" + return f""" +{self.problem_context['instructions']} + +Begin by conducting a systematic analysis of the problem: + +1. Start with understanding the problem scope and impact +2. Gather baseline telemetry data +3. Identify key metrics and potential anomalies +4. Form initial hypotheses about root causes +5. Design and implement targeted solutions +6. Validate results and iterate if needed + +Remember to follow the structured response format and provide clear reasoning for each step. +""" + + async def get_action(self, input_data: str) -> str: + """ + Main interface method for the agent. + + Args: + input_data (str): Input from the orchestrator/environment + + Returns: + str: The agent's response/action + """ + + # Increment step counter + self.current_step += 1 + + # Add input to history + self.history.append({"role": "user", "content": input_data}) + + # Enhanced input processing + processed_input = self._process_input(input_data) + + # Generate response with context awareness + response = await self._generate_contextual_response(processed_input) + + # Add response to history + self.history.append({"role": "assistant", "content": response}) + + # Store analysis step + self.analysis_steps.append({ + 'step': self.current_step, + 'input': input_data, + 'response': response, + 'timestamp': asyncio.get_event_loop().time() + }) + + return response + + def _process_input(self, input_data: str) -> str: + """Process and enhance the input with context.""" + + # Extract any error messages or important data + error_patterns = [ + r'Error: (.+)', + r'Failed: (.+)', + r'Exception: (.+)', + r'Warning: (.+)' + ] + + extracted_info = [] + for pattern in error_patterns: + matches = re.findall(pattern, input_data, re.IGNORECASE) + extracted_info.extend(matches) + + # Add context about current step + context = f""" +Current Step: {self.current_step}/{self.max_analysis_steps} +Previous Actions: {len(self.analysis_steps)} analysis steps completed + +Input Data: +{input_data} +""" + + if extracted_info: + context += f""" +Extracted Key Information: +{chr(10).join(f"- {info}" for info in extracted_info)} +""" + + return context + + async def _generate_contextual_response(self, processed_input: str) -> str: + """Generate a response with enhanced context awareness.""" + + # Add context about current analysis state + context_prompt = f""" +{processed_input} + +Analysis Progress: +- Current step: {self.current_step} +- Previous hypotheses: {len(self.solution_attempts)} attempts +- Available APIs: {len(self.telemetry_apis)} telemetry + shell + submit + +Based on the current state and input, provide your structured response following the format: +Analysis: +Hypothesis: +Action: +Rationale: +Expected_Outcome: +""" + + # Get LLM response + temp_history = self.history.copy() + temp_history.append({"role": "user", "content": context_prompt}) + + response = self.llm.run(temp_history) + + return response[0] if isinstance(response, list) else response + + def _filter_dict(self, dictionary: Dict, filter_func) -> Dict: + """Filter dictionary based on a function.""" + return {k: v for k, v in dictionary.items() if filter_func(k, v)} + + def get_analysis_summary(self) -> Dict[str, Any]: + """Get a summary of the analysis performed.""" + return { + 'total_steps': self.current_step, + 'analysis_steps': self.analysis_steps, + 'solution_attempts': self.solution_attempts, + 'problem_context': self.problem_context + } + + def reset_analysis(self): + """Reset the analysis state for a new problem.""" + self.analysis_steps = [] + self.solution_attempts = [] + self.current_step = 0 + self.history = [] + self.problem_context = {} + + +# Example usage and testing +if __name__ == "__main__": + async def test_agent(): + """Test the custom agent with a sample problem.""" + + agent = CustomAgent() + orchestrator = Orchestrator() + orchestrator.register_agent(agent, name="custom-aiops-agent") + + # Test with a sample problem + try: + pid = "misconfig_app_hotel_res-mitigation-1" + problem_desc, instructions, apis = orchestrator.init_problem(pid) + agent.init_context(problem_desc, instructions, apis) + + print(f"Initialized agent with problem: {pid}") + print(f"Problem Description: {problem_desc[:200]}...") + print(f"Available APIs: {len(apis)}") + + # Start the problem solving process + await orchestrator.start_problem(max_steps=15) + + # Get analysis summary + summary = agent.get_analysis_summary() + print(f"\nAnalysis Summary:") + print(f"Total Steps: {summary['total_steps']}") + print(f"Analysis Steps: {len(summary['analysis_steps'])}") + + except Exception as e: + print(f"Error during testing: {e}") + print("This is expected if the full AIOpsLab environment is not set up.") + + # Run the test + asyncio.run(test_agent()) diff --git a/clients/simple_agent.py b/clients/simple_agent.py new file mode 100644 index 00000000..a1e0d6f7 --- /dev/null +++ b/clients/simple_agent.py @@ -0,0 +1,136 @@ +"""Simple AIOps Agent for AIOpsLab. + +A streamlined agent that focuses on: +- Clear problem analysis +- Systematic troubleshooting +- Effective solution implementation +""" + +import asyncio +import json +from typing import Dict, Any + +from aiopslab.orchestrator import Orchestrator +from clients.utils.llm import GPT4Turbo +from clients.utils.templates import DOCS + + +class SimpleAgent: + """A simple, effective AIOps agent.""" + + def __init__(self): + self.history = [] + self.llm = GPT4Turbo() + self.step_count = 0 + + def init_context(self, problem_desc: str, instructions: str, apis: str): + """Initialize the context for the agent.""" + + # Categorize APIs + self.shell_api = self._filter_dict(apis, lambda k, _: "exec_shell" in k) + self.submit_api = self._filter_dict(apis, lambda k, _: "submit" in k) + self.telemetry_apis = self._filter_dict( + apis, lambda k, _: "exec_shell" not in k and "submit" not in k + ) + + # Create system message using the standard template + stringify_apis = lambda apis: "\n\n".join([f"{k}\n{v}" for k, v in apis.items()]) + + self.system_message = DOCS.format( + prob_desc=problem_desc, + telemetry_apis=stringify_apis(self.telemetry_apis), + shell_api=stringify_apis(self.shell_api), + submit_api=stringify_apis(self.submit_api), + ) + + # Add enhanced instructions + enhanced_instructions = f""" +{instructions} + +PROBLEM-SOLVING APPROACH: +1. First, understand the problem by analyzing the description and any immediate symptoms +2. Gather telemetry data to understand the current system state +3. Identify potential root causes based on the data +4. Implement targeted solutions step by step +5. Validate that your solution addresses the root cause +6. Submit your solution when confident it resolves the issue + +IMPORTANT GUIDELINES: +- Be systematic in your approach +- Always explain your reasoning +- Don't rush to solutions without understanding the problem +- Use telemetry data to guide your decisions +- Test your solutions before submitting +""" + + # Initialize conversation + self.history.append({"role": "system", "content": self.system_message}) + self.history.append({"role": "user", "content": enhanced_instructions}) + + async def get_action(self, input_data: str) -> str: + """ + Main interface method for the agent. + + Args: + input_data (str): Input from the orchestrator/environment + + Returns: + str: The agent's response/action + """ + + self.step_count += 1 + + # Add step context to input + contextual_input = f""" +Step {self.step_count}: +{input_data} + +Remember to: +- Think through the problem step by step +- Use "Thought:" to explain your reasoning +- Use "Action:" to specify your next action +- Be specific about what you're trying to achieve +""" + + # Add to history + self.history.append({"role": "user", "content": contextual_input}) + + # Get LLM response + response = self.llm.run(self.history) + result = response[0] if isinstance(response, list) else response + + # Add response to history + self.history.append({"role": "assistant", "content": result}) + + return result + + def _filter_dict(self, dictionary: Dict, filter_func) -> Dict: + """Filter dictionary based on a function.""" + return {k: v for k, v in dictionary.items() if filter_func(k, v)} + + +# Example usage +if __name__ == "__main__": + async def test_simple_agent(): + """Test the simple agent.""" + + agent = SimpleAgent() + orchestrator = Orchestrator() + orchestrator.register_agent(agent, name="simple-aiops-agent") + + try: + pid = "misconfig_app_hotel_res-mitigation-1" + problem_desc, instructions, apis = orchestrator.init_problem(pid) + agent.init_context(problem_desc, instructions, apis) + + print(f"Simple agent initialized with problem: {pid}") + + # Start the problem solving process + await orchestrator.start_problem(max_steps=10) + + except Exception as e: + print(f"Error during testing: {e}") + print("This is expected if the full AIOpsLab environment is not set up.") + + # Run the test + asyncio.run(test_simple_agent()) diff --git a/clients/test_agents.py b/clients/test_agents.py new file mode 100644 index 00000000..af44ac31 --- /dev/null +++ b/clients/test_agents.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +"""Test script for custom AIOps agents. + +This script helps you test your custom agents with different problems +and configurations in the AIOpsLab environment. +""" + +import asyncio +import sys +import argparse +from pathlib import Path + +# Add the project root to the path +sys.path.append(str(Path(__file__).parent.parent)) + +from aiopslab.orchestrator import Orchestrator +from clients.custom_agent import CustomAgent +from clients.simple_agent import SimpleAgent + + +class AgentTester: + """Test runner for AIOps agents.""" + + def __init__(self): + self.orchestrator = Orchestrator() + self.agents = { + 'custom': CustomAgent, + 'simple': SimpleAgent + } + + async def test_agent(self, agent_type: str, problem_id: str, max_steps: int = 10): + """Test a specific agent with a problem.""" + + if agent_type not in self.agents: + print(f"Unknown agent type: {agent_type}") + print(f"Available agents: {list(self.agents.keys())}") + return + + print(f"Testing {agent_type} agent with problem: {problem_id}") + print("-" * 50) + + # Create agent instance + agent = self.agents[agent_type]() + + # Register with orchestrator + self.orchestrator.register_agent(agent, name=f"{agent_type}-test-agent") + + try: + # Initialize problem + problem_desc, instructions, apis = self.orchestrator.init_problem(problem_id) + + print(f"Problem Description: {problem_desc[:200]}...") + print(f"Available APIs: {len(apis)}") + print(f"Max Steps: {max_steps}") + print("-" * 50) + + # Initialize agent context + agent.init_context(problem_desc, instructions, apis) + + # Start problem solving + await self.orchestrator.start_problem(max_steps=max_steps) + + # Print summary if available + if hasattr(agent, 'get_analysis_summary'): + summary = agent.get_analysis_summary() + print(f"\nAnalysis Summary:") + print(f"Total Steps: {summary['total_steps']}") + print(f"Analysis Steps: {len(summary['analysis_steps'])}") + + print("\nAgent test completed successfully!") + + except Exception as e: + print(f"Error during agent test: {e}") + import traceback + traceback.print_exc() + + def list_problems(self): + """List available problems.""" + try: + problems = self.orchestrator.probs.list_problems() + print("Available problems:") + for problem in problems: + print(f" - {problem}") + except Exception as e: + print(f"Error listing problems: {e}") + + def list_agents(self): + """List available agents.""" + print("Available agents:") + for agent_name, agent_class in self.agents.items(): + print(f" - {agent_name}: {agent_class.__doc__ or 'No description'}") + + +async def main(): + """Main function with command line interface.""" + + parser = argparse.ArgumentParser(description='Test AIOps agents') + parser.add_argument('--agent', '-a', choices=['custom', 'simple'], + default='simple', help='Agent type to test') + parser.add_argument('--problem', '-p', + default='misconfig_app_hotel_res-mitigation-1', + help='Problem ID to test') + parser.add_argument('--steps', '-s', type=int, default=10, + help='Maximum number of steps') + parser.add_argument('--list-problems', action='store_true', + help='List available problems') + parser.add_argument('--list-agents', action='store_true', + help='List available agents') + + args = parser.parse_args() + + tester = AgentTester() + + if args.list_problems: + tester.list_problems() + return + + if args.list_agents: + tester.list_agents() + return + + # Test the agent + await tester.test_agent(args.agent, args.problem, args.steps) + + +if __name__ == "__main__": + # Example usage without arguments + if len(sys.argv) == 1: + print("AIOps Agent Tester") + print("=" * 30) + print("\nUsage examples:") + print(" python test_agents.py --agent simple --problem misconfig_app_hotel_res-mitigation-1") + print(" python test_agents.py --agent custom --steps 15") + print(" python test_agents.py --list-problems") + print(" python test_agents.py --list-agents") + print("\nRunning with default settings (simple agent)...") + print("-" * 50) + + try: + asyncio.run(main()) + except KeyboardInterrupt: + print("\nTest interrupted by user") + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() diff --git a/clients/utils/llm.py b/clients/utils/llm.py index 6632a80a..68a891c8 100644 --- a/clients/utils/llm.py +++ b/clients/utils/llm.py @@ -156,197 +156,7 @@ def inference(self, payload: list[dict[str, str]]) -> list[str]: if cache_result is not None: return cache_result - client = OpenAI(api_key=os.getenv("DEEPSEEK_API_KEY"), - base_url="https://api.deepseek.com") - try: - response = client.chat.completions.create( - messages=payload, # type: ignore - model="deepseek-reasoner", - max_tokens=1024, - stop=[], - ) - - except Exception as e: - print(f"Exception: {repr(e)}") - raise e - - return [c.message.content for c in response.choices] # type: ignore - - def run(self, payload: list[dict[str, str]]) -> list[str]: - response = self.inference(payload) - if self.cache is not None: - self.cache.add_to_cache(payload, response) - self.cache.save_cache() - return response - - -class QwenClient: - """Abstraction for Qwen's model. Some Qwen models only support streaming output.""" - - def __init__(self): - self.cache = Cache() - - def inference(self, payload: list[dict[str, str]]) -> list[str]: - if self.cache is not None: - cache_result = self.cache.get_from_cache(payload) - if cache_result is not None: - return cache_result - client = OpenAI(api_key=os.getenv("DASHSCOPE_API_KEY"), - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1") - try: - # TODO: Add constraints for the input context length - response = client.chat.completions.create( - messages=payload, # type: ignore - model="qwq-32b", - max_tokens=1024, - n=1, - timeout=60, - stop=[], - stream=True - ) - except Exception as e: - print(f"Exception: {repr(e)}") - raise e - - reasoning_content = "" - answer_content = "" - is_answering = False - - for chunk in response: - if not chunk.choices: - print("\nUsage:") - print(chunk.usage) - else: - delta = chunk.choices[0].delta - if hasattr(delta, 'reasoning_content') and delta.reasoning_content != None: - reasoning_content += delta.reasoning_content - else: - if delta.content != "" and is_answering is False: - is_answering = True - answer_content += delta.content - - return [answer_content] - - def run(self, payload: list[dict[str, str]]) -> list[str]: - response = self.inference(payload) - if self.cache is not None: - self.cache.add_to_cache(payload, response) - self.cache.save_cache() - return response - - -class vLLMClient: - """Abstraction for local LLM models.""" - - def __init__(self, - model="Qwen/Qwen2.5-Coder-3B-Instruct", - repetition_penalty=1.0, - temperature=1.0, - top_p=0.95, - max_tokens=1024): - self.cache = Cache() - self.model = model - self.repetition_penalty = repetition_penalty - self.temperature = temperature - self.top_p = top_p - self.max_tokens = max_tokens - - def inference(self, payload: list[dict[str, str]]) -> list[str]: - if self.cache is not None: - cache_result = self.cache.get_from_cache(payload) - if cache_result is not None: - return cache_result - - client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1") - try: - response = client.chat.completions.create( - messages=payload, # type: ignore - model=self.model, - max_tokens=self.max_tokens, - temperature=self.temperature, - top_p=self.top_p, - frequency_penalty=0.0, - presence_penalty=0.0, - n=1, - timeout=60, - stop=[], - ) - except Exception as e: - print(f"Exception: {repr(e)}") - raise e - - return [c.message.content for c in response.choices] # type: ignore - - def run(self, payload: list[dict[str, str]]) -> list[str]: - response = self.inference(payload) - if self.cache is not None: - self.cache.add_to_cache(payload, response) - self.cache.save_cache() - return response - - -class OpenRouterClient: - """Abstraction for OpenRouter API with support for multiple models.""" - - def __init__(self, model="anthropic/claude-3.5-sonnet"): - self.cache = Cache() - self.model = model - - def inference(self, payload: list[dict[str, str]]) -> list[str]: - if self.cache is not None: - cache_result = self.cache.get_from_cache(payload) - if cache_result is not None: - return cache_result - - client = OpenAI( - api_key=os.getenv("OPENROUTER_API_KEY"), - base_url="https://openrouter.ai/api/v1" - ) - try: - response = client.chat.completions.create( - messages=payload, # type: ignore - model=self.model, - max_tokens=1024, - temperature=0.5, - top_p=0.95, - frequency_penalty=0.0, - presence_penalty=0.0, - n=1, - timeout=60, - stop=[], - ) - except Exception as e: - print(f"Exception: {repr(e)}") - raise e - - return [c.message.content for c in response.choices] # type: ignore - - def run(self, payload: list[dict[str, str]]) -> list[str]: - response = self.inference(payload) - if self.cache is not None: - self.cache.add_to_cache(payload, response) - self.cache.save_cache() - return response - - -class LLaMAClient: - """Abstraction for Meta's LLaMA-3 model.""" - - def __init__(self): - self.cache = Cache() - - def inference(self, payload: list[dict[str, str]]) -> list[str]: - if self.cache is not None: - cache_result = self.cache.get_from_cache(payload) - if cache_result is not None: - return cache_result - - client = Groq(api_key=os.getenv("GROQ_API_KEY")) - try: - response = client.chat.completions.create( - messages=payload, - model="llama-3.1-8b-instant", max_tokens=1024, temperature=0.5, top_p=0.95, diff --git a/fix-storage-classes.yaml b/fix-storage-classes.yaml new file mode 100644 index 00000000..0bdd2505 --- /dev/null +++ b/fix-storage-classes.yaml @@ -0,0 +1,47 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: geo-storage +provisioner: openebs.io/local +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: profile-storage +provisioner: openebs.io/local +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rate-storage +provisioner: openebs.io/local +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: recommendation-storage +provisioner: openebs.io/local +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: reservation-storage +provisioner: openebs.io/local +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: user-storage +provisioner: openebs.io/local +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete diff --git a/kind.exe b/kind.exe new file mode 100644 index 00000000..4fcf7b2b Binary files /dev/null and b/kind.exe differ diff --git a/kubectl.exe b/kubectl.exe new file mode 100644 index 00000000..006a33c6 Binary files /dev/null and b/kubectl.exe differ diff --git a/quick-setup.ps1 b/quick-setup.ps1 new file mode 100644 index 00000000..42581b39 --- /dev/null +++ b/quick-setup.ps1 @@ -0,0 +1,74 @@ +#!/usr/bin/env powershell +# Quick Setup Script for AIOpsLab on Windows + +Write-Host "πŸš€ AIOpsLab Quick Setup" -ForegroundColor Green +Write-Host "========================" -ForegroundColor Green + +# Check if Python is installed +Write-Host "`n1. Checking Python installation..." -ForegroundColor Yellow +try { + $pythonVersion = python --version 2>&1 + if ($pythonVersion -match "Python 3\.1\d+") { + Write-Host "βœ… Python found: $pythonVersion" -ForegroundColor Green + } else { + Write-Host "❌ Python 3.11+ required. Current: $pythonVersion" -ForegroundColor Red + Write-Host "Please install Python 3.11+ from https://python.org/downloads/" -ForegroundColor Yellow + exit 1 + } +} catch { + Write-Host "❌ Python not found. Please install Python 3.11+ from https://python.org/downloads/" -ForegroundColor Red + exit 1 +} + +# Check if Poetry is installed +Write-Host "`n2. Checking Poetry installation..." -ForegroundColor Yellow +try { + $poetryVersion = poetry --version 2>&1 + Write-Host "βœ… Poetry found: $poetryVersion" -ForegroundColor Green +} catch { + Write-Host "❌ Poetry not found. Installing Poetry..." -ForegroundColor Red + Write-Host "Installing Poetry via pip..." -ForegroundColor Yellow + python -m pip install poetry + + # Verify installation + try { + $poetryVersion = poetry --version 2>&1 + Write-Host "βœ… Poetry installed: $poetryVersion" -ForegroundColor Green + } catch { + Write-Host "❌ Poetry installation failed. Please install manually." -ForegroundColor Red + exit 1 + } +} + +# Configure Poetry environment +Write-Host "`n3. Configuring Poetry environment..." -ForegroundColor Yellow +poetry env use python + +# Install dependencies +Write-Host "`n4. Installing project dependencies..." -ForegroundColor Yellow +poetry install + +# Create config file +Write-Host "`n5. Creating configuration file..." -ForegroundColor Yellow +if (-Not (Test-Path "aiopslab\config.yml")) { + Copy-Item "aiopslab\config.yml.example" "aiopslab\config.yml" + Write-Host "βœ… Configuration file created" -ForegroundColor Green +} else { + Write-Host "βœ… Configuration file already exists" -ForegroundColor Green +} + +# Setup complete +Write-Host "`nπŸŽ‰ Setup Complete!" -ForegroundColor Green +Write-Host "==================" -ForegroundColor Green +Write-Host "" +Write-Host "To run AIOpsLab:" -ForegroundColor Cyan +Write-Host " poetry run python cli.py" -ForegroundColor White +Write-Host "" +Write-Host "Or activate the environment first:" -ForegroundColor Cyan +Write-Host " poetry shell" -ForegroundColor White +Write-Host " python cli.py" -ForegroundColor White +Write-Host "" +Write-Host "To run tests:" -ForegroundColor Cyan +Write-Host " poetry run python -m pytest tests/ -v" -ForegroundColor White +Write-Host "" +Write-Host "Happy coding! 🎯" -ForegroundColor Green diff --git a/test_azure_openai.py b/test_azure_openai.py new file mode 100644 index 00000000..dd04d616 --- /dev/null +++ b/test_azure_openai.py @@ -0,0 +1,43 @@ +import os +from openai import AzureOpenAI +import traceback + +# Load environment variables +env_vars = {} +with open('.env', 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + env_vars[key] = value.strip('"') + +for key, value in env_vars.items(): + os.environ[key] = value + +try: + print('Testing Azure OpenAI configuration...') + print(f'API Key set: {bool(os.getenv("OPENAI_API_KEY"))}') + print(f'Endpoint: {os.getenv("AZURE_OPENAI_ENDPOINT")}') + print(f'API Version: {os.getenv("OPENAI_API_VERSION")}') + print(f'Deployment: {os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")}') + + client = AzureOpenAI( + api_key=os.getenv('OPENAI_API_KEY'), + api_version=os.getenv('OPENAI_API_VERSION', '2023-12-01-preview'), + azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT') + ) + + print('Client created successfully. Testing API call...') + + response = client.chat.completions.create( + model=os.getenv('AZURE_OPENAI_DEPLOYMENT_NAME', 'gpt-4'), + messages=[{'role': 'user', 'content': 'Hello, just testing the connection.'}], + max_tokens=10 + ) + + print('SUCCESS! Response:', response.choices[0].message.content) + +except Exception as e: + print(f'ERROR: {str(e)}') + print('Full traceback:') + traceback.print_exc() diff --git a/test_flash_single.py b/test_flash_single.py new file mode 100644 index 00000000..2cd464fc --- /dev/null +++ b/test_flash_single.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# Modified Flash agent to run a single scenario for testing + +import asyncio +from typing import List, Dict, Tuple, Any +from pydantic import BaseModel +from clients.utils.llm import GPT4Turbo +from aiopslab.orchestrator import Orchestrator + +# Import the FlashAgent class from the original file +import sys +sys.path.append('.') +from clients.flash import FlashAgent + +if __name__ == "__main__": + # Test with just one scenario instead of 12 + pid = "k8s_target_port-misconfig-detection-2" # Single scenario for testing + + print(f"Running Flash agent with scenario: {pid}") + + flash_agent = FlashAgent() + orchestrator = Orchestrator() + + orchestrator.register_agent(flash_agent, name="flash") + + problem_desc, instructions, apis = orchestrator.init_problem(pid) + flash_agent.init_context(problem_desc, instructions, apis) + + print("Starting Flash agent scenario...") + asyncio.run(orchestrator.start_problem(max_steps=20)) + print("Flash agent scenario completed!") diff --git a/tests/clients/test_agents.py b/tests/clients/test_agents.py new file mode 100644 index 00000000..839ac8cb --- /dev/null +++ b/tests/clients/test_agents.py @@ -0,0 +1 @@ +// ...existing code... diff --git a/tests/clients/test_azure_openai.py b/tests/clients/test_azure_openai.py new file mode 100644 index 00000000..839ac8cb --- /dev/null +++ b/tests/clients/test_azure_openai.py @@ -0,0 +1 @@ +// ...existing code... diff --git a/tests/clients/test_flash_single.py b/tests/clients/test_flash_single.py new file mode 100644 index 00000000..839ac8cb --- /dev/null +++ b/tests/clients/test_flash_single.py @@ -0,0 +1 @@ +// ...existing code...