diff --git a/aiopslab-runs/.dockerignore b/aiopslab-runs/.dockerignore new file mode 100644 index 00000000..3b46b1bd --- /dev/null +++ b/aiopslab-runs/.dockerignore @@ -0,0 +1,76 @@ +# Git and version control +.git +.gitignore + +# Node.js +node_modules +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Environment files (should be mounted or configured at runtime) +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +# Development tools +.vscode +.idea +*.swp +*.swo + +# OS generated files +.DS_Store +.DS_Store? +._* +Thumbs.db + +# Documentation and non-essential files +README.md +*.md +docs/ + +# Test files +test/ +tests/ +__tests__/ +*.test.js +*.spec.js + +# Coverage and build artifacts +coverage/ +dist/ +build/ + +# Logs +logs/ +*.log + +# Runtime data +pids/ +*.pid +*.seed +*.pid.lock + +# Optional cache directories +.npm +.yarn-integrity +.cache + +# Temporary files +tmp/ +temp/ +.tmp/ + +# Docker files (not needed inside container) +Dockerfile* +.dockerignore +docker-compose*.yml + +# CI/CD files +.github/ +.gitlab-ci.yml +.travis.yml +Jenkinsfile diff --git a/aiopslab-runs/.env.example b/aiopslab-runs/.env.example new file mode 100644 index 00000000..366c4e0d --- /dev/null +++ b/aiopslab-runs/.env.example @@ -0,0 +1,25 @@ +# AIOpsLab Viewer Server Configuration +# Copy this file to .env and modify as needed + +# Server Configuration +PORT=3000 +HTTPS_PORT=3443 +HOST=0.0.0.0 + +# Runs Configuration +RUNS_PATH=./runs + +# Database Configuration +DATABASE_PATH=./runs.db + + +# Security Configuration +RATE_LIMIT_WINDOW_MS=900000 +RATE_LIMIT_MAX_REQUESTS=100 + +# Development/Production Mode +NODE_ENV=development + +# Logging +LOG_LEVEL=info +LOG_FORMAT=combined diff --git a/aiopslab-runs/.gitignore b/aiopslab-runs/.gitignore new file mode 100644 index 00000000..720a2e42 --- /dev/null +++ b/aiopslab-runs/.gitignore @@ -0,0 +1,156 @@ +# Node.js dependencies +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +package-lock.json + +# Environment configuration (keep .env.example for reference) +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +runs/ +*.db + +# SSL certificates and keys (regenerate with npm run generate-certs) +ssl/ +*.key +*.crt +*.pem +*.p12 +*.pfx + +# NOTE: We keep the following for the project: +# - runs/ directory and all run data (log files, evaluation files) +# - runs.db database file with run metadata +# This allows full project reproduction with actual data + +# Log files +logs/ +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Runtime data +pids/ +*.pid +*.seed +*.pid.lock + +# Coverage directory used by tools like istanbul +coverage/ +*.lcov + +# nyc test coverage +.nyc_output + +# Dependency directories +jspm_packages/ + +# Optional npm cache directory +.npm + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env +.env.test + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +public + +# Storybook build outputs +.out +.storybook-out + +# Temporary folders +tmp/ +temp/ + +# Editor directories and files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Backup files +*.bak +*.backup +*.old + +# Runtime and cache +.cache/ +.temp/ +.tmp/ + +# ESLint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* diff --git a/aiopslab-runs/Dockerfile b/aiopslab-runs/Dockerfile new file mode 100644 index 00000000..1a8a6a13 --- /dev/null +++ b/aiopslab-runs/Dockerfile @@ -0,0 +1,37 @@ +# Use Node.js 18 Alpine for smaller image size +FROM infyartifactory.jfrog.io/docker-local/node:18.12.1-alpine3.15 + +# Set working directory +WORKDIR /app + +# Install OpenSSL for certificate generation +RUN apk add --no-cache openssl + +# Copy package files first for better Docker layer caching +COPY package*.json ./ + +# Install dependencies +RUN npm ci --only=production + +# Copy application source code +COPY . . + +# Create necessary directories with proper permissions +RUN mkdir -p /app/ssl /app/runs && \ + chown -R node:node /app + +# Generate SSL certificates (self-signed for development) +RUN npm run generate-certs + +# Switch to non-root user for security +USER node + +# Expose both HTTP and HTTPS ports +EXPOSE 3000 3443 + +# Health check endpoint +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD node -e "require('http').get('http://localhost:3000/health', (res) => { process.exit(res.statusCode === 200 ? 0 : 1) })" + +# Start the server +CMD ["npm", "start"] diff --git a/aiopslab-runs/README.md b/aiopslab-runs/README.md new file mode 100644 index 00000000..89dfe2aa --- /dev/null +++ b/aiopslab-runs/README.md @@ -0,0 +1,969 @@ +# AIOpsLab Viewer - Secure Server + +A secure HTTP/HTTPS server for serving the AIOpsLab session analysis viewer with proper security headers, CORS configuration, and static file serving. + +## Quick Start + +### 🐳 **Docker (Recommended)** + +```bash +### 🐳 **Docker (Recommended)** + +```bash +# Clone and start with Docker +git clone +cd aiopslab-runs +docker-compose up -d + +# Access at http://localhost:3000 or https://localhost:3443 +``` + +### ☸️ **Kubernetes (Production)** + +```bash +# Deploy to Kubernetes with Helm +git clone +cd aiopslab-runs +./scripts/k8s-deploy.sh + +# Or manually with Helm +helm install aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab \ + --create-namespace \ + --values ./helm/aiopslab-viewer/values-dev.yaml +``` + +### πŸ”§ **Manual Setup** + +```bash +# Clone and start manually +git clone +cd aiopslab-runs +npm install +cp .env.example .env # Modify as needed +npm start +``` + + +## Features + +- πŸ”’ **Secure HTTPS** support with self-signed certificates for development +- πŸ›‘οΈ **Security Headers** using Helmet.js (CSP, XSS protection, etc.) +- 🌐 **CORS** configured for local development +- πŸ“Š **Rate Limiting** to prevent abuse +- πŸ“ **Static File Serving** with proper MIME types +- πŸ” **API Endpoints** for dynamic content +- πŸ“ **Request Logging** for debugging +- ⚑ **Health Check** endpoint +- πŸ’Ύ **Embedded Database** with SQLite for persistent caching +- πŸ”„ **Smart Caching** - only reanalyzes when files change +- πŸ“ˆ **Analytics** - database statistics and performance metrics + +## Architecture Overview + +The AIOpsLab Viewer uses a **database-first approach** for optimal scalability and performance: + +### πŸ—„οΈ **Database-Driven Design** +- **SQLite Database**: Stores all run metadata, metrics, and analysis results +- **No Real-time Scanning**: API endpoints query the database directly, not the filesystem +- **Manual Import**: New runs are imported via explicit scan operations (`POST /api/runs/scan`) +- **Persistent Storage**: Run data persists even if files are moved or deleted + +### πŸ“ **Filesystem Usage** +- **File Serving Only**: Filesystem is used only for serving actual log and evaluation files +- **Static Assets**: Log files, markdown evaluations served via `/runs//` +- **No Directory Traversal**: No real-time directory scanning during API requests + +### ⚑ **Performance Benefits** +- **Fast API Responses**: Database queries are much faster than filesystem scans +- **Scalable**: Handles hundreds of runs without performance degradation +- **Reduced I/O**: Eliminates filesystem operations during normal browsing +- **Caching**: Database acts as an intelligent cache layer + +### πŸ”„ **Data Flow** +1. **Import**: Manual scan (`πŸ” Scan Files` button) analyzes filesystem and populates database +2. **Browse**: UI loads run list from database (`GET /api/runs`) +3. **View**: Individual files served directly from filesystem (`GET /runs//`) +4. **Reanalyze**: Force re-analysis of specific run (`POST /api/runs//reanalyze`) + +## Quick Start + +### 1. Install Dependencies + +```bash +npm install +``` + +For HTTPS support, generate self-signed certificates: + +```bash +``` + +### 3. Start the Server + +```bash +npm start +``` + +The server will start on: +- **HTTP**: http://localhost:3000 +- **HTTPS**: https://localhost:3443 (if certificates are available) + +## Configuration + +### Environment Variables + +Create a `.env` file (copy from `.env.example`) to configure the server: + +```bash +# Server Configuration +PORT=3000 # HTTP port +HTTPS_PORT=3443 # HTTPS port (optional) +HOST=0.0.0.0 # Host to bind to + +# Runs Configuration +RUNS_PATH=./runs # Directory containing AIOpsLab run folders + + +# Security Configuration +RATE_LIMIT_WINDOW_MS=900000 # Rate limit window (15 minutes) +RATE_LIMIT_MAX_REQUESTS=100 # Max requests per window + +# Development/Production Mode +NODE_ENV=development +``` + +### Runs Directory Structure + +The server expects AIOpsLab runs to be organized in the configured runs directory: + +``` +runs/ +β”œβ”€β”€ 20250715-57fff059/ +β”‚ β”œβ”€β”€ log.txt +β”‚ β”œβ”€β”€ copilot.md +β”‚ β”œβ”€β”€ perplexity.md +β”‚ └── ... +β”œβ”€β”€ 20250714-abcdef123/ +β”‚ β”œβ”€β”€ log.txt +β”‚ └── ... +└── ... +``` + +**Important**: Run folders must follow the naming pattern `YYYYMMDD-` to be detected by the system. + +### Custom Configuration + +```bash +# Custom ports +HTTP_PORT=8080 HTTPS_PORT=8443 npm start + +# Bind to specific interface +HOST=127.0.0.1 npm start + +# Development with auto-restart +npm run dev +``` + +## API Endpoints + +### πŸ“Š **Database-First Endpoints** + +#### GET /api/runs +**Fast database query** - Returns JSON list of runs from database (no filesystem scanning): +```json +[ + { + "id": "20250715-57fff059", + "created": "2025-07-15T10:30:00.000Z", + "modified": "2025-07-15T10:35:00.000Z", + "hasLogFile": true, + "evaluationFiles": ["copilot.md", "perplexity.md"], + "evaluationCount": 2, + "status": "partial", + "duration": 368.99, + "issues": 1, + "reasoning_judgement": "6/10", + "detectionAccuracy": "Invalid Format", + "steps": 20, + "inputTokens": 75033, + "outputTokens": 464, + "reasoningScore": 6, + "namespace": "test-social-network" + } +] +``` + +#### POST /api/runs +**Create new run record** - Creates a new run with auto-generated ID: +```bash +curl -X POST http://localhost:3000/api/runs \ + -H "Content-Type: application/json" \ + -d '{"namespace":"my-test-env"}' +``` +Response: +```json +{ + "success": true, + "runId": "20250716-auo58obr", + "message": "Run 20250716-auo58obr created successfully", + "run": { + "id": "20250716-auo58obr", + "created_at": "2025-07-16T03:25:31.525Z", + "status": "created", + "namespace": "my-test-env", + "hasLogFile": false, + "evaluationFiles": [], + "evaluationCount": 0 + } +} +``` + +#### POST /api/runs/scan +**Manual filesystem import** - Scans filesystem and imports/updates runs in database: +```bash +curl -X POST http://localhost:3000/api/runs/scan +``` +Response: +```json +{ + "success": true, + "message": "Scanned and imported 3 runs", + "runs": [ + {"id": "20250715-57fff059", "status": "imported"}, + {"id": "20250714-abcdef12", "status": "imported"} + ] +} +``` + +### πŸ“€ **File Upload Endpoints** + +#### POST /api/runs/:runId/log +**Upload log file** - Upload log.txt for a specific run: +```bash +curl -X POST http://localhost:3000/api/runs/20250716-auo58obr/log \ + -F "logFile=@session-data.txt" +``` +**Note**: File will be automatically saved as `log.txt` regardless of original filename. + +Response: +```json +{ + "success": true, + "message": "Log file uploaded successfully for run 20250716-auo58obr", + "filename": "log.txt", + "originalName": "session-data.txt", + "analysis": { + "status": "success", + "duration": 15.5, + "detectionAccuracy": "High", + "steps": 12, + "inputTokens": 150, + "outputTokens": 300, + "reasoningScore": 85 + } +} +``` + +#### POST /api/runs/:runId/evaluation +**Upload evaluation file** - Upload evaluation .md file for a specific run: +```bash +# Upload with evaluator name in filename +curl -X POST http://localhost:3000/api/runs/20250716-auo58obr/evaluation \ + -F "evaluationFile=@github-copilot-analysis.md" + +# Upload with specific evaluator name (auto-detected from filename) +curl -X POST http://localhost:3000/api/runs/20250716-auo58obr/evaluation \ + -F "evaluationFile=@gpt-4.md" +``` + +**Supported Evaluators**: `copilot`, `perplexity`, `claude-sonnet`, `gpt-4`, `gemini`, `claude`, `openai` + +**File Naming**: Files are automatically renamed based on detected evaluator: +- `github-copilot-eval.md` β†’ `copilot.md` +- `perplexity-analysis.md` β†’ `perplexity.md` +- `gpt-4.md` β†’ `gpt-4.md` + +Response: +```json +{ + "success": true, + "message": "Evaluation file uploaded successfully for run 20250716-auo58obr", + "filename": "copilot.md", + "originalName": "github-copilot-analysis.md", + "evaluator": "copilot", + "evaluationFiles": ["copilot.md"], + "evaluationCount": 1, + "analysis": { + "averageReasoningJudgement": "8/10", + "issueCount": 2 + } +} +``` + +#### GET /api/stats +**Database statistics** - Returns aggregated run statistics: +```json +{ + "totalRuns": 15, + "successRuns": 8, + "partialRuns": 5, + "failedRuns": 2, + "avgDuration": 284, + "avgReasoningScore": 7 +} +``` + +#### DELETE /api/runs/:runId +**Remove from database** - Deletes run from database (filesystem files remain): +```bash +curl -X DELETE http://localhost:3000/api/runs/20250715-57fff059 +``` + +### πŸ”„ **Analysis Endpoints** + +#### POST /api/runs/:runId/reanalyze +**Force re-analysis** - Re-analyzes specific run and updates database: +```bash +curl -X POST http://localhost:3000/api/runs/20250715-57fff059/reanalyze +``` + +### πŸ“ **File Serving Endpoints** + +#### GET /runs/:runId/:filename +**Direct file access** - Serves individual run files: +- `/runs/20250715-57fff059/log.txt` - Raw log file +- `/runs/20250715-57fff059/copilot.md` - Evaluation file + +### 🌐 **Web Interface Endpoints** + +#### GET / +Main dashboard with database-driven run list and scan functionality + +#### GET /viewer.html +Session viewer with URL parameters: +- `?run=20250715-57fff059` - View specific run + +#### GET /admin +Admin panel for database management and system administration + +## Intelligent Status Assessment + +The server automatically analyzes each run to determine its status based on multiple factors: + +### Status Categories + +1. **🟒 Success** - Run completed successfully with good results + - Reasoning score β‰₯ 7/10 + - Root cause identified or solution provided + - Minimal API/connectivity errors (< 10) + - Duration reasonable (< 10 minutes) + +2. **🟑 Partial Success** - Run completed but with limitations + - Reasoning score 5-6/10 + - Some progress made but incomplete resolution + - Moderate API/connectivity issues (< 20 errors) + - Session duration reasonable + +3. **πŸ”΄ Failed** - Run failed to complete or provide useful results + - Fatal errors or termination + - Excessive API/connectivity errors (> 50) + - Very low reasoning score (< 5) + - No meaningful progress + +### Analysis Metrics + +The system extracts and analyzes: + +- **Duration**: Time to detection (TTD) from logs +- **Error Count**: API failures, connection issues, and other errors +- **Steps Taken**: Number of diagnostic actions performed +- **Token Usage**: Input/output tokens for AI interactions +- **Reasoning Score**: Average score from evaluation files +- **Detection Accuracy**: Whether anomalies were correctly identified +- **Issues Found**: Count of problems identified in evaluations +- **Namespace**: Kubernetes namespace being analyzed + +### Data Sources + +Status assessment uses: +1. **Log Analysis**: Extracts metrics, errors, and session flow +2. **Evaluation Files**: Averages reasoning judgements from AI service evaluations +3. **File Structure**: Checks for completeness and file availability +4. **Timing Analysis**: Calculates session duration and efficiency + +### Reasoning Logic + +The status assessment follows this decision tree: + +#### 1. **Critical Failure Check** +``` +IF log file missing OR fatal errors OR >50 errors + β†’ Status = "failed" +``` + +#### 2. **API/Infrastructure Issues** +``` +Count API errors (Azure OpenAI 404s, connection refused, etc.) +- Heavy API issues: >20 errors +- Moderate API issues: 10-20 errors +- Light API issues: <10 errors +``` + +#### 3. **Success Indicators** +``` +Check for resolution evidence: +- Log contains "root cause" OR "solution" OR "resolved" +- Reasoning score β‰₯ 8/10 +- Detection accuracy is valid format +``` + +#### 4. **Status Decision Matrix** +``` +IF (has_resolution AND api_errors < 10 AND reasoning_score β‰₯ 7) + β†’ Status = "success" + +ELSE IF (reasoning_score β‰₯ 5 OR (duration > 0 AND duration < 600 AND api_errors < 20)) + β†’ Status = "partial" + +ELSE + β†’ Status = "failed" +``` + +#### 5. **Metric Extraction Process** + +**From Log Files:** +- **Duration**: Extract TTD (Time to Detection) from results JSON +- **Steps**: Count diagnostic actions (exec_shell, get_logs, etc.) +- **Errors**: Find ERROR:, Connection refused, Failed to patterns +- **Namespace**: Extract from kubectl commands or environment setup +- **Metrics**: Parse Results JSON block for detection accuracy, tokens, etc. + +**From Evaluation Files:** +- **Reasoning Score**: Extract from "Reasoning Score: X" or "Overall Score: X" +- **Issues**: Count mentions of "issue", "problem", "error", "weakness" +- **Average Reasoning Judgement**: Calculate mean across all evaluation files + +#### 6. **Example Assessment** + +For a typical run: +``` +Log Analysis: +βœ“ Duration: 368.99 seconds (from TTD) +βœ“ Steps: 20 (counted exec_shell, get_logs calls) +⚠ API Errors: 15 (Azure OpenAI 404 errors) +βœ“ Namespace: test-social-network +⚠ Detection Accuracy: "Invalid Format" + +Evaluation Analysis: +βœ“ Average Reasoning Judgement: 6/10 (from copilot.md, perplexity.md) +⚠ Issues Found: 10 (weakness mentions in evaluations) + +Decision Process: +1. Not critical failure βœ“ +2. Moderate API issues (15 errors) +3. No clear resolution evidence +4. Reasoning score = 6 (β‰₯ 5) βœ“ +5. Duration < 600 seconds βœ“ +β†’ Result: "partial" status +``` + +This logic ensures consistent, objective assessment based on actual session data rather than manual classification. + +### GET /health +Health check endpoint: +```json +{ + "status": "healthy", + "timestamp": "2025-07-15T10:30:00.000Z", + "uptime": 123.45 +} +``` + +### GET /api/stats +Database and system statistics: +```json +{ + "database": { + "totalRuns": 25, + "successRuns": 8, + "partialRuns": 12, + "failedRuns": 5, + "avgDuration": 287, + "avgReasoningScore": 6 + }, + "system": { + "uptime": 3600, + "memory": {...}, + "timestamp": "2025-07-15T10:30:00.000Z" + } +} +``` + +### POST /api/runs/:runId/reanalyze +Force reanalysis of a specific run (ignores cache): +```bash +curl -X POST http://localhost:3000/api/runs/20250715-57fff059/reanalyze +``` + +### POST /api/cleanup +Clean up old runs from database: +```bash +# Via JSON body +curl -X POST http://localhost:3000/api/cleanup \ + -H "Content-Type: application/json" \ + -d '{"days": 30}' + +# Via query parameter +curl -X POST "http://localhost:3000/api/cleanup?days=30" +``` + +## Database & Caching + +### Embedded SQLite Database + +The server uses SQLite for persistent storage of analysis results: + +- **File**: `runs.db` (created automatically) +- **Purpose**: Cache computed metrics to avoid reanalysis on every request +- **Schema**: Stores all run metadata, status, reasoning judgements, and analysis results + +### Smart Caching Logic + +1. **File Change Detection**: Uses SHA-256 hash of file modification times and sizes +2. **Conditional Analysis**: Only reanalyzes when files change or run not in database +3. **Performance**: Reduces API response time from ~500ms to ~50ms for cached runs +4. **Scalability**: Handles thousands of runs efficiently + +### Cache Invalidation + +Cache is automatically invalidated when: +- Log file is modified +- Any evaluation file (*.md) is modified +- New evaluation files are added +- Files are deleted from run directory + +### Database Schema + +```sql +CREATE TABLE runs ( + id TEXT PRIMARY KEY, -- Run ID (e.g., 20250715-57fff059) + created_at TEXT NOT NULL, -- Run creation timestamp + modified_at TEXT NOT NULL, -- Last file modification + file_hash TEXT NOT NULL, -- Hash for change detection + status TEXT NOT NULL, -- success|partial|failed + duration REAL DEFAULT 0, -- Time to detection (seconds) + issues INTEGER DEFAULT 0, -- Number of issues found + reasoning_judgement TEXT DEFAULT 'N/A', -- Overall reasoning judgement (e.g., "6/10") + detection_accuracy TEXT, -- Detection accuracy result + steps INTEGER DEFAULT 0, -- Diagnostic steps taken + input_tokens INTEGER DEFAULT 0, -- AI input tokens + output_tokens INTEGER DEFAULT 0, -- AI output tokens + reasoning_score INTEGER DEFAULT 0, -- Reasoning quality score + namespace TEXT DEFAULT 'unknown', -- Kubernetes namespace + has_log_file BOOLEAN DEFAULT 0, -- Log file availability + evaluation_files TEXT DEFAULT '[]', -- JSON array of eval files + evaluation_count INTEGER DEFAULT 0, -- Number of evaluations + errors TEXT DEFAULT '[]', -- JSON array of errors + last_analyzed_at TEXT NOT NULL -- Last analysis timestamp +); +``` + +## Admin Panel + +### Overview + +The admin panel provides a web-based interface for database management and system administration. Access it at `/admin` once the server is running. + +### Features + +#### πŸ“Š **Database Statistics** +- Real-time metrics display (total runs, status breakdown) +- Average duration and reasoning judgements +- System uptime and memory usage +- Refresh functionality for live updates + +#### 🧹 **Database Cleanup** +- Remove runs older than specified days +- Configurable retention period (1-365 days) +- Confirmation dialogs for destructive operations +- Real-time feedback on cleanup results + +#### πŸ“‹ **Run Management** +- Interactive list of all runs with status badges +- Individual run reanalysis (bypass cache) +- Bulk operations for selected runs +- Status-based filtering (success/partial/failed) +- Checkbox selection for batch operations + +#### πŸ”„ **Bulk Operations** +- **Reanalyze All**: Force reanalysis of entire database +- **Reanalyze Selected**: Process only checked runs +- **Export Database**: Download runs as JSON file +- **Smart Progress**: Real-time feedback during bulk operations + +### Admin Panel Interface + +``` +πŸ› οΈ AIOpsLab Admin Panel +β”œβ”€β”€ Database Statistics (live metrics) +β”œβ”€β”€ Database Cleanup (retention management) +└── Run Management + β”œβ”€β”€ Status filtering + β”œβ”€β”€ Individual run actions + β”œβ”€β”€ Bulk selection + └── Export functionality +``` + +### Security Considerations + +The admin panel currently has **no authentication** and should only be used in: +- Local development environments +- Trusted internal networks +- Behind proper authentication proxy in production + +For production deployments, consider: +- Adding basic auth or OAuth integration +- Restricting admin routes by IP/network +- Using environment-based feature flags + +**Note**: The server includes a Content Security Policy (CSP) that allows inline scripts and event handlers for the admin panel functionality. This is configured via the `scriptSrcAttr: ["'unsafe-inline'"]` directive. + +### Usage Examples + +#### Clean Up Old Runs +1. Navigate to `/admin` +2. Set retention period (e.g., 30 days) +3. Click "Cleanup Database" +4. Confirm the operation + +#### Bulk Reanalysis +1. Go to Run Management section +2. Filter by status if needed +3. Select runs using checkboxes +4. Click "Reanalyze Selected" +5. Monitor progress via alerts + +#### Export Data +1. Click "Export Database" button +2. JSON file downloads automatically +3. Contains all run data and metrics + +## Security Features + +### Content Security Policy (CSP) +- Restricts resource loading to same-origin +- Allows inline styles and scripts (required for the viewer) +- Blocks object and frame embedding + +### Security Headers +- `X-Content-Type-Options: nosniff` +- `X-Frame-Options: DENY` +- `X-XSS-Protection: 1; mode=block` +- `Referrer-Policy: strict-origin-when-cross-origin` + +### Rate Limiting +- 100 requests per 15 minutes per IP +- Configurable via express-rate-limit + +### CORS Configuration +- Allows localhost and local network access +- Supports credentials for authenticated requests +- Configurable origin validation + +## File Structure + +``` +aiopslab-runs/ +β”œβ”€β”€ server.js # Main server file +β”œβ”€β”€ database.js # SQLite database module +β”œβ”€β”€ runs.db # SQLite database (auto-created) +β”œβ”€β”€ package.json # Node.js dependencies +β”œβ”€β”€ index.html # Main dashboard +β”œβ”€β”€ viewer.html # Session viewer +β”œβ”€β”€ scripts/ +β”‚ └── +β”œβ”€β”€ +β”‚ β”œβ”€β”€ server.key # Private key +β”‚ └── server.crt # Certificate +└── [run-directories]/ # Session data + β”œβ”€β”€ log.txt # Session logs + β”œβ”€β”€ copilot.md # AI evaluations + └── ... +``` + +## Development + +### Auto-restart Development Server + +```bash +npm run dev +``` + +Uses nodemon to automatically restart the server when files change. + +### Security Audit + +```bash +npm run security-check +``` + +### Adding New Evaluation Files + +The server automatically discovers evaluation files in run directories. Supported files: +- `copilot.md` +- `perplexity.md` +- `claude-sonnet.md` +- `gpt-4.md` +- `gemini.md` + +## Production Deployment + +### Using Kubernetes (Enterprise) + +For production Kubernetes deployments with persistent storage and high availability: + +#### Quick Start with Helm + +```bash +# Clone the repository +git clone +cd aiopslab-runs + +# Deploy to Kubernetes +./scripts/k8s-deploy.sh -e prod -t v1.0.0 + +# Or deploy manually +helm install aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab-prod \ + --create-namespace \ + --values ./helm/aiopslab-viewer/values-prod.yaml \ + --set image.tag=v1.0.0 +``` + +#### Features Included + +- βœ… **Persistent Volumes**: Separate PVCs for database and runs data +- βœ… **High Availability**: Pod anti-affinity and autoscaling +- βœ… **Security**: Network policies, non-root containers, security contexts +- βœ… **Ingress**: TLS termination with cert-manager integration +- βœ… **Monitoring**: Health checks, readiness/liveness probes +- βœ… **Storage**: Configurable storage classes and sizes + +#### Storage Configuration + +```yaml +# Production storage example +persistence: + database: + enabled: true + storageClass: "fast-ssd" + size: 5Gi + runs: + enabled: true + storageClass: "fast-ssd" + size: 50Gi +``` + +See [k8s-deployment.md](./k8s-deployment.md) for comprehensive Kubernetes deployment documentation. + +### Using Docker (Recommended) + +#### Quick Start with Docker Compose + +The easiest way to deploy is using Docker Compose: + +```bash +# Clone the repository +git clone +cd aiopslab-runs + +# Start the application +docker-compose up -d + +# View logs +docker-compose logs -f aiopslab-viewer + +# Stop the application +docker-compose down +``` + +The application will be available at: +- **HTTP**: http://localhost:3000 +- **HTTPS**: https://localhost:3443 + +#### Building the Docker Image + +```bash +# Build the image +docker build -t aiopslab-viewer . + +# Run the container +docker run -d \ + --name aiopslab-viewer \ + -p 3000:3000 \ + -p 3443:3443 \ + -v $(pwd)/runs:/app/runs \ + -v $(pwd)/runs.db:/app/runs.db \ + -e NODE_ENV=production \ + aiopslab-viewer +``` + +#### Production Deployment with Nginx + +For production, use the included Docker Compose configuration with Nginx: + +```bash +# Start with Nginx reverse proxy +docker-compose --profile production up -d + +# This starts: +# - AIOpsLab Viewer on internal network +# - Nginx reverse proxy on ports 80/443 +# - SSL termination and rate limiting +``` + +The production setup includes: +- βœ… **Rate limiting** for API and static files +- βœ… **Security headers** (HSTS, CSP, etc.) +- βœ… **Gzip compression** for better performance +- βœ… **Static file caching** for faster load times +- βœ… **Health checks** for container orchestration + +#### Environment Variables for Docker + +```bash +# Create production environment file +cat > .env.production << EOF +NODE_ENV=production +PORT=3000 +HTTPS_PORT=3443 +HOST=0.0.0.0 +RUNS_PATH=/app/runs +DATABASE_PATH=/app/runs.db +RATE_LIMIT_WINDOW_MS=900000 +RATE_LIMIT_MAX_REQUESTS=100 +LOG_LEVEL=info +EOF + +# Use in Docker Compose +docker-compose --env-file .env.production up -d +``` + +### Using PM2 + +For traditional server deployments: + +```bash +# Install PM2 +npm install -g pm2 + +# Start server with PM2 +pm2 start server.js --name aiopslab-viewer + +# Monitor +pm2 monit + +# Auto-restart on reboot +pm2 startup +pm2 save +``` + +### Manual Docker Build + +If you prefer to build manually: + +```dockerfile +FROM node:18-alpine + + +WORKDIR /app + +# Copy package files and install dependencies +COPY package*.json ./ +RUN npm ci --only=production + +# Copy application code +COPY . . + +# Create directories and generate certificates +RUN mkdir -p /app/runs && \ + chown -R node:node /app + +USER node + +EXPOSE 3000 3443 + +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD node -e "require('http').get('http://localhost:3000/health', (res) => { process.exit(res.statusCode === 200 ? 0 : 1) })" + +CMD ["npm", "start"] +``` + +### Reverse Proxy (Manual Nginx) + +If setting up Nginx manually: + +```nginx +server { + listen 80; + server_name your-domain.com; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl http2; + server_name your-domain.com; + + ssl_certificate /path/to/your/certificate.crt; + ssl_certificate_key /path/to/your/private.key; + + # Security headers + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + + location / { + proxy_pass http://localhost:3000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} +``` + +## Troubleshooting + +### SSL Certificate Issues + +If you see SSL errors: + +1. Regenerate certificates: + ```bash + rm -rf ssl/ + ``` + +2. Accept the self-signed certificate in your browser +3. For production, use Let's Encrypt or proper CA-signed certificates + +### CORS Errors + +If you get CORS errors: + +1. Check that your origin is in the allowed list +2. Ensure you're accessing via `localhost` or allowed IP ranges +3. For production, update the CORS configuration + +### File Serving Issues + +If files aren't loading: + +1. Check file permissions +2. Verify the file structure matches the expected layout +3. Check the server logs for 404 errors + +## License + +MIT License - see package.json for details. diff --git a/aiopslab-runs/admin.html b/aiopslab-runs/admin.html new file mode 100644 index 00000000..869288b3 --- /dev/null +++ b/aiopslab-runs/admin.html @@ -0,0 +1,1147 @@ + + + + + + AIOpsLab Admin - Database Management + + + + +
+
+
+
+

AIOpsLab Admin Panel

+
Database management and system administration
+
+ +
+
+ +
+
+ + +
+

πŸ“‹ Run Management

+ + +
+

πŸ”„ Bulk Operations

+
+
+ 0 runs selected + + +
+
+ + + +
+
+
+ + +
+

πŸ” Search & Filter

+
+
+ + +
+
+ + +
+
+
+ +
+
+

Loading runs...

+
+ +
+ + +
+

🧹 Database Cleanup

+
+

Clean Old Runs

+

Remove runs older than specified days to free up space.

+
+ + +
+ +
+ +
+

Bulk Operations

+ + +
+ +
+

Dangerous Operations

+

These operations cannot be undone!

+ +
+
+ + +
+ + +
+ + + + diff --git a/aiopslab-runs/database.js b/aiopslab-runs/database.js new file mode 100644 index 00000000..2c3bf359 --- /dev/null +++ b/aiopslab-runs/database.js @@ -0,0 +1,286 @@ +const sqlite3 = require('sqlite3').verbose(); +const fs = require('fs'); +const path = require('path'); +const crypto = require('crypto'); + +class RunDatabase { + constructor(dbPath = './runs.db') { + this.dbPath = dbPath; + this.db = null; + this.init(); + } + + init() { + this.db = new sqlite3.Database(this.dbPath, sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE, (err) => { + if (err) { + console.error('Error opening database:', err.message); + } else { + console.log('πŸ“Š Connected to SQLite database'); + this.createTables(); + } + }); + } + + createTables() { + // First, create the table with current schema + const schema = ` + CREATE TABLE IF NOT EXISTS runs ( + id TEXT PRIMARY KEY, + created_at TEXT NOT NULL, + modified_at TEXT NOT NULL, + file_hash TEXT NOT NULL, + status TEXT NOT NULL, + duration REAL DEFAULT 0, + reasoning_judgement TEXT DEFAULT 'N/A', + detection_accuracy TEXT DEFAULT 'Unknown', + steps INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0, + reasoning_score INTEGER DEFAULT 0, + agent_name TEXT DEFAULT 'unknown', + application_name TEXT DEFAULT 'unknown', + has_log_file BOOLEAN DEFAULT 0, + evaluation_files TEXT DEFAULT '[]', + evaluation_count INTEGER DEFAULT 0, + last_analyzed_at TEXT NOT NULL, + UNIQUE(id) + ); + + CREATE INDEX IF NOT EXISTS idx_runs_created_at ON runs(created_at); + CREATE INDEX IF NOT EXISTS idx_runs_status ON runs(status); + CREATE INDEX IF NOT EXISTS idx_runs_modified_at ON runs(modified_at); + `; + + this.db.exec(schema, (err) => { + if (err) { + console.error('Error creating tables:', err.message); + } else { + console.log('βœ… Database tables initialized'); + + // Ensure reasoning_judgement column exists for existing databases + this.db.run("ALTER TABLE runs ADD COLUMN reasoning_judgement TEXT DEFAULT 'N/A'", (alterErr) => { + if (alterErr && !alterErr.message.includes('duplicate column name')) { + console.error('Error adding reasoning_judgement column:', alterErr.message); + } else if (!alterErr) { + console.log('βœ… Added reasoning_judgement column to existing database'); + } + }); + } + }); + } + + // Calculate hash of run directory files for change detection + calculateRunHash(runPath) { + const hash = crypto.createHash('sha256'); + + try { + // Always include log.txt + const logFile = path.join(runPath, 'log.txt'); + if (fs.existsSync(logFile)) { + const stats = fs.statSync(logFile); + hash.update(`log.txt:${stats.mtime.toISOString()}:${stats.size}`); + } + + // Dynamically discover all .md files in the run directory + const files = fs.readdirSync(runPath).filter(file => file.endsWith('.md')); + + // Sort files for consistent hashing + files.sort(); + + for (const file of files) { + const filePath = path.join(runPath, file); + if (fs.existsSync(filePath)) { + const stats = fs.statSync(filePath); + hash.update(`${file}:${stats.mtime.toISOString()}:${stats.size}`); + } + } + + return hash.digest('hex'); + } catch (error) { + console.warn('Error calculating hash for', runPath, ':', error.message); + return crypto.randomBytes(16).toString('hex'); + } + } + + // Get run from database + async getRun(runId) { + return new Promise((resolve, reject) => { + const query = 'SELECT * FROM runs WHERE id = ?'; + this.db.get(query, [runId], (err, row) => { + if (err) { + reject(err); + } else if (!row) { + resolve(null); + } else { + // Parse JSON fields + const parsedRow = { + ...row, + evaluation_files: JSON.parse(row.evaluation_files || '[]'), + has_log_file: Boolean(row.has_log_file) + }; + resolve(parsedRow); + } + }); + }); + } + + // Insert or update run data + async upsertRun(runData) { + return new Promise((resolve, reject) => { + const query = ` + INSERT OR REPLACE INTO runs ( + id, created_at, modified_at, file_hash, status, duration, reasoning_judgement, + detection_accuracy, steps, input_tokens, output_tokens, reasoning_score, + agent_name, application_name, has_log_file, evaluation_files, evaluation_count, + last_analyzed_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `; + + const params = [ + runData.id, + runData.created_at, + runData.modified_at, + runData.file_hash, + runData.status, + runData.duration, + runData.reasoning_judgement, + runData.detection_accuracy, + runData.steps, + runData.input_tokens, + runData.output_tokens, + runData.reasoning_score, + runData.agent_name || 'unknown', + runData.application_name || 'unknown', + runData.has_log_file ? 1 : 0, + JSON.stringify(runData.evaluation_files || []), + runData.evaluation_count || 0, + new Date().toISOString() + ]; + + this.db.run(query, params, function(err) { + if (err) { + reject(err); + } else { + resolve({ changes: this.changes, lastID: this.lastID }); + } + }); + }); + } + + // Get all runs ordered by creation date + async getAllRuns() { + return new Promise((resolve, reject) => { + const query = 'SELECT * FROM runs ORDER BY created_at DESC'; + this.db.all(query, [], (err, rows) => { + if (err) { + reject(err); + } else { + // Parse JSON fields for all rows + const parsedRows = rows.map(row => ({ + ...row, + evaluation_files: JSON.parse(row.evaluation_files || '[]'), + has_log_file: Boolean(row.has_log_file) + })); + resolve(parsedRows); + } + }); + }); + } + + // Check if run needs reanalysis (file changes or doesn't exist in DB) + async needsReanalysis(runId, runPath) { + try { + const existingRun = await this.getRun(runId); + if (!existingRun) { + return true; // Run not in database + } + + const currentHash = this.calculateRunHash(runPath); + return existingRun.file_hash !== currentHash; + } catch (error) { + console.warn('Error checking if run needs reanalysis:', error.message); + return true; // Default to reanalysis on error + } + } + + // Get database statistics + async getStats() { + return new Promise((resolve, reject) => { + const queries = [ + 'SELECT COUNT(*) as total_runs FROM runs', + 'SELECT COUNT(*) as success_runs FROM runs WHERE status = "success"', + 'SELECT COUNT(*) as partial_runs FROM runs WHERE status = "partial"', + 'SELECT COUNT(*) as failed_runs FROM runs WHERE status = "failed"', + 'SELECT AVG(duration) as avg_duration FROM runs WHERE duration > 0', + 'SELECT AVG(reasoning_score) as avg_reasoning_score FROM runs WHERE reasoning_score > 0' + ]; + + Promise.all(queries.map(query => + new Promise((res, rej) => { + this.db.get(query, [], (err, row) => { + if (err) rej(err); + else res(row); + }); + }) + )).then(results => { + resolve({ + totalRuns: results[0].total_runs, + successRuns: results[1].success_runs, + partialRuns: results[2].partial_runs, + failedRuns: results[3].failed_runs, + avgDuration: Math.round(results[4].avg_duration || 0), + avgReasoningScore: Math.round(results[5].avg_reasoning_score || 0) + }); + }).catch(reject); + }); + } + + // Clean up old runs (optional) + async cleanupOldRuns(daysOld = 30) { + return new Promise((resolve, reject) => { + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - daysOld); + const cutoffISO = cutoffDate.toISOString(); + + const query = 'DELETE FROM runs WHERE created_at < ?'; + this.db.run(query, [cutoffISO], function(err) { + if (err) { + reject(err); + } else { + console.log(`🧹 Cleaned up ${this.changes} old runs`); + resolve(this.changes); + } + }); + }); + } + + // Delete a run from the database + async deleteRun(runId) { + return new Promise((resolve, reject) => { + const query = 'DELETE FROM runs WHERE id = ?'; + this.db.run(query, [runId], function(err) { + if (err) { + reject(err); + } else { + console.log(`πŸ—‘οΈ Deleted run ${runId} from database (${this.changes} rows affected)`); + resolve(this.changes); + } + }); + }); + } + + // Close database connection + close() { + if (this.db) { + this.db.close((err) => { + if (err) { + console.error('Error closing database:', err.message); + } else { + console.log('πŸ“Š Database connection closed'); + } + }); + } + } +} + +module.exports = RunDatabase; diff --git a/aiopslab-runs/docker-compose.yml b/aiopslab-runs/docker-compose.yml new file mode 100644 index 00000000..439fb900 --- /dev/null +++ b/aiopslab-runs/docker-compose.yml @@ -0,0 +1,52 @@ +version: '3.8' + +services: + aiopslab-viewer: + build: . + container_name: aiopslab-viewer + ports: + - "3000:3000" # HTTP + - "3443:3443" # HTTPS + environment: + - NODE_ENV=production + - PORT=3000 + - HOST=0.0.0.0 + - RUNS_PATH=/app/runs + - DATABASE_PATH=/app/runs.db + - RATE_LIMIT_WINDOW_MS=900000 + - RATE_LIMIT_MAX_REQUESTS=100 + - LOG_LEVEL=info + volumes: + # Mount runs directory for persistent data + - ./runs:/app/runs + # Mount database for persistent storage + - ./runs.db:/app/runs.db + # Mount environment file (optional) + - ./.env:/app/.env:ro + restart: unless-stopped + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/health', (res) => { process.exit(res.statusCode === 200 ? 0 : 1) })"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + # Optional: Nginx reverse proxy for production + nginx: + image: nginx:alpine + container_name: aiopslab-nginx + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + - ./ssl:/etc/ssl/certs:ro + depends_on: + - aiopslab-viewer + restart: unless-stopped + profiles: + - production + +networks: + default: + name: aiopslab-network diff --git a/aiopslab-runs/helm/aiopslab-viewer/Chart.yaml b/aiopslab-runs/helm/aiopslab-viewer/Chart.yaml new file mode 100644 index 00000000..7fa1a1bb --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: aiopslab-viewer +description: A Helm chart for AIOpsLab Viewer - Secure server for AIOpsLab session analysis +type: application +version: 1.0.0 +appVersion: "1.0.0" +keywords: + - aiopslab + - kubernetes + - analysis + - viewer + - monitoring +home: https://github.com/your-org/aiopslab-runs +sources: + - https://github.com/your-org/aiopslab-runs +maintainers: + - name: AIOpsLab Team + email: team@aiopslab.com +annotations: + category: Analytics +dependencies: [] diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/_helpers.tpl b/aiopslab-runs/helm/aiopslab-viewer/templates/_helpers.tpl new file mode 100644 index 00000000..5e180efc --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "aiopslab-viewer.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "aiopslab-viewer.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "aiopslab-viewer.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "aiopslab-viewer.labels" -}} +helm.sh/chart: {{ include "aiopslab-viewer.chart" . }} +{{ include "aiopslab-viewer.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "aiopslab-viewer.selectorLabels" -}} +app.kubernetes.io/name: {{ include "aiopslab-viewer.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "aiopslab-viewer.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "aiopslab-viewer.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/configmap.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/configmap.yaml new file mode 100644 index 00000000..f838b2ed --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/configmap.yaml @@ -0,0 +1,12 @@ +{{- if .Values.configMap.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "aiopslab-viewer.fullname" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} +data: + {{- with .Values.configMap.data }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/deployment.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/deployment.yaml new file mode 100644 index 00000000..be499c7f --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/deployment.yaml @@ -0,0 +1,131 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "aiopslab-viewer.fullname" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "aiopslab-viewer.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "aiopslab-viewer.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.http.targetPort }} + protocol: TCP + - name: https + containerPort: {{ .Values.service.https.targetPort }} + protocol: TCP + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.livenessProbe.httpGet.path }} + port: {{ .Values.livenessProbe.httpGet.port }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.readinessProbe.httpGet.path }} + port: {{ .Values.readinessProbe.httpGet.port }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.persistence.database.enabled }} + - name: database-storage + mountPath: {{ .Values.persistence.database.mountPath }} + {{- end }} + {{- if .Values.persistence.runs.enabled }} + - name: runs-storage + mountPath: {{ .Values.persistence.runs.mountPath }} + {{- end }} + {{- if .Values.ssl.enabled }} + - name: ssl-certs + mountPath: /app/ssl + readOnly: {{ if .Values.ssl.existingSecret }}true{{ else }}false{{ end }} + {{- end }} + {{- if .Values.configMap.enabled }} + - name: config + mountPath: /app/config + readOnly: true + {{- end }} + volumes: + {{- if .Values.persistence.database.enabled }} + - name: database-storage + persistentVolumeClaim: + claimName: {{ include "aiopslab-viewer.fullname" . }}-database + {{- end }} + {{- if .Values.persistence.runs.enabled }} + - name: runs-storage + persistentVolumeClaim: + claimName: {{ include "aiopslab-viewer.fullname" . }}-runs + {{- end }} + {{- if .Values.ssl.enabled }} + - name: ssl-certs + {{- if .Values.ssl.existingSecret }} + secret: + secretName: {{ .Values.ssl.existingSecret }} + defaultMode: 0600 + {{- else }} + emptyDir: {} + {{- end }} + {{- end }} + {{- if .Values.configMap.enabled }} + - name: config + configMap: + name: {{ include "aiopslab-viewer.fullname" . }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/hpa.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/hpa.yaml new file mode 100644 index 00000000..e60d5feb --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "aiopslab-viewer.fullname" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "aiopslab-viewer.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/ingress.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/ingress.yaml new file mode 100644 index 00000000..fac40087 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/ingress.yaml @@ -0,0 +1,59 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "aiopslab-viewer.fullname" . -}} +{{- $svcPort := .Values.service.http.port -}} +{{- if and .Values.ingress.className (not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class")) }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/networkpolicy.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/networkpolicy.yaml new file mode 100644 index 00000000..c7e804c8 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/networkpolicy.yaml @@ -0,0 +1,27 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "aiopslab-viewer.fullname" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "aiopslab-viewer.selectorLabels" . | nindent 6 }} + policyTypes: + {{- if .Values.networkPolicy.ingress }} + - Ingress + {{- end }} + {{- if .Values.networkPolicy.egress }} + - Egress + {{- end }} + {{- if .Values.networkPolicy.ingress }} + ingress: + {{- toYaml .Values.networkPolicy.ingress | nindent 4 }} + {{- end }} + {{- if .Values.networkPolicy.egress }} + egress: + {{- toYaml .Values.networkPolicy.egress | nindent 4 }} + {{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/pvc.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/pvc.yaml new file mode 100644 index 00000000..210ef8c5 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/pvc.yaml @@ -0,0 +1,53 @@ +{{- if .Values.persistence.database.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "aiopslab-viewer.fullname" . }}-database + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} + component: database + {{- with .Values.persistence.database.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + accessModes: + - {{ .Values.persistence.database.accessMode }} + resources: + requests: + storage: {{ .Values.persistence.database.size }} + {{- if .Values.persistence.database.storageClass }} + {{- if (eq "-" .Values.persistence.database.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: {{ .Values.persistence.database.storageClass }} + {{- end }} + {{- end }} +--- +{{- end }} +{{- if .Values.persistence.runs.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "aiopslab-viewer.fullname" . }}-runs + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} + component: runs + {{- with .Values.persistence.runs.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + accessModes: + - {{ .Values.persistence.runs.accessMode }} + resources: + requests: + storage: {{ .Values.persistence.runs.size }} + {{- if .Values.persistence.runs.storageClass }} + {{- if (eq "-" .Values.persistence.runs.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: {{ .Values.persistence.runs.storageClass }} + {{- end }} + {{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/secret.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/secret.yaml new file mode 100644 index 00000000..7684b65e --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/secret.yaml @@ -0,0 +1,15 @@ +{{- if .Values.secrets.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "aiopslab-viewer.fullname" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} +type: Opaque +data: + {{- with .Values.secrets.data }} + {{- range $key, $value := . }} + {{ $key }}: {{ $value | b64enc }} + {{- end }} + {{- end }} +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/service.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/service.yaml new file mode 100644 index 00000000..c1f73bf2 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aiopslab-viewer.fullname" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.http.port }} + targetPort: {{ .Values.service.http.targetPort }} + protocol: TCP + name: http + - port: {{ .Values.service.https.port }} + targetPort: {{ .Values.service.https.targetPort }} + protocol: TCP + name: https + selector: + {{- include "aiopslab-viewer.selectorLabels" . | nindent 4 }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/templates/serviceaccount.yaml b/aiopslab-runs/helm/aiopslab-viewer/templates/serviceaccount.yaml new file mode 100644 index 00000000..810fb813 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "aiopslab-viewer.serviceAccountName" . }} + labels: + {{- include "aiopslab-viewer.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: false +{{- end }} diff --git a/aiopslab-runs/helm/aiopslab-viewer/values-dev.yaml b/aiopslab-runs/helm/aiopslab-viewer/values-dev.yaml new file mode 100644 index 00000000..b5d5ea14 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/values-dev.yaml @@ -0,0 +1,73 @@ +# Development values for aiopslab-viewer +# This file contains development-specific overrides + +# Enable development features +env: + NODE_ENV: development + LOG_LEVEL: debug + +# Smaller resource requirements for development +resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 250m + memory: 256Mi + +# Enable ingress for local development +ingress: + enabled: true + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" + nginx.ingress.kubernetes.io/ssl-redirect: "false" + hosts: + - host: aiopslab-viewer.local + paths: + - path: / + pathType: Prefix + +# Smaller storage for development +persistence: + database: + size: 500Mi + runs: + size: 2Gi + +# Disable autoscaling in development +autoscaling: + enabled: false + +# Faster health checks for development +healthCheck: + initialDelaySeconds: 10 + periodSeconds: 5 + +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 3 + +livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 15 + +# Enable network policy for security testing +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + - namespaceSelector: + matchLabels: + name: default + ports: + - protocol: TCP + port: 3000 + - protocol: TCP + port: 3443 + egress: + - {} # Allow all egress for development diff --git a/aiopslab-runs/helm/aiopslab-viewer/values-prod.yaml b/aiopslab-runs/helm/aiopslab-viewer/values-prod.yaml new file mode 100644 index 00000000..ac8b084f --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/values-prod.yaml @@ -0,0 +1,146 @@ +# Production values for aiopslab-viewer +# This file contains production-specific overrides + +# Production configuration +env: + NODE_ENV: production + LOG_LEVEL: info + +# Higher resource requirements for production +resources: + limits: + cpu: 2000m + memory: 2Gi + requests: + cpu: 1000m + memory: 1Gi + +# Enable autoscaling for production +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# Production ingress with TLS +ingress: + enabled: true + className: "nginx" + annotations: + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" + nginx.ingress.kubernetes.io/rate-limit: "100" + nginx.ingress.kubernetes.io/rate-limit-window: "1m" + hosts: + - host: aiopslab-viewer.yourdomain.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: aiopslab-viewer-tls + hosts: + - aiopslab-viewer.yourdomain.com + +# Larger storage for production +persistence: + database: + size: 5Gi + storageClass: "fast-ssd" + runs: + size: 50Gi + storageClass: "fast-ssd" + +# Production-grade health checks +healthCheck: + initialDelaySeconds: 60 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 5 + +readinessProbe: + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +livenessProbe: + initialDelaySeconds: 120 + periodSeconds: 60 + timeoutSeconds: 15 + failureThreshold: 3 + +# Security configurations +podSecurityContext: + fsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + +# Enable network policies for production security +networkPolicy: + enabled: true + ingress: + # Allow ingress from nginx ingress controller + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 3000 + # Allow internal cluster communication + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: aiopslab-viewer + ports: + - protocol: TCP + port: 3000 + - protocol: TCP + port: 3443 + egress: + # Allow DNS resolution + - to: [] + ports: + - protocol: UDP + port: 53 + # Allow HTTPS for external APIs if needed + - to: [] + ports: + - protocol: TCP + port: 443 + +# Pod anti-affinity for high availability +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - aiopslab-viewer + topologyKey: kubernetes.io/hostname + +# Tolerations for production nodes +tolerations: + - key: "node-role.kubernetes.io/production" + operator: "Equal" + value: "true" + effect: "NoSchedule" + +# Node selector for production nodes +nodeSelector: + node-role.kubernetes.io/production: "true" diff --git a/aiopslab-runs/helm/aiopslab-viewer/values.yaml b/aiopslab-runs/helm/aiopslab-viewer/values.yaml new file mode 100644 index 00000000..132cabf9 --- /dev/null +++ b/aiopslab-runs/helm/aiopslab-viewer/values.yaml @@ -0,0 +1,189 @@ +# Default values for aiopslab-viewer +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: aiopslab-viewer + pullPolicy: IfNotPresent + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +# Service configuration +service: + type: ClusterIP + http: + port: 3000 + targetPort: 3000 + https: + port: 3443 + targetPort: 3443 + +# Ingress configuration +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + # cert-manager.io/cluster-issuer: "letsencrypt-prod" + hosts: + - host: aiopslab-viewer.local + paths: + - path: / + pathType: Prefix + tls: [] + # - secretName: aiopslab-viewer-tls + # hosts: + # - aiopslab-viewer.local + +# Resource limits and requests +resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 500m + memory: 512Mi + +# Autoscaling configuration +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 5 + targetCPUUtilizationPercentage: 80 + targetMemoryUtilizationPercentage: 80 + +# Node selection +nodeSelector: {} +tolerations: [] +affinity: {} + +# Service account +serviceAccount: + create: true + annotations: {} + name: "" + +# Pod security context +podSecurityContext: + fsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + +# Container security context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 + +# Environment variables +env: + NODE_ENV: production + PORT: "3000" + HTTPS_PORT: "3443" + HOST: "0.0.0.0" + RUNS_PATH: "/app/runs" + DATABASE_PATH: "/app/data/runs.db" + SSL_KEY_PATH: "/app/ssl/server.key" + SSL_CERT_PATH: "/app/ssl/server.crt" + RATE_LIMIT_WINDOW_MS: "900000" + RATE_LIMIT_MAX_REQUESTS: "100" + LOG_LEVEL: "info" + LOG_FORMAT: "combined" + +# Persistent Volume Claims +persistence: + # Database PVC + database: + enabled: true + storageClass: "" + accessMode: ReadWriteOnce + size: 1Gi + mountPath: /app/data + annotations: {} + + # Runs data PVC + runs: + enabled: true + storageClass: "" + accessMode: ReadWriteOnce + size: 10Gi + mountPath: /app/runs + annotations: {} + +# SSL Certificate configuration +ssl: + # Enable SSL certificate generation + enabled: true + # Use existing secret instead of generating certificates + existingSecret: "" + # Certificate and key if not using existing secret + certificate: "" + privateKey: "" + +# Health checks +healthCheck: + enabled: true + httpGet: + path: /health + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + +# Readiness probe +readinessProbe: + enabled: true + httpGet: + path: /health + port: 3000 + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + successThreshold: 1 + failureThreshold: 3 + +# Liveness probe +livenessProbe: + enabled: true + httpGet: + path: /health + port: 3000 + initialDelaySeconds: 60 + periodSeconds: 30 + timeoutSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + +# Pod annotations +podAnnotations: {} + +# Pod labels +podLabels: {} + +# ConfigMap for additional configuration +configMap: + enabled: false + data: {} + +# Secrets for sensitive data +secrets: + enabled: false + data: {} + +# Network policies +networkPolicy: + enabled: false + ingress: [] + egress: [] diff --git a/aiopslab-runs/index.html b/aiopslab-runs/index.html new file mode 100644 index 00000000..667507cc --- /dev/null +++ b/aiopslab-runs/index.html @@ -0,0 +1,1873 @@ + + + + + + AIOpsLab Runs + + + + +
+
+
+
+

AIOpsLab Runs

+
+
+ + +
+
+

Monitor and analyze your AI-powered diagnostic sessions

+
+ +
+
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + +
+
+
+
+ +
+
+

Scanning for AIOpsLab runs...

+
+ + + + + + + + +
+ + + + + + + diff --git a/aiopslab-runs/k8s-deployment.md b/aiopslab-runs/k8s-deployment.md new file mode 100644 index 00000000..92d31f48 --- /dev/null +++ b/aiopslab-runs/k8s-deployment.md @@ -0,0 +1,455 @@ +# AIOpsLab Viewer - Kubernetes Deployment Guide + +This guide covers deploying AIOpsLab Viewer to Kubernetes using Helm charts with persistent volumes for both database and runs data. + +## πŸ“‹ Prerequisites + +- Kubernetes cluster (1.19+) +- Helm 3.0+ +- kubectl configured to access your cluster +- Docker (for building custom images) + +## πŸš€ Quick Start + +### 1. Deploy to Development Environment + +```bash +# Clone the repository +git clone +cd aiopslab-runs + +# Deploy using the deployment script +./scripts/k8s-deploy.sh + +# Or deploy manually with Helm +helm install aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab \ + --create-namespace \ + --values ./helm/aiopslab-viewer/values-dev.yaml +``` + +### 2. Access the Application + +```bash +# Port forward to access locally +kubectl port-forward -n aiopslab svc/aiopslab-viewer 3000:3000 + +# Visit http://localhost:3000 +``` + +## πŸ—οΈ Helm Chart Structure + +``` +helm/aiopslab-viewer/ +β”œβ”€β”€ Chart.yaml # Chart metadata +β”œβ”€β”€ values.yaml # Default configuration +β”œβ”€β”€ values-dev.yaml # Development overrides +β”œβ”€β”€ values-prod.yaml # Production overrides +└── templates/ + β”œβ”€β”€ _helpers.tpl # Template helpers + β”œβ”€β”€ deployment.yaml # Main application deployment + β”œβ”€β”€ service.yaml # Kubernetes service + β”œβ”€β”€ pvc.yaml # Persistent Volume Claims + β”œβ”€β”€ serviceaccount.yaml # Service account + β”œβ”€β”€ ingress.yaml # Ingress configuration + β”œβ”€β”€ hpa.yaml # Horizontal Pod Autoscaler + β”œβ”€β”€ configmap.yaml # ConfigMap (optional) + β”œβ”€β”€ secret.yaml # Secrets (optional) + └── networkpolicy.yaml # Network policies (optional) +``` + +## πŸ’Ύ Persistent Storage + +The Helm chart creates two Persistent Volume Claims: + +### Database PVC +- **Name**: `{release-name}-database` +- **Mount Path**: `/app/data` +- **Default Size**: 1Gi (dev) / 5Gi (prod) +- **Purpose**: Stores SQLite database (`runs.db`) + +### Runs Data PVC +- **Name**: `{release-name}-runs` +- **Mount Path**: `/app/runs` +- **Default Size**: 10Gi (dev) / 50Gi (prod) +- **Purpose**: Stores AIOpsLab run data (logs, evaluations) + +### Storage Class Configuration + +```yaml +# values.yaml +persistence: + database: + enabled: true + storageClass: "fast-ssd" # Use your preferred storage class + size: 5Gi + runs: + enabled: true + storageClass: "fast-ssd" + size: 50Gi +``` + +## πŸ”§ Configuration Options + +### Environment Variables + +All server configuration can be customized via Helm values: + +```yaml +# values.yaml +env: + NODE_ENV: production + PORT: "3000" + HTTPS_PORT: "3443" + RUNS_PATH: "/app/runs" + DATABASE_PATH: "/app/data/runs.db" + RATE_LIMIT_WINDOW_MS: "900000" + RATE_LIMIT_MAX_REQUESTS: "100" + LOG_LEVEL: "info" +``` + +### Resource Limits + +```yaml +# values.yaml +resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 500m + memory: 512Mi +``` + +### Autoscaling + +```yaml +# values.yaml +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 +``` + +## 🌐 Ingress Configuration + +### Development (HTTP) + +```yaml +# values-dev.yaml +ingress: + enabled: true + className: "nginx" + hosts: + - host: aiopslab-viewer.local + paths: + - path: / + pathType: Prefix +``` + +### Production (HTTPS with TLS) + +```yaml +# values-prod.yaml +ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - host: aiopslab-viewer.yourdomain.com + paths: + - path: / + pathType: Prefix + tls: + - secretName: aiopslab-viewer-tls + hosts: + - aiopslab-viewer.yourdomain.com +``` + +## πŸ”’ Security Features + +### Pod Security Context + +```yaml +podSecurityContext: + fsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 1000 +``` + +### Network Policies + +```yaml +networkPolicy: + enabled: true + ingress: + - from: + - namespaceSelector: + matchLabels: + name: ingress-nginx + ports: + - protocol: TCP + port: 3000 + egress: + - to: [] + ports: + - protocol: UDP + port: 53 # DNS +``` + +## πŸ“ Deployment Commands + +### Install New Deployment + +```bash +# Development +helm install aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab \ + --create-namespace \ + --values ./helm/aiopslab-viewer/values-dev.yaml + +# Production +helm install aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab-prod \ + --create-namespace \ + --values ./helm/aiopslab-viewer/values-prod.yaml \ + --set image.tag=v1.0.0 +``` + +### Upgrade Existing Deployment + +```bash +# Upgrade with new image +helm upgrade aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab \ + --values ./helm/aiopslab-viewer/values-dev.yaml \ + --set image.tag=v1.1.0 + +# Upgrade configuration only +helm upgrade aiopslab-viewer ./helm/aiopslab-viewer \ + --namespace aiopslab \ + --values ./helm/aiopslab-viewer/values-dev.yaml \ + --reuse-values +``` + +### Rollback Deployment + +```bash +# View rollback history +helm history aiopslab-viewer -n aiopslab + +# Rollback to previous version +helm rollback aiopslab-viewer -n aiopslab + +# Rollback to specific revision +helm rollback aiopslab-viewer 2 -n aiopslab +``` + +### Uninstall Deployment + +```bash +# Uninstall but keep PVCs +helm uninstall aiopslab-viewer -n aiopslab + +# Delete PVCs manually if needed +kubectl delete pvc aiopslab-viewer-database -n aiopslab +kubectl delete pvc aiopslab-viewer-runs -n aiopslab +``` + +## πŸ” Monitoring and Debugging + +### Check Pod Status + +```bash +kubectl get pods -n aiopslab -l app.kubernetes.io/name=aiopslab-viewer +``` + +### View Logs + +```bash +kubectl logs -n aiopslab -l app.kubernetes.io/name=aiopslab-viewer -f +``` + +### Check Storage + +```bash +kubectl get pvc -n aiopslab +kubectl describe pvc aiopslab-viewer-database -n aiopslab +``` + +### Port Forward for Local Access + +```bash +kubectl port-forward -n aiopslab svc/aiopslab-viewer 3000:3000 +``` + +### Execute Commands in Pod + +```bash +kubectl exec -it -n aiopslab deploy/aiopslab-viewer -- /bin/sh +``` + +## πŸŽ›οΈ Customization Examples + +### Custom Storage Classes + +```yaml +# values-custom.yaml +persistence: + database: + storageClass: "ssd-retain" + size: 10Gi + runs: + storageClass: "nfs-shared" + size: 100Gi +``` + +### Multiple Environments + +```bash +# Staging environment +helm install aiopslab-staging ./helm/aiopslab-viewer \ + --namespace aiopslab-staging \ + --create-namespace \ + --values ./helm/aiopslab-viewer/values-dev.yaml \ + --set env.NODE_ENV=staging \ + --set ingress.hosts[0].host=aiopslab-staging.company.com +``` + +### Custom SSL Certificates + +```yaml +# values-custom-ssl.yaml +ssl: + enabled: true + existingSecret: "custom-ssl-cert" +``` + +```bash +# Create SSL secret +kubectl create secret tls custom-ssl-cert \ + --cert=path/to/cert.crt \ + --key=path/to/private.key \ + -n aiopslab +``` + +## πŸ”§ Troubleshooting + +### Common Issues + +#### 1. PVC Pending State + +```bash +# Check storage class +kubectl get storageclass + +# Check PVC events +kubectl describe pvc aiopslab-viewer-database -n aiopslab +``` + +#### 2. Pod CrashLoopBackOff + +```bash +# Check pod logs +kubectl logs -n aiopslab -l app.kubernetes.io/name=aiopslab-viewer --previous + +# Check pod description +kubectl describe pod -n aiopslab -l app.kubernetes.io/name=aiopslab-viewer +``` + +#### 3. Ingress Not Working + +```bash +# Check ingress controller +kubectl get pods -n ingress-nginx + +# Check ingress resource +kubectl describe ingress aiopslab-viewer -n aiopslab +``` + +### Performance Tuning + +#### Resource Optimization + +```yaml +# For high-traffic environments +resources: + limits: + cpu: 2000m + memory: 4Gi + requests: + cpu: 1000m + memory: 2Gi + +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 20 + targetCPUUtilizationPercentage: 60 +``` + +#### Storage Performance + +```yaml +# Use high-performance storage +persistence: + database: + storageClass: "premium-ssd" + runs: + storageClass: "premium-ssd" +``` + +## πŸ“Š Production Considerations + +### High Availability + +```yaml +# Anti-affinity rules +affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - aiopslab-viewer + topologyKey: kubernetes.io/hostname +``` + +### Backup Strategy + +```bash +# Backup database PVC +kubectl exec -n aiopslab deploy/aiopslab-viewer -- tar czf - /app/data | \ + kubectl exec -i backup-pod -- tar xzf - -C /backup/$(date +%Y%m%d) + +# Backup runs data +kubectl exec -n aiopslab deploy/aiopslab-viewer -- tar czf - /app/runs | \ + kubectl exec -i backup-pod -- tar xzf - -C /backup/$(date +%Y%m%d) +``` + +### Monitoring Integration + +```yaml +# Prometheus annotations +podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "3000" + prometheus.io/path: "/metrics" +``` + +This comprehensive Kubernetes deployment setup provides enterprise-grade capabilities for running AIOpsLab Viewer in production environments with persistent data storage, security, and scalability features. diff --git a/aiopslab-runs/nginx.conf b/aiopslab-runs/nginx.conf new file mode 100644 index 00000000..7f10475c --- /dev/null +++ b/aiopslab-runs/nginx.conf @@ -0,0 +1,106 @@ +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + error_log /var/log/nginx/error.log; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_min_length 10240; + gzip_proxied expired no-cache no-store private must-revalidate max-age=0; + gzip_types + text/plain + text/css + text/xml + text/javascript + application/javascript + application/xml+rss + application/json; + + # Rate limiting + limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s; + limit_req_zone $binary_remote_addr zone=static:10m rate=30r/s; + + upstream aiopslab-backend { + server aiopslab-viewer:3000; + } + + # HTTP server (redirect to HTTPS) + # HTTP server + server { + listen 80; + server_name _; + + # Security headers + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + + # API endpoints with rate limiting + location /api/ { + limit_req zone=api burst=20 nodelay; + proxy_pass http://aiopslab-backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 30s; + proxy_send_timeout 30s; + proxy_read_timeout 30s; + } + + # Health check endpoint + location /health { + limit_req zone=api burst=5 nodelay; + proxy_pass http://aiopslab-backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Static files and web interface + location / { + limit_req zone=static burst=50 nodelay; + proxy_pass http://aiopslab-backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Caching for static assets + location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg)$ { + proxy_pass http://aiopslab-backend; + proxy_set_header Host $host; + expires 1y; + add_header Cache-Control "public, immutable"; + } + } + + # Run files serving with caching + location /runs/ { + limit_req zone=static burst=30 nodelay; + proxy_pass http://aiopslab-backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Cache log and evaluation files + expires 1h; + add_header Cache-Control "public"; + } + } +} diff --git a/aiopslab-runs/package.json b/aiopslab-runs/package.json new file mode 100644 index 00000000..fd897555 --- /dev/null +++ b/aiopslab-runs/package.json @@ -0,0 +1,46 @@ +{ + "name": "aiopslab-viewer", + "version": "1.0.0", + "description": "Server for AIOpsLab session analysis viewer", + "main": "server.js", + "scripts": { + "start": "node server.js", + "dev": "nodemon server.js", + "install-deps": "npm install", + "security-check": "npm audit" + }, + "keywords": [ + "aiopslab", + "kubernetes", + "analysis", + "viewer", + "server" + ], + "author": "AIOpsLab Team", + "license": "MIT", + "dependencies": { + "cors": "^2.8.5", + "crypto": "^1.0.1", + "dotenv": "^17.2.0", + "express": "^4.18.2", + "express-rate-limit": "^7.1.5", + "helmet": "^7.1.0", + "multer": "^2.0.1", + "sqlite3": "^5.1.6" + }, + "devDependencies": { + "nodemon": "^3.0.2" + }, + "engines": { + "node": ">=16.0.0", + "npm": ">=8.0.0" + }, + "repository": { + "type": "git", + "url": "https://github.com/aiopslab/viewer" + }, + "bugs": { + "url": "https://github.com/aiopslab/viewer/issues" + }, + "homepage": "https://github.com/aiopslab/viewer#readme" +} diff --git a/aiopslab-runs/scripts/k8s-deploy.sh b/aiopslab-runs/scripts/k8s-deploy.sh new file mode 100755 index 00000000..9c0a3371 --- /dev/null +++ b/aiopslab-runs/scripts/k8s-deploy.sh @@ -0,0 +1,218 @@ +#!/bin/bash + +# AIOpsLab Viewer - Kubernetes Deployment Script +# This script helps deploy AIOpsLab Viewer to Kubernetes using Helm + +set -e + +# Default values +NAMESPACE="aiopslab" +RELEASE_NAME="aiopslab-viewer" +ENVIRONMENT="dev" +HELM_CHART_PATH="./helm/aiopslab-viewer" +IMAGE_TAG="latest" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Print colored output +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Show usage +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +Deploy AIOpsLab Viewer to Kubernetes using Helm + +OPTIONS: + -n, --namespace NAMESPACE Kubernetes namespace (default: aiopslab) + -r, --release RELEASE_NAME Helm release name (default: aiopslab-viewer) + -e, --environment ENV Environment: dev|prod (default: dev) + -t, --tag IMAGE_TAG Docker image tag (default: latest) + -u, --upgrade Upgrade existing deployment + -d, --dry-run Perform a dry run + -h, --help Show this help message + +EXAMPLES: + $0 # Deploy to dev environment + $0 -e prod -t v1.0.0 # Deploy to production with specific tag + $0 -u -e prod # Upgrade production deployment + $0 -d -e prod # Dry run for production + +EOF +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -n|--namespace) + NAMESPACE="$2" + shift 2 + ;; + -r|--release) + RELEASE_NAME="$2" + shift 2 + ;; + -e|--environment) + ENVIRONMENT="$2" + shift 2 + ;; + -t|--tag) + IMAGE_TAG="$2" + shift 2 + ;; + -u|--upgrade) + UPGRADE=true + shift + ;; + -d|--dry-run) + DRY_RUN=true + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + print_error "Unknown option $1" + usage + exit 1 + ;; + esac +done + +# Validate environment +if [[ ! "$ENVIRONMENT" =~ ^(dev|prod)$ ]]; then + print_error "Environment must be 'dev' or 'prod'" + exit 1 +fi + +# Check if required tools are installed +check_requirements() { + print_info "Checking requirements..." + + if ! command -v kubectl &> /dev/null; then + print_error "kubectl is not installed" + exit 1 + fi + + if ! command -v helm &> /dev/null; then + print_error "helm is not installed" + exit 1 + fi + + if ! command -v docker &> /dev/null; then + print_warn "docker is not installed - image building will not be available" + fi + + print_info "Requirements check passed" +} + +# Create namespace if it doesn't exist +create_namespace() { + print_info "Ensuring namespace '$NAMESPACE' exists..." + kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - +} + +# Build Docker image if requested +build_image() { + if [[ -n "$BUILD_IMAGE" ]]; then + print_info "Building Docker image..." + docker build -t "aiopslab-viewer:$IMAGE_TAG" . + print_info "Image built successfully" + fi +} + +# Deploy using Helm +deploy() { + local values_file="${HELM_CHART_PATH}/values-${ENVIRONMENT}.yaml" + local helm_command="helm" + local action="install" + + if [[ "$UPGRADE" == "true" ]]; then + action="upgrade" + fi + + if [[ "$DRY_RUN" == "true" ]]; then + helm_command="$helm_command --dry-run" + print_info "Performing dry run..." + fi + + print_info "Deploying AIOpsLab Viewer..." + print_info "Namespace: $NAMESPACE" + print_info "Release: $RELEASE_NAME" + print_info "Environment: $ENVIRONMENT" + print_info "Image Tag: $IMAGE_TAG" + + # Check if values file exists + if [[ ! -f "$values_file" ]]; then + print_warn "Values file $values_file not found, using default values" + values_file="${HELM_CHART_PATH}/values.yaml" + fi + + # Prepare Helm command + local cmd="$helm_command $action $RELEASE_NAME $HELM_CHART_PATH" + cmd="$cmd --namespace $NAMESPACE" + cmd="$cmd --values $values_file" + cmd="$cmd --set image.tag=$IMAGE_TAG" + cmd="$cmd --wait --timeout=300s" + + if [[ "$action" == "install" ]]; then + cmd="$cmd --create-namespace" + fi + + print_info "Executing: $cmd" + eval $cmd + + if [[ "$DRY_RUN" != "true" ]]; then + print_info "Deployment completed successfully!" + + # Show deployment status + print_info "Checking deployment status..." + kubectl get pods -n "$NAMESPACE" -l "app.kubernetes.io/name=aiopslab-viewer" + + # Show service information + print_info "Service information:" + kubectl get svc -n "$NAMESPACE" -l "app.kubernetes.io/name=aiopslab-viewer" + + # Show ingress if enabled + if kubectl get ingress -n "$NAMESPACE" "$RELEASE_NAME" &> /dev/null; then + print_info "Ingress information:" + kubectl get ingress -n "$NAMESPACE" "$RELEASE_NAME" + fi + + # Show access instructions + print_info "Access instructions:" + echo " kubectl port-forward -n $NAMESPACE svc/$RELEASE_NAME 3000:3000" + echo " Then visit: http://localhost:3000" + fi +} + +# Main execution +main() { + print_info "Starting AIOpsLab Viewer deployment..." + + check_requirements + create_namespace + build_image + deploy + + print_info "Deployment script completed!" +} + +# Run main function +main diff --git a/aiopslab-runs/server.js b/aiopslab-runs/server.js new file mode 100644 index 00000000..0c942445 --- /dev/null +++ b/aiopslab-runs/server.js @@ -0,0 +1,1191 @@ +const express = require('express'); +const http = require('http'); +const fs = require('fs'); +const path = require('path'); +const helmet = require('helmet'); +const cors = require('cors'); +const multer = require('multer'); +const RunDatabase = require('./database'); + +// Load environment variables +require('dotenv').config(); + +const app = express(); + +// Configuration +const RUNS_PATH = process.env.RUNS_PATH || './runs'; +const PORT = process.env.PORT || 3000; + +// Ensure runs directory exists +const runsDir = path.resolve(__dirname, RUNS_PATH); +if (!fs.existsSync(runsDir)) { + fs.mkdirSync(runsDir, { recursive: true }); + console.log(`πŸ“ Created runs directory: ${runsDir}`); +} + +// Initialize database +const runDB = new RunDatabase(); + +// Helper function to format duration to 2 decimal places +function formatDuration(duration) { + if (typeof duration === 'number' && !isNaN(duration)) { + return Math.round(duration * 100) / 100; // Round to 2 decimal places + } + return duration; +} + +// Configure multer for file uploads +const storage = multer.diskStorage({ + destination: function (req, file, cb) { + const runId = req.params.runId; + const runPath = path.join(runsDir, runId); + + // Ensure run directory exists + if (!fs.existsSync(runPath)) { + fs.mkdirSync(runPath, { recursive: true }); + } + + cb(null, runPath); + }, + filename: function (req, file, cb) { + // Determine final filename based on endpoint + let finalFilename; + + if (req.route.path.includes('/log')) { + // Force log files to be named log.txt + finalFilename = 'log.txt'; + } else if (req.route.path.includes('/evaluation')) { + // For evaluation files, use a temporary name - final naming will be handled in the endpoint + finalFilename = `temp_${Date.now()}_${file.originalname}`; + } else { + finalFilename = file.originalname; + } + + cb(null, finalFilename); + } +}); + + + +const upload = multer({ + storage: storage, + limits: { + fileSize: 10 * 1024 * 1024, // 10MB limit + files: 1 + }, + fileFilter: function (req, file, cb) { + // Allow text files and markdown files + if (file.mimetype === 'text/plain' || + file.mimetype === 'text/markdown' || + file.originalname.endsWith('.txt') || + file.originalname.endsWith('.md') || + file.originalname.endsWith('.log')) { + cb(null, true); + } else { + cb(new Error('Invalid file type. Only .txt, .md, and .log files are allowed.')); + } + } +}); + +// Security middleware +app.use(helmet({ + contentSecurityPolicy: { + directives: { + defaultSrc: ["'self'"], + styleSrc: ["'self'", "'unsafe-inline'"], + scriptSrc: ["'self'", "'unsafe-inline'"], + scriptSrcAttr: ["'unsafe-inline'"], // Allow inline event handlers + imgSrc: ["'self'", "data:", "https:"], + fontSrc: ["'self'", "https:", "data:"], + connectSrc: ["'self'"], + mediaSrc: ["'self'"], + objectSrc: ["'none'"], + childSrc: ["'self'"], + frameSrc: ["'none'"], + workerSrc: ["'self'"], + frameAncestors: ["'none'"], + formAction: ["'self'"], + upgradeInsecureRequests: [] + } + }, + crossOriginEmbedderPolicy: false +})); + +// CORS configuration +app.use(cors({ + origin: function (origin, callback) { + // Allow requests with no origin (like mobile apps or curl requests) + if (!origin) return callback(null, true); + + // Allow localhost and local network requests + const allowedOrigins = [ + /^https?:\/\/localhost(:\d+)?$/, + /^https?:\/\/127\.0\.0\.1(:\d+)?$/, + /^https?:\/\/0\.0\.0\.0(:\d+)?$/, + /^https?:\/\/192\.168\.\d+\.\d+(:\d+)?$/, + /^https?:\/\/10\.\d+\.\d+\.\d+(:\d+)?$/ + ]; + + if (allowedOrigins.some(pattern => pattern.test(origin))) { + return callback(null, true); + } + + return callback(new Error('Not allowed by CORS')); + }, + credentials: true +})); + +// Custom security headers +app.use((req, res, next) => { + res.setHeader('X-Content-Type-Options', 'nosniff'); + res.setHeader('X-Frame-Options', 'DENY'); + res.setHeader('X-XSS-Protection', '1; mode=block'); + res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin'); + res.setHeader('Permissions-Policy', 'geolocation=(), microphone=(), camera=()'); + next(); +}); + +// JSON body parsing middleware +app.use(express.json()); +app.use(express.urlencoded({ extended: true })); + +// Logging middleware +app.use((req, res, next) => { + const timestamp = new Date().toISOString(); + console.log(`[${timestamp}] ${req.method} ${req.url} - ${req.ip}`); + next(); +}); + +// Rate limiting for security +const rateLimit = require('express-rate-limit'); +const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // limit each IP to 100 requests per windowMs + message: 'Too many requests from this IP, please try again later.', + standardHeaders: true, + legacyHeaders: false, +}); +app.use(limiter); + +// Serve static files with proper MIME types +app.use(express.static(__dirname, { + setHeaders: (res, path) => { + if (path.endsWith('.html')) { + res.setHeader('Content-Type', 'text/html; charset=utf-8'); + } else if (path.endsWith('.css')) { + res.setHeader('Content-Type', 'text/css; charset=utf-8'); + } else if (path.endsWith('.js')) { + res.setHeader('Content-Type', 'application/javascript; charset=utf-8'); + } else if (path.endsWith('.json')) { + res.setHeader('Content-Type', 'application/json; charset=utf-8'); + } else if (path.endsWith('.md')) { + res.setHeader('Content-Type', 'text/markdown; charset=utf-8'); + } else if (path.endsWith('.txt')) { + res.setHeader('Content-Type', 'text/plain; charset=utf-8'); + } + + // Cache control for static assets + if (path.match(/\.(css|js|png|jpg|jpeg|gif|ico|svg)$/)) { + res.setHeader('Cache-Control', 'public, max-age=86400'); // 1 day + } else { + res.setHeader('Cache-Control', 'no-cache'); + } + } +})); + +// Serve run files from the runs directory +app.use('/runs', express.static(runsDir, { + setHeaders: (res, path) => { + if (path.endsWith('.md')) { + res.setHeader('Content-Type', 'text/markdown; charset=utf-8'); + } else if (path.endsWith('.txt')) { + res.setHeader('Content-Type', 'text/plain; charset=utf-8'); + } else if (path.endsWith('.log')) { + res.setHeader('Content-Type', 'text/plain; charset=utf-8'); + } + res.setHeader('Cache-Control', 'no-cache'); + } +})); + +// API endpoint to list runs from database (no filesystem scanning) +app.get('/api/runs', async (req, res) => { + try { + console.log('πŸ“Š Loading runs from database...'); + const runs = await runDB.getAllRuns(); + + // Transform database format to API format + const formattedRuns = runs.map(runData => ({ + id: runData.id, + created: runData.created_at, + modified: runData.modified_at, + hasLogFile: runData.has_log_file, + evaluationFiles: runData.evaluation_files, + evaluationCount: runData.evaluation_count, + status: runData.status, + duration: formatDuration(runData.duration), + reasoning_judgement: runData.reasoning_judgement, + detectionAccuracy: runData.detection_accuracy, + steps: runData.steps, + inputTokens: runData.input_tokens, + outputTokens: runData.output_tokens, + reasoningScore: runData.reasoning_score, + agentName: runData.agent_name, + applicationName: runData.application_name, + lastAnalyzed: runData.last_analyzed_at + })); + + console.log(`πŸ’Ύ Loaded ${formattedRuns.length} runs from database`); + res.json(formattedRuns); + } catch (error) { + console.error('Error loading runs from database:', error); + res.status(500).json({ error: 'Failed to load runs from database' }); + } +}); + +// Get individual run data +app.get('/api/runs/:runId', async (req, res) => { + try { + const { runId } = req.params; + console.log(`πŸ” Getting run data for: ${runId}`); + + const runData = await runDB.getRun(runId); + if (!runData) { + return res.status(404).json({ error: 'Run not found' }); + } + + // Transform database format to API format + const formattedRun = { + id: runData.id, + created: runData.created_at, + modified: runData.modified_at, + hasLogFile: runData.has_log_file, + evaluationFiles: runData.evaluation_files, + evaluationCount: runData.evaluation_count, + status: runData.status, + duration: formatDuration(runData.duration), + reasoning_judgement: runData.reasoning_judgement, + detectionAccuracy: runData.detection_accuracy, + steps: runData.steps, + inputTokens: runData.input_tokens, + outputTokens: runData.output_tokens, + reasoningScore: runData.reasoning_score, + agentName: runData.agent_name, + applicationName: runData.application_name, + lastAnalyzed: runData.last_analyzed_at, + evaluation_files: runData.evaluation_files // Also provide snake_case for viewer.html + }; + + res.json(formattedRun); + } catch (error) { + console.error(`Error getting run data for ${req.params.runId}:`, error); + res.status(500).json({ error: 'Failed to get run data' }); + } +}); + +// Analyze run data to determine status and extract metrics +function analyzeRunData(runPath, runId) { + const analysis = { + status: 'unknown', + duration: 0, + reasoning_judgement: 'N/A', + detectionAccuracy: 'Unknown', + steps: 0, + inputTokens: 0, + outputTokens: 0, + reasoningScore: 0, + agentName: 'unknown', + applicationName: 'unknown' + }; + + try { + // Analyze log file + const logPath = path.join(runPath, 'log.txt'); + if (fs.existsSync(logPath)) { + const logContent = fs.readFileSync(logPath, 'utf8'); + // Extract metrics from log + const metricsMatch = logContent.match(/Results:\s*(\{[\s\S]*?\})\s*(?=\n==|\n[A-Z]|$)/); + if (metricsMatch) { + let metricsStr = metricsMatch[1]; + + try { + // More robust parsing for Python dict format with complex string values + const metrics = {}; + + // Extract Detection Accuracy (can be quoted string) + const detectionMatch = metricsStr.match(/'Detection[_ ]Accuracy':\s*'([^']+)'/); + if (detectionMatch) metrics.Detection_Accuracy = detectionMatch[1]; + + // Extract numeric fields + const ttdMatch = metricsStr.match(/'TTD':\s*([\d.]+)/); + if (ttdMatch) metrics.TTD = parseFloat(ttdMatch[1]); + + const stepsMatch = metricsStr.match(/'steps':\s*(\d+)/); + if (stepsMatch) metrics.steps = parseInt(stepsMatch[1]); + + const inTokensMatch = metricsStr.match(/'in_tokens':\s*(\d+)/); + if (inTokensMatch) metrics.in_tokens = parseInt(inTokensMatch[1]); + + const outTokensMatch = metricsStr.match(/'out_tokens':\s*(\d+)/); + if (outTokensMatch) metrics.out_tokens = parseInt(outTokensMatch[1]); + + const reasoningScoreMatch = metricsStr.match(/'reasoning_score':\s*(\d+)/); + if (reasoningScoreMatch) metrics.reasoning_score = parseInt(reasoningScoreMatch[1]); + + // Extract reasoning_judgement (complex string with nested quotes) + // Find the start and end of the reasoning_judgement value + const reasoningJudgementMatch = metricsStr.match(/'reasoning_judgement':\s*'([\s\S]*?)'(?=,\s*'[^']*':|$)/); + if (reasoningJudgementMatch) { + // Clean up the extracted string by handling escaped quotes + let reasoningText = reasoningJudgementMatch[1]; + // Unescape single quotes within the string + reasoningText = reasoningText.replace(/\\'/g, "'"); + metrics.reasoning_judgement = reasoningText; + } + + console.log(`Parsed metrics for ${runId}:`, { + Detection_Accuracy: metrics.Detection_Accuracy, + TTD: metrics.TTD, + steps: metrics.steps, + in_tokens: metrics.in_tokens, + out_tokens: metrics.out_tokens, + reasoning_score: metrics.reasoning_score, + reasoning_judgement_length: metrics.reasoning_judgement ? metrics.reasoning_judgement.length : 0 + }); + + analysis.detectionAccuracy = metrics.Detection_Accuracy || 'Unknown'; + analysis.duration = metrics.TTD || 0; + analysis.steps = metrics.steps || 0; + analysis.inputTokens = metrics.in_tokens || 0; + analysis.outputTokens = metrics.out_tokens || 0; + analysis.reasoningScore = metrics.reasoning_score || 0; + analysis.reasoning_judgement = metrics.reasoning_judgement || 'N/A'; + + } catch (parseError) { + console.error(`Failed to parse metrics for ${runId}:`, parseError.message); + console.error(`Raw metrics string (first 500 chars):`, metricsStr.substring(0, 500)); + // Leave metrics with default values when parsing fails + } + } + } + + // Determine status based on analysis + analysis.status = determineRunStatus(analysis, runPath); + + } catch (error) { + console.warn(`Error analyzing run ${runId}:`, error.message); + } + + return analysis; +} + + +// Generate unique filename for evaluation files with numbering if needed +function generateUniqueEvaluationFilename(runPath, baseName) { + let filename = `${baseName}.md`; + let counter = 1; + + // Check if file already exists + while (fs.existsSync(path.join(runPath, filename))) { + filename = `${baseName}-${counter}.md`; + counter++; + } + + return filename; +} + +// Dynamically discover evaluation files in a run directory +function discoverEvaluationFiles(runPath) { + const evaluationFiles = []; + + try { + if (fs.existsSync(runPath)) { + const files = fs.readdirSync(runPath); + for (const file of files) { + if (file.endsWith('.md') && file !== 'README.md') { + const filePath = path.join(runPath, file); + const stats = fs.statSync(filePath); + if (stats.isFile()) { + evaluationFiles.push(file); + } + } + } + } + } catch (error) { + console.warn(`Error discovering evaluation files in ${runPath}:`, error.message); + } + + return evaluationFiles; +} + + +// Determine run status based on analysis +function determineRunStatus(analysis, runPath) { + const logPath = path.join(runPath, 'log.txt'); + + // Check if log file exists + if (!fs.existsSync(logPath)) { + return 'failed'; + } + + const logContent = fs.readFileSync(logPath, 'utf8'); + + // Check for critical failures + if (logContent.includes('Fatal error') || + logContent.includes('Terminated')) { + return 'failed'; + } + + // Check for successful resolution indicators + const hasResolution = logContent.includes('root cause') || + logContent.includes('solution') || + logContent.includes('resolved') || + analysis.reasoningScore >= 8; + + // Determine status based on multiple factors + if (hasResolution && analysis.reasoningScore >= 7) { + return 'success'; + } else if (analysis.reasoningScore >= 5 || + (analysis.duration > 0 && analysis.duration < 600)) { + return 'partial'; + } else { + return 'failed'; + } +} + +// Health check endpoint +app.get('/health', (req, res) => { + res.json({ + status: 'healthy', + timestamp: new Date().toISOString(), + uptime: process.uptime() + }); +}); + +// Database statistics endpoint +app.get('/api/stats', async (req, res) => { + try { + const stats = await runDB.getStats(); + res.json(stats); + } catch (error) { + console.error('Error getting database statistics:', error); + res.status(500).json({ error: 'Failed to get database statistics' }); + } +}); + +// API endpoint to delete a run completely (database record and filesystem files) +app.delete('/api/runs/:runId', async (req, res) => { + try { + const { runId } = req.params; + const { filesOnly = false } = req.query; // Optional parameter to delete only files + + console.log(`πŸ—‘οΈ Deleting run: ${runId} (files: ${!filesOnly ? 'yes' : 'no'}, database: ${!filesOnly ? 'yes' : 'no'})`); + + const runPath = path.join(runsDir, runId); + let deletedFiles = []; + let deletedFromDatabase = false; + + // Check if run exists in database + const existingRun = await runDB.getRun(runId); + if (!existingRun) { + return res.status(404).json({ error: 'Run not found in database' }); + } + + // Delete filesystem files and directory + if (fs.existsSync(runPath)) { + try { + // Get list of files before deletion for reporting + const files = fs.readdirSync(runPath); + deletedFiles = files; + + // Remove all files in the directory + for (const file of files) { + const filePath = path.join(runPath, file); + fs.unlinkSync(filePath); + console.log(`πŸ—‚οΈ Deleted file: ${file}`); + } + + // Remove the directory itself + fs.rmdirSync(runPath); + console.log(`πŸ“ Deleted directory: ${runPath}`); + } catch (fileError) { + console.error(`Error deleting files for run ${runId}:`, fileError); + return res.status(500).json({ + error: 'Failed to delete run files', + details: fileError.message + }); + } + } + + // Delete from database (unless filesOnly is true) + if (!filesOnly) { + try { + await runDB.deleteRun(runId); + deletedFromDatabase = true; + console.log(`πŸ’Ύ Deleted run from database: ${runId}`); + } catch (dbError) { + console.error(`Error deleting run from database:`, dbError); + return res.status(500).json({ + error: 'Files deleted but failed to remove database record', + details: dbError.message + }); + } + } + + res.json({ + success: true, + message: `Run ${runId} deleted successfully`, + deleted: { + files: deletedFiles, + filesCount: deletedFiles.length, + directory: !fs.existsSync(runPath), + database: deletedFromDatabase + } + }); + } catch (error) { + console.error('Error deleting run:', error); + res.status(500).json({ error: 'Failed to delete run' }); + } +}); + +// Force reanalysis of a specific run +app.post('/api/runs/:runId/reanalyze', async (req, res) => { + try { + const { runId } = req.params; + const runPath = path.join(runsDir, runId); + + if (!fs.existsSync(runPath)) { + return res.status(404).json({ error: 'Run not found' }); + } + + console.log(`πŸ”„ Force reanalyzing run: ${runId}`); + + const stats = fs.statSync(runPath); + const hasLogFile = fs.existsSync(path.join(runPath, 'log.txt')); + const evaluationFiles = discoverEvaluationFiles(runPath); + + const runAnalysis = analyzeRunData(runPath, runId); + + // Get existing run data to preserve manually set fields + const existingRun = await runDB.getRun(runId); + + const runData = { + id: runId, + created_at: stats.birthtime.toISOString(), + modified_at: stats.mtime.toISOString(), + file_hash: runDB.calculateRunHash(runPath), + has_log_file: hasLogFile, + evaluation_files: evaluationFiles, + evaluation_count: evaluationFiles.length, + // Map camelCase to snake_case for database + status: runAnalysis.status, + duration: runAnalysis.duration, + issues: runAnalysis.issues, + reasoning_judgement: runAnalysis.reasoning_judgement, + detection_accuracy: runAnalysis.detectionAccuracy, + steps: runAnalysis.steps, + input_tokens: runAnalysis.inputTokens, + output_tokens: runAnalysis.outputTokens, + reasoning_score: runAnalysis.reasoningScore, + // Preserve existing agent_name and application_name if they exist and are not 'unknown' + agent_name: (existingRun?.agent_name && existingRun.agent_name !== 'unknown') + ? existingRun.agent_name + : runAnalysis.agentName, + application_name: (existingRun?.application_name && existingRun.application_name !== 'unknown') + ? existingRun.application_name + : runAnalysis.applicationName, + namespace: runAnalysis.namespace, + errors: runAnalysis.errors + }; + + await runDB.upsertRun(runData); + + res.json({ + message: 'Run reanalyzed successfully', + runData: { + id: runData.id, + status: runData.status, + duration: formatDuration(runData.duration), + reasoning_judgement: runData.reasoning_judgement, + lastAnalyzed: new Date().toISOString() + } + }); + } catch (error) { + console.error('Error reanalyzing run:', error); + res.status(500).json({ error: 'Failed to reanalyze run' }); + } +}); + +// Database cleanup endpoint +app.post('/api/cleanup', async (req, res) => { + try { + const daysOld = parseInt(req.query.days || req.body.days || 30); + const deletedCount = await runDB.cleanupOldRuns(daysOld); + res.json({ + message: `Cleaned up ${deletedCount} old runs`, + deletedCount + }); + } catch (error) { + console.error('Error cleaning up database:', error); + res.status(500).json({ error: 'Failed to cleanup database' }); + } +}); + +// Delete all records endpoint (dangerous operation) +app.post('/api/delete-all', async (req, res) => { + try { + const { confirm } = req.body; + + if (confirm !== 'DELETE_ALL_RECORDS') { + return res.status(400).json({ + error: 'Missing confirmation', + message: 'You must send {"confirm": "DELETE_ALL_RECORDS"} to perform this operation' + }); + } + + console.log('πŸ—‘οΈ DANGER: Deleting ALL database records'); + + // Delete all records from the database + const deletedCount = await new Promise((resolve, reject) => { + runDB.db.run('DELETE FROM runs', function(err) { + if (err) { + reject(err); + } else { + console.log(`🧹 Deleted ALL ${this.changes} records from database`); + resolve(this.changes); + } + }); + }); + + res.json({ + message: `Deleted all ${deletedCount} records from database`, + deletedCount + }); + } catch (error) { + console.error('Error deleting all records:', error); + res.status(500).json({ error: 'Failed to delete all records' }); + } +}); + +// API endpoint to scan filesystem and import/update runs in database +app.post('/api/runs/scan', async (req, res) => { + try { + console.log('πŸ” Scanning filesystem for runs...'); + const importedRuns = []; + const entries = fs.readdirSync(runsDir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isDirectory() && entry.name.match(/^\d{8}-[a-f0-9]+$/)) { + const runPath = path.join(runsDir, entry.name); + const stats = fs.statSync(runPath); + + console.log(`πŸ“ Processing run: ${entry.name}`); + + // Check for required files + const hasLogFile = fs.existsSync(path.join(runPath, 'log.txt')); + const evaluationFiles = discoverEvaluationFiles(runPath); + + // Analyze run data for intelligent status assessment + const runAnalysis = analyzeRunData(runPath, entry.name); + + // Get existing run data to preserve manually set fields + const existingRun = await runDB.getRun(entry.name); + + // Prepare data for database + const runData = { + id: entry.name, + created_at: stats.birthtime.toISOString(), + modified_at: stats.mtime.toISOString(), + file_hash: runDB.calculateRunHash(runPath), + has_log_file: hasLogFile, + evaluation_files: evaluationFiles, + evaluation_count: evaluationFiles.length, + status: runAnalysis.status, + duration: runAnalysis.duration, + issues: runAnalysis.issues, + reasoning_judgement: runAnalysis.reasoning_judgement, + detection_accuracy: runAnalysis.detectionAccuracy, + steps: runAnalysis.steps, + input_tokens: runAnalysis.inputTokens, + output_tokens: runAnalysis.outputTokens, + reasoning_score: runAnalysis.reasoningScore, + // Preserve existing agent_name and application_name if they exist and are not 'unknown' + agent_name: (existingRun?.agent_name && existingRun.agent_name !== 'unknown') + ? existingRun.agent_name + : runAnalysis.agentName, + application_name: (existingRun?.application_name && existingRun.application_name !== 'unknown') + ? existingRun.application_name + : runAnalysis.applicationName, + namespace: runAnalysis.namespace, + errors: runAnalysis.errors + }; + + // Save to database + await runDB.upsertRun(runData); + importedRuns.push({ + id: entry.name, + status: 'imported' + }); + + console.log(`βœ… Imported run: ${entry.name}`); + } + } + + console.log(`πŸŽ‰ Scan complete. Imported ${importedRuns.length} runs.`); + res.json({ + success: true, + message: `Scanned and imported ${importedRuns.length} runs`, + runs: importedRuns + }); + } catch (error) { + console.error('Error scanning and importing runs:', error); + res.status(500).json({ error: 'Failed to scan and import runs' }); + } +}); + +// API endpoint to create a new run record +app.post('/api/runs', async (req, res) => { + try { + const { runId, agentName, applicationName, status = 'unknown', duration = 0, score = 'N/A' } = req.body; + + if (!runId) { + return res.status(400).json({ error: 'runId is required' }); + } + + if (!agentName || !applicationName) { + return res.status(400).json({ error: 'agentName and applicationName are required' }); + } + + // Validate runId format (should be like 20250715-57fff059) + if (!runId.match(/^\d{8}-[a-f0-9]+$/)) { + return res.status(400).json({ error: 'Invalid runId format. Expected format: YYYYMMDD-hash' }); + } + + const runPath = path.join(runsDir, runId); + + // Create run directory if it doesn't exist + if (!fs.existsSync(runPath)) { + fs.mkdirSync(runPath, { recursive: true }); + } + + const stats = fs.existsSync(runPath) ? fs.statSync(runPath) : null; + const hasLogFile = fs.existsSync(path.join(runPath, 'log.txt')); + + const runData = { + id: runId, + created_at: stats ? stats.birthtime.toISOString() : new Date().toISOString(), + modified_at: new Date().toISOString(), + file_hash: runDB.calculateRunHash(runPath), + has_log_file: hasLogFile, + evaluation_files: [], + evaluation_count: 0, + status, + duration, + issues: 0, + score, + detection_accuracy: 'Unknown', + steps: 0, + input_tokens: 0, + output_tokens: 0, + reasoning_score: 0, + namespace: applicationName, // Use applicationName as namespace + agent_name: agentName, // Store agent name + application_name: applicationName, // Store application name + errors: [] + }; + + await runDB.upsertRun(runData); + + console.log(`πŸ“ Created new run record: ${runId}`); + res.json({ + success: true, + message: `Run ${runId} created successfully`, + runData: { + id: runData.id, + status: runData.status, + created: runData.created_at + } + }); + } catch (error) { + console.error('Error creating run record:', error); + res.status(500).json({ error: 'Failed to create run record' }); + } +}); + +// API endpoint to delete multiple runs at once +app.post('/api/runs/delete-batch', async (req, res) => { + try { + const { runIds, deleteFiles = true } = req.body; + + if (!Array.isArray(runIds) || runIds.length === 0) { + return res.status(400).json({ error: 'runIds must be a non-empty array' }); + } + + console.log(`πŸ—‘οΈ Batch deleting ${runIds.length} runs (files: ${deleteFiles})`); + + const results = { + successful: [], + failed: [], + totalDeleted: 0, + totalFailed: 0 + }; + + for (const runId of runIds) { + try { + const runPath = path.join(runsDir, runId); + let deletedFiles = []; + + // Check if run exists in database + const existingRun = await runDB.getRun(runId); + if (!existingRun) { + results.failed.push({ + runId, + error: 'Run not found in database' + }); + continue; + } + + // Delete filesystem files if requested + if (deleteFiles && fs.existsSync(runPath)) { + const files = fs.readdirSync(runPath); + deletedFiles = files; + + // Remove all files in the directory + for (const file of files) { + const filePath = path.join(runPath, file); + fs.unlinkSync(filePath); + } + + // Remove the directory itself + fs.rmdirSync(runPath); + } + + // Delete from database + await runDB.deleteRun(runId); + + results.successful.push({ + runId, + deletedFiles: deletedFiles.length, + deletedFromDatabase: true + }); + + results.totalDeleted++; + console.log(`βœ… Deleted run: ${runId}`); + + } catch (error) { + console.error(`Error deleting run ${runId}:`, error); + results.failed.push({ + runId, + error: error.message + }); + results.totalFailed++; + } + } + + console.log(`πŸŽ‰ Batch deletion complete. Success: ${results.totalDeleted}, Failed: ${results.totalFailed}`); + + res.json({ + success: true, + message: `Batch deletion completed. ${results.totalDeleted} runs deleted, ${results.totalFailed} failed.`, + results + }); + + } catch (error) { + console.error('Error in batch deletion:', error); + res.status(500).json({ error: 'Failed to perform batch deletion' }); + } +}); + +// API endpoint to upload log file for a run +app.post('/api/runs/:runId/log', upload.single('logFile'), async (req, res) => { + try { + const runId = req.params.runId; + + if (!req.file) { + return res.status(400).json({ error: 'No log file provided' }); + } + + const runPath = path.join(runsDir, runId); + + // Verify run exists in database + const existingRun = await runDB.getRun(runId); + if (!existingRun) { + return res.status(404).json({ error: 'Run not found' }); + } + + // Analyze the uploaded log file + const runAnalysis = analyzeRunData(runPath, runId); + + // Update run data with log file analysis + const updatedData = { + ...existingRun, + modified_at: new Date().toISOString(), + file_hash: runDB.calculateRunHash(runPath), + has_log_file: true, + status: runAnalysis.status, + duration: runAnalysis.duration, + issues: runAnalysis.issues, + reasoning_judgement: runAnalysis.reasoning_judgement, + detection_accuracy: runAnalysis.detectionAccuracy, + steps: runAnalysis.steps, + input_tokens: runAnalysis.inputTokens, + output_tokens: runAnalysis.outputTokens, + reasoning_score: runAnalysis.reasoningScore, + // Preserve existing agent_name and application_name if they exist and are not 'unknown' + agent_name: (existingRun.agent_name && existingRun.agent_name !== 'unknown') + ? existingRun.agent_name + : runAnalysis.agentName, + application_name: (existingRun.application_name && existingRun.application_name !== 'unknown') + ? existingRun.application_name + : runAnalysis.applicationName, + namespace: runAnalysis.namespace, + errors: runAnalysis.errors + }; + + await runDB.upsertRun(updatedData); + + console.log(`πŸ“„ Log file uploaded for run: ${runId} (saved as log.txt)`); + res.json({ + success: true, + message: `Log file uploaded successfully for run ${runId}`, + filename: 'log.txt', + originalName: req.file.originalname, + analysis: runAnalysis + }); + } catch (error) { + console.error('Error uploading log file:', error); + res.status(500).json({ error: 'Failed to upload log file' }); + } +}); + +// API endpoint to upload evaluation file for a run +app.post('/api/runs/:runId/evaluation', upload.single('evaluationFile'), async (req, res) => { + try { + const runId = req.params.runId; + const targetName = req.body.targetName || 'Eval'; // Default to 'Eval' if not provided + + if (!req.file) { + return res.status(400).json({ error: 'No evaluation file provided' }); + } + + if (!req.file.originalname.endsWith('.md')) { + return res.status(400).json({ error: 'Evaluation file must be a .md file' }); + } + + const runPath = path.join(runsDir, runId); + + // Verify run exists in database + const existingRun = await runDB.getRun(runId); + if (!existingRun) { + return res.status(404).json({ error: 'Run not found' }); + } + + // Determine the final filename with numbering if needed + const finalFilename = generateUniqueEvaluationFilename(runPath, targetName); + + // Move the uploaded file to the correct location with the new name + const tempPath = req.file.path; + const finalPath = path.join(runPath, finalFilename); + + try { + fs.renameSync(tempPath, finalPath); + } catch (moveError) { + console.error('Error moving uploaded file:', moveError); + return res.status(500).json({ error: 'Failed to save evaluation file' }); + } + + // Update evaluation files list + const currentEvaluationFiles = Array.isArray(existingRun.evaluation_files) + ? existingRun.evaluation_files + : JSON.parse(existingRun.evaluation_files || '[]'); + + if (!currentEvaluationFiles.includes(finalFilename)) { + currentEvaluationFiles.push(finalFilename); + } + + // Update run data + const updatedData = { + ...existingRun, + modified_at: new Date().toISOString(), + evaluation_files: currentEvaluationFiles, + evaluation_count: currentEvaluationFiles.length + }; + + // Update status if needed + if (updatedData.has_log_file && updatedData.evaluation_count > 0) { + updatedData.status = determineRunStatus(updatedData, runPath); + } + + await runDB.upsertRun(updatedData); + + console.log(`πŸ“ Evaluation file uploaded for run: ${runId} (saved as ${finalFilename})`); + res.json({ + success: true, + message: `Evaluation file uploaded successfully for run ${runId}`, + filename: finalFilename, + originalName: req.file.originalname, + targetName: targetName, + evaluationFiles: currentEvaluationFiles, + evaluationCount: currentEvaluationFiles.length + }); + } catch (error) { + console.error('Error uploading evaluation file:', error); + res.status(500).json({ error: 'Failed to upload evaluation file' }); + } +}); + +// API endpoint to update a run record +app.put('/api/runs/:runId', async (req, res) => { + try { + const { runId } = req.params; + const updateData = req.body; + + console.log(`πŸ“ Updating run ${runId}`); + + // Get existing run to preserve unchanged fields + const existingRun = await runDB.getRun(runId); + if (!existingRun) { + return res.status(404).json({ error: 'Run not found' }); + } + + // Merge existing data with updates + const updatedRun = { + ...existingRun, + ...updateData, + id: runId, // Ensure ID cannot be changed + modified_at: new Date().toISOString(), + last_analyzed_at: new Date().toISOString() + }; + + // Update in database + await runDB.upsertRun(updatedRun); + + res.json({ + message: 'Run updated successfully', + run: updatedRun + }); + + } catch (error) { + console.error('Error updating run:', error); + res.status(500).json({ error: 'Failed to update run' }); + } +}); + +// API endpoint for bulk reanalysis +app.post('/api/runs/bulk-reanalyze', async (req, res) => { + try { + const { runIds, reanalyzeAll = false } = req.body; + + let targetRunIds = []; + + if (reanalyzeAll) { + // Get all run IDs from database + const allRuns = await runDB.getAllRuns(); + targetRunIds = allRuns.map(run => run.id); + } else { + if (!Array.isArray(runIds) || runIds.length === 0) { + return res.status(400).json({ error: 'runIds must be a non-empty array when reanalyzeAll is false' }); + } + targetRunIds = runIds; + } + + console.log(`πŸ”„ Starting bulk reanalysis of ${targetRunIds.length} runs`); + + const results = { + successful: [], + failed: [], + totalProcessed: targetRunIds.length, + successCount: 0, + failedCount: 0 + }; + + // Process each run + for (const runId of targetRunIds) { + try { + const runPath = path.join(runsDir, runId); + + // Check if run directory exists + if (!fs.existsSync(runPath)) { + results.failed.push({ + runId, + error: 'Run directory not found' + }); + results.failedCount++; + continue; + } + + // Trigger reanalysis + const analysisResult = await analyzeRun(runId, runPath); + + if (analysisResult) { + results.successful.push(runId); + results.successCount++; + } else { + results.failed.push({ + runId, + error: 'Analysis returned no result' + }); + results.failedCount++; + } + + } catch (error) { + results.failed.push({ + runId, + error: error.message + }); + results.failedCount++; + } + } + + console.log(`βœ… Bulk reanalysis complete: ${results.successCount} successful, ${results.failedCount} failed`); + + res.json({ + message: `Bulk reanalysis complete`, + results + }); + + } catch (error) { + console.error('Error in bulk reanalysis:', error); + res.status(500).json({ error: 'Failed to perform bulk reanalysis' }); + } +}); + +// Admin route +app.get('/admin', (req, res) => { + res.sendFile(path.join(__dirname, 'admin.html')); +}); + +// Main viewer route +app.get('/', (req, res) => { + res.sendFile(path.join(__dirname, 'index.html')); +}); + +// Start the server +const server = app.listen(PORT, () => { + console.log(`πŸš€ AIOpsLab server running on port ${PORT}`); + console.log(`πŸ“Š Dashboard: http://localhost:${PORT}`); + console.log(`πŸ”§ Admin Panel: http://localhost:${PORT}/admin`); + console.log(`πŸ‘οΈ Viewer: http://localhost:${PORT}/viewer.html`); +}); + +// Graceful shutdown +process.on('SIGTERM', () => { + console.log('SIGTERM signal received: closing HTTP server'); + server.close(() => { + console.log('HTTP server closed'); + if (runDB) { + runDB.close(); + } + }); +}); + +process.on('SIGINT', () => { + console.log('SIGINT signal received: closing HTTP server'); + server.close(() => { + console.log('HTTP server closed'); + if (runDB) { + runDB.close(); + } + }); +}); + + diff --git a/aiopslab-runs/test-log.txt b/aiopslab-runs/test-log.txt new file mode 100644 index 00000000..9ea0f434 --- /dev/null +++ b/aiopslab-runs/test-log.txt @@ -0,0 +1,14 @@ +Test log file with agent patterns +python3 clients/some-other-agent.py +Namespace different-app +Using agent: different-agent +Application: different-application + +Results: { + "Detection_Accuracy": "High", + "TTD": 15.5, + "steps": 8, + "in_tokens": 1500, + "out_tokens": 800, + "reasoning_score": 9 +} diff --git a/aiopslab-runs/viewer.html b/aiopslab-runs/viewer.html new file mode 100644 index 00000000..b9f98644 --- /dev/null +++ b/aiopslab-runs/viewer.html @@ -0,0 +1,1843 @@ + + + + + + AIOpsLab Session Analysis + + + + +
+

AIOpsLab Session Analysis

+
Loading session details...
+ + +
+ +
+
+
+
+ + Raw Log File + + + +
+
+
+
+
Loading log content...
+
+
+ Scroll to view full log +
+ + +
+
+
+
+ +
+
+
+ + AI Evaluations + +
+
+
+
+
+
+
+
+
+
+ + + + + + + + + +