From 71c66117c5f07ae313ac137d916a3da09a3af109 Mon Sep 17 00:00:00 2001 From: Naragod Date: Mon, 10 Nov 2025 17:28:17 -0500 Subject: [PATCH 1/5] Add opentelemetry python dependencies --- README.md | 2 +- pyproject.toml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 88a06ee..b4d6e6d 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ To install project dependencies, including development dependencies: ```console -$ pip install -e .[dev] +$ pip install -e '.[dev]' ``` To install pre-commit hooks: diff --git a/pyproject.toml b/pyproject.toml index 37c9de4..7ae2ea3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,9 @@ dependencies = [ "python-dotenv", "redis", "requests", + "opentelemetry-instrumentation-flask", + "opentelemetry-sdk", + "opentelemetry-exporter-otlp-proto-grpc", ] [project.optional-dependencies] From f107ed963b9e54ac580e5942c9525df4c8d8708f Mon Sep 17 00:00:00 2001 From: Naragod Date: Mon, 10 Nov 2025 17:31:16 -0500 Subject: [PATCH 2/5] Use opentelemetry flask instrumentation --- markus_ai_server/server.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py index 7855b8d..c487913 100644 --- a/markus_ai_server/server.py +++ b/markus_ai_server/server.py @@ -12,6 +12,12 @@ import requests from dotenv import load_dotenv from flask import Flask, abort, jsonify, request +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.flask import FlaskInstrumentor +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor from .redis_helper import REDIS_CONNECTION @@ -22,7 +28,22 @@ # Load environment variables from .env file load_dotenv() +# Configure OpenTelemetry +resource = Resource.create({"service.name": "ai-server"}) +tracer_provider = TracerProvider(resource=resource) + +# Configure OTLP exporter to send to collector at localhost:4317 +otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) +span_processor = BatchSpanProcessor(otlp_exporter) +tracer_provider.add_span_processor(span_processor) + +# Set the global tracer provider +trace.set_tracer_provider(tracer_provider) + +tracer = trace.get_tracer("ai-server.tracer") + app = Flask('AI server') +FlaskInstrumentor().instrument_app(app) # Configuration from environment variables DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'deepseek-coder-v2:latest') @@ -32,7 +53,8 @@ GGUF_DIR = os.getenv('GGUF_DIR', '/data1/GGUF') # Llama server configuration -_llama_server_url = os.getenv('LLAMA_SERVER_URL') # e.g., http://localhost:8080 or localhost:8080 +# e.g., http://localhost:8080 or localhost:8080 +_llama_server_url = os.getenv('LLAMA_SERVER_URL') LLAMA_SERVER_URL = ( f"http://{_llama_server_url}" if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://')) From 825d3d7de62f28ca918ad39a4250fb4dd4a8333c Mon Sep 17 00:00:00 2001 From: Naragod Date: Mon, 10 Nov 2025 21:49:48 -0500 Subject: [PATCH 3/5] Update set up instructions --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index b4d6e6d..9b65749 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ To install project dependencies, including development dependencies: ```console +$ source venv/bin/activate; + $ pip install -e '.[dev]' ``` @@ -19,3 +21,25 @@ To run the test suite: ```console $ pytest ``` + +To run locally: + +Pre-requisites: + +Must have redis and lamma server up and running. + + +```console +$ docker compose -f opentelemetry_collector/docker-compose.yml up -d + +$ REDIS_URL='redis://localhost:6379' LLAMA_SERVER_URL='http://localhost:11434' python3 -m ai_server.__main__ +``` + +Send Request: + +Example + +```curl +curl --location 'localhost:5000/chat' \ +--form 'content="asdf asdf asdasdf ad"' +``` From 294fb157b4945975a0e94333ec616dc6aaf38924 Mon Sep 17 00:00:00 2001 From: Naragod Date: Mon, 10 Nov 2025 21:50:19 -0500 Subject: [PATCH 4/5] Add OpenTelemetry pipeline --- markus_ai_server/server.py | 36 +- opentelemetry_collector/CONFIG_EXPLANATION.md | 419 ++++++++++++++++++ opentelemetry_collector/README.md | 51 +++ opentelemetry_collector/alert_rules.yml | 42 ++ opentelemetry_collector/alertmanager.yml | 45 ++ opentelemetry_collector/config.yml | 74 ++++ opentelemetry_collector/docker-compose.yml | 150 +++++++ opentelemetry_collector/prometheus.yml | 30 ++ 8 files changed, 839 insertions(+), 8 deletions(-) create mode 100644 opentelemetry_collector/CONFIG_EXPLANATION.md create mode 100644 opentelemetry_collector/README.md create mode 100644 opentelemetry_collector/alert_rules.yml create mode 100644 opentelemetry_collector/alertmanager.yml create mode 100644 opentelemetry_collector/config.yml create mode 100644 opentelemetry_collector/docker-compose.yml create mode 100644 opentelemetry_collector/prometheus.yml diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py index c487913..a0d2364 100644 --- a/markus_ai_server/server.py +++ b/markus_ai_server/server.py @@ -12,9 +12,12 @@ import requests from dotenv import load_dotenv from flask import Flask, abort, jsonify, request -from opentelemetry import trace +from opentelemetry import metrics, trace +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.instrumentation.flask import FlaskInstrumentor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor @@ -28,20 +31,37 @@ # Load environment variables from .env file load_dotenv() -# Configure OpenTelemetry +# Configure OpenTelemetry - Shared Resource (identifies this service) resource = Resource.create({"service.name": "ai-server"}) + +# ========== TRACES CONFIGURATION ========== +# TracerProvider: Factory for creating tracers (for distributed tracing) tracer_provider = TracerProvider(resource=resource) -# Configure OTLP exporter to send to collector at localhost:4317 -otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) -span_processor = BatchSpanProcessor(otlp_exporter) +# OTLP Trace Exporter: Sends traces to collector at localhost:4317 +otlp_trace_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) +span_processor = BatchSpanProcessor(otlp_trace_exporter) tracer_provider.add_span_processor(span_processor) -# Set the global tracer provider +# Set the global tracer provider (FlaskInstrumentor will use this) trace.set_tracer_provider(tracer_provider) - tracer = trace.get_tracer("ai-server.tracer") +# ========== METRICS CONFIGURATION ========== +# OTLP Metric Exporter: Sends metrics to collector at localhost:4317 +otlp_metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317", insecure=True) + +# PeriodicExportingMetricReader: Collects and exports metrics every 10 seconds +metric_reader = PeriodicExportingMetricReader( + exporter=otlp_metric_exporter, export_interval_millis=10000 # Export every 10 seconds +) + +# MeterProvider: Factory for creating meters (for metrics collection) +meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) + +# Set the global meter provider (FlaskInstrumentor will use this for HTTP metrics) +metrics.set_meter_provider(meter_provider) + app = Flask('AI server') FlaskInstrumentor().instrument_app(app) @@ -271,7 +291,7 @@ def authenticate() -> str: @app.route('/chat', methods=['POST']) def chat(): """Handle chat request with optional llama_mode and system prompt parameters.""" - authenticate() + # authenticate() model = request.form.get('model', DEFAULT_MODEL) content = request.form.get('content', '') llama_mode = request.form.get('llama_mode', 'cli') diff --git a/opentelemetry_collector/CONFIG_EXPLANATION.md b/opentelemetry_collector/CONFIG_EXPLANATION.md new file mode 100644 index 0000000..22d9d8e --- /dev/null +++ b/opentelemetry_collector/CONFIG_EXPLANATION.md @@ -0,0 +1,419 @@ +# OpenTelemetry Setup: Configuration Deep Dive + +### config.yml (OpenTelemetry Collector Configuration) + +```yaml +# OpenTelemetry Collector Configuration for ai-server monitoring +receivers: + # Receives telemetry from your Flask app via OTLP protocol + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 # Your Flask app sends traces here + http: + endpoint: 0.0.0.0:4318 # Alternative HTTP endpoint (not currently used) +``` + +**Explanation:** + +**`receivers` section:** +- **Purpose**: Defines how the collector receives telemetry data +- **`otlp`**: OpenTelemetry Protocol - the standard protocol for sending telemetry + - **`grpc`**: Uses gRPC protocol (binary, efficient) + - **`endpoint: 0.0.0.0:4317`**: + - `0.0.0.0` means "listen on all network interfaces" (localhost, Docker bridge, etc.) + - Port `4317` is the standard OTLP gRPC port + - Your Flask app is configured to send to `http://localhost:4317` + - **`http`**: Alternative HTTP endpoint (fallback, not used by your Flask app) + +--- + +```yaml +connectors: + # spanmetrics connector - Generates RED metrics (calls, errors, duration) from traces + # This is what Jaeger SPM requires to function + spanmetrics: + histogram: + explicit: + # Latency buckets in seconds (0.001s = 1ms, 10s = 10000ms) + buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10] + dimensions: + # Group metrics by these span attributes + - name: http.method # GET, POST, etc. + - name: http.status_code # 200, 500, etc. + metrics_flush_interval: 15s # Export metrics every 15 seconds +``` + +**Explanation:** + +**`connectors` section:** +- **Purpose**: Connectors sit between pipelines and transform data +- **`spanmetrics`**: A special connector that watches traces and generates metrics + +**How spanmetrics works:** +1. **Watches the traces pipeline**: Every span that flows through gets analyzed +2. **Generates counters**: For each span, increments `calls_total` counter +3. **Generates histograms**: Records span duration in `duration_*` histogram buckets +4. **Outputs to metrics pipeline**: The generated metrics become inputs for the metrics pipeline + +**`histogram.explicit.buckets`:** +- Defines latency buckets for histogram metrics +- Values are in **seconds**: `0.001` = 1ms, `1` = 1000ms, `10` = 10000ms +- When a request takes 234ms (0.234s), it gets counted in the `0.25` bucket +- These buckets allow calculating percentiles (P50, P95, P99) + +**`dimensions`:** +- Creates separate metric series for each unique combination of these attributes +- Example: `calls_total{http.method="POST", http.status_code="200"}` vs `calls_total{http.method="GET", http.status_code="404"}` +- Allows filtering metrics by HTTP method, status code, etc. in queries + +**`metrics_flush_interval`:** +- How often spanmetrics exports accumulated metrics (15 seconds) +- Balances freshness vs efficiency (too frequent = overhead, too slow = stale data) + +--- + +```yaml +processors: + # Batches spans together for efficient export (reduces network calls) + batch: + timeout: 10s # Send batch every 10 seconds + send_batch_size: 100 # OR when 100 spans accumulate +``` + +**Explanation:** + +**`processors` section:** +- **Purpose**: Processes/transforms telemetry data in the pipeline +- **`batch`**: Groups multiple spans/metrics together before exporting + +**Why batching matters:** +- **Without batching**: Every span would be sent individually (1 network call per span) +- **With batching**: 100 spans sent in 1 network call +- **Result**: ~100x reduction in network overhead + +**`timeout: 10s`:** +- If 10 seconds pass without reaching 100 spans, send whatever we have +- Prevents data from sitting too long + +**`send_batch_size: 100`:** +- As soon as 100 spans accumulate, send immediately (don't wait for timeout) +- Prevents memory buildup during high traffic + +**Decision logic:** +``` +IF (accumulated_spans >= 100) OR (time_since_last_send >= 10s) + THEN send_batch() +``` + +--- + +```yaml +exporters: + # Console output - still shows traces in terminal for debugging + debug: + verbosity: normal + + # OTLP exporter - sends traces to Jaeger via OTLP protocol + # Jaeger has a built-in OTLP receiver (mapped to host port 14317) + otlp: + endpoint: localhost:14317 # Jaeger's OTLP receiver (Docker port mapping) + tls: + insecure: true # No TLS encryption (ok for local development) + + # Prometheus exporter - exposes metrics in Prometheus format + # Prometheus will scrape this endpoint every 15 seconds + prometheus: + endpoint: "0.0.0.0:8889" # Expose metrics for Prometheus to scrape + namespace: "ai_server" # Prefix for all metrics (ai_server_http_requests_total) + const_labels: # Labels added to all metrics + environment: "development" +``` + +**Explanation:** + +**`exporters` section:** +- **Purpose**: Defines where to send processed telemetry data + +**1. `debug` exporter:** +- **Purpose**: Prints telemetry to console/logs (useful for troubleshooting) +- **`verbosity: normal`**: Shows span details without overwhelming output + - `basic`: Just counts (least verbose) + - `normal`: Readable span summaries ← We use this + - `detailed`: Full JSON dumps (most verbose) + +**2. `otlp` exporter:** +- **Purpose**: Sends traces to Jaeger +- **`endpoint: localhost:14317`**: + - Jaeger container exposes port 4317 internally + - Docker maps it to host port 14317 (`-p 14317:4317`) + - Collector (running on host) connects to `localhost:14317` +- **`tls.insecure: true`**: + - Disables TLS certificate verification + - **Only safe for local development!** Production should use proper TLS + +**3. `prometheus` exporter:** +- **Purpose**: Exposes metrics in Prometheus format for scraping +- **How it works**: Creates an HTTP server that Prometheus polls +- **`endpoint: "0.0.0.0:8889"`**: + - Opens HTTP server on all interfaces, port 8889 + - Prometheus scrapes `http://localhost:8889/metrics` +- **`namespace: "ai_server"`**: + - Adds prefix to all metric names + - Example: `http_server_duration` → `ai_server_http_server_duration` + - The spanmetrics connector also includes "traces_span_metrics" in its path + - Final metric: `ai_server_traces_span_metrics_calls_total` +- **`const_labels`**: + - These labels are added to **every metric** from this exporter + - `environment: "development"` allows filtering by environment in queries + - Useful when you have dev/staging/prod all sending to same Prometheus + +--- + +```yaml +service: + pipelines: + # Traces pipeline - receives traces from Flask, sends to Jaeger AND spanmetrics + traces: + receivers: [otlp] # Receive traces from Flask app + processors: [batch] # Batch the spans + exporters: [debug, otlp, spanmetrics] # Send to console, Jaeger, AND spanmetrics connector +``` + +**Explanation:** + +**`service.pipelines` section:** +- **Purpose**: Defines the data flow through the collector +- **Think of it as**: `input → processing → output` + +**Traces pipeline flow:** +``` +Flask OTLP → [otlp receiver] → [batch processor] → { + [debug exporter] → Console + [otlp exporter] → Jaeger + [spanmetrics connector] → metrics/spanmetrics pipeline + } +``` + +**Key insight**: `spanmetrics` appears in **exporters** list +- **Why?** Connectors bridge pipelines - they're both an exporter (from traces pipeline) and a receiver (for metrics pipeline) +- **Result**: Traces flow into spanmetrics, which generates metrics and sends them to the metrics pipeline + +--- + +```yaml + # Spanmetrics-generated metrics pipeline - RED metrics for Jaeger SPM + metrics/spanmetrics: + receivers: [spanmetrics] # Receive metrics generated from traces by spanmetrics connector + processors: [batch] + exporters: [prometheus] # Export to Prometheus (Jaeger reads from here) +``` + +**Explanation:** + +**`metrics/spanmetrics` pipeline:** +- **Name format**: `/` allows multiple pipelines of same type +- **Purpose**: Handles metrics that spanmetrics generates from traces + +**Pipeline flow:** +``` +[spanmetrics connector] → [batch processor] → [prometheus exporter] → port 8889 + ↓ + Prometheus scrapes + ↓ + Jaeger queries for SPM +``` + +**Why separate from main metrics pipeline?** +- Different sources: spanmetrics vs Flask instrumentation +- Different purposes: RED metrics vs detailed HTTP metrics +- Cleaner configuration: Easy to disable/modify independently + +--- + +```yaml + # Application-generated metrics pipeline - HTTP instrumentation metrics from Flask + metrics: + receivers: [otlp] # Receive metrics from Flask instrumentation + processors: [batch] + exporters: [debug, prometheus] # Send to console AND Prometheus +``` + +**Explanation:** + +**`metrics` pipeline (Flask-generated):** +- **Purpose**: Handles metrics that Flask OpenTelemetry instrumentation generates +- **Source**: Your Flask app's `MeterProvider` sends these via OTLP + +**What metrics flow through here:** +- `ai_server_http_server_duration_milliseconds_*`: Request latency histogram +- `ai_server_http_server_active_requests`: Current active requests gauge + +**Pipeline flow:** +``` +Flask OTLP → [otlp receiver] → [batch processor] → { + [debug exporter] → Console + [prometheus exporter] → port 8889 + } +``` + +**Both metrics pipelines export to same Prometheus endpoint:** +- spanmetrics: RED metrics (calls_total, duration_bucket) +- Flask: HTTP instrumentation metrics (http_server_duration) +- **Result**: All metrics available at `http://localhost:8889/metrics` + +--- + +```yaml + logs: + receivers: [otlp] + processors: [batch] + exporters: [debug] # Only console for now +``` + +**Explanation:** + +**`logs` pipeline:** +- **Currently simple**: Just receives and prints to console +- **Future expansion**: Could add exporters for: + - Loki (log aggregation system, part of Grafana stack) + - Elasticsearch + - CloudWatch, Datadog, etc. + +--- + +### prometheus.yml (Prometheus Scrape Configuration) + +```yaml +# Prometheus configuration for scraping OpenTelemetry Collector metrics +global: + scrape_interval: 15s # How often to scrape metrics + evaluation_interval: 15s # How often to evaluate rules +``` + +**Explanation:** + +**`global` section:** +- **Purpose**: Default settings for all scrape jobs + +**`scrape_interval: 15s`:** +- How often Prometheus fetches metrics from targets +- Every 15 seconds, Prometheus sends HTTP GET to `http://localhost:8889/metrics` +- **Trade-off**: + - Shorter interval (5s) = fresher data, more load + - Longer interval (60s) = less frequent updates, less overhead + - 15s is a good balance for local development + +**`evaluation_interval: 15s`:** +- How often to evaluate alerting rules (not used in our setup) +- Kept at same interval as scraping for simplicity + +--- + +```yaml +# Scrape configuration +scrape_configs: + # Scrape metrics from OpenTelemetry Collector + - job_name: 'otel-collector' + static_configs: + - targets: ['host.docker.internal:8888'] # Collector's metrics endpoint + labels: + service: 'otel-collector' +``` + +**Explanation:** + +**`scrape_configs` section:** +- **Purpose**: Defines what endpoints to scrape and how + +**Job 1: `otel-collector`:** +- **Purpose**: Scrape collector's own internal metrics (health, performance) +- **`targets: ['host.docker.internal:8888']`**: + - `host.docker.internal`: Docker's way to reach host machine from container + - Port `8888`: Collector's self-monitoring endpoint (built-in) + - **What's scraped**: Collector health, queue sizes, processing rates, etc. +- **`labels: {service: 'otel-collector'}`**: + - Adds `service="otel-collector"` label to all scraped metrics + - Allows filtering: `up{service="otel-collector"}` + +--- + +```yaml + # Scrape metrics that the collector exports (from your Flask app) + - job_name: 'ai-server' + static_configs: + - targets: ['host.docker.internal:8889'] # We'll expose metrics here + labels: + service: 'ai-server' +``` + +**Explanation:** + +**Job 2: `ai-server`:** +- **Purpose**: Scrape application metrics (spanmetrics + Flask HTTP metrics) +- **`targets: ['host.docker.internal:8889']`**: + - Same host resolution mechanism + - Port `8889`: Where our Prometheus exporter exposes metrics + - **What's scraped**: + - `ai_server_traces_span_metrics_calls_total` + - `ai_server_http_server_duration_*` + - All metrics from both pipelines +- **`labels: {service: 'ai-server'}`**: + - Adds `service="ai-server"` label + - Note: Metrics already have `service_name="ai-server"` from spanmetrics + - Both labels coexist (useful for different query patterns) + +--- + +## Summary: The Complete Picture + +### Data Flow with All Components: + +``` +Flask App (ai-server) + ↓ OpenTelemetry SDK (server.py lines 26-59) + ↓ Generates: Traces (spans) + Metrics (HTTP instrumentation) + ↓ Sends via: OTLP gRPC to localhost:4317 + ↓ +OpenTelemetry Collector (config.yml) + ↓ + ├─ Traces Pipeline: + │ ├─ otlp receiver (port 4317) receives spans + │ ├─ batch processor groups them + │ └─ Exporters: + │ ├─ debug → Console logs + │ ├─ otlp (port 14317) → Jaeger (for trace visualization) + │ └─ spanmetrics connector → Generates RED metrics + │ ↓ + │ ┌────────┘ + │ ↓ + ├─ Metrics/spanmetrics Pipeline: + │ ├─ spanmetrics connector outputs metrics + │ ├─ batch processor groups them + │ └─ prometheus exporter (port 8889) → Exposes /metrics endpoint + │ + └─ Metrics Pipeline: + ├─ otlp receiver receives HTTP metrics from Flask + ├─ batch processor groups them + └─ Exporters: + ├─ debug → Console logs + └─ prometheus exporter (port 8889) → Same endpoint as spanmetrics + ↓ +Prometheus (prometheus.yml) + ↓ Scrapes http://host.docker.internal:8889/metrics every 15s + ↓ Stores time-series data + ↓ +Jaeger SPM (docker-compose.yml) + ↓ Queries Prometheus: http://prometheus:9090 + ↓ Looks for: ai_server_traces_span_metrics_calls_total + ↓ Displays: RED metrics in Monitor tab +``` + +### Key Takeaways: + +1. **spanmetrics is essential** for Jaeger SPM - it converts traces to metrics +2. **Namespace configuration must match** between collector and Jaeger +3. **Connectors bridge pipelines** - spanmetrics sits between traces and metrics +4. **Prometheus is the middle layer** - stores metrics that Jaeger queries +5. **All components must be on same network** (or use host.docker.internal) +6. **Multiple exporters can coexist** - same pipeline can output to console + Jaeger + Prometheus diff --git a/opentelemetry_collector/README.md b/opentelemetry_collector/README.md new file mode 100644 index 0000000..7fb53dc --- /dev/null +++ b/opentelemetry_collector/README.md @@ -0,0 +1,51 @@ +# OpenTelemetry Collector + +The reason we are installing the opentelemetry contrib collector is because it includes the spanmetrics connector which is required if we wish +to enable Jaeger's Service Performance Monitoring ([SPM](https://www.jaegertracing.io/docs/2.11/architecture/spm/)) + +## Configuration + +Take a look at `config.yml` inside this folder for a default configuration. For more information about this system +please take a look at the collector configuration [guide](https://opentelemetry.io/docs/collector/configuration/). + +## Execution + +Start all services with Docker Compose: +```bash +docker-compose up -d +``` + +## Alerting + +Alerting is configured using Prometheus Alertmanager: + +1. **Alert Rules** (`alert_rules.yml`) - Defines when alerts fire (high errors, latency, service down) +2. **Alertmanager** (`alertmanager.yml`) - Handles notifications and routing + +### Viewing Alerts + +- **Prometheus Alerts**: http://localhost:9090/alerts - See current alert status +- **Alertmanager UI**: http://localhost:9093 - View, silence, and manage active alerts + +### Configuring Email Notifications + +Edit `alertmanager.yml` and uncomment the email configuration: + +```yaml +global: + smtp_smarthost: 'smtp.gmail.com:587' + smtp_from: 'your-email@gmail.com' + smtp_auth_username: 'your-email@gmail.com' + smtp_auth_password: 'your-app-password' + +receivers: + - name: 'default' + email_configs: + - to: 'your-email@example.com' +``` + +Then restart: `docker-compose restart alertmanager` + +### Testing Alerts + +To test if alerts are working, stop your Flask app and wait 3 minutes - the `ServiceDown` alert should fire. diff --git a/opentelemetry_collector/alert_rules.yml b/opentelemetry_collector/alert_rules.yml new file mode 100644 index 0000000..ccbce20 --- /dev/null +++ b/opentelemetry_collector/alert_rules.yml @@ -0,0 +1,42 @@ +groups: + # Simple alert rules for ai-server monitoring + - name: ai_server_alerts + interval: 30s + rules: + # Alert when service appears down (no requests) + - alert: ServiceDown + expr: rate(ai_server_http_server_duration_milliseconds_count[2m]) == 0 + for: 3m + labels: + severity: critical + annotations: + summary: "AI Server appears to be down" + description: "No HTTP requests received in the last 3 minutes." + + # Alert on high error rate (>10% of requests failing) + - alert: HighErrorRate + expr: | + ( + sum(rate(ai_server_http_server_duration_milliseconds_count{http_status_code=~"5.."}[5m])) + / + sum(rate(ai_server_http_server_duration_milliseconds_count[5m])) + ) > 0.10 + for: 3m + labels: + severity: warning + annotations: + summary: "High error rate detected" + description: "More than 10% of requests are failing with 5xx errors." + + # Alert on very slow response times (p95 > 5 seconds) + - alert: HighLatency + expr: | + histogram_quantile(0.95, + sum(rate(ai_server_http_server_duration_milliseconds_bucket[5m])) by (le) + ) > 5000 + for: 5m + labels: + severity: warning + annotations: + summary: "High latency detected" + description: "95th percentile response time is above 5 seconds." diff --git a/opentelemetry_collector/alertmanager.yml b/opentelemetry_collector/alertmanager.yml new file mode 100644 index 0000000..2361bb1 --- /dev/null +++ b/opentelemetry_collector/alertmanager.yml @@ -0,0 +1,45 @@ +# Alertmanager configuration for ai-server alerts +# Docs: https://prometheus.io/docs/alerting/latest/configuration/ + +global: + # Default email settings (configure these with your SMTP server) + # OPTION 1: Gmail (requires app password) + # smtp_smarthost: 'smtp.gmail.com:587' + # smtp_from: 'your-email@gmail.com' + # smtp_auth_username: 'your-email@gmail.com' + # smtp_auth_password: 'your-app-password' + + # OPTION 2: Use a local mail relay + # smtp_smarthost: 'localhost:25' + # smtp_from: 'alertmanager@localhost' + # smtp_require_tls: false + +route: + # Default route - groups alerts by alert name + group_by: ['alertname'] + group_wait: 30s # Wait 30s before sending first alert in a group + group_interval: 5m # Wait 5m before sending updates for existing group + repeat_interval: 4h # Resend alert every 4h if still firing + receiver: 'default' # Default receiver (see below) + +receivers: + # Default receiver - logs to console (useful for testing) + - name: 'default' + # Uncomment and configure email settings: + # email_configs: + # - to: 'your-email@example.com' + # headers: + # Subject: '[ALERT] {{ .GroupLabels.alertname }}' + + # Example: Critical alerts go to a different email/channel + # - name: 'critical' + # email_configs: + # - to: 'oncall@example.com' + +# Optional: Inhibition rules (suppress certain alerts when others fire) +# inhibit_rules: +# - source_match: +# severity: 'critical' +# target_match: +# severity: 'warning' +# equal: ['alertname', 'service'] diff --git a/opentelemetry_collector/config.yml b/opentelemetry_collector/config.yml new file mode 100644 index 0000000..bba288b --- /dev/null +++ b/opentelemetry_collector/config.yml @@ -0,0 +1,74 @@ +# OpenTelemetry Collector Configuration for ai-server monitoring +receivers: + # Receives telemetry from your Flask app via OTLP protocol + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 # Your Flask app sends traces here + http: + endpoint: 0.0.0.0:4318 # Alternative HTTP endpoint (not currently used) + +connectors: + # spanmetrics connector - Generates RED metrics (calls, errors, duration) from traces + # This is what Jaeger SPM requires to function + spanmetrics: + histogram: + explicit: + # Latency buckets in seconds (0.001s = 1ms, 10s = 10000ms) + buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10] + dimensions: + # Group metrics by these span attributes + - name: http.method # GET, POST, etc. + - name: http.status_code # 200, 500, etc. + metrics_flush_interval: 15s # Export metrics every 15 seconds + +processors: + # Batches spans together for efficient export (reduces network calls) + batch: + timeout: 10s # Send batch every 10 seconds + send_batch_size: 100 # OR when 100 spans accumulate + +exporters: + # Console output - still shows traces in terminal for debugging + debug: + verbosity: normal + + # OTLP exporter - sends traces to Jaeger via OTLP protocol + # Jaeger container exposes OTLP receiver on port 4317 + otlp: + endpoint: jaeger:4317 # Jaeger's OTLP receiver (Docker network) + tls: + insecure: true # No TLS encryption (ok for local development) + + # Prometheus exporter - exposes metrics in Prometheus format + # Prometheus will scrape this endpoint every 15 seconds + prometheus: + endpoint: "0.0.0.0:8889" # Expose metrics for Prometheus to scrape + namespace: "ai_server" # Prefix for all metrics (ai_server_http_requests_total) + const_labels: # Labels added to all metrics + environment: "development" + +service: + pipelines: + # Traces pipeline - receives traces from Flask, sends to Jaeger AND spanmetrics + traces: + receivers: [otlp] # Receive traces from Flask app + processors: [batch] # Batch the spans + exporters: [debug, otlp, spanmetrics] # Send to console, Jaeger, AND spanmetrics connector + + # Spanmetrics-generated metrics pipeline - RED metrics for Jaeger SPM + metrics/spanmetrics: + receivers: [spanmetrics] # Receive metrics generated from traces by spanmetrics connector + processors: [batch] + exporters: [prometheus] # Export to Prometheus (Jaeger reads from here) + + # Application-generated metrics pipeline - HTTP instrumentation metrics from Flask + metrics: + receivers: [otlp] # Receive metrics from Flask instrumentation + processors: [batch] + exporters: [debug, prometheus] # Send to console AND Prometheus + + logs: + receivers: [otlp] + processors: [batch] + exporters: [debug] # Only console for now diff --git a/opentelemetry_collector/docker-compose.yml b/opentelemetry_collector/docker-compose.yml new file mode 100644 index 0000000..428acda --- /dev/null +++ b/opentelemetry_collector/docker-compose.yml @@ -0,0 +1,150 @@ +version: '3.8' + +services: + # OpenTelemetry Collector Contrib - Telemetry data pipeline + # Receives traces/metrics from Flask app, processes them, and exports to Jaeger/Prometheus + # Includes spanmetrics connector for generating RED metrics from traces + otel-collector: + image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.139.0 + container_name: otel-collector + deploy: + resources: + limits: + memory: 200M + command: ["--config=/etc/otelcol-config.yml"] + volumes: + # Mount our config.yml into the container + - ./config.yml:/etc/otelcol-config.yml:ro + ports: + - "4317:4317" # OTLP gRPC receiver (Flask app sends here) + - "4318:4318" # OTLP HTTP receiver (alternative) + - "8889:8889" # Prometheus exporter (Prometheus scrapes this) + - "8888:8888" # Collector's own metrics endpoint + networks: + - monitoring + depends_on: + - jaeger + - prometheus + restart: unless-stopped + + # Prometheus - Time-series metrics database + # Scrapes metrics from OpenTelemetry Collector and stores them + # Jaeger queries Prometheus to display SPM (Service Performance Monitoring) data + prometheus: + image: prom/prometheus:latest + container_name: prometheus + ports: + - "9090:9090" # Web UI and API + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./alert_rules.yml:/etc/prometheus/alert_rules.yml:ro + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + networks: + - monitoring + depends_on: + - alertmanager + restart: unless-stopped + + # Alertmanager - Handles alert notifications + # Receives alerts from Prometheus and sends notifications + alertmanager: + image: prom/alertmanager:latest + container_name: alertmanager + ports: + - "9093:9093" # Web UI and API + volumes: + - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + networks: + - monitoring + restart: unless-stopped + + # Jaeger - Distributed tracing backend and UI + # Receives traces from OpenTelemetry Collector via OTLP + # Queries Prometheus for RED metrics (Rate, Errors, Duration) to power SPM + jaeger: + image: jaegertracing/all-in-one:latest + container_name: jaeger + ports: + - "16686:16686" # Jaeger UI (web interface) + - "14317:4317" # OTLP gRPC receiver (collector sends traces here) + - "14318:4318" # OTLP HTTP receiver (alternative) + environment: + # SPM Configuration - tells Jaeger to read metrics from Prometheus + - METRICS_STORAGE_TYPE=prometheus + - PROMETHEUS_SERVER_URL=http://prometheus:9090 + + # Metric naming configuration - must match what spanmetrics connector generates + - PROMETHEUS_QUERY_NAMESPACE=ai_server_traces_span_metrics + - PROMETHEUS_QUERY_DURATION_UNIT=ms + + # Optional: Enable normalization for better metric compatibility + - PROMETHEUS_QUERY_NORMALIZE_CALLS=true + - PROMETHEUS_QUERY_NORMALIZE_DURATION=true + + # Enable Monitor menu in Jaeger UI (required for SPM to show) + - JAEGER_DISABLED=false + - PROMETHEUS_QUERY_SUPPORT_SPANMETRICS_CONNECTOR=true + + # Collector configuration (built-in OTLP receiver) + - COLLECTOR_OTLP_ENABLED=true + networks: + - monitoring + depends_on: + - prometheus + restart: unless-stopped + +networks: + monitoring: + driver: bridge + +# USAGE INSTRUCTIONS: +# +# 1. Start all services: +# docker-compose up -d +# +# 2. Stop all services: +# docker-compose down +# +# 3. View logs: +# docker-compose logs -f [service-name] +# Example: docker-compose logs -f jaeger +# +# 4. Restart a specific service: +# docker-compose restart [service-name] +# +# 5. Check status: +# docker-compose ps +# +# ARCHITECTURE: +# +# Flask App (host:5000) +# ↓ Sends traces & metrics via OTLP to localhost:4317 +# ↓ +# OpenTelemetry Collector (container:4317) ← All services now in Docker! +# ↓ Processes and routes: +# ├─→ Traces → Jaeger container (jaeger:4317) +# ├─→ spanmetrics connector (generates RED metrics from traces) +# └─→ Metrics → Exposed on port 8889 +# ↓ +# Prometheus (container) scrapes otel-collector:8889 +# ↓ Stores time-series metrics +# ↑ +# Jaeger queries prometheus:9090 for SPM data +# +# NETWORK: +# - All containers on 'monitoring' bridge network +# - Flask app (host) connects via localhost:4317 +# +# ACCESS URLS: +# - Jaeger UI: http://localhost:16686 +# - Prometheus: http://localhost:9090 +# - Alertmanager UI: http://localhost:9093 +# - OTel Collector metrics: http://localhost:8889/metrics +# - OTel Collector health: http://localhost:8888/metrics diff --git a/opentelemetry_collector/prometheus.yml b/opentelemetry_collector/prometheus.yml new file mode 100644 index 0000000..5548662 --- /dev/null +++ b/opentelemetry_collector/prometheus.yml @@ -0,0 +1,30 @@ +# Prometheus configuration for scraping OpenTelemetry Collector metrics +global: + scrape_interval: 15s # How often to scrape metrics + evaluation_interval: 15s # How often to evaluate rules + +# Alerting configuration +alerting: + alertmanagers: + - static_configs: + - targets: ['alertmanager:9093'] # Alertmanager service + +# Load alert rules +rule_files: + - '/etc/prometheus/alert_rules.yml' + +# Scrape configuration +scrape_configs: + # Scrape metrics from OpenTelemetry Collector (collector's own health metrics) + - job_name: 'otel-collector' + static_configs: + - targets: ['otel-collector:8888'] # Collector's internal metrics endpoint + labels: + service: 'otel-collector' + + # Scrape metrics that the collector exports (spanmetrics + Flask HTTP metrics) + - job_name: 'ai-server' + static_configs: + - targets: ['otel-collector:8889'] # Collector's Prometheus exporter + labels: + service: 'ai-server' From 6df85fad162b1061812789de0be70358df6fd105 Mon Sep 17 00:00:00 2001 From: Naragod Date: Thu, 13 Nov 2025 10:43:32 -0500 Subject: [PATCH 5/5] Use environment variables --- .env.example | 14 ++++++++++++++ markus_ai_server/server.py | 13 ++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d64b662 --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +# Redis Configuration +REDIS_URL=redis://localhost:6379 + +# Ollama/Llama Server Configuration +LLAMA_SERVER_URL=http://localhost:11434 +OLLAMA_HOST=http://localhost:11434 # Used by ollama Python library +DEFAULT_MODEL=deepseek-coder-v2:latest + +# OpenTelemetry Configuration +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 + +# Optional: Llama.cpp CLI configuration (for local llama.cpp usage) +# LLAMA_CPP_CLI=/data1/llama.cpp/bin/llama-cli +# GGUF_DIR=/data1/GGUF diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py index a0d2364..2d3005c 100644 --- a/markus_ai_server/server.py +++ b/markus_ai_server/server.py @@ -31,6 +31,9 @@ # Load environment variables from .env file load_dotenv() +# OpenTelemetry endpoint configuration +OTEL_EXPORTER_OTLP_ENDPOINT = os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT', 'http://localhost:4317') + # Configure OpenTelemetry - Shared Resource (identifies this service) resource = Resource.create({"service.name": "ai-server"}) @@ -38,8 +41,8 @@ # TracerProvider: Factory for creating tracers (for distributed tracing) tracer_provider = TracerProvider(resource=resource) -# OTLP Trace Exporter: Sends traces to collector at localhost:4317 -otlp_trace_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True) +# OTLP Trace Exporter: Sends traces to collector +otlp_trace_exporter = OTLPSpanExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, insecure=True) span_processor = BatchSpanProcessor(otlp_trace_exporter) tracer_provider.add_span_processor(span_processor) @@ -48,8 +51,8 @@ tracer = trace.get_tracer("ai-server.tracer") # ========== METRICS CONFIGURATION ========== -# OTLP Metric Exporter: Sends metrics to collector at localhost:4317 -otlp_metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317", insecure=True) +# OTLP Metric Exporter: Sends metrics to collector +otlp_metric_exporter = OTLPMetricExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, insecure=True) # PeriodicExportingMetricReader: Collects and exports metrics every 10 seconds metric_reader = PeriodicExportingMetricReader( @@ -291,7 +294,7 @@ def authenticate() -> str: @app.route('/chat', methods=['POST']) def chat(): """Handle chat request with optional llama_mode and system prompt parameters.""" - # authenticate() + authenticate() model = request.form.get('model', DEFAULT_MODEL) content = request.form.get('content', '') llama_mode = request.form.get('llama_mode', 'cli')