From 71c66117c5f07ae313ac137d916a3da09a3af109 Mon Sep 17 00:00:00 2001
From: Naragod <mateo.naranjo@mail.utoronto.ca>
Date: Mon, 10 Nov 2025 17:28:17 -0500
Subject: [PATCH 1/5] Add opentelemetry python dependencies

---
 README.md      | 2 +-
 pyproject.toml | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 88a06ee..b4d6e6d 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 To install project dependencies, including development dependencies:
 
 ```console
-$ pip install -e .[dev]
+$ pip install -e '.[dev]'
 ```
 
 To install pre-commit hooks:
diff --git a/pyproject.toml b/pyproject.toml
index 37c9de4..7ae2ea3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,9 @@ dependencies = [
     "python-dotenv",
     "redis",
     "requests",
+    "opentelemetry-instrumentation-flask",
+    "opentelemetry-sdk",
+    "opentelemetry-exporter-otlp-proto-grpc",
 ]
 
 [project.optional-dependencies]

From f107ed963b9e54ac580e5942c9525df4c8d8708f Mon Sep 17 00:00:00 2001
From: Naragod <mateo.naranjo@mail.utoronto.ca>
Date: Mon, 10 Nov 2025 17:31:16 -0500
Subject: [PATCH 2/5] Use opentelemetry flask instrumentation

---
 markus_ai_server/server.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py
index 7855b8d..c487913 100644
--- a/markus_ai_server/server.py
+++ b/markus_ai_server/server.py
@@ -12,6 +12,12 @@
 import requests
 from dotenv import load_dotenv
 from flask import Flask, abort, jsonify, request
+from opentelemetry import trace
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.flask import FlaskInstrumentor
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
 
 from .redis_helper import REDIS_CONNECTION
 
@@ -22,7 +28,22 @@
 # Load environment variables from .env file
 load_dotenv()
 
+# Configure OpenTelemetry
+resource = Resource.create({"service.name": "ai-server"})
+tracer_provider = TracerProvider(resource=resource)
+
+# Configure OTLP exporter to send to collector at localhost:4317
+otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
+span_processor = BatchSpanProcessor(otlp_exporter)
+tracer_provider.add_span_processor(span_processor)
+
+# Set the global tracer provider
+trace.set_tracer_provider(tracer_provider)
+
+tracer = trace.get_tracer("ai-server.tracer")
+
 app = Flask('AI server')
+FlaskInstrumentor().instrument_app(app)
 
 # Configuration from environment variables
 DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'deepseek-coder-v2:latest')
@@ -32,7 +53,8 @@
 GGUF_DIR = os.getenv('GGUF_DIR', '/data1/GGUF')
 
 # Llama server configuration
-_llama_server_url = os.getenv('LLAMA_SERVER_URL')  # e.g., http://localhost:8080 or localhost:8080
+# e.g., http://localhost:8080 or localhost:8080
+_llama_server_url = os.getenv('LLAMA_SERVER_URL')
 LLAMA_SERVER_URL = (
     f"http://{_llama_server_url}"
     if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://'))

From 825d3d7de62f28ca918ad39a4250fb4dd4a8333c Mon Sep 17 00:00:00 2001
From: Naragod <mateo.naranjo@mail.utoronto.ca>
Date: Mon, 10 Nov 2025 21:49:48 -0500
Subject: [PATCH 3/5] Update set up instructions

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index b4d6e6d..9b65749 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@
 To install project dependencies, including development dependencies:
 
 ```console
+$ source venv/bin/activate;
+
 $ pip install -e '.[dev]'
 ```
 
@@ -19,3 +21,25 @@ To run the test suite:
 ```console
 $ pytest
 ```
+
+To run locally:
+
+Pre-requisites:
+
+Must have redis and lamma server up and running.
+
+
+```console
+$ docker compose -f opentelemetry_collector/docker-compose.yml up -d
+
+$ REDIS_URL='redis://localhost:6379' LLAMA_SERVER_URL='http://localhost:11434' python3 -m ai_server.__main__
+```
+
+Send Request:
+
+Example
+
+```curl
+curl --location 'localhost:5000/chat' \
+--form 'content="asdf asdf asdasdf ad"'
+```

From 294fb157b4945975a0e94333ec616dc6aaf38924 Mon Sep 17 00:00:00 2001
From: Naragod <mateo.naranjo@mail.utoronto.ca>
Date: Mon, 10 Nov 2025 21:50:19 -0500
Subject: [PATCH 4/5] Add OpenTelemetry pipeline

---
 markus_ai_server/server.py                    |  36 +-
 opentelemetry_collector/CONFIG_EXPLANATION.md | 419 ++++++++++++++++++
 opentelemetry_collector/README.md             |  51 +++
 opentelemetry_collector/alert_rules.yml       |  42 ++
 opentelemetry_collector/alertmanager.yml      |  45 ++
 opentelemetry_collector/config.yml            |  74 ++++
 opentelemetry_collector/docker-compose.yml    | 150 +++++++
 opentelemetry_collector/prometheus.yml        |  30 ++
 8 files changed, 839 insertions(+), 8 deletions(-)
 create mode 100644 opentelemetry_collector/CONFIG_EXPLANATION.md
 create mode 100644 opentelemetry_collector/README.md
 create mode 100644 opentelemetry_collector/alert_rules.yml
 create mode 100644 opentelemetry_collector/alertmanager.yml
 create mode 100644 opentelemetry_collector/config.yml
 create mode 100644 opentelemetry_collector/docker-compose.yml
 create mode 100644 opentelemetry_collector/prometheus.yml

diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py
index c487913..a0d2364 100644
--- a/markus_ai_server/server.py
+++ b/markus_ai_server/server.py
@@ -12,9 +12,12 @@
 import requests
 from dotenv import load_dotenv
 from flask import Flask, abort, jsonify, request
-from opentelemetry import trace
+from opentelemetry import metrics, trace
+from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
 from opentelemetry.instrumentation.flask import FlaskInstrumentor
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
@@ -28,20 +31,37 @@
 # Load environment variables from .env file
 load_dotenv()
 
-# Configure OpenTelemetry
+# Configure OpenTelemetry - Shared Resource (identifies this service)
 resource = Resource.create({"service.name": "ai-server"})
+
+# ========== TRACES CONFIGURATION ==========
+# TracerProvider: Factory for creating tracers (for distributed tracing)
 tracer_provider = TracerProvider(resource=resource)
 
-# Configure OTLP exporter to send to collector at localhost:4317
-otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
-span_processor = BatchSpanProcessor(otlp_exporter)
+# OTLP Trace Exporter: Sends traces to collector at localhost:4317
+otlp_trace_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
+span_processor = BatchSpanProcessor(otlp_trace_exporter)
 tracer_provider.add_span_processor(span_processor)
 
-# Set the global tracer provider
+# Set the global tracer provider (FlaskInstrumentor will use this)
 trace.set_tracer_provider(tracer_provider)
-
 tracer = trace.get_tracer("ai-server.tracer")
 
+# ========== METRICS CONFIGURATION ==========
+# OTLP Metric Exporter: Sends metrics to collector at localhost:4317
+otlp_metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317", insecure=True)
+
+# PeriodicExportingMetricReader: Collects and exports metrics every 10 seconds
+metric_reader = PeriodicExportingMetricReader(
+    exporter=otlp_metric_exporter, export_interval_millis=10000  # Export every 10 seconds
+)
+
+# MeterProvider: Factory for creating meters (for metrics collection)
+meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
+
+# Set the global meter provider (FlaskInstrumentor will use this for HTTP metrics)
+metrics.set_meter_provider(meter_provider)
+
 app = Flask('AI server')
 FlaskInstrumentor().instrument_app(app)
 
@@ -271,7 +291,7 @@ def authenticate() -> str:
 @app.route('/chat', methods=['POST'])
 def chat():
     """Handle chat request with optional llama_mode and system prompt parameters."""
-    authenticate()
+    # authenticate()
     model = request.form.get('model', DEFAULT_MODEL)
     content = request.form.get('content', '')
     llama_mode = request.form.get('llama_mode', 'cli')
diff --git a/opentelemetry_collector/CONFIG_EXPLANATION.md b/opentelemetry_collector/CONFIG_EXPLANATION.md
new file mode 100644
index 0000000..22d9d8e
--- /dev/null
+++ b/opentelemetry_collector/CONFIG_EXPLANATION.md
@@ -0,0 +1,419 @@
+# OpenTelemetry Setup: Configuration Deep Dive
+
+### config.yml (OpenTelemetry Collector Configuration)
+
+```yaml
+# OpenTelemetry Collector Configuration for ai-server monitoring
+receivers:
+  # Receives telemetry from your Flask app via OTLP protocol
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317  # Your Flask app sends traces here
+      http:
+        endpoint: 0.0.0.0:4318  # Alternative HTTP endpoint (not currently used)
+```
+
+**Explanation:**
+
+**`receivers` section:**
+- **Purpose**: Defines how the collector receives telemetry data
+- **`otlp`**: OpenTelemetry Protocol - the standard protocol for sending telemetry
+  - **`grpc`**: Uses gRPC protocol (binary, efficient)
+    - **`endpoint: 0.0.0.0:4317`**:
+      - `0.0.0.0` means "listen on all network interfaces" (localhost, Docker bridge, etc.)
+      - Port `4317` is the standard OTLP gRPC port
+      - Your Flask app is configured to send to `http://localhost:4317`
+  - **`http`**: Alternative HTTP endpoint (fallback, not used by your Flask app)
+
+---
+
+```yaml
+connectors:
+  # spanmetrics connector - Generates RED metrics (calls, errors, duration) from traces
+  # This is what Jaeger SPM requires to function
+  spanmetrics:
+    histogram:
+      explicit:
+        # Latency buckets in seconds (0.001s = 1ms, 10s = 10000ms)
+        buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
+    dimensions:
+      # Group metrics by these span attributes
+      - name: http.method         # GET, POST, etc.
+      - name: http.status_code    # 200, 500, etc.
+    metrics_flush_interval: 15s   # Export metrics every 15 seconds
+```
+
+**Explanation:**
+
+**`connectors` section:**
+- **Purpose**: Connectors sit between pipelines and transform data
+- **`spanmetrics`**: A special connector that watches traces and generates metrics
+
+**How spanmetrics works:**
+1. **Watches the traces pipeline**: Every span that flows through gets analyzed
+2. **Generates counters**: For each span, increments `calls_total` counter
+3. **Generates histograms**: Records span duration in `duration_*` histogram buckets
+4. **Outputs to metrics pipeline**: The generated metrics become inputs for the metrics pipeline
+
+**`histogram.explicit.buckets`:**
+- Defines latency buckets for histogram metrics
+- Values are in **seconds**: `0.001` = 1ms, `1` = 1000ms, `10` = 10000ms
+- When a request takes 234ms (0.234s), it gets counted in the `0.25` bucket
+- These buckets allow calculating percentiles (P50, P95, P99)
+
+**`dimensions`:**
+- Creates separate metric series for each unique combination of these attributes
+- Example: `calls_total{http.method="POST", http.status_code="200"}` vs `calls_total{http.method="GET", http.status_code="404"}`
+- Allows filtering metrics by HTTP method, status code, etc. in queries
+
+**`metrics_flush_interval`:**
+- How often spanmetrics exports accumulated metrics (15 seconds)
+- Balances freshness vs efficiency (too frequent = overhead, too slow = stale data)
+
+---
+
+```yaml
+processors:
+  # Batches spans together for efficient export (reduces network calls)
+  batch:
+    timeout: 10s              # Send batch every 10 seconds
+    send_batch_size: 100      # OR when 100 spans accumulate
+```
+
+**Explanation:**
+
+**`processors` section:**
+- **Purpose**: Processes/transforms telemetry data in the pipeline
+- **`batch`**: Groups multiple spans/metrics together before exporting
+
+**Why batching matters:**
+- **Without batching**: Every span would be sent individually (1 network call per span)
+- **With batching**: 100 spans sent in 1 network call
+- **Result**: ~100x reduction in network overhead
+
+**`timeout: 10s`:**
+- If 10 seconds pass without reaching 100 spans, send whatever we have
+- Prevents data from sitting too long
+
+**`send_batch_size: 100`:**
+- As soon as 100 spans accumulate, send immediately (don't wait for timeout)
+- Prevents memory buildup during high traffic
+
+**Decision logic:**
+```
+IF (accumulated_spans >= 100) OR (time_since_last_send >= 10s)
+  THEN send_batch()
+```
+
+---
+
+```yaml
+exporters:
+  # Console output - still shows traces in terminal for debugging
+  debug:
+    verbosity: normal
+
+  # OTLP exporter - sends traces to Jaeger via OTLP protocol
+  # Jaeger has a built-in OTLP receiver (mapped to host port 14317)
+  otlp:
+    endpoint: localhost:14317  # Jaeger's OTLP receiver (Docker port mapping)
+    tls:
+      insecure: true           # No TLS encryption (ok for local development)
+
+  # Prometheus exporter - exposes metrics in Prometheus format
+  # Prometheus will scrape this endpoint every 15 seconds
+  prometheus:
+    endpoint: "0.0.0.0:8889"   # Expose metrics for Prometheus to scrape
+    namespace: "ai_server"      # Prefix for all metrics (ai_server_http_requests_total)
+    const_labels:               # Labels added to all metrics
+      environment: "development"
+```
+
+**Explanation:**
+
+**`exporters` section:**
+- **Purpose**: Defines where to send processed telemetry data
+
+**1. `debug` exporter:**
+- **Purpose**: Prints telemetry to console/logs (useful for troubleshooting)
+- **`verbosity: normal`**: Shows span details without overwhelming output
+  - `basic`: Just counts (least verbose)
+  - `normal`: Readable span summaries ← We use this
+  - `detailed`: Full JSON dumps (most verbose)
+
+**2. `otlp` exporter:**
+- **Purpose**: Sends traces to Jaeger
+- **`endpoint: localhost:14317`**:
+  - Jaeger container exposes port 4317 internally
+  - Docker maps it to host port 14317 (`-p 14317:4317`)
+  - Collector (running on host) connects to `localhost:14317`
+- **`tls.insecure: true`**:
+  - Disables TLS certificate verification
+  - **Only safe for local development!** Production should use proper TLS
+
+**3. `prometheus` exporter:**
+- **Purpose**: Exposes metrics in Prometheus format for scraping
+- **How it works**: Creates an HTTP server that Prometheus polls
+- **`endpoint: "0.0.0.0:8889"`**:
+  - Opens HTTP server on all interfaces, port 8889
+  - Prometheus scrapes `http://localhost:8889/metrics`
+- **`namespace: "ai_server"`**:
+  - Adds prefix to all metric names
+  - Example: `http_server_duration` → `ai_server_http_server_duration`
+  - The spanmetrics connector also includes "traces_span_metrics" in its path
+  - Final metric: `ai_server_traces_span_metrics_calls_total`
+- **`const_labels`**:
+  - These labels are added to **every metric** from this exporter
+  - `environment: "development"` allows filtering by environment in queries
+  - Useful when you have dev/staging/prod all sending to same Prometheus
+
+---
+
+```yaml
+service:
+  pipelines:
+    # Traces pipeline - receives traces from Flask, sends to Jaeger AND spanmetrics
+    traces:
+      receivers: [otlp]                    # Receive traces from Flask app
+      processors: [batch]                  # Batch the spans
+      exporters: [debug, otlp, spanmetrics]  # Send to console, Jaeger, AND spanmetrics connector
+```
+
+**Explanation:**
+
+**`service.pipelines` section:**
+- **Purpose**: Defines the data flow through the collector
+- **Think of it as**: `input → processing → output`
+
+**Traces pipeline flow:**
+```
+Flask OTLP → [otlp receiver] → [batch processor] → {
+                                                      [debug exporter] → Console
+                                                      [otlp exporter] → Jaeger
+                                                      [spanmetrics connector] → metrics/spanmetrics pipeline
+                                                    }
+```
+
+**Key insight**: `spanmetrics` appears in **exporters** list
+- **Why?** Connectors bridge pipelines - they're both an exporter (from traces pipeline) and a receiver (for metrics pipeline)
+- **Result**: Traces flow into spanmetrics, which generates metrics and sends them to the metrics pipeline
+
+---
+
+```yaml
+    # Spanmetrics-generated metrics pipeline - RED metrics for Jaeger SPM
+    metrics/spanmetrics:
+      receivers: [spanmetrics]             # Receive metrics generated from traces by spanmetrics connector
+      processors: [batch]
+      exporters: [prometheus]              # Export to Prometheus (Jaeger reads from here)
+```
+
+**Explanation:**
+
+**`metrics/spanmetrics` pipeline:**
+- **Name format**: `<signal>/<identifier>` allows multiple pipelines of same type
+- **Purpose**: Handles metrics that spanmetrics generates from traces
+
+**Pipeline flow:**
+```
+[spanmetrics connector] → [batch processor] → [prometheus exporter] → port 8889
+                                                                           ↓
+                                                                      Prometheus scrapes
+                                                                           ↓
+                                                                      Jaeger queries for SPM
+```
+
+**Why separate from main metrics pipeline?**
+- Different sources: spanmetrics vs Flask instrumentation
+- Different purposes: RED metrics vs detailed HTTP metrics
+- Cleaner configuration: Easy to disable/modify independently
+
+---
+
+```yaml
+    # Application-generated metrics pipeline - HTTP instrumentation metrics from Flask
+    metrics:
+      receivers: [otlp]                    # Receive metrics from Flask instrumentation
+      processors: [batch]
+      exporters: [debug, prometheus]       # Send to console AND Prometheus
+```
+
+**Explanation:**
+
+**`metrics` pipeline (Flask-generated):**
+- **Purpose**: Handles metrics that Flask OpenTelemetry instrumentation generates
+- **Source**: Your Flask app's `MeterProvider` sends these via OTLP
+
+**What metrics flow through here:**
+- `ai_server_http_server_duration_milliseconds_*`: Request latency histogram
+- `ai_server_http_server_active_requests`: Current active requests gauge
+
+**Pipeline flow:**
+```
+Flask OTLP → [otlp receiver] → [batch processor] → {
+                                                      [debug exporter] → Console
+                                                      [prometheus exporter] → port 8889
+                                                    }
+```
+
+**Both metrics pipelines export to same Prometheus endpoint:**
+- spanmetrics: RED metrics (calls_total, duration_bucket)
+- Flask: HTTP instrumentation metrics (http_server_duration)
+- **Result**: All metrics available at `http://localhost:8889/metrics`
+
+---
+
+```yaml
+    logs:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug]                   # Only console for now
+```
+
+**Explanation:**
+
+**`logs` pipeline:**
+- **Currently simple**: Just receives and prints to console
+- **Future expansion**: Could add exporters for:
+  - Loki (log aggregation system, part of Grafana stack)
+  - Elasticsearch
+  - CloudWatch, Datadog, etc.
+
+---
+
+### prometheus.yml (Prometheus Scrape Configuration)
+
+```yaml
+# Prometheus configuration for scraping OpenTelemetry Collector metrics
+global:
+  scrape_interval: 15s      # How often to scrape metrics
+  evaluation_interval: 15s  # How often to evaluate rules
+```
+
+**Explanation:**
+
+**`global` section:**
+- **Purpose**: Default settings for all scrape jobs
+
+**`scrape_interval: 15s`:**
+- How often Prometheus fetches metrics from targets
+- Every 15 seconds, Prometheus sends HTTP GET to `http://localhost:8889/metrics`
+- **Trade-off**:
+  - Shorter interval (5s) = fresher data, more load
+  - Longer interval (60s) = less frequent updates, less overhead
+  - 15s is a good balance for local development
+
+**`evaluation_interval: 15s`:**
+- How often to evaluate alerting rules (not used in our setup)
+- Kept at same interval as scraping for simplicity
+
+---
+
+```yaml
+# Scrape configuration
+scrape_configs:
+  # Scrape metrics from OpenTelemetry Collector
+  - job_name: 'otel-collector'
+    static_configs:
+      - targets: ['host.docker.internal:8888']  # Collector's metrics endpoint
+        labels:
+          service: 'otel-collector'
+```
+
+**Explanation:**
+
+**`scrape_configs` section:**
+- **Purpose**: Defines what endpoints to scrape and how
+
+**Job 1: `otel-collector`:**
+- **Purpose**: Scrape collector's own internal metrics (health, performance)
+- **`targets: ['host.docker.internal:8888']`**:
+  - `host.docker.internal`: Docker's way to reach host machine from container
+  - Port `8888`: Collector's self-monitoring endpoint (built-in)
+  - **What's scraped**: Collector health, queue sizes, processing rates, etc.
+- **`labels: {service: 'otel-collector'}`**:
+  - Adds `service="otel-collector"` label to all scraped metrics
+  - Allows filtering: `up{service="otel-collector"}`
+
+---
+
+```yaml
+  # Scrape metrics that the collector exports (from your Flask app)
+  - job_name: 'ai-server'
+    static_configs:
+      - targets: ['host.docker.internal:8889']  # We'll expose metrics here
+        labels:
+          service: 'ai-server'
+```
+
+**Explanation:**
+
+**Job 2: `ai-server`:**
+- **Purpose**: Scrape application metrics (spanmetrics + Flask HTTP metrics)
+- **`targets: ['host.docker.internal:8889']`**:
+  - Same host resolution mechanism
+  - Port `8889`: Where our Prometheus exporter exposes metrics
+  - **What's scraped**:
+    - `ai_server_traces_span_metrics_calls_total`
+    - `ai_server_http_server_duration_*`
+    - All metrics from both pipelines
+- **`labels: {service: 'ai-server'}`**:
+  - Adds `service="ai-server"` label
+  - Note: Metrics already have `service_name="ai-server"` from spanmetrics
+  - Both labels coexist (useful for different query patterns)
+
+---
+
+## Summary: The Complete Picture
+
+### Data Flow with All Components:
+
+```
+Flask App (ai-server)
+  ↓ OpenTelemetry SDK (server.py lines 26-59)
+  ↓ Generates: Traces (spans) + Metrics (HTTP instrumentation)
+  ↓ Sends via: OTLP gRPC to localhost:4317
+  ↓
+OpenTelemetry Collector (config.yml)
+  ↓
+  ├─ Traces Pipeline:
+  │   ├─ otlp receiver (port 4317) receives spans
+  │   ├─ batch processor groups them
+  │   └─ Exporters:
+  │       ├─ debug → Console logs
+  │       ├─ otlp (port 14317) → Jaeger (for trace visualization)
+  │       └─ spanmetrics connector → Generates RED metrics
+  │             ↓
+  │   ┌────────┘
+  │   ↓
+  ├─ Metrics/spanmetrics Pipeline:
+  │   ├─ spanmetrics connector outputs metrics
+  │   ├─ batch processor groups them
+  │   └─ prometheus exporter (port 8889) → Exposes /metrics endpoint
+  │
+  └─ Metrics Pipeline:
+      ├─ otlp receiver receives HTTP metrics from Flask
+      ├─ batch processor groups them
+      └─ Exporters:
+          ├─ debug → Console logs
+          └─ prometheus exporter (port 8889) → Same endpoint as spanmetrics
+               ↓
+Prometheus (prometheus.yml)
+  ↓ Scrapes http://host.docker.internal:8889/metrics every 15s
+  ↓ Stores time-series data
+  ↓
+Jaeger SPM (docker-compose.yml)
+  ↓ Queries Prometheus: http://prometheus:9090
+  ↓ Looks for: ai_server_traces_span_metrics_calls_total
+  ↓ Displays: RED metrics in Monitor tab
+```
+
+### Key Takeaways:
+
+1. **spanmetrics is essential** for Jaeger SPM - it converts traces to metrics
+2. **Namespace configuration must match** between collector and Jaeger
+3. **Connectors bridge pipelines** - spanmetrics sits between traces and metrics
+4. **Prometheus is the middle layer** - stores metrics that Jaeger queries
+5. **All components must be on same network** (or use host.docker.internal)
+6. **Multiple exporters can coexist** - same pipeline can output to console + Jaeger + Prometheus
diff --git a/opentelemetry_collector/README.md b/opentelemetry_collector/README.md
new file mode 100644
index 0000000..7fb53dc
--- /dev/null
+++ b/opentelemetry_collector/README.md
@@ -0,0 +1,51 @@
+# OpenTelemetry Collector
+
+The reason we are installing the opentelemetry contrib collector is because it includes the spanmetrics connector which is required if we wish
+to enable Jaeger's Service Performance Monitoring ([SPM](https://www.jaegertracing.io/docs/2.11/architecture/spm/))
+
+## Configuration
+
+Take a look at `config.yml` inside this folder for a default configuration. For more information about this system
+please take a look at the collector configuration [guide](https://opentelemetry.io/docs/collector/configuration/).
+
+## Execution
+
+Start all services with Docker Compose:
+```bash
+docker-compose up -d
+```
+
+## Alerting
+
+Alerting is configured using Prometheus Alertmanager:
+
+1. **Alert Rules** (`alert_rules.yml`) - Defines when alerts fire (high errors, latency, service down)
+2. **Alertmanager** (`alertmanager.yml`) - Handles notifications and routing
+
+### Viewing Alerts
+
+- **Prometheus Alerts**: http://localhost:9090/alerts - See current alert status
+- **Alertmanager UI**: http://localhost:9093 - View, silence, and manage active alerts
+
+### Configuring Email Notifications
+
+Edit `alertmanager.yml` and uncomment the email configuration:
+
+```yaml
+global:
+  smtp_smarthost: 'smtp.gmail.com:587'
+  smtp_from: 'your-email@gmail.com'
+  smtp_auth_username: 'your-email@gmail.com'
+  smtp_auth_password: 'your-app-password'
+
+receivers:
+  - name: 'default'
+    email_configs:
+      - to: 'your-email@example.com'
+```
+
+Then restart: `docker-compose restart alertmanager`
+
+### Testing Alerts
+
+To test if alerts are working, stop your Flask app and wait 3 minutes - the `ServiceDown` alert should fire.
diff --git a/opentelemetry_collector/alert_rules.yml b/opentelemetry_collector/alert_rules.yml
new file mode 100644
index 0000000..ccbce20
--- /dev/null
+++ b/opentelemetry_collector/alert_rules.yml
@@ -0,0 +1,42 @@
+groups:
+  # Simple alert rules for ai-server monitoring
+  - name: ai_server_alerts
+    interval: 30s
+    rules:
+      # Alert when service appears down (no requests)
+      - alert: ServiceDown
+        expr: rate(ai_server_http_server_duration_milliseconds_count[2m]) == 0
+        for: 3m
+        labels:
+          severity: critical
+        annotations:
+          summary: "AI Server appears to be down"
+          description: "No HTTP requests received in the last 3 minutes."
+
+      # Alert on high error rate (>10% of requests failing)
+      - alert: HighErrorRate
+        expr: |
+          (
+            sum(rate(ai_server_http_server_duration_milliseconds_count{http_status_code=~"5.."}[5m]))
+            /
+            sum(rate(ai_server_http_server_duration_milliseconds_count[5m]))
+          ) > 0.10
+        for: 3m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High error rate detected"
+          description: "More than 10% of requests are failing with 5xx errors."
+
+      # Alert on very slow response times (p95 > 5 seconds)
+      - alert: HighLatency
+        expr: |
+          histogram_quantile(0.95,
+            sum(rate(ai_server_http_server_duration_milliseconds_bucket[5m])) by (le)
+          ) > 5000
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High latency detected"
+          description: "95th percentile response time is above 5 seconds."
diff --git a/opentelemetry_collector/alertmanager.yml b/opentelemetry_collector/alertmanager.yml
new file mode 100644
index 0000000..2361bb1
--- /dev/null
+++ b/opentelemetry_collector/alertmanager.yml
@@ -0,0 +1,45 @@
+# Alertmanager configuration for ai-server alerts
+# Docs: https://prometheus.io/docs/alerting/latest/configuration/
+
+global:
+  # Default email settings (configure these with your SMTP server)
+  # OPTION 1: Gmail (requires app password)
+  # smtp_smarthost: 'smtp.gmail.com:587'
+  # smtp_from: 'your-email@gmail.com'
+  # smtp_auth_username: 'your-email@gmail.com'
+  # smtp_auth_password: 'your-app-password'
+
+  # OPTION 2: Use a local mail relay
+  # smtp_smarthost: 'localhost:25'
+  # smtp_from: 'alertmanager@localhost'
+  # smtp_require_tls: false
+
+route:
+  # Default route - groups alerts by alert name
+  group_by: ['alertname']
+  group_wait: 30s        # Wait 30s before sending first alert in a group
+  group_interval: 5m     # Wait 5m before sending updates for existing group
+  repeat_interval: 4h    # Resend alert every 4h if still firing
+  receiver: 'default'    # Default receiver (see below)
+
+receivers:
+  # Default receiver - logs to console (useful for testing)
+  - name: 'default'
+    # Uncomment and configure email settings:
+    # email_configs:
+    #   - to: 'your-email@example.com'
+    #     headers:
+    #       Subject: '[ALERT] {{ .GroupLabels.alertname }}'
+
+  # Example: Critical alerts go to a different email/channel
+  # - name: 'critical'
+  #   email_configs:
+  #     - to: 'oncall@example.com'
+
+# Optional: Inhibition rules (suppress certain alerts when others fire)
+# inhibit_rules:
+#   - source_match:
+#       severity: 'critical'
+#     target_match:
+#       severity: 'warning'
+#     equal: ['alertname', 'service']
diff --git a/opentelemetry_collector/config.yml b/opentelemetry_collector/config.yml
new file mode 100644
index 0000000..bba288b
--- /dev/null
+++ b/opentelemetry_collector/config.yml
@@ -0,0 +1,74 @@
+# OpenTelemetry Collector Configuration for ai-server monitoring
+receivers:
+  # Receives telemetry from your Flask app via OTLP protocol
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317  # Your Flask app sends traces here
+      http:
+        endpoint: 0.0.0.0:4318  # Alternative HTTP endpoint (not currently used)
+
+connectors:
+  # spanmetrics connector - Generates RED metrics (calls, errors, duration) from traces
+  # This is what Jaeger SPM requires to function
+  spanmetrics:
+    histogram:
+      explicit:
+        # Latency buckets in seconds (0.001s = 1ms, 10s = 10000ms)
+        buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
+    dimensions:
+      # Group metrics by these span attributes
+      - name: http.method         # GET, POST, etc.
+      - name: http.status_code    # 200, 500, etc.
+    metrics_flush_interval: 15s   # Export metrics every 15 seconds
+
+processors:
+  # Batches spans together for efficient export (reduces network calls)
+  batch:
+    timeout: 10s              # Send batch every 10 seconds
+    send_batch_size: 100      # OR when 100 spans accumulate
+
+exporters:
+  # Console output - still shows traces in terminal for debugging
+  debug:
+    verbosity: normal
+
+  # OTLP exporter - sends traces to Jaeger via OTLP protocol
+  # Jaeger container exposes OTLP receiver on port 4317
+  otlp:
+    endpoint: jaeger:4317      # Jaeger's OTLP receiver (Docker network)
+    tls:
+      insecure: true           # No TLS encryption (ok for local development)
+
+  # Prometheus exporter - exposes metrics in Prometheus format
+  # Prometheus will scrape this endpoint every 15 seconds
+  prometheus:
+    endpoint: "0.0.0.0:8889"   # Expose metrics for Prometheus to scrape
+    namespace: "ai_server"      # Prefix for all metrics (ai_server_http_requests_total)
+    const_labels:               # Labels added to all metrics
+      environment: "development"
+
+service:
+  pipelines:
+    # Traces pipeline - receives traces from Flask, sends to Jaeger AND spanmetrics
+    traces:
+      receivers: [otlp]                    # Receive traces from Flask app
+      processors: [batch]                  # Batch the spans
+      exporters: [debug, otlp, spanmetrics]  # Send to console, Jaeger, AND spanmetrics connector
+
+    # Spanmetrics-generated metrics pipeline - RED metrics for Jaeger SPM
+    metrics/spanmetrics:
+      receivers: [spanmetrics]             # Receive metrics generated from traces by spanmetrics connector
+      processors: [batch]
+      exporters: [prometheus]              # Export to Prometheus (Jaeger reads from here)
+
+    # Application-generated metrics pipeline - HTTP instrumentation metrics from Flask
+    metrics:
+      receivers: [otlp]                    # Receive metrics from Flask instrumentation
+      processors: [batch]
+      exporters: [debug, prometheus]       # Send to console AND Prometheus
+
+    logs:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug]                   # Only console for now
diff --git a/opentelemetry_collector/docker-compose.yml b/opentelemetry_collector/docker-compose.yml
new file mode 100644
index 0000000..428acda
--- /dev/null
+++ b/opentelemetry_collector/docker-compose.yml
@@ -0,0 +1,150 @@
+version: '3.8'
+
+services:
+  # OpenTelemetry Collector Contrib - Telemetry data pipeline
+  # Receives traces/metrics from Flask app, processes them, and exports to Jaeger/Prometheus
+  # Includes spanmetrics connector for generating RED metrics from traces
+  otel-collector:
+    image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.139.0
+    container_name: otel-collector
+    deploy:
+      resources:
+        limits:
+          memory: 200M
+    command: ["--config=/etc/otelcol-config.yml"]
+    volumes:
+      # Mount our config.yml into the container
+      - ./config.yml:/etc/otelcol-config.yml:ro
+    ports:
+      - "4317:4317"   # OTLP gRPC receiver (Flask app sends here)
+      - "4318:4318"   # OTLP HTTP receiver (alternative)
+      - "8889:8889"   # Prometheus exporter (Prometheus scrapes this)
+      - "8888:8888"   # Collector's own metrics endpoint
+    networks:
+      - monitoring
+    depends_on:
+      - jaeger
+      - prometheus
+    restart: unless-stopped
+
+  # Prometheus - Time-series metrics database
+  # Scrapes metrics from OpenTelemetry Collector and stores them
+  # Jaeger queries Prometheus to display SPM (Service Performance Monitoring) data
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    ports:
+      - "9090:9090"  # Web UI and API
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - ./alert_rules.yml:/etc/prometheus/alert_rules.yml:ro
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+      - '--web.console.templates=/usr/share/prometheus/consoles'
+    networks:
+      - monitoring
+    depends_on:
+      - alertmanager
+    restart: unless-stopped
+
+  # Alertmanager - Handles alert notifications
+  # Receives alerts from Prometheus and sends notifications
+  alertmanager:
+    image: prom/alertmanager:latest
+    container_name: alertmanager
+    ports:
+      - "9093:9093"  # Web UI and API
+    volumes:
+      - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
+    command:
+      - '--config.file=/etc/alertmanager/alertmanager.yml'
+      - '--storage.path=/alertmanager'
+    networks:
+      - monitoring
+    restart: unless-stopped
+
+  # Jaeger - Distributed tracing backend and UI
+  # Receives traces from OpenTelemetry Collector via OTLP
+  # Queries Prometheus for RED metrics (Rate, Errors, Duration) to power SPM
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    container_name: jaeger
+    ports:
+      - "16686:16686"  # Jaeger UI (web interface)
+      - "14317:4317"   # OTLP gRPC receiver (collector sends traces here)
+      - "14318:4318"   # OTLP HTTP receiver (alternative)
+    environment:
+      # SPM Configuration - tells Jaeger to read metrics from Prometheus
+      - METRICS_STORAGE_TYPE=prometheus
+      - PROMETHEUS_SERVER_URL=http://prometheus:9090
+
+      # Metric naming configuration - must match what spanmetrics connector generates
+      - PROMETHEUS_QUERY_NAMESPACE=ai_server_traces_span_metrics
+      - PROMETHEUS_QUERY_DURATION_UNIT=ms
+
+      # Optional: Enable normalization for better metric compatibility
+      - PROMETHEUS_QUERY_NORMALIZE_CALLS=true
+      - PROMETHEUS_QUERY_NORMALIZE_DURATION=true
+
+      # Enable Monitor menu in Jaeger UI (required for SPM to show)
+      - JAEGER_DISABLED=false
+      - PROMETHEUS_QUERY_SUPPORT_SPANMETRICS_CONNECTOR=true
+
+      # Collector configuration (built-in OTLP receiver)
+      - COLLECTOR_OTLP_ENABLED=true
+    networks:
+      - monitoring
+    depends_on:
+      - prometheus
+    restart: unless-stopped
+
+networks:
+  monitoring:
+    driver: bridge
+
+# USAGE INSTRUCTIONS:
+#
+# 1. Start all services:
+#    docker-compose up -d
+#
+# 2. Stop all services:
+#    docker-compose down
+#
+# 3. View logs:
+#    docker-compose logs -f [service-name]
+#    Example: docker-compose logs -f jaeger
+#
+# 4. Restart a specific service:
+#    docker-compose restart [service-name]
+#
+# 5. Check status:
+#    docker-compose ps
+#
+# ARCHITECTURE:
+#
+# Flask App (host:5000)
+#     ↓ Sends traces & metrics via OTLP to localhost:4317
+#     ↓
+# OpenTelemetry Collector (container:4317) ← All services now in Docker!
+#     ↓ Processes and routes:
+#     ├─→ Traces → Jaeger container (jaeger:4317)
+#     ├─→ spanmetrics connector (generates RED metrics from traces)
+#     └─→ Metrics → Exposed on port 8889
+#           ↓
+# Prometheus (container) scrapes otel-collector:8889
+#     ↓ Stores time-series metrics
+#     ↑
+# Jaeger queries prometheus:9090 for SPM data
+#
+# NETWORK:
+# - All containers on 'monitoring' bridge network
+# - Flask app (host) connects via localhost:4317
+#
+# ACCESS URLS:
+# - Jaeger UI: http://localhost:16686
+# - Prometheus: http://localhost:9090
+# - Alertmanager UI: http://localhost:9093
+# - OTel Collector metrics: http://localhost:8889/metrics
+# - OTel Collector health: http://localhost:8888/metrics
diff --git a/opentelemetry_collector/prometheus.yml b/opentelemetry_collector/prometheus.yml
new file mode 100644
index 0000000..5548662
--- /dev/null
+++ b/opentelemetry_collector/prometheus.yml
@@ -0,0 +1,30 @@
+# Prometheus configuration for scraping OpenTelemetry Collector metrics
+global:
+  scrape_interval: 15s      # How often to scrape metrics
+  evaluation_interval: 15s  # How often to evaluate rules
+
+# Alerting configuration
+alerting:
+  alertmanagers:
+    - static_configs:
+        - targets: ['alertmanager:9093']  # Alertmanager service
+
+# Load alert rules
+rule_files:
+  - '/etc/prometheus/alert_rules.yml'
+
+# Scrape configuration
+scrape_configs:
+  # Scrape metrics from OpenTelemetry Collector (collector's own health metrics)
+  - job_name: 'otel-collector'
+    static_configs:
+      - targets: ['otel-collector:8888']  # Collector's internal metrics endpoint
+        labels:
+          service: 'otel-collector'
+
+  # Scrape metrics that the collector exports (spanmetrics + Flask HTTP metrics)
+  - job_name: 'ai-server'
+    static_configs:
+      - targets: ['otel-collector:8889']  # Collector's Prometheus exporter
+        labels:
+          service: 'ai-server'

From 6df85fad162b1061812789de0be70358df6fd105 Mon Sep 17 00:00:00 2001
From: Naragod <mateo.naranjo@mail.utoronto.ca>
Date: Thu, 13 Nov 2025 10:43:32 -0500
Subject: [PATCH 5/5] Use environment variables

---
 .env.example               | 14 ++++++++++++++
 markus_ai_server/server.py | 13 ++++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..d64b662
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,14 @@
+# Redis Configuration
+REDIS_URL=redis://localhost:6379
+
+# Ollama/Llama Server Configuration
+LLAMA_SERVER_URL=http://localhost:11434
+OLLAMA_HOST=http://localhost:11434  # Used by ollama Python library
+DEFAULT_MODEL=deepseek-coder-v2:latest
+
+# OpenTelemetry Configuration
+OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
+
+# Optional: Llama.cpp CLI configuration (for local llama.cpp usage)
+# LLAMA_CPP_CLI=/data1/llama.cpp/bin/llama-cli
+# GGUF_DIR=/data1/GGUF
diff --git a/markus_ai_server/server.py b/markus_ai_server/server.py
index a0d2364..2d3005c 100644
--- a/markus_ai_server/server.py
+++ b/markus_ai_server/server.py
@@ -31,6 +31,9 @@
 # Load environment variables from .env file
 load_dotenv()
 
+# OpenTelemetry endpoint configuration
+OTEL_EXPORTER_OTLP_ENDPOINT = os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT', 'http://localhost:4317')
+
 # Configure OpenTelemetry - Shared Resource (identifies this service)
 resource = Resource.create({"service.name": "ai-server"})
 
@@ -38,8 +41,8 @@
 # TracerProvider: Factory for creating tracers (for distributed tracing)
 tracer_provider = TracerProvider(resource=resource)
 
-# OTLP Trace Exporter: Sends traces to collector at localhost:4317
-otlp_trace_exporter = OTLPSpanExporter(endpoint="http://localhost:4317", insecure=True)
+# OTLP Trace Exporter: Sends traces to collector
+otlp_trace_exporter = OTLPSpanExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, insecure=True)
 span_processor = BatchSpanProcessor(otlp_trace_exporter)
 tracer_provider.add_span_processor(span_processor)
 
@@ -48,8 +51,8 @@
 tracer = trace.get_tracer("ai-server.tracer")
 
 # ========== METRICS CONFIGURATION ==========
-# OTLP Metric Exporter: Sends metrics to collector at localhost:4317
-otlp_metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317", insecure=True)
+# OTLP Metric Exporter: Sends metrics to collector
+otlp_metric_exporter = OTLPMetricExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, insecure=True)
 
 # PeriodicExportingMetricReader: Collects and exports metrics every 10 seconds
 metric_reader = PeriodicExportingMetricReader(
@@ -291,7 +294,7 @@ def authenticate() -> str:
 @app.route('/chat', methods=['POST'])
 def chat():
     """Handle chat request with optional llama_mode and system prompt parameters."""
-    # authenticate()
+    authenticate()
     model = request.form.get('model', DEFAULT_MODEL)
     content = request.form.get('content', '')
     llama_mode = request.form.get('llama_mode', 'cli')