From 053c22e61f2a9827602aac88bfe9ba5c63545cb3 Mon Sep 17 00:00:00 2001
From: blublinsky <blublinsky@hotmail.com>
Date: Thu, 15 Jan 2026 11:14:21 +0000
Subject: [PATCH] implementation of the mcp-auth endpoint

---
 README.md                                     |  30 +-
 dev-tools/mcp-mock-server/Dockerfile          |  15 +
 docker-compose-library.yaml                   |  25 +
 docker-compose.yaml                           |  19 +
 docs/ARCHITECTURE.md                          | 573 ++++++++++++++++++
 docs/auth.md                                  |   2 +
 src/app/endpoints/mcp_auth.py                 |  91 +++
 src/app/routers.py                            |   2 +
 src/models/responses.py                       |  38 ++
 tests/e2e/configs/run-ci.yaml                 |   3 +
 tests/e2e/configuration/README.md             |  32 +-
 .../library-mode/lightspeed-stack.yaml        |  17 +
 .../server-mode/lightspeed-stack.yaml         |  17 +
 tests/e2e/features/info.feature               |   8 +
 tests/e2e/features/steps/info.py              |  61 ++
 tests/unit/app/endpoints/test_mcp_auth.py     | 339 +++++++++++
 tests/unit/app/test_routers.py                |   7 +-
 17 files changed, 1275 insertions(+), 4 deletions(-)
 create mode 100644 dev-tools/mcp-mock-server/Dockerfile
 create mode 100644 docs/ARCHITECTURE.md
 create mode 100644 src/app/endpoints/mcp_auth.py
 create mode 100644 tests/unit/app/endpoints/test_mcp_auth.py
diff --git a/README.md b/README.md
index 4729b8363..dedf8ee78 100644
--- a/README.md
+++ b/README.md
@@ -390,7 +390,7 @@ mcp_servers:
       Authorization: "kubernetes"    # Uses user's k8s token from request auth
 ```
 
-The user's Kubernetes token is extracted from the incoming request's `Authorization` header and forwarded to the MCP server.
+**Note:** Kubernetes token-based MCP authorization only works when Lightspeed Core Stack is configured with Kubernetes authentication (`authentication.k8s`). For any other authentication types, MCP servers configured with `Authorization: "kubernetes"` are removed from the available MCP servers list.
 
 ##### 3. Client-Provided Tokens (For Per-User Authentication)
 
@@ -418,6 +418,34 @@ curl -X POST "http://localhost:8080/v1/query" \
 
 **Structure**: `MCP-HEADERS: {"<server-name>": {"<header-name>": "<header-value>", ...}, ...}`
 
+##### Client-Authenticated MCP Servers Discovery
+
+To help clients determine which MCP servers require client-provided tokens, use the **MCP Client Auth Options** endpoint:
+
+```bash
+GET /v1/mcp-auth/client-options
+```
+
+**Response:**
+```json
+{
+  "servers": [
+    {
+      "name": "user-specific-service",
+      "client_auth_headers": ["Authorization", "X-User-Token"]
+    },
+    {
+      "name": "github-integration",
+      "client_auth_headers": ["Authorization"]
+    }
+  ]
+}
+```
+
+This endpoint returns only MCP servers configured with `authorization_headers: "client"`, along with the specific header names that need to be provided via `MCP-HEADERS`. Servers using file-based or Kubernetes authentication are not included in this response.
+
+**Use case:** Clients can call this endpoint at startup or before making requests to discover which servers they can authenticate with using their own tokens.
+
 ##### Combining Authentication Methods
 
 You can mix and match authentication methods across different MCP servers, and even combine multiple methods for a single server:
diff --git a/dev-tools/mcp-mock-server/Dockerfile b/dev-tools/mcp-mock-server/Dockerfile
new file mode 100644
index 000000000..dcd00faa8
--- /dev/null
+++ b/dev-tools/mcp-mock-server/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install curl for health checks
+RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
+
+# Copy the mock server script
+COPY dev-tools/mcp-mock-server/server.py .
+
+# Expose HTTP port (we'll only use HTTP in Docker for simplicity)
+EXPOSE 3000
+
+# Run the mock server (HTTP only on port 3000)
+CMD ["python", "server.py", "3000"]
diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
index f38fb1952..a3513404c 100644
--- a/docker-compose-library.yaml
+++ b/docker-compose-library.yaml
@@ -1,4 +1,21 @@
 services:
+  # Mock MCP server for testing
+  mcp-mock-server:
+    build:
+      context: .
+      dockerfile: dev-tools/mcp-mock-server/Dockerfile
+    container_name: mcp-mock-server
+    ports:
+      - "3000:3000"
+    networks:
+      - lightspeednet
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:3000/"]
+      interval: 5s
+      timeout: 3s
+      retries: 3
+      start_period: 5s
+
   # Lightspeed Stack with embedded llama-stack (library mode)
   lightspeed-stack:
     build:
@@ -8,6 +25,11 @@ services:
     container_name: lightspeed-stack
     ports:
       - "8080:8080"
+    depends_on:
+      mcp-mock-server:
+        condition: service_healthy
+    networks:
+      - lightspeednet
     volumes:
       # Mount both config files - lightspeed-stack.yaml should have library mode enabled
       - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
@@ -51,3 +73,6 @@ services:
       retries: 3      # how many times to retry before marking as unhealthy
       start_period: 15s # time to wait before starting checks (increased for library initialization)
 
+networks:
+  lightspeednet:
+    driver: bridge
\ No newline at end of file
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 86c7dc4dd..98d5dbdc2 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,4 +1,21 @@
 services:
+  # Mock MCP server for testing
+  mcp-mock-server:
+    build:
+      context: .
+      dockerfile: dev-tools/mcp-mock-server/Dockerfile
+    container_name: mcp-mock-server
+    ports:
+      - "3000:3000"
+    networks:
+      - lightspeednet
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:3000/"]
+      interval: 5s
+      timeout: 3s
+      retries: 3
+      start_period: 5s
+
   # Red Hat llama-stack distribution with FAISS
   llama-stack:
     build:
@@ -69,6 +86,8 @@ services:
     depends_on:
         llama-stack:
           condition: service_healthy
+        mcp-mock-server:
+          condition: service_healthy
     networks:
       - lightspeednet
     healthcheck:
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
new file mode 100644
index 000000000..69a036961
--- /dev/null
+++ b/docs/ARCHITECTURE.md
@@ -0,0 +1,573 @@
+# Lightspeed Core Stack - Architecture Overview
+
+**Version:** 1.0  
+**Last Updated:** January 2026  
+**Status:** Living Document
+
+---
+
+## Table of Contents
+
+- [1. Introduction](#1-introduction)
+- [2. Core Components](#2-core-components)
+- [3. Request Processing Pipeline](#3-request-processing-pipeline)
+- [4. Database Architecture](#4-database-architecture)
+- [5. API Endpoints](#5-api-endpoints)
+- [6. Deployment & Operations](#6-deployment--operations)
+- [Appendix](#appendix)
+
+---
+
+## 1. Introduction
+
+### 1.1 What is Lightspeed Core Stack?
+
+**Lightspeed Core Stack (LCORE)** is an enterprise-grade middleware service that provides a robust layer between client applications and AI Large Language Model (LLM) backends. It adds essential enterprise features such as authentication, authorization, quota management, caching, and observability to LLM interactions.
+
+LCore is built on **Llama Stack** - Meta's open-source framework that provides standardized APIs for building LLM applications. Llama Stack offers a unified interface for models, RAG (vector stores), tools, and safety (shields) across different providers. LCore communicates with Llama Stack to orchestrate all LLM operations.
+
+To enhance LLM responses, LCore leverages **RAG (Retrieval-Augmented Generation)**, which retrieves relevant context from vector databases before generating answers. Llama Stack manages the vector stores, and LCore queries them to inject relevant documentation, knowledge bases, or previous conversations into the LLM prompt.
+
+### 1.2 Key Features
+
+- **Multi-Provider Support**: Works with multiple LLM providers (Ollama, OpenAI, Watsonx, etc.)
+- **Enterprise Security**: Authentication, authorization (RBAC), and secure credential management
+- **Resource Management**: Token-based quota limits and usage tracking
+- **Conversation Management**: Multi-turn conversations with history and caching
+- **RAG Integration**: Retrieval-Augmented Generation for context-aware responses
+- **Tool Orchestration**: Model Context Protocol (MCP) server integration
+- **Observability**: Prometheus metrics, structured logging, and health checks
+- **Agent-to-Agent**: A2A protocol support for multi-agent collaboration
+
+### 1.3 System Overview
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   Client Applications                   │
+│  (Web UI, CLI, VS Code Extension, Mobile Apps, etc.)   │
+└────────────────────┬────────────────────────────────────┘
+                     │ REST/A2A/JSON-RPC
+                     ▼
+┌─────────────────────────────────────────────────────────┐
+│                  LCore (This Service)                   │
+│  ┌───────────────────────────────────────────────────┐  │
+│  │          Enterprise Layer                         │  │
+│  │  • Authentication & Authorization (RBAC)          │  │
+│  │  • Quota & Rate Limiting                          │  │
+│  │  • Caching & Conversation Management              │  │
+│  │  • Metrics & Observability                        │  │
+│  └───────────────────────────────────────────────────┘  │
+│                         ▼                               │
+│  ┌───────────────────────────────────────────────────┐  │
+│  │          Request Processing                       │  │
+│  │  • LLM Orchestration (via Llama Stack)            │  │
+│  │  • Tool Integration (MCP servers)                 │  │
+│  │  • RAG & Context Management                       │  │
+│  └───────────────────────────────────────────────────┘  │
+│                         ▼                               │
+│  ┌───────────────────────────────────────────────────┐  │
+│  │          Storage Layer                            │  │
+│  │  • 4 Separate Databases                           │  │
+│  │    (User, Cache, Quota, A2A State)                │  │
+│  └───────────────────────────────────────────────────┘  │
+└────────────────────┬────────────────────────────────────┘
+                     │
+                     ▼
+          ┌──────────────────┐
+          │   Llama Stack    │
+          │  (LLM Backend)   │
+          │                  │
+          │  • Models & LLMs │
+          │  • RAG Stores    │
+          │  • Shields       │
+          └────────┬─────────┘
+                   │ (manages & invokes)
+                   ▼
+          ┌──────────────────┐
+          │  MCP Servers     │
+          │  (Remote HTTP)   │
+          └──────────────────┘
+```
+
+---
+
+## 2. Core Components
+
+This section describes the major functional components that make up LCore. Each component handles a specific aspect of the system's operation.
+
+### 2.1 Entry Points & Lifecycle Management
+
+**Primary Files:** `lightspeed_stack.py`, `app/main.py`, `app/routers.py`
+
+**Purpose:** Orchestrates application startup, shutdown, and request routing.
+
+**Key Responsibilities:**
+- **CLI Entry Point**: Parse command-line arguments and route to operations (serve, config generation, config dump)
+- **FastAPI Application**: Initialize the web framework with OpenAPI documentation
+- **Middleware Stack**: Set up Cross-Origin Resource Sharing (CORS), metrics tracking, and global exception handling
+- **Lifecycle Management**: 
+  - **Startup**: Load configuration, initialize Llama Stack client, load MCP server configuration and register all defined servers with Llama Stack to build the tools list, establish database connections
+  - **Shutdown**: Clean up A2A storage resources (database connections and other resources are cleaned up automatically by Python's context managers)
+- **Router Registration**: Mount all endpoint routers (query, conversation, model info, auth, metrics, A2A, feedback, admin, mcp_auth)
+
+**Note:** All configured MCP servers must be running and accessible at startup time for LCore to initialize successfully.
+
+---
+
+### 2.2 Configuration System (`configuration.py`, `models/config.py`)
+
+**Purpose:** Load, validate, and provide access to service configuration from YAML files
+
+**Configuration Files:**
+
+LCore requires two main configuration files:
+
+1. **LCore Configuration** (`lightspeed-stack.yaml`):
+   - Service settings (host, port, logging, CORS)
+   - Authentication and authorization methods
+   - Database connections (user DB, cache, quota, A2A)
+   - MCP server endpoints and credentials
+   - Quota limits and schedules
+   - User data collection preferences
+   - Default models and system prompts
+
+2. **Llama Stack Configuration** (`run.yaml`):
+   - Required for both library and server modes
+   - Defines LLM providers, models, RAG stores, shields
+   - See [Llama Stack documentation](https://llama-stack.readthedocs.io/) for details
+
+**Configuration Validation:**
+- Pydantic models validate configuration structure at startup
+- Invalid configurations prevent service from starting
+- Type checking ensures correct data types
+- MCP authorization headers validated against authentication method
+
+---
+
+### 2.3 Authentication System (`authentication/`)
+
+**Purpose:** Verify the identity of incoming requests
+
+**Authentication Providers:**
+
+| Provider | Use Case | Token Handling |
+|----------|----------|----------------|
+| **No Auth** | Development, testing | No token (empty string) |
+| **No Auth + Token** | Testing with token passthrough | Bearer token passed through |
+| **Kubernetes** | K8s service accounts | K8s service account token validated and forwarded |
+| **Red Hat SSO** | Red Hat environments | X-RH-Identity header (no separate token) |
+| **API Key** | API key authentication | API key from Authorization header |
+| **JWK/JWT** | JWT tokens | JWT validated and forwarded |
+
+**Authentication Result (AuthTuple):**
+
+All authentication modules return a standardized 4-tuple: `(user_id, username, roles, token)`
+- `user_id` (str): Unique user identifier
+- `username` (str): Human-readable username  
+- `roles` (list[str]): User roles for authorization checks
+- `token` (str): Original auth token extracted from request, forwarded to Llama Stack and backend services
+
+**Note:** LCore does not generate tokens - it extracts the client's original token from the request (typically `Authorization` header) and forwards it to backend services.
+
+---
+
+### 2.4 Authorization System (`authorization/`)
+
+**Purpose:** Enforce role-based access control (RBAC) on actions
+
+**Key Components:**
+
+**`middleware.py`:**
+- `@authorize(action: Action)` decorator
+- Attaches to endpoint functions
+- Raises HTTPException(403) if unauthorized
+
+**`resolvers.py`:**
+- Maps roles to allowed actions
+- Configurable via optional `authorization.yaml` file (defaults to built-in mappings if not provided)
+- Supports action inheritance and wildcards
+
+**Authorization Actions:**
+
+The system defines 30+ actions that can be authorized. Examples (see `docs/auth.md` for complete list):
+
+**Query Actions:**
+- `QUERY` - Execute non-streaming queries
+- `STREAMING_QUERY` - Execute streaming queries
+
+**Conversation Management:**
+- `LIST_CONVERSATIONS` - List user's conversations
+- `GET_CONVERSATION` - Get conversation details
+- `DELETE_CONVERSATION` - Delete conversations
+
+**Administrative Actions:**
+- `ADMIN` - Administrative operations
+- `FEEDBACK` - Submit user feedback
+
+**Agent-to-Agent Protocol:**
+- `A2A_JSONRPC` - A2A protocol access
+
+**Metadata Operations:**
+- `LIST_MODELS`, `LIST_SHIELDS`, `LIST_TOOLS`, `LIST_PROVIDERS`
+
+**How Authorization Works:**
+1. Each endpoint is decorated with required action (e.g., `@authorize(Action.QUERY)`)
+2. User's roles are extracted from AuthTuple
+3. Authorization module checks if any user role has permission for the action
+4. Request proceeds if authorized, returns 403 Forbidden if not
+
+---
+
+### 2.5 Llama Stack Client (`client.py`)
+
+**Purpose:** Communicate with the Llama Stack backend service for LLM operations
+
+**Llama Stack APIs Used:**
+- **Models**: List available LLM models
+- **Responses**: Generate LLM responses (OpenAI-compatible)
+- **Conversations**: Manage conversation history
+- **Shields**: List and apply guardrails (content filtering, safety checks)
+- **Vector Stores**: Access RAG databases for context injection
+- **Toolgroups**: Register MCP servers as tools
+
+---
+
+### 2.6 Quota Management (`quota/`)
+
+**Purpose:** Enforce token usage limits and track consumption
+
+**Components:**
+
+**Quota Limiters:**
+- **`user_quota_limiter.py`**: Per-user token limits
+- **`cluster_quota_limiter.py`**: Cluster-wide limits (shared across all users)
+- **`revokable_quota_limiter.py`**: Base implementation with revoke capability
+
+**Token Usage Tracking:**
+- **`token_usage_history.py`**: Historical usage per (user, provider, model)
+- Supports analytics and billing
+
+**Background Jobs:**
+- **`runners/quota_scheduler.py`**: Background thread that periodically resets/increases quotas based on configured periods (e.g., daily, weekly, monthly)
+- Runs SQL UPDATE statements to modify quota limits when time periods expire
+- Maintains database connection and reconnects on failures
+
+**Quota Enforcement Flow:**
+
+1. **Before LLM Call:**
+   - Check if user has available quota
+   - Raises `QuotaExceedError` if quota exhausted
+   - Request is blocked with 429 status code
+
+2. **After LLM Call:**
+   - Count input and output tokens from LLM response
+   - Record token usage in Token Usage History table
+   - Consume tokens from user's quota
+   - Update quota counters
+
+3. **On Error:**
+   - If LLM call fails, no tokens are consumed
+   - Quota remains unchanged
+   - User can retry the request
+
+---
+
+### 2.7 Caching System (`cache/`)
+
+**Purpose:** Store full conversation transcripts for retrieval, debugging, and compliance
+
+When an LLM response is received, the system creates a `CacheEntry` containing the query, response, referenced documents (RAG URLs), provider/model metadata, and timestamps. This entry is stored in the cache database for conversation retrieval (`GET /conversations/{id}`), debugging, analytics, and compliance auditing.
+
+**Implementations:**
+
+- **PostgreSQL** (`postgres_cache.py`) - Production multi-worker deployments with persistent database storage
+- **SQLite** (`sqlite_cache.py`) - Single-worker or development environments with file-based storage
+- **In-Memory** (`in_memory_cache.py`) - Testing and ephemeral use cases with no persistence
+- **No-Op** (`noop_cache.py`) - Disables caching entirely
+
+---
+
+### 2.8 Metrics System (`metrics/`)
+
+**Purpose:** Export Prometheus-compatible metrics for observability and monitoring
+
+**Metric Categories:**
+
+**API Metrics:**
+- Total API calls by endpoint and status code
+- Response duration histograms
+- Request rates and latencies
+
+**LLM Metrics:**
+- Total LLM calls by provider and model
+- LLM call failures and error rates
+- LLM call duration
+- Token usage (input/output tokens per model)
+
+**Quota Metrics:**
+- Quota limits and available quota by subject type
+- Quota consumption rates
+
+**Shield Metrics:**
+- Guardrail violations by shield type
+
+**Metrics Endpoint:**
+- Exposed at `GET /metrics` in Prometheus format
+- Can be scraped by Prometheus or compatible monitoring systems
+
+---
+
+### 2.9 MCP Integration (`utils/mcp_*`)
+
+**Purpose:** Enable LLMs to call external tools via Model Context Protocol (MCP) servers
+
+MCP servers are remote HTTP services that expose tools/capabilities to LLMs (e.g., Kubernetes management, web search, databases, custom business logic).
+
+**How It Works:**
+
+1. **Configuration:** MCP servers are defined in the config file with name, URL, and authorization headers
+2. **Registration at Startup:** LCore tells Llama Stack about each MCP server by calling `toolgroups.register()` - this makes the MCP server's tools available in Llama Stack's tool registry
+3. **Query Processing:** When processing a query, LCore determines which tools to make available to the LLM and finalizes authorization headers (e.g., merging client-provided tokens with configured headers)
+4. **Tool Execution:** When the LLM calls a tool, Llama Stack routes the request to the appropriate MCP server URL with the finalized authorization headers
+
+**Authorization:**
+- Supports tokens from files, environment variables, or direct values
+- Special `"kubernetes"` value uses K8s service account token (validated at startup)
+- Special `"client"` value allows client-provided authentication via MCP-HEADERS (see below)
+- Startup validation ensures `"kubernetes"` only used with K8s authentication
+
+**Client-Provided Authentication (MCP-HEADERS):**
+
+Clients can provide their own authentication tokens for specific MCP servers using the `MCP-HEADERS` request header. This is used when MCP servers are configured with `"client"` as the authorization value. 
+
+Use `GET /v1/mcp-auth/client-options` to discover which servers accept client authentication and what header names they expect.
+
+**Limitations:**
+- Only remote HTTP/HTTPS endpoints supported
+
+---
+
+### 2.10 A2A Protocol Support (`app/endpoints/a2a.py`, `a2a_storage/`)
+
+**Purpose:** Enable external AI agents to call LCore as an A2A-compatible agent
+
+External agents interact with LCore through a multi-step process:
+
+1. **Discovery:** The agent calls `GET /.well-known/agent.json` to retrieve LCore's capabilities, skills, and supported modes
+2. **Message Exchange:** The agent sends messages via `POST /a2a` using JSON-RPC 2.0 format (e.g., `message/send` method) with a `context_id` to identify the conversation
+3. **Context Mapping:** The A2A context store maps the external agent's `context_id` to LCore's internal `conversation_id`, enabling multi-turn conversations (storage: PostgreSQL, SQLite, or in-memory)
+4. **Query Processing:** LCore processes the message through its standard query pipeline (including LLM calls via Llama Stack) and returns the response to the external agent
+
+External A2A requests go through LCore's standard authentication system (K8s, RH Identity, API Key, etc.).
+
+---
+
+## 3. Request Processing Pipeline
+
+This section illustrates how requests flow through LCore from initial receipt to final response.
+
+### 3.1 Complete Pipeline Overview
+
+Every API request flows through this standardized pipeline:
+
+1. **FastAPI Routing** - Match URL path, parse parameters
+2. **Middleware Layer** - CORS validation, metrics timer, exception handling
+3. **Authentication** - Extract and validate auth token, return AuthTuple(user_id, username, roles, token)
+4. **Authorization** - Check user roles against required action permissions
+5. **Endpoint Handler** - Execute business logic (see concrete example below)
+6. **Middleware Response** - Update metrics, log response
+
+**Concrete Example:**
+
+Here's how a real query flows through the system:
+
+**User Query:** "How do I scale pods in Kubernetes?"
+
+**Step-by-Step Processing:**
+
+1. **Request Arrives** - `POST /v2/query` with query text and optional conversation_id
+2. **Authentication** - Validate JWT token, extract user_id="user123", roles=["developer"]
+3. **Authorization** - Check if "developer" role has QUERY action permission ✅
+4. **Quota Check** - User has 50,000 tokens available ✅
+5. **Model Selection** - Use configured default model (e.g., `meta-llama/Llama-3.1-8B-Instruct`)
+6. **Context Building** - Retrieve conversation history, query RAG vector stores for relevant docs, determine available MCP tools
+7. **Llama Stack Call** - Send complete request with system prompt, RAG context, MCP tools, and shields
+8. **LLM Processing** - Llama Stack generates response, may invoke MCP tools, returns token counts
+9. **Post-Processing** - Apply shields, generate conversation summary if new
+10. **Store Results** - Save to Cache DB, User DB, consume quota, update metrics
+11. **Return Response** - Complete LLM response with referenced documents, token usage, and remaining quota
+
+**Key Takeaways:**
+- RAG enhances responses with relevant documentation
+- MCP tools allow LLM to interact with real systems
+- Every step is tracked for quotas, metrics, and caching
+- The entire flow typically completes in 1-3 seconds
+
+---
+
+### 3.2 Error Handling
+
+**Exception Types and HTTP Status Codes:**
+
+- **HTTPException (FastAPI)** - 401 Unauthorized, 403 Forbidden, 404 Not Found, 429 Too Many Requests, 500 Internal Server Error
+- **QuotaExceedError** - Converted to HTTP 429
+- **APIConnectionError** (Llama Stack client) - Converted to HTTP 503 Service Unavailable
+- **SQLAlchemyError** (Database) - Converted to HTTP 500
+
+---
+
+## 4. Database Architecture
+
+LCore uses a multi-database strategy to optimize for different data access patterns and lifecycles.
+
+### 4.1 Multi-Database Strategy
+
+The system uses **4 separate databases** for different purposes:
+
+| Database | Purpose | Technology | Size |
+|----------|---------|------------|------|
+| **User DB** | Conversation metadata | SQLAlchemy ORM | Small |
+| **Cache DB** | Full conversation transcripts | psycopg2/sqlite3 | Large |
+| **Quota DB** | Token usage and limits | psycopg2/sqlite3 | Medium |
+| **A2A DB** | Agent-to-agent protocol state | SQLAlchemy async | Small |
+
+### 4.2 Why Separate Databases?
+
+Each database has different lifecycles, access patterns, and scaling needs:
+
+- **User DB**: Long-term storage, frequent small operations, never deleted
+- **Cache DB**: Medium-term storage, large reads/writes, can be purged for compliance
+- **Quota DB**: Periodic resets, very frequent updates, scales with API call frequency
+- **A2A DB**: Ephemeral storage, async operations, cleared after agent sessions
+
+---
+
+## 5. API Endpoints
+
+This section documents the REST API endpoints exposed by LCore for client interactions.
+
+### 5.1 Core Query Endpoints
+
+**Non-Streaming Query:**
+- `POST /v2/query` (Responses API, recommended)
+- `POST /query` (Agent API, deprecated)
+- Returns complete LLM response with referenced documents, token usage, and quota info
+
+**Streaming Query:**
+- `POST /v2/streaming_query` (Responses API, recommended)
+- `POST /streaming_query` (Agent API, deprecated)
+- Returns LLM response as Server-Sent Events (SSE) stream
+- Events: start, token, metadata, end
+
+---
+
+### 5.2 Conversation Management
+
+**List Conversations:** `GET /conversations`
+- Returns list of user's conversations with metadata
+
+**Get Conversation:** `GET /conversations/{conversation_id}`
+- Returns full conversation history with all messages
+
+**Delete Conversation:** `DELETE /conversations/{conversation_id}`
+- Deletes conversation and associated data
+
+---
+
+### 5.3 Information Endpoints
+
+**List Models:** `GET /models`
+- Returns available LLM models
+
+**List Providers:** `GET /providers`
+- Returns configured LLM providers
+
+**List Tools:** `GET /tools`
+- Returns available tools (RAG, MCP servers)
+
+**Discover MCP Client Auth Options:** `GET /v1/mcp-auth/client-options`
+- Returns MCP servers that accept client-provided authentication tokens
+- Includes header names that need to be provided via MCP-HEADERS
+
+**List Shields:** `GET /shields`
+- Returns available guardrails
+
+**List RAG Databases:** `GET /rags`
+- Returns configured vector stores
+
+---
+
+### 5.4 Administrative Endpoints
+
+**Health Check:** `GET /health`
+- Basic health status
+
+**Readiness Check:** `GET /readiness`
+- Checks configuration, Llama Stack, and database connections
+
+**Metrics:** `GET /metrics`
+- Prometheus-compatible metrics
+
+**Feedback:** `POST /feedback`
+- Submit user feedback (stored as JSON files)
+
+---
+
+### 5.5 A2A Protocol Endpoints
+
+**Agent Card Discovery:** `GET /.well-known/agent.json`
+- Returns agent capabilities (A2A protocol standard)
+
+**A2A JSON-RPC:** `POST /a2a`
+- Agent-to-agent communication endpoint
+- Standard JSON-RPC 2.0 format
+
+---
+
+## 6. Deployment & Operations
+
+LCore supports two deployment modes, each suited for different operational requirements.
+
+### 6.1 Deployment Modes
+
+**Library Mode:**
+- Llama Stack runs embedded within LCore process
+- No separate Llama Stack service needed
+- Direct library calls (no HTTP overhead)
+- Lower latency for LLM operations
+- Simpler deployment (single process)
+- Best for: Development, single-node deployments, environments with limited operational complexity
+
+**Server Mode:**
+- LCore and Llama Stack run as two separate processes
+- HTTP communication between LCore and Llama Stack
+- Independent scaling of each component
+- Better resource isolation
+- Easier to update/restart components independently
+- In Kubernetes: can run as separate pods or as two containers in the same pod (sidecar)
+  - **Separate pods**: More isolation, can scale independently
+  - **Same pod (sidecar)**: Lower latency (localhost communication), atomic deployment
+- Best for: Production, multi-node deployments, when LCore and Llama Stack have different scaling needs
+
+---
+
+## Appendix
+
+### A. Configuration Examples
+
+See the `examples/` directory in the repository root for complete configuration examples.
+
+---
+
+### B. Related Documentation
+
+- [A2A Protocol](./a2a_protocol.md) - Agent-to-Agent communication protocol
+- [Authentication & Authorization](./auth.md) - Detailed auth configuration
+- [Configuration Guide](./config.md) - Configuration system details
+- [Deployment Guide](./deployment_guide.md) - Deployment patterns and best practices
+- [RAG Guide](./rag_guide.md) - RAG configuration and usage
+- [OpenAPI Specification](./openapi.md) - Complete API reference
+
+---
+
+**End of Architecture Overview**
diff --git a/docs/auth.md b/docs/auth.md
index 31b18ad35..0c7905b9a 100644
--- a/docs/auth.md
+++ b/docs/auth.md
@@ -352,6 +352,8 @@ authorization:
 - `info` - Access the `/` endpoint, `/info` endpoint, `/readiness` endpoint, and `/liveness` endpoint
 - `get_config` - Access the `/config` endpoint
 - `get_models` - Access the `/models` endpoint
+- `get_tools` - Access the `/tools` and `/mcp-auth/client-options` endpoints
+- `get_shields` - Access the `/shields` endpoint
 - `list_providers` - Access the `/providers` endpoint
 - `get_provider` - Access the `/providers/{provider_id}` endpoint
 - `get_metrics` - Access the `/metrics` endpoint
diff --git a/src/app/endpoints/mcp_auth.py b/src/app/endpoints/mcp_auth.py
new file mode 100644
index 000000000..c87d3298c
--- /dev/null
+++ b/src/app/endpoints/mcp_auth.py
@@ -0,0 +1,91 @@
+"""Handler for REST API calls related to MCP server authentication."""
+
+import logging
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Depends, Request
+
+from authentication import get_auth_dependency
+from authentication.interface import AuthTuple
+from authorization.middleware import authorize
+from configuration import configuration
+from models.config import Action
+from models.responses import (
+    ForbiddenResponse,
+    InternalServerErrorResponse,
+    MCPClientAuthOptionsResponse,
+    MCPServerAuthInfo,
+    UnauthorizedResponse,
+)
+from utils.endpoints import check_configuration_loaded
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/mcp-auth", tags=["mcp-auth"])
+
+
+mcp_auth_responses: dict[int | str, dict[str, Any]] = {
+    200: MCPClientAuthOptionsResponse.openapi_response(),
+    401: UnauthorizedResponse.openapi_response(
+        examples=["missing header", "missing token"]
+    ),
+    403: ForbiddenResponse.openapi_response(examples=["endpoint"]),
+    500: InternalServerErrorResponse.openapi_response(examples=["configuration"]),
+}
+
+
+@router.get("/client-options", responses=mcp_auth_responses)
+@authorize(
+    Action.GET_TOOLS
+)  # Uses GET_TOOLS: discovering client auth is related to tool discovery
+async def get_mcp_client_auth_options(
+    request: Request,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+) -> MCPClientAuthOptionsResponse:
+    """
+    Get MCP servers that accept client-provided authorization.
+
+    Returns a list of MCP servers configured to accept client-provided
+    authorization tokens, along with the header names where clients
+    should provide these tokens.
+
+    This endpoint helps clients discover which MCP servers they can
+    authenticate with using their own tokens.
+
+    Args:
+        request: The incoming HTTP request (used by middleware).
+        auth: Authentication tuple from the auth dependency (used by middleware).
+
+    Returns:
+        MCPClientAuthOptionsResponse: List of MCP servers and their
+            accepted client authentication headers.
+    """
+    # Used only by the middleware
+    _ = auth
+
+    # Nothing interesting in the request
+    _ = request
+
+    check_configuration_loaded(configuration)
+
+    servers_info = []
+
+    for mcp_server in configuration.mcp_servers:
+        if not mcp_server.authorization_headers:
+            continue
+
+        # Find headers with "client" value
+        client_headers = [
+            header_name
+            for header_name, header_value in mcp_server.authorization_headers.items()
+            if header_value.strip() == "client"
+        ]
+
+        if client_headers:
+            servers_info.append(
+                MCPServerAuthInfo(
+                    name=mcp_server.name,
+                    client_auth_headers=client_headers,
+                )
+            )
+
+    return MCPClientAuthOptionsResponse(servers=servers_info)
diff --git a/src/app/routers.py b/src/app/routers.py
index 7e9037aaa..f3be1e3c4 100644
--- a/src/app/routers.py
+++ b/src/app/routers.py
@@ -18,6 +18,7 @@
     conversations_v3,
     metrics,
     tools,
+    mcp_auth,
     # V2 endpoints for Response API support
     query_v2,
     # RHEL Lightspeed rlsapi v1 compatibility
@@ -38,6 +39,7 @@ def include_routers(app: FastAPI) -> None:
     app.include_router(info.router, prefix="/v1")
     app.include_router(models.router, prefix="/v1")
     app.include_router(tools.router, prefix="/v1")
+    app.include_router(mcp_auth.router, prefix="/v1")
     app.include_router(shields.router, prefix="/v1")
     app.include_router(providers.router, prefix="/v1")
     app.include_router(rags.router, prefix="/v1")
diff --git a/src/models/responses.py b/src/models/responses.py
index bf4da2698..5e672975c 100644
--- a/src/models/responses.py
+++ b/src/models/responses.py
@@ -127,6 +127,44 @@ class ToolsResponse(AbstractSuccessfulResponse):
     }
 
 
+class MCPServerAuthInfo(BaseModel):
+    """Information about MCP server client authentication options."""
+
+    name: str = Field(..., description="MCP server name")
+    client_auth_headers: list[str] = Field(
+        ...,
+        description="List of authentication header names for client-provided tokens",
+    )
+
+
+class MCPClientAuthOptionsResponse(AbstractSuccessfulResponse):
+    """Response containing MCP servers that accept client-provided authorization."""
+
+    servers: list[MCPServerAuthInfo] = Field(
+        default_factory=list,
+        description="List of MCP servers that accept client-provided authorization",
+    )
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "servers": [
+                        {
+                            "name": "github",
+                            "client_auth_headers": ["Authorization"],
+                        },
+                        {
+                            "name": "gitlab",
+                            "client_auth_headers": ["Authorization", "X-API-Key"],
+                        },
+                    ]
+                }
+            ]
+        }
+    }
+
+
 class ShieldsResponse(AbstractSuccessfulResponse):
     """Model representing a response to shields request."""
 
diff --git a/tests/e2e/configs/run-ci.yaml b/tests/e2e/configs/run-ci.yaml
index 04c45978b..2c2da44fc 100644
--- a/tests/e2e/configs/run-ci.yaml
+++ b/tests/e2e/configs/run-ci.yaml
@@ -65,6 +65,9 @@ providers:
   - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config: # Define the storage backend for RAG
       persistence:
diff --git a/tests/e2e/configuration/README.md b/tests/e2e/configuration/README.md
index db6f5a44f..c9d40aac8 100644
--- a/tests/e2e/configuration/README.md
+++ b/tests/e2e/configuration/README.md
@@ -1,2 +1,32 @@
-# List of source files stored in `tests/e2e/configuration` directory
+# E2E Test Configuration Files
 
+This directory contains configuration files used for end-to-end testing of Lightspeed Core.
+
+## Directory Structure
+
+- `server-mode/` - Configurations for testing when LCore connects to a separate Llama Stack service
+- `library-mode/` - Configurations for testing when LCore embeds Llama Stack as a library
+
+## Common Configuration Features
+
+### Default Configurations (`lightspeed-stack.yaml`)
+
+Both server-mode and library-mode default configurations include:
+
+1. **MCP Servers** - Used for testing MCP-related endpoints:
+   - `github-api` - Uses client-provided auth (Authorization header)
+   - `gitlab-api` - Uses client-provided auth (X-API-Token header)
+   - `k8s-service` - Uses kubernetes auth (not client-provided)
+   - `public-api` - No authentication (not client-provided)
+
+   These servers test the `/v1/mcp-auth/client-options` endpoint, which should return only servers accepting client-provided authentication (`github-api` and `gitlab-api`).
+
+2. **Authentication** - Set to `noop` for most tests
+
+3. **User Data Collection** - Enabled for feedback and transcripts testing
+
+### Special-Purpose Configurations
+
+- `lightspeed-stack-auth-noop-token.yaml` - For authorization testing
+- `lightspeed-stack-invalid-feedback-storage.yaml` - For negative feedback testing
+- `lightspeed-stack-no-cache.yaml` - For cache-disabled scenarios
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
index 47257bfb1..118b917c5 100644
--- a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml
@@ -17,3 +17,20 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+mcp_servers:
+  # Mock server with client-provided auth - should appear in mcp-auth/client-options response
+  - name: "github-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Mock server with client-provided auth (different header) - should appear in response
+  - name: "gitlab-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      X-API-Token: "client"
+  # Mock server with no auth - should NOT appear in response
+  - name: "public-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
\ No newline at end of file
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
index cc699ba89..1dbef61cf 100644
--- a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml
@@ -18,3 +18,20 @@ user_data_collection:
   transcripts_storage: "/tmp/data/transcripts"
 authentication:
   module: "noop"
+mcp_servers:
+  # Mock server with client-provided auth - should appear in mcp-auth/client-options response
+  - name: "github-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      Authorization: "client"
+  # Mock server with client-provided auth (different header) - should appear in response
+  - name: "gitlab-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
+    authorization_headers:
+      X-API-Token: "client"
+  # Mock server with no auth - should NOT appear in response
+  - name: "public-api"
+    provider_id: "model-context-protocol"
+    url: "http://mcp-mock-server:3000"
\ No newline at end of file
diff --git a/tests/e2e/features/info.feature b/tests/e2e/features/info.feature
index df7e30ded..af6848b85 100644
--- a/tests/e2e/features/info.feature
+++ b/tests/e2e/features/info.feature
@@ -127,3 +127,11 @@ Feature: Info tests
      When I access endpoint "metrics" using HTTP GET method
      Then The status code of the response is 200
       And The body of the response contains ls_provider_model_configuration
+
+  Scenario: Check if MCP client auth options endpoint is working
+    Given The system is in default state
+     When I access REST API endpoint "mcp-auth/client-options" using HTTP GET method
+     Then The status code of the response is 200
+      And The body of the response has proper client auth options structure
+      And The response contains server "github-api" with client auth header "Authorization"
+      And The response contains server "gitlab-api" with client auth header "X-API-Token"
\ No newline at end of file
diff --git a/tests/e2e/features/steps/info.py b/tests/e2e/features/steps/info.py
index b4ec37afc..3c6a3473b 100644
--- a/tests/e2e/features/steps/info.py
+++ b/tests/e2e/features/steps/info.py
@@ -178,3 +178,64 @@ def check_tool_structure(context: Context, provider_name: str) -> None:
     assert (
         provider_tool["type"] == expected_json["type"]
     ), f"type should be {expected_json["type"]}"
+
+
+@then("The body of the response has proper client auth options structure")
+def check_client_auth_options_structure(context: Context) -> None:
+    """Check that the MCP client auth options response has the correct structure."""
+    response_json = context.response.json()
+    assert response_json is not None, "Response is not valid JSON"
+
+    assert "servers" in response_json, "Response missing 'servers' field"
+    servers = response_json["servers"]
+    assert isinstance(servers, list), "'servers' should be a list"
+
+    # Verify structure of each server entry
+    for server in servers:
+        assert "name" in server, "Server missing 'name' field"
+        assert isinstance(server["name"], str), "Server 'name' should be a string"
+
+        assert (
+            "client_auth_headers" in server
+        ), "Server missing 'client_auth_headers' field"
+        assert isinstance(
+            server["client_auth_headers"], list
+        ), "'client_auth_headers' should be a list"
+        assert (
+            len(server["client_auth_headers"]) > 0
+        ), "'client_auth_headers' should not be empty"
+
+        # Validate all headers are strings
+        for header in server["client_auth_headers"]:
+            assert isinstance(
+                header, str
+            ), f"Header should be a string, but got {type(header)}"
+
+
+@then(
+    'The response contains server "{server_name}" with client auth header "{header_name}"'
+)
+def check_server_with_header(
+    context: Context, server_name: str, header_name: str
+) -> None:
+    """Check that a specific server with a specific header is present in the response."""
+    response_json = context.response.json()
+    assert response_json is not None, "Response is not valid JSON"
+
+    servers = response_json.get("servers", [])
+
+    # Find the server by name
+    found_server = None
+    for server in servers:
+        if server.get("name") == server_name:
+            found_server = server
+            break
+
+    assert found_server is not None, f"Server '{server_name}' not found in response"
+
+    # Check that the header is in the client_auth_headers list
+    headers = found_server.get("client_auth_headers", [])
+    assert header_name in headers, (
+        f"Header '{header_name}' not found in server '{server_name}'. "
+        f"Found headers: {headers}"
+    )
diff --git a/tests/unit/app/endpoints/test_mcp_auth.py b/tests/unit/app/endpoints/test_mcp_auth.py
new file mode 100644
index 000000000..6df771501
--- /dev/null
+++ b/tests/unit/app/endpoints/test_mcp_auth.py
@@ -0,0 +1,339 @@
+# pylint: disable=protected-access
+# pyright: reportCallIssue=false
+
+"""Unit tests for MCP auth endpoint."""
+
+import pytest
+from pytest_mock import MockerFixture
+
+# Import the function directly to bypass decorators
+from app.endpoints import mcp_auth
+from authentication.interface import AuthTuple
+from configuration import AppConfig
+from models.config import (
+    Configuration,
+    LlamaStackConfiguration,
+    ModelContextProtocolServer,
+    ServiceConfiguration,
+    UserDataCollection,
+)
+from models.responses import MCPClientAuthOptionsResponse
+
+# Shared mock auth tuple with 4 fields as expected by the application
+MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
+
+
+@pytest.fixture
+def mock_configuration_with_client_auth() -> Configuration:
+    """Create a mock configuration with MCP servers that have client auth."""
+    return Configuration(  # type: ignore[call-arg]
+        name="test",
+        service=ServiceConfiguration(),  # type: ignore[call-arg]
+        llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),  # type: ignore[call-arg]
+        user_data_collection=UserDataCollection(feedback_enabled=False),  # type: ignore[call-arg]
+        mcp_servers=[
+            ModelContextProtocolServer(
+                name="github",
+                provider_id="model-context-protocol",
+                url="http://github-mcp:8080",
+                authorization_headers={"Authorization": "client"},
+            ),
+            ModelContextProtocolServer(
+                name="gitlab",
+                provider_id="model-context-protocol",
+                url="http://gitlab-mcp:8080",
+                authorization_headers={
+                    "Authorization": "client",
+                    "X-API-Key": "client",
+                },
+            ),
+        ],
+    )  # type: ignore[call-arg]
+
+
+@pytest.fixture
+def mock_configuration_mixed_auth() -> Configuration:
+    """Create a mock configuration with mixed auth types."""
+    return Configuration(  # type: ignore[call-arg]
+        name="test",
+        service=ServiceConfiguration(),  # type: ignore[call-arg]
+        llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),  # type: ignore[call-arg]
+        user_data_collection=UserDataCollection(feedback_enabled=False),  # type: ignore[call-arg]
+        mcp_servers=[
+            ModelContextProtocolServer(
+                name="github",
+                provider_id="model-context-protocol",
+                url="http://github-mcp:8080",
+                authorization_headers={"Authorization": "client"},
+            ),
+            ModelContextProtocolServer(
+                name="k8s_mgmt",
+                provider_id="model-context-protocol",
+                url="http://k8s-mcp:8080",
+                authorization_headers={"Authorization": "kubernetes"},
+            ),
+            ModelContextProtocolServer(
+                name="public_server",
+                provider_id="model-context-protocol",
+                url="http://public-mcp:8080",
+                # No authorization headers
+            ),
+        ],
+    )  # type: ignore[call-arg]
+
+
+@pytest.fixture
+def mock_configuration_no_client_auth() -> Configuration:
+    """Create a mock configuration with no client auth servers."""
+    return Configuration(  # type: ignore[call-arg]
+        name="test",
+        service=ServiceConfiguration(),  # type: ignore[call-arg]
+        llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),  # type: ignore[call-arg]
+        user_data_collection=UserDataCollection(feedback_enabled=False),  # type: ignore[call-arg]
+        mcp_servers=[
+            ModelContextProtocolServer(
+                name="k8s_mgmt",
+                provider_id="model-context-protocol",
+                url="http://k8s-mcp:8080",
+                authorization_headers={"Authorization": "kubernetes"},
+            ),
+            ModelContextProtocolServer(
+                name="public_server",
+                provider_id="model-context-protocol",
+                url="http://public-mcp:8080",
+                # No authorization headers
+            ),
+        ],
+    )  # type: ignore[call-arg]
+
+
+@pytest.mark.asyncio
+async def test_get_mcp_client_auth_options_success(
+    mocker: MockerFixture,
+    mock_configuration_with_client_auth: Configuration,  # pylint: disable=redefined-outer-name
+) -> None:
+    """Test successful retrieval of MCP servers with client auth options."""
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration_with_client_auth
+    mocker.patch("app.endpoints.mcp_auth.configuration", app_config)
+
+    # Mock authorization decorator to bypass it
+    mocker.patch("app.endpoints.mcp_auth.authorize", lambda action: lambda func: func)
+
+    # Mock request and auth
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    # Call the endpoint
+    response = await mcp_auth.get_mcp_client_auth_options.__wrapped__(  # type: ignore
+        mock_request, mock_auth
+    )
+
+    # Verify response
+    assert isinstance(response, MCPClientAuthOptionsResponse)
+    assert len(response.servers) == 2
+
+    # Verify github server
+    github = next(s for s in response.servers if s.name == "github")
+    assert github.client_auth_headers == ["Authorization"]
+
+    # Verify gitlab server
+    gitlab = next(s for s in response.servers if s.name == "gitlab")
+    assert set(gitlab.client_auth_headers) == {"Authorization", "X-API-Key"}
+
+
+@pytest.mark.asyncio
+async def test_get_mcp_client_auth_options_mixed_auth(
+    mocker: MockerFixture,
+    mock_configuration_mixed_auth: Configuration,  # pylint: disable=redefined-outer-name
+) -> None:
+    """Test retrieval with mixed auth types - should only return client auth servers."""
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration_mixed_auth
+    mocker.patch("app.endpoints.mcp_auth.configuration", app_config)
+
+    # Mock authorization decorator to bypass it
+    mocker.patch("app.endpoints.mcp_auth.authorize", lambda action: lambda func: func)
+
+    # Mock request and auth
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    # Call the endpoint
+    response = await mcp_auth.get_mcp_client_auth_options.__wrapped__(  # type: ignore
+        mock_request, mock_auth
+    )
+
+    # Verify response - should only have github server, not k8s_mgmt or public_server
+    assert isinstance(response, MCPClientAuthOptionsResponse)
+    assert len(response.servers) == 1
+
+    assert response.servers[0].name == "github"
+    assert response.servers[0].client_auth_headers == ["Authorization"]
+
+    # Verify k8s_mgmt and public_server are not in the response
+    assert not any(s.name == "k8s_mgmt" for s in response.servers)
+    assert not any(s.name == "public_server" for s in response.servers)
+
+
+@pytest.mark.asyncio
+async def test_get_mcp_client_auth_options_no_client_auth(
+    mocker: MockerFixture,
+    mock_configuration_no_client_auth: Configuration,  # pylint: disable=redefined-outer-name
+) -> None:
+    """Test retrieval when no servers have client auth - should return empty list."""
+    # Mock configuration - wrap in AppConfig
+    app_config = AppConfig()
+    app_config._configuration = mock_configuration_no_client_auth
+    mocker.patch("app.endpoints.mcp_auth.configuration", app_config)
+
+    # Mock authorization decorator to bypass it
+    mocker.patch("app.endpoints.mcp_auth.authorize", lambda action: lambda func: func)
+
+    # Mock request and auth
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    # Call the endpoint
+    response = await mcp_auth.get_mcp_client_auth_options.__wrapped__(  # type: ignore
+        mock_request, mock_auth
+    )
+
+    # Verify response - should be empty
+    assert isinstance(response, MCPClientAuthOptionsResponse)
+    assert len(response.servers) == 0
+
+
+@pytest.mark.asyncio
+async def test_get_mcp_client_auth_options_empty_config(
+    mocker: MockerFixture,
+) -> None:
+    """Test retrieval when no MCP servers are configured."""
+    # Mock configuration with no MCP servers - wrap in AppConfig
+    mock_config = Configuration(  # type: ignore[call-arg]
+        name="test",
+        service=ServiceConfiguration(),  # type: ignore[call-arg]
+        llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),  # type: ignore[call-arg]
+        user_data_collection=UserDataCollection(feedback_enabled=False),  # type: ignore[call-arg]
+        mcp_servers=[],
+    )  # type: ignore[call-arg]
+    app_config = AppConfig()
+    app_config._configuration = mock_config
+    mocker.patch("app.endpoints.mcp_auth.configuration", app_config)
+
+    # Mock authorization decorator to bypass it
+    mocker.patch("app.endpoints.mcp_auth.authorize", lambda action: lambda func: func)
+
+    # Mock request and auth
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    # Call the endpoint
+    response = await mcp_auth.get_mcp_client_auth_options.__wrapped__(  # type: ignore
+        mock_request, mock_auth
+    )
+
+    # Verify response - should be empty
+    assert isinstance(response, MCPClientAuthOptionsResponse)
+    assert len(response.servers) == 0
+
+
+@pytest.mark.asyncio
+async def test_get_mcp_client_auth_options_whitespace_handling(
+    mocker: MockerFixture,
+) -> None:
+    """Test that whitespace in authorization header values is handled correctly."""
+    # Mock configuration with whitespace in values - wrap in AppConfig
+    mock_config = Configuration(  # type: ignore[call-arg]
+        name="test",
+        service=ServiceConfiguration(),  # type: ignore[call-arg]
+        llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),  # type: ignore[call-arg]
+        user_data_collection=UserDataCollection(feedback_enabled=False),  # type: ignore[call-arg]
+        mcp_servers=[
+            ModelContextProtocolServer(
+                name="server1",
+                provider_id="model-context-protocol",
+                url="http://server1:8080",
+                authorization_headers={"Authorization": "  client  "},  # With spaces
+            ),
+            ModelContextProtocolServer(
+                name="server2",
+                provider_id="model-context-protocol",
+                url="http://server2:8080",
+                authorization_headers={"Authorization": "kubernetes  "},  # With spaces
+            ),
+        ],
+    )  # type: ignore[call-arg]
+    app_config = AppConfig()
+    app_config._configuration = mock_config
+    mocker.patch("app.endpoints.mcp_auth.configuration", app_config)
+
+    # Mock authorization decorator to bypass it
+    mocker.patch("app.endpoints.mcp_auth.authorize", lambda action: lambda func: func)
+
+    # Mock request and auth
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    # Call the endpoint
+    response = await mcp_auth.get_mcp_client_auth_options.__wrapped__(  # type: ignore
+        mock_request, mock_auth
+    )
+
+    # Verify response - should have server1 (with spaces around "client")
+    assert isinstance(response, MCPClientAuthOptionsResponse)
+    assert len(response.servers) == 1
+    assert response.servers[0].name == "server1"
+
+
+@pytest.mark.asyncio
+async def test_get_mcp_client_auth_options_multiple_headers_single_server(
+    mocker: MockerFixture,
+) -> None:
+    """Test server with multiple client auth headers."""
+    # Mock configuration - wrap in AppConfig
+    mock_config = Configuration(  # type: ignore[call-arg]
+        name="test",
+        service=ServiceConfiguration(),  # type: ignore[call-arg]
+        llama_stack=LlamaStackConfiguration(url="http://localhost:8321"),  # type: ignore[call-arg]
+        user_data_collection=UserDataCollection(feedback_enabled=False),  # type: ignore[call-arg]
+        mcp_servers=[
+            ModelContextProtocolServer(
+                name="multi_auth",
+                provider_id="model-context-protocol",
+                url="http://multi:8080",
+                authorization_headers={
+                    "Authorization": "client",
+                    "X-API-Key": "client",
+                    "X-Custom-Token": "client",
+                },
+            ),
+        ],
+    )  # type: ignore[call-arg]
+    app_config = AppConfig()
+    app_config._configuration = mock_config
+    mocker.patch("app.endpoints.mcp_auth.configuration", app_config)
+
+    # Mock authorization decorator to bypass it
+    mocker.patch("app.endpoints.mcp_auth.authorize", lambda action: lambda func: func)
+
+    # Mock request and auth
+    mock_request = mocker.Mock()
+    mock_auth = MOCK_AUTH
+
+    # Call the endpoint
+    response = await mcp_auth.get_mcp_client_auth_options.__wrapped__(  # type: ignore
+        mock_request, mock_auth
+    )
+
+    # Verify response
+    assert isinstance(response, MCPClientAuthOptionsResponse)
+    assert len(response.servers) == 1
+    assert response.servers[0].name == "multi_auth"
+    assert set(response.servers[0].client_auth_headers) == {
+        "Authorization",
+        "X-API-Key",
+        "X-Custom-Token",
+    }
diff --git a/tests/unit/app/test_routers.py b/tests/unit/app/test_routers.py
index 535dc92e5..0da8c15f1 100644
--- a/tests/unit/app/test_routers.py
+++ b/tests/unit/app/test_routers.py
@@ -23,6 +23,7 @@
     authorized,
     metrics,
     tools,
+    mcp_auth,
     rlsapi_v1,
     a2a,
 )  # noqa:E402
@@ -105,11 +106,12 @@ def test_include_routers() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 18
+    assert len(app.routers) == 19
     assert root.router in app.get_routers()
     assert info.router in app.get_routers()
     assert models.router in app.get_routers()
     assert tools.router in app.get_routers()
+    assert mcp_auth.router in app.get_routers()
     assert shields.router in app.get_routers()
     assert providers.router in app.get_routers()
     # assert query.router in app.get_routers()
@@ -142,11 +144,12 @@ def test_check_prefixes() -> None:
     include_routers(app)
 
     # are all routers added?
-    assert len(app.routers) == 18
+    assert len(app.routers) == 19
     assert app.get_router_prefix(root.router) == ""
     assert app.get_router_prefix(info.router) == "/v1"
     assert app.get_router_prefix(models.router) == "/v1"
     assert app.get_router_prefix(tools.router) == "/v1"
+    assert app.get_router_prefix(mcp_auth.router) == "/v1"
     assert app.get_router_prefix(shields.router) == "/v1"
     assert app.get_router_prefix(providers.router) == "/v1"
     assert app.get_router_prefix(rags.router) == "/v1"