lightspeed-core · asimurka · Oct 27, 2025 · Jan 14, 2026 · Jan 14, 2026 · asimurka
diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml
@@ -60,7 +60,7 @@ jobs:
           
           cp "${CONFIG_FILE}" lightspeed-stack.yaml
           echo "✅ Configuration loaded successfully"
-        
+
       - name: Select and configure run.yaml
         env:
           CONFIG_ENVIRONMENT: ${{ matrix.environment || 'ci' }}
@@ -100,7 +100,7 @@ jobs:
           echo "=== Configuration Summary ==="
           echo "Deployment mode: ${{ matrix.mode }}"
           echo "Environment: ${{ matrix.environment }}"
-          echo "Source config: tests/e2e/configs/run-ci.yaml"
+          echo "Source config: tests/e2e/configs/run-${{ matrix.environment }}.yaml"
           echo ""
           echo "=== Configuration Preview ==="
           echo "Providers: $(grep -c "provider_id:" run.yaml)"

diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml
@@ -52,6 +52,21 @@ jobs:
           echo "=== Recent commits ==="
           git log --oneline -5
 
+      - name: Add Azure Entra ID config block to all test configs
+        if: matrix.environment == 'azure'
+        run: |
+          echo "Adding azure_entra_id configuration block to all test configs..."
+          for config in tests/e2e/configuration/*/lightspeed-stack*.yaml; do
+            if [ -f "$config" ]; then
+              echo "" >> "$config"
+              echo "azure_entra_id:" >> "$config"
+              echo "  tenant_id: \${env.TENANT_ID}" >> "$config"
+              echo "  client_id: \${env.CLIENT_ID}" >> "$config"
+              echo "  client_secret: \${env.CLIENT_SECRET}" >> "$config"
+              echo "✅ Added to: $config"
+            fi
+          done
+
       - name: Load lightspeed-stack.yaml configuration
         run: |
           MODE="${{ matrix.mode }}"
@@ -66,32 +81,6 @@ jobs:
 
           cp "${CONFIG_FILE}" lightspeed-stack.yaml
           echo "✅ Configuration loaded successfully"
-
-      - name: Get Azure API key (access token)
-        if: matrix.environment == 'azure'
-        id: azure_token
-        env:
-          CLIENT_ID: ${{ secrets.CLIENT_ID }}
-          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
-          TENANT_ID: ${{ secrets.TENANT_ID }}
-        run: |
-          echo "Requesting Azure API token..."
-          RESPONSE=$(curl -s -X POST \
-            -H "Content-Type: application/x-www-form-urlencoded" \
-            -d "client_id=$CLIENT_ID&scope=https://cognitiveservices.azure.com/.default&client_secret=$CLIENT_SECRET&grant_type=client_credentials" \
-            "https://login.microsoftonline.com/$TENANT_ID/oauth2/v2.0/token")
-
-          echo "Response received. Extracting access_token..."
-          ACCESS_TOKEN=$(echo "$RESPONSE" | jq -r '.access_token')
-
-          if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" == "null" ]; then
-            echo "❌ Failed to obtain Azure access token. Response:"
-            echo "$RESPONSE"
-            exit 1
-          fi
-
-          echo "✅ Successfully obtained Azure access token."
-          echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV
 
       - name: Save VertexAI service account key to file
         if: matrix.environment == 'vertexai'
@@ -198,7 +187,9 @@ jobs:
         if: matrix.mode == 'server'
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
+          TENANT_ID: ${{ secrets.TENANT_ID }}
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
           VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
@@ -227,7 +218,9 @@ jobs:
         if: matrix.mode == 'library'
         env: 
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
+          TENANT_ID: ${{ secrets.TENANT_ID }}
+          CLIENT_ID: ${{ secrets.CLIENT_ID }}
+          CLIENT_SECRET: ${{ secrets.CLIENT_SECRET }}
           VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
           VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
           GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}

diff --git a/Makefile b/Makefile
@@ -10,8 +10,17 @@ PYTHON_REGISTRY = pypi
 TORCH_VERSION := 2.7.1
 
 
+# Default configuration files (override with: make run CONFIG=myconfig.yaml)
+CONFIG ?= lightspeed-stack.yaml
+LLAMA_STACK_CONFIG ?= run.yaml
+
 run: ## Run the service locally
-	uv run src/lightspeed_stack.py
+	uv run src/lightspeed_stack.py -c $(CONFIG)
+
+run-llama-stack: ## Start Llama Stack with enriched config (for local service mode)
+	uv run src/llama_stack_configuration.py -c $(CONFIG) -i $(LLAMA_STACK_CONFIG) -o $(LLAMA_STACK_CONFIG) && \
+	AZURE_API_KEY=$$(grep '^AZURE_API_KEY=' .env | cut -d'=' -f2-) \
+	uv run llama stack run $(LLAMA_STACK_CONFIG)
 
 test-unit: ## Run the unit tests
 	@echo "Running unit tests..."

diff --git a/README.md b/README.md
@@ -195,8 +195,8 @@ __Note__: Support for individual models is dependent on the specific inference p
 | RHOAI (vLLM)| meta-llama/Llama-3.2-1B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e-prow/rhoai/configs/run.yaml)                                     |
 | RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhaiis.yaml)                                     |
 | RHEL AI (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhelai.yaml)                                     |
-| Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
-| Azure    |  o1, o1-mini | No          | remote::azure  |  |
+| Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-4o-mini, o3-mini, o4-mini, o1| Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
+| Azure    |  gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano,  o1-mini | No or limited         | remote::azure  |  |
 | VertexAI    | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes          | remote::vertexai  | [1](examples/vertexai-run.yaml)                                               |
 | WatsonX    | meta-llama/llama-3-3-70b-instruct | Yes          | remote::watsonx  | [1](examples/watsonx-run.yaml)                                               |
 

diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
@@ -14,13 +14,16 @@ services:
       - ./run.yaml:/app-root/run.yaml:Z
       - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
     environment:
+      # LLM Provider API Keys
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
       # OpenAI
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4o-mini}
-      # Azure
-      - AZURE_API_KEY=${AZURE_API_KEY:-}
+      # Azure Entra ID credentials (AZURE_API_KEY is obtained dynamically in Python)
+      - TENANT_ID=${TENANT_ID:-}
+      - CLIENT_ID=${CLIENT_ID:-}
+      - CLIENT_SECRET=${CLIENT_SECRET:-}
       # RHAIIS
       - RHAIIS_URL=${RHAIIS_URL:-}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY:-}

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -11,14 +11,17 @@ services:
     volumes:
       - ./run.yaml:/opt/app-root/run.yaml:Z
       - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
+      - ./lightspeed-stack.yaml:/opt/app-root/lightspeed-stack.yaml:z
     environment:
       - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
       - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
       # OpenAI
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4o-mini}
-      # Azure
-      - AZURE_API_KEY=${AZURE_API_KEY}
+      # Azure Entra ID credentials (AZURE_API_KEY is passed via provider_data at request time)
+      - TENANT_ID=${TENANT_ID:-}
+      - CLIENT_ID=${CLIENT_ID:-}
+      - CLIENT_SECRET=${CLIENT_SECRET:-}
       # RHAIIS
       - RHAIIS_URL=${RHAIIS_URL}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY}
@@ -55,10 +58,13 @@ services:
     ports:
       - "8080:8080"
     volumes:
-      - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
+      - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
-      - AZURE_API_KEY=${AZURE_API_KEY}
+      # Azure Entra ID credentials (AZURE_API_KEY is obtained dynamically)
+      - TENANT_ID=${TENANT_ID:-}
+      - CLIENT_ID=${CLIENT_ID:-}
+      - CLIENT_SECRET=${CLIENT_SECRET:-}
     depends_on:
         llama-stack:
           condition: service_healthy

diff --git a/docs/providers.md b/docs/providers.md
@@ -65,6 +65,115 @@ Red Hat providers:
 | RHAIIS (vllm) | 3.2.3 (on RHEL 9.20250429.0.4) | remote | `openai` | ✅ |
 | RHEL AI (vllm) | 1.5.2 | remote | `openai` | ✅ |
 
+### Azure Provider - Entra ID Authentication Guide
+
+Lightspeed Core supports secure authentication using Microsoft Entra ID (formerly Azure Active Directory) for the Azure Inference Provider. This allows you to connect to Azure OpenAI without using API keys, by authenticating through your organization's Azure identity.
+
+#### Lightspeed Core Configuration Requirements
+
+To enable Entra ID authentication, the `azure_entra_id` block must be included in your LCS configuration. The `tenant_id`, `client_id`, and `client_secret` attributes are required:
+
+| Attribute | Required | Description |
+|-----------|----------|-------------|
+| `tenant_id` | Yes | Azure AD tenant ID |
+| `client_id` | Yes | Application (client) ID |
+| `client_secret` | Yes | Client secret value |
+| `scope` | No | Token scope (default: `https://cognitiveservices.azure.com/.default`) |
+
+Example of LCS config section:
+
+```yaml
+azure_entra_id:
+  tenant_id: ${env.TENANT_ID}
+  client_id: ${env.CLIENT_ID}
+  client_secret: ${env.CLIENT_SECRET}
+  # scope: "https://cognitiveservices.azure.com/.default"  # optional, this is the default
+```
+
+#### Llama Stack Configuration Requirements
+
+Because Lightspeed builds on top of Llama Stack, certain configuration fields are required to satisfy the base Llama Stack schema. The config block for the Azure inference provider **must** include `api_key`, `api_base`, and `api_version` — Llama Stack will fail to start if any of these are missing.
+
+**Important:** The `api_key` field must be set to `${env.AZURE_API_KEY}` exactly as shown below. This is not optional — Lightspeed uses this specific environment variable name as a placeholder for injection of the Entra ID access token. Using a different variable name will break the authentication flow.
+
+```yaml
+inference:
+  - provider_id: azure
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY}    # Must be exactly this - placeholder for Entra ID token
+      api_base: ${env.AZURE_API_BASE}
+      api_version: 2025-01-01-preview
+```
+
+**How it works:** At startup, Lightspeed acquires an Entra ID access token and stores it in the `AZURE_API_KEY` environment variable. When Llama Stack initializes, it reads the config, substitutes `${env.AZURE_API_KEY}` with the token value, and uses it to authenticate with Azure OpenAI. Llama Stack also calls `models.list()` during initialization to validate provider connectivity, which is why the token must be available before client initialization.
+
+#### Access Token Lifecycle and Management
+
+**Library mode startup:**
+1. Lightspeed reads your Entra ID configuration
+2. Acquires an initial access token from Microsoft Entra ID
+3. Stores the token in the `AZURE_API_KEY` environment variable
+4. **Then** initializes the Llama Stack library client
+
+This ordering is critical because Llama Stack calls `models.list()` during initialization to validate provider connectivity. If the token is not set before client initialization, Azure requests will fail with authentication errors.
+
+**Service mode startup:**
+
+When running Llama Stack as a separate service, Lightspeed runs a pre-startup script that:
+1. Reads the Entra ID configuration
+2. Acquires an initial access token
+3. Writes the token to the `AZURE_API_KEY` environment variable
+4. **Then** Llama Stack service starts
+
+This initial token is used solely for the `models.list()` validation call during Llama Stack startup. After startup, Lightspeed manages token refresh independently and passes fresh tokens via request headers.
+
+**During inference requests:**
+1. Before each request, Lightspeed checks if the token has expired
+2. If expired, a new token is automatically acquired and the environment variable is updated
+3. For library mode: the Llama Stack client is reloaded to pick up the new token
+4. For service mode: the token is passed via `X-LlamaStack-Provider-Data` request headers
+
+**Token security:**
+- Access tokens are wrapped in `SecretStr` to prevent accidental logging
+- Tokens are stored only in the `AZURE_API_KEY` environment variable (single source of truth)
+- Each Uvicorn worker maintains its own token lifecycle independently
+
+**Token validity:**
+- Access tokens are typically valid for 1 hour
+- Lightspeed refreshes tokens proactively before expiration (with a safety margin)
+- Token refresh happens automatically in the background without manual intervention
+
+#### Local Deployment Examples
+
+**Prerequisites:** Export the required Azure Entra ID environment variables in your terminal(s):
+
+```bash
+export TENANT_ID="your-tenant-id"
+export CLIENT_ID="your-client-id"
+export CLIENT_SECRET="your-client-secret"
+```
+
+**Library mode** (Llama Stack embedded in Lightspeed):
+
+```bash
+# From project root
+make run CONFIG=examples/lightspeed-stack-azure-entraid-lib.yaml
+```
+
+**Service mode** (Llama Stack as separate service):
+
+```bash
+# Terminal 1: Start Llama Stack service with Azure Entra ID config
+make run-llama-stack CONFIG=examples/lightspeed-stack-azure-entraid-service.yaml LLAMA_STACK_CONFIG=examples/azure-run.yaml
+
+# Terminal 2: Start Lightspeed (after Llama Stack is ready)
+make run CONFIG=examples/lightspeed-stack-azure-entraid-service.yaml
+```
+
+**Note:** The `make run-llama-stack` command accepts two variables:
+- `CONFIG` - Lightspeed configuration file (default: `lightspeed-stack.yaml`)
+- `LLAMA_STACK_CONFIG` - Llama Stack configuration file to enrich and run (default: `run.yaml`)
 
 ---