lightspeed-core · raptorsun · Jan 14, 2026 · coderabbitai · Jan 14, 2026 · coderabbitai
diff --git a/.tekton/lightspeed-stack-pull-request.yaml b/.tekton/lightspeed-stack-pull-request.yaml
@@ -49,14 +49,13 @@ spec:
           "type": "pip",
           "path": ".",
           "requirements_files": [
-            "requirements.x86_64.txt",
-            "requirements.aarch64.txt",
-            "requirements.hermetic.txt",
-            "requirements.torch.txt"
+            "requirements.hashes.wheel.txt",
+            "requirements.hashes.source.txt",
+            "requirements.hermetic.txt"
           ],
           "requirements_build_files": ["requirements-build.txt"],
           "binary": {
-            "packages": "torch,faiss-cpu,uv,pip,jiter,tiktoken,numpy,scipy,pandas,pillow,scikit_learn,pyarrow,cryptography,fastuuid,hf_xet,maturin,pydantic_core,rpds_py,safetensors,tokenizers",
+            "packages": "accelerate,aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,autoevals,cachetools,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,durationpy,faiss-cpu,fire,frozenlist,fsspec,googleapis-common-protos,greenlet,grpcio,h11,hf-xet,httpcore,httpx,httpx-sse,huggingface-hub,idna,jinja2,jiter,joblib,jsonschema-specifications,llama-stack-client,lxml,markdown-it-py,markupsafe,mdurl,mpmath,multidict,networkx,nltk,numpy,oauthlib,opentelemetry-api,opentelemetry-exporter-otlp,opentelemetry-exporter-otlp-proto-common,opentelemetry-exporter-otlp-proto-grpc,opentelemetry-exporter-otlp-proto-http,opentelemetry-instrumentation,opentelemetry-proto,opentelemetry-sdk,opentelemetry-semantic-conventions,packaging,pandas,pillow,ply,polyleven,prompt-toolkit,propcache,proto-plus,psycopg2-binary,pyaml,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pydantic-settings,pygments,pyjwt,python-dateutil,python-dotenv,python-multipart,pytz,pyyaml,referencing,regex,requests,requests-oauthlib,rich,rpds-py,rsa,safetensors,scikit-learn,scipy,sentence-transformers,setuptools,six,sniffio,sqlalchemy,starlette,sympy,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,triton,typing-extensions,typing-inspection,tzdata,urllib3,wcwidth,websocket-client,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
             "os": "linux",
             "arch": "x86_64,aarch64",
             "py_version": "312"

diff --git a/.tekton/lightspeed-stack-push.yaml b/.tekton/lightspeed-stack-push.yaml
@@ -41,14 +41,13 @@ spec:
           "type": "pip",
           "path": ".",
           "requirements_files": [
-            "requirements.x86_64.txt",
-            "requirements.aarch64.txt",
-            "requirements.hermetic.txt",
-            "requirements.torch.txt"
+            "requirements.hashes.wheel.txt",
+            "requirements.hashes.source.txt",
+            "requirements.hermetic.txt"
           ],
           "requirements_build_files": ["requirements-build.txt"],
           "binary": {
-            "packages": "torch,faiss-cpu,uv,pip,jiter,tiktoken,numpy,scipy,pandas,pillow,scikit_learn,pyarrow,cryptography,fastuuid,hf_xet,maturin,pydantic_core,rpds_py,safetensors,tokenizers",
+            "packages": "accelerate,aiohappyeyeballs,aiohttp,aiosignal,aiosqlite,annotated-doc,annotated-types,anyio,asyncpg,attrs,autoevals,cachetools,cffi,charset-normalizer,chevron,click,cryptography,datasets,dill,distro,dnspython,durationpy,faiss-cpu,fire,frozenlist,fsspec,googleapis-common-protos,greenlet,grpcio,h11,hf-xet,httpcore,httpx,httpx-sse,huggingface-hub,idna,jinja2,jiter,joblib,jsonschema-specifications,llama-stack-client,lxml,markdown-it-py,markupsafe,mdurl,mpmath,multidict,networkx,nltk,numpy,oauthlib,opentelemetry-api,opentelemetry-exporter-otlp,opentelemetry-exporter-otlp-proto-common,opentelemetry-exporter-otlp-proto-grpc,opentelemetry-exporter-otlp-proto-http,opentelemetry-instrumentation,opentelemetry-proto,opentelemetry-sdk,opentelemetry-semantic-conventions,packaging,pandas,pillow,ply,polyleven,prompt-toolkit,propcache,proto-plus,psycopg2-binary,pyaml,pyarrow,pyasn1,pyasn1-modules,pycparser,pydantic,pydantic-core,pydantic-settings,pygments,pyjwt,python-dateutil,python-dotenv,python-multipart,pytz,pyyaml,referencing,regex,requests,requests-oauthlib,rich,rpds-py,rsa,safetensors,scikit-learn,scipy,sentence-transformers,setuptools,six,sniffio,sqlalchemy,starlette,sympy,threadpoolctl,tiktoken,tokenizers,torch,tqdm,transformers,triton,typing-extensions,typing-inspection,tzdata,urllib3,wcwidth,websocket-client,wrapt,xxhash,yarl,zipp,uv,pip,maturin",
             "os": "linux",
             "arch": "x86_64,aarch64",
             "py_version": "312"

diff --git a/Containerfile b/Containerfile
@@ -5,17 +5,20 @@ ARG APP_ROOT=/app-root
 ARG LSC_SOURCE_DIR=.
 
 # UV_PYTHON_DOWNLOADS=0 : Disable Python interpreter downloads and use the system interpreter.
+# MATURIN_NO_INSTALL_RUST=1 : Disable installation of Rust dependencies by Maturin.
 ENV UV_COMPILE_BYTECODE=0 \
     UV_LINK_MODE=copy \
-    UV_PYTHON_DOWNLOADS=0
+    UV_PYTHON_DOWNLOADS=0 \
+    MATURIN_NO_INSTALL_RUST=1
 
 WORKDIR /app-root
 
 USER root
 
 # Install gcc - required by polyleven python package on aarch64
 # (dependency of autoevals, no pre-built binary wheels for linux on aarch64)
-RUN dnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs gcc
+# cmake and cargo are required by fastuuid, maturin
+RUN dnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs gcc cmake cargo
 
 # Install uv package manager
 RUN pip3.12 install "uv>=0.8.15"
@@ -34,7 +37,8 @@ RUN if [ -f /cachi2/cachi2.env ]; then \
     . /cachi2/cachi2.env && \
     uv venv --seed --no-index --find-links ${PIP_FIND_LINKS} && \
     . .venv/bin/activate && \
-    pip install --no-index --find-links ${PIP_FIND_LINKS} -r requirements.$(uname -m).txt -r requirements.torch.txt; \
+    pip install --no-cache-dir --ignore-installed --no-index --find-links ${PIP_FIND_LINKS} --no-deps -r requirements.hashes.wheel.txt -r requirements.hashes.source.txt && \
+    pip check; \
     else \
     uv sync --locked --no-dev --group llslibdev; \
     fi

diff --git a/Makefile b/Makefile
@@ -7,7 +7,7 @@ PATH_TO_PLANTUML := ~/bin
 PYTHON_REGISTRY = pypi
 
 # PyTorch version
-TORCH_VERSION := 2.7.1
+TORCH_VERSION := 2.9.0
 
 
 run: ## Run the service locally
@@ -111,14 +111,7 @@ upload-distribution-archives:	## Upload distribution archives into Python regist
 	uv run python -m twine upload --repository ${PYTHON_REGISTRY} dist/*
 
 konflux-requirements:	## generate hermetic requirements.*.txt file for konflux build
-	uv pip compile pyproject.toml -o requirements.x86_64.txt --generate-hashes --group llslibdev --python-platform x86_64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh
-	uv pip compile pyproject.toml -o requirements.aarch64.txt --generate-hashes --group llslibdev --python-platform aarch64-unknown-linux-gnu --torch-backend cpu --python-version 3.12 --refresh
-	./scripts/remove_torch_deps.sh requirements.x86_64.txt
-	./scripts/remove_torch_deps.sh requirements.aarch64.txt
-	echo "torch==${TORCH_VERSION}" | uv pip compile - -o requirements.torch.txt --generate-hashes --python-version 3.12 --torch-backend cpu --emit-index-url --no-deps --index-url https://download.pytorch.org/whl/cpu --refresh
-	uv run pybuild-deps compile --output-file=requirements-build.txt \
-	<(grep -v "^faiss-cpu" requirements.hermetic.txt) \
-	<(grep -Eo "^[a-zA-Z0-9][-a-zA-Z0-9._]*==[^ ]+" requirements.x86_64.txt | grep -v "^faiss-cpu")
+	./scripts/konflux_requirements.sh
 
 help: ## Show this help screen
 	@echo 'Usage: make <OPTIONS> ... <TARGETS>'

diff --git a/README.md b/README.md
@@ -16,85 +16,93 @@ The service includes comprehensive user data collection capabilities for various
 <!-- vim-markdown-toc GFM -->
 
 * [lightspeed-stack](#lightspeed-stack)
-    * [About The Project](#about-the-project)
+  * [About The Project](#about-the-project)
 * [Architecture](#architecture)
 * [Prerequisites](#prerequisites)
 * [Installation](#installation)
 * [Run LCS locally](#run-lcs-locally)
 * [Configuration](#configuration)
-    * [LLM Compatibility](#llm-compatibility)
-    * [Set LLM provider and model](#set-llm-provider-and-model)
-    * [Selecting provider and model](#selecting-provider-and-model)
-        * [Provider and model selection in REST API request](#provider-and-model-selection-in-rest-api-request)
-        * [Default provider and model](#default-provider-and-model)
-    * [Supported providers](#supported-providers)
-    * [Integration with Llama Stack](#integration-with-llama-stack)
-    * [Llama Stack as separate server](#llama-stack-as-separate-server)
-        * [MCP Server and Tool Configuration](#mcp-server-and-tool-configuration)
-            * [Configuring MCP Servers](#configuring-mcp-servers)
-            * [Configuring MCP Headers](#configuring-mcp-headers)
-        * [Llama Stack project and configuration](#llama-stack-project-and-configuration)
-        * [Check connection to Llama Stack](#check-connection-to-llama-stack)
-    * [Llama Stack as client library](#llama-stack-as-client-library)
-    * [Llama Stack version check](#llama-stack-version-check)
-    * [User data collection](#user-data-collection)
-    * [System prompt](#system-prompt)
-        * [System Prompt Path](#system-prompt-path)
-        * [System Prompt Literal](#system-prompt-literal)
-        * [Custom Profile](#custom-profile)
-        * [Control model/provider overrides via authorization](#control-modelprovider-overrides-via-authorization)
-    * [Safety Shields](#safety-shields)
-    * [Authentication](#authentication)
-    * [CORS](#cors)
-        * [Default values](#default-values)
-    * [Allow credentials](#allow-credentials)
+  * [LLM Compatibility](#llm-compatibility)
+  * [Set LLM provider and model](#set-llm-provider-and-model)
+  * [Selecting provider and model](#selecting-provider-and-model)
+    * [Provider and model selection in REST API request](#provider-and-model-selection-in-rest-api-request)
+    * [Default provider and model](#default-provider-and-model)
+  * [Supported providers](#supported-providers)
+  * [Integration with Llama Stack](#integration-with-llama-stack)
+  * [Llama Stack as separate server](#llama-stack-as-separate-server)
+    * [MCP Server and Tool Configuration](#mcp-server-and-tool-configuration)
+      * [Configuring MCP Servers](#configuring-mcp-servers)
+      * [Configuring MCP Server Authentication](#configuring-mcp-server-authentication)
+        * [1. Static Tokens from Files (Recommended for Service Credentials)](#1-static-tokens-from-files-recommended-for-service-credentials)
+        * [2. Kubernetes Service Account Tokens (For K8s Deployments)](#2-kubernetes-service-account-tokens-for-k8s-deployments)
+        * [3. Client-Provided Tokens (For Per-User Authentication)](#3-client-provided-tokens-for-per-user-authentication)
+        * [Combining Authentication Methods](#combining-authentication-methods)
+        * [Authentication Method Comparison](#authentication-method-comparison)
+        * [Important: Automatic Server Skipping](#important-automatic-server-skipping)
+    * [Llama Stack project and configuration](#llama-stack-project-and-configuration)
+    * [Check connection to Llama Stack](#check-connection-to-llama-stack)
+  * [Llama Stack as client library](#llama-stack-as-client-library)
+  * [Llama Stack version check](#llama-stack-version-check)
+  * [User data collection](#user-data-collection)
+  * [System prompt](#system-prompt)
+    * [System Prompt Path](#system-prompt-path)
+    * [System Prompt Literal](#system-prompt-literal)
+    * [Custom Profile](#custom-profile)
+    * [Control model/provider overrides via authorization](#control-modelprovider-overrides-via-authorization)
+  * [Safety Shields](#safety-shields)
+  * [Authentication](#authentication)
+  * [CORS](#cors)
+    * [Default values](#default-values)
+  * [Allow credentials](#allow-credentials)
 * [RAG Configuration](#rag-configuration)
-    * [Example configurations for inference](#example-configurations-for-inference)
+  * [Example configurations for inference](#example-configurations-for-inference)
 * [Usage](#usage)
-    * [Make targets](#make-targets)
-    * [Running Linux container image](#running-linux-container-image)
-    * [Building Container Images](#building-container-images)
-        * [Llama-Stack as Separate Service (Server Mode)](#llama-stack-as-separate-service-server-mode)
-            * [macOS (arm64)](#macos-arm64)
-        * [Llama-Stack as Library (Library Mode)](#llama-stack-as-library-library-mode)
-            * [macOS](#macos)
-        * [Verify it's running properly](#verify-its-running-properly)
-    * [Custom Container Image](#custom-container-image)
+  * [Make targets](#make-targets)
+  * [Running Linux container image](#running-linux-container-image)
+  * [Building Container Images](#building-container-images)
+    * [Llama-Stack as Separate Service (Server Mode)](#llama-stack-as-separate-service-server-mode)
+      * [macOS (arm64)](#macos-arm64)
+    * [Llama-Stack as Library (Library Mode)](#llama-stack-as-library-library-mode)
+      * [macOS](#macos)
+    * [Verify it's running properly](#verify-its-running-properly)
+  * [Custom Container Image](#custom-container-image)
 * [Endpoints](#endpoints)
-    * [OpenAPI specification](#openapi-specification)
-    * [Readiness Endpoint](#readiness-endpoint)
-    * [Liveness Endpoint](#liveness-endpoint)
+  * [OpenAPI specification](#openapi-specification)
+  * [Readiness Endpoint](#readiness-endpoint)
+  * [Liveness Endpoint](#liveness-endpoint)
 * [Database structure](#database-structure)
 * [Publish the service as Python package on PyPI](#publish-the-service-as-python-package-on-pypi)
-    * [Generate distribution archives to be uploaded into Python registry](#generate-distribution-archives-to-be-uploaded-into-python-registry)
-    * [Upload distribution archives into selected Python registry](#upload-distribution-archives-into-selected-python-registry)
-    * [Packages on PyPI and Test PyPI](#packages-on-pypi-and-test-pypi)
+  * [Generate distribution archives to be uploaded into Python registry](#generate-distribution-archives-to-be-uploaded-into-python-registry)
+  * [Upload distribution archives into selected Python registry](#upload-distribution-archives-into-selected-python-registry)
+  * [Packages on PyPI and Test PyPI](#packages-on-pypi-and-test-pypi)
 * [Contributing](#contributing)
 * [Testing](#testing)
 * [License](#license)
 * [Additional tools](#additional-tools)
-    * [Utility to generate OpenAPI schema](#utility-to-generate-openapi-schema)
-        * [Path](#path)
-        * [Usage](#usage-1)
-    * [Makefile target to generate OpenAPI specification](#makefile-target-to-generate-openapi-specification)
-    * [Utility to generate documentation from source code](#utility-to-generate-documentation-from-source-code)
-        * [Path](#path-1)
-        * [Usage](#usage-2)
+  * [Utility to generate OpenAPI schema](#utility-to-generate-openapi-schema)
+    * [Path](#path)
+    * [Usage](#usage-1)
+  * [Makefile target to generate OpenAPI specification](#makefile-target-to-generate-openapi-specification)
+  * [Utility to generate documentation from source code](#utility-to-generate-documentation-from-source-code)
+    * [Path](#path-1)
+    * [Usage](#usage-2)
 * [Data Export Integration](#data-export-integration)
-    * [Quick Integration](#quick-integration)
-    * [Documentation](#documentation)
+  * [Quick Integration](#quick-integration)
+  * [Documentation](#documentation)
 * [Project structure](#project-structure)
-    * [Configuration classes](#configuration-classes)
-    * [REST API](#rest-api)
-    * [Sequence diagrams](#sequence-diagrams)
-        * [Query endpoint REST API handler](#query-endpoint-rest-api-handler)
-    * [Streaming query endpoint REST API handler](#streaming-query-endpoint-rest-api-handler)
-    * [Versioning](#versioning)
+  * [Configuration classes](#configuration-classes)
+  * [REST API](#rest-api)
+  * [Sequence diagrams](#sequence-diagrams)
+    * [Query endpoint REST API handler](#query-endpoint-rest-api-handler)
+  * [Streaming query endpoint REST API handler](#streaming-query-endpoint-rest-api-handler)
+  * [Versioning](#versioning)
+* [Development Tools](#development-tools)
+  * [MCP Mock Server](#mcp-mock-server)
 * [Konflux](#konflux)
-    * [Updating Dependencies for Hermetic Builds](#updating-dependencies-for-hermetic-builds)
-        * [When to Update Dependency Files](#when-to-update-dependency-files)
-        * [Updating Python Dependencies](#updating-python-dependencies)
-        * [Updating RPM Dependencies](#updating-rpm-dependencies)
+  * [Updating Dependencies for Hermetic Builds](#updating-dependencies-for-hermetic-builds)
+    * [When to Update Dependency Files](#when-to-update-dependency-files)
+    * [Updating Python Dependencies](#updating-python-dependencies)
+    * [Updating RPM Dependencies](#updating-rpm-dependencies)
 
 <!-- vim-markdown-toc -->
 
@@ -421,13 +429,13 @@ mcp_servers:
     url: "http://weather-api:8080"
     authorization_headers:
       X-API-Key: "/var/secrets/weather-api-key"
-  
+
   # Kubernetes auth for internal services
   - name: "internal-db"
     url: "http://db-mcp.cluster.local:8080"
     authorization_headers:
       Authorization: "kubernetes"
-  
+
   # Mixed: static API key + per-user token
   - name: "multi-tenant-service"
     url: "http://multi-tenant:8080"
@@ -1176,10 +1184,14 @@ Update these files when you:
 make konflux-requirements
 ```
 
-This generates three platform-specific requirements files:
-- `requirements.x86_64.txt` - x86_64 packages (excludes torch)
-- `requirements.aarch64.txt` - ARM64 packages (excludes torch)
-- `requirements.torch.txt` - CPU variant of torch (shared by both platforms)
+This compiles Python dependencies from `pyproject.toml` using `uv`, splits packages by their source index (PyPI vs Red Hat's internal registry), and generates hermetic requirements files with pinned versions and hashes for Konflux builds.
+
+**Files produced:**
+- `requirements.hashes.source.txt` – PyPI packages with hashes
+- `requirements.hashes.wheel.txt` – Red Hat registry packages with hashes
+- `requirements-build.txt` – Build-time dependencies for source packages
+
+The script also updates the Tekton pipeline configurations (`.tekton/lightspeed-stack-*.yaml`) with the list of pre-built wheel packages.
 
 ### Updating RPM Dependencies
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -165,11 +165,11 @@ llslibdev = [
     "opentelemetry-exporter-otlp>=1.34.1",
     # API tool_runtime: inline::rag-runtime
     "transformers>=4.34.0",
-    "numpy==2.2.6",
+    "numpy==2.3.5",
     # API tool_runtime: remote::model-context-protocol
     "mcp>=1.9.4",
     # API post_training: inline::huggingface
-    "torch==2.7.1",
+    "torch==2.9.0",
     "trl>=0.18.2",
     "peft>=0.15.2",
     # Other