diff --git a/docs/auth.md b/docs/auth.md index ec55b8603..dc54480b8 100644 --- a/docs/auth.md +++ b/docs/auth.md @@ -142,6 +142,22 @@ authentication: - Extracts user ID and username from configurable JWT claims - Returns default credentials (guest-like) if no `Authorization` header present (guest access) +### API Key Token (`api-key-token`) + +Authentication that checks a given API Key token is present as a Bearer token + +**Configuration:** +```yaml + module: "api-key-token" + api_key_config: + api_key: "some-api-key" +``` + +**Behavior:** +- Extracts bearer token from the `Authorization` header +- Same user ID and username handling as `noop` +- Token is passed through and validated against the API Key given from configuration, for downstream use + ## Authorization System Authorization is controlled through role-based access control using two resolver types. diff --git a/docs/openapi.json b/docs/openapi.json index 27134327b..5d52a19ed 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -4360,6 +4360,19 @@ }, "components": { "schemas": { + "APIKeyTokenConfiguration": { + "properties": { + "api_key": { + "type": "string", + "title": "Api Key", + "default": "some-api-key" + } + }, + "additionalProperties": false, + "type": "object", + "title": "APIKeyTokenConfiguration", + "description": "API Key Token configuration." + }, "AccessRule": { "properties": { "role": { @@ -4514,6 +4527,16 @@ } ] }, + "api_key_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/APIKeyTokenConfiguration" + }, + { + "type": "null" + } + ] + }, "rh_identity_config": { "anyOf": [ { @@ -4670,14 +4693,16 @@ "type": "string" }, "type": "array", - "title": "Allow Origins", + "title": "Allow origins", + "description": "A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins.", "default": [ "*" ] }, "allow_credentials": { "type": "boolean", - "title": "Allow Credentials", + "title": "Allow credentials", + "description": "Indicate that cookies should be supported for cross-origin requests", "default": false }, "allow_methods": { @@ -4685,7 +4710,8 @@ "type": "string" }, "type": "array", - "title": "Allow Methods", + "title": "Allow methods", + "description": "A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods.", "default": [ "*" ] @@ -4695,7 +4721,8 @@ "type": "string" }, "type": "array", - "title": "Allow Headers", + "title": "Allow headers", + "description": "A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests.", "default": [ "*" ] @@ -4704,35 +4731,47 @@ "additionalProperties": false, "type": "object", "title": "CORSConfiguration", - "description": "CORS configuration." + "description": "CORS configuration.\n\nCORS or 'Cross-Origin Resource Sharing' refers to the situations when a\nfrontend running in a browser has JavaScript code that communicates with a\nbackend, and the backend is in a different 'origin' than the frontend.\n\nUseful resources:\n\n - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/)\n - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing)\n - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1)" }, "Configuration": { "properties": { "name": { "type": "string", - "title": "Name" + "title": "Service name", + "description": "Name of the service. That value will be used in REST API endpoints." }, "service": { - "$ref": "#/components/schemas/ServiceConfiguration" + "$ref": "#/components/schemas/ServiceConfiguration", + "title": "Service configuration", + "description": "This section contains Lightspeed Core Stack service configuration." }, "llama_stack": { - "$ref": "#/components/schemas/LlamaStackConfiguration" + "$ref": "#/components/schemas/LlamaStackConfiguration", + "title": "Llama Stack configuration", + "description": "This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode." }, "user_data_collection": { - "$ref": "#/components/schemas/UserDataCollection" + "$ref": "#/components/schemas/UserDataCollection", + "title": "User data collection configuration", + "description": "This section contains configuration for subsystem that collects user data(transcription history and feedbacks)." }, "database": { - "$ref": "#/components/schemas/DatabaseConfiguration" + "$ref": "#/components/schemas/DatabaseConfiguration", + "title": "Database Configuration", + "description": "Configuration for database to store conversation IDs and other runtime data" }, "mcp_servers": { "items": { "$ref": "#/components/schemas/ModelContextProtocolServer" }, "type": "array", - "title": "Mcp Servers" + "title": "Model Context Protocol Server and tools configuration", + "description": "MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents." }, "authentication": { - "$ref": "#/components/schemas/AuthenticationConfiguration" + "$ref": "#/components/schemas/AuthenticationConfiguration", + "title": "Authentication configuration", + "description": "Authentication configuration" }, "authorization": { "anyOf": [ @@ -4742,7 +4781,9 @@ { "type": "null" } - ] + ], + "title": "Authorization configuration", + "description": "Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration." }, "customization": { "anyOf": [ @@ -4752,23 +4793,31 @@ { "type": "null" } - ] + ], + "title": "Custom profile configuration", + "description": "It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules." }, "inference": { - "$ref": "#/components/schemas/InferenceConfiguration" + "$ref": "#/components/schemas/InferenceConfiguration", + "title": "Inference configuration", + "description": "One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used." }, "conversation_cache": { - "$ref": "#/components/schemas/ConversationHistoryConfiguration" + "$ref": "#/components/schemas/ConversationHistoryConfiguration", + "title": "Conversation history configuration" }, "byok_rag": { "items": { "$ref": "#/components/schemas/ByokRag" }, "type": "array", - "title": "Byok Rag" + "title": "BYOK RAG configuration", + "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file" }, "quota_handlers": { - "$ref": "#/components/schemas/QuotaHandlersConfiguration" + "$ref": "#/components/schemas/QuotaHandlersConfiguration", + "title": "Quota handlers", + "description": "Quota handlers configuration" } }, "additionalProperties": false, @@ -5116,7 +5165,7 @@ "additionalProperties": false, "type": "object", "title": "ConversationHistoryConfiguration", - "description": "Conversation cache configuration." + "description": "Conversation history configuration." }, "ConversationResponse": { "properties": { @@ -5395,7 +5444,9 @@ { "type": "null" } - ] + ], + "title": "SQLite configuration", + "description": "SQLite database configuration" }, "postgres": { "anyOf": [ @@ -5405,7 +5456,9 @@ { "type": "null" } - ] + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" } }, "additionalProperties": false, @@ -5705,7 +5758,8 @@ "max_entries": { "type": "integer", "exclusiveMinimum": 0.0, - "title": "Max Entries" + "title": "Max entries", + "description": "Maximum number of entries stored in the in-memory cache" } }, "additionalProperties": false, @@ -5917,25 +5971,31 @@ "properties": { "jsonpath": { "type": "string", - "title": "Jsonpath" + "title": "JSON path", + "description": "JSONPath expression to evaluate against the JWT payload" }, "operator": { - "$ref": "#/components/schemas/JsonPathOperator" + "$ref": "#/components/schemas/JsonPathOperator", + "title": "Operator", + "description": "JSON path comparison operator" }, "negate": { "type": "boolean", - "title": "Negate", + "title": "Negate rule", + "description": "If set to true, the meaning of the rule is negated", "default": false }, "value": { - "title": "Value" + "title": "Value", + "description": "Value to compare against" }, "roles": { "items": { "type": "string" }, "type": "array", - "title": "Roles" + "title": "List of roles", + "description": "Roles to be assigned if the rule matches" } }, "additionalProperties": false, @@ -5984,7 +6044,8 @@ "type": "null" } ], - "title": "Url" + "title": "Llama Stack URL", + "description": "URL to Llama Stack service; used when library mode is disabled" }, "api_key": { "anyOf": [ @@ -5997,7 +6058,8 @@ "type": "null" } ], - "title": "Api Key" + "title": "API key", + "description": "API key to access Llama Stack service" }, "use_as_library_client": { "anyOf": [ @@ -6008,7 +6070,8 @@ "type": "null" } ], - "title": "Use As Library Client" + "title": "Use as library", + "description": "When set to true Llama Stack will be used in library mode, not in server mode (default)" }, "library_client_config_path": { "anyOf": [ @@ -6019,28 +6082,32 @@ "type": "null" } ], - "title": "Library Client Config Path" + "title": "Llama Stack configuration path", + "description": "Path to configuration file used when Llama Stack is run in library mode" } }, "additionalProperties": false, "type": "object", "title": "LlamaStackConfiguration", - "description": "Llama stack configuration." + "description": "Llama stack configuration.\n\nLlama Stack is a comprehensive system that provides a uniform set of tools\nfor building, scaling, and deploying generative AI applications, enabling\ndevelopers to create, integrate, and orchestrate multiple AI services and\ncapabilities into an adaptable setup.\n\nUseful resources:\n\n - [Llama Stack](https://www.llama.com/products/llama-stack/)\n - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python)\n - [Build AI Applications with Llama Stack](https://llamastack.github.io/)" }, "ModelContextProtocolServer": { "properties": { "name": { "type": "string", - "title": "Name" + "title": "MCP name", + "description": "MCP server name that must be unique" }, "provider_id": { "type": "string", - "title": "Provider Id", + "title": "Provider ID", + "description": "MCP provider identification", "default": "model-context-protocol" }, "url": { "type": "string", - "title": "Url" + "title": "MCP server URL", + "description": "URL of the MCP server" } }, "additionalProperties": false, @@ -6050,7 +6117,7 @@ "url" ], "title": "ModelContextProtocolServer", - "description": "model context protocol server configuration." + "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and\ncapabilities to the AI agents. These are configured by this structure.\nOnly MCP servers defined in the lightspeed-stack.yaml configuration are\navailable to the agents. Tools configured in the llama-stack run.yaml\nare not accessible to lightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)" }, "ModelsResponse": { "properties": { @@ -6138,27 +6205,32 @@ "properties": { "host": { "type": "string", - "title": "Host", + "title": "Hostname", + "description": "Database server host or socket directory", "default": "localhost" }, "port": { "type": "integer", "exclusiveMinimum": 0.0, "title": "Port", + "description": "Database server port", "default": 5432 }, "db": { "type": "string", - "title": "Db" + "title": "Database name", + "description": "Database name to connect to" }, "user": { "type": "string", - "title": "User" + "title": "User name", + "description": "Database user name used to authenticate" }, "password": { "type": "string", "format": "password", "title": "Password", + "description": "Password used to authenticate", "writeOnly": true }, "namespace": { @@ -6170,17 +6242,20 @@ "type": "null" } ], - "title": "Namespace", + "title": "Name space", + "description": "Database namespace", "default": "lightspeed-stack" }, "ssl_mode": { "type": "string", - "title": "Ssl Mode", + "title": "SSL mode", + "description": "SSL mode", "default": "prefer" }, "gss_encmode": { "type": "string", - "title": "Gss Encmode", + "title": "GSS encmode", + "description": "This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server.", "default": "prefer" }, "ca_cert_path": { @@ -6193,7 +6268,8 @@ "type": "null" } ], - "title": "Ca Cert Path" + "title": "CA certificate path", + "description": "Path to CA certificate" } }, "additionalProperties": false, @@ -6204,7 +6280,7 @@ "password" ], "title": "PostgreSQLDatabaseConfiguration", - "description": "PostgreSQL database configuration." + "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing information about\nconversation IDs. It can also be leveraged to store conversation history and information\nabout quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)" }, "ProviderHealthStatus": { "properties": { @@ -6813,7 +6889,9 @@ { "type": "null" } - ] + ], + "title": "SQLite configuration", + "description": "SQLite database configuration" }, "postgres": { "anyOf": [ @@ -6823,28 +6901,34 @@ { "type": "null" } - ] + ], + "title": "PostgreSQL configuration", + "description": "PostgreSQL database configuration" }, "limiters": { "items": { "$ref": "#/components/schemas/QuotaLimiterConfiguration" }, "type": "array", - "title": "Limiters" + "title": "Quota limiters", + "description": "Quota limiters configuration" }, "scheduler": { - "$ref": "#/components/schemas/QuotaSchedulerConfiguration" + "$ref": "#/components/schemas/QuotaSchedulerConfiguration", + "title": "Quota scheduler", + "description": "Quota scheduler configuration" }, "enable_token_history": { "type": "boolean", - "title": "Enable Token History", + "title": "Enable token history", + "description": "Enables storing information about token usage history", "default": false } }, "additionalProperties": false, "type": "object", "title": "QuotaHandlersConfiguration", - "description": "Quota limiter configuration." + "description": "Quota limiter configuration.\n\nIt is possible to limit quota usage per user or per service or services\n(that typically run in one cluster). Each limit is configured as a separate\n_quota limiter_. It can be of type `user_limiter` or `cluster_limiter`\n(which is name that makes sense in OpenShift deployment)." }, "QuotaLimiterConfiguration": { "properties": { @@ -6854,25 +6938,30 @@ "user_limiter", "cluster_limiter" ], - "title": "Type" + "title": "Quota limiter type", + "description": "Quota limiter type, either user_limiter or cluster_limiter" }, "name": { "type": "string", - "title": "Name" + "title": "Quota limiter name", + "description": "Human readable quota limiter name" }, "initial_quota": { "type": "integer", "minimum": 0.0, - "title": "Initial Quota" + "title": "Initial quota", + "description": "Quota set at beginning of the period" }, "quota_increase": { "type": "integer", "minimum": 0.0, - "title": "Quota Increase" + "title": "Quota increase", + "description": "Delta value used to increase quota when period is reached" }, "period": { "type": "string", - "title": "Period" + "title": "Period", + "description": "Period specified in human readable form" } }, "additionalProperties": false, @@ -6885,7 +6974,7 @@ "period" ], "title": "QuotaLimiterConfiguration", - "description": "Configuration for one quota limiter." + "description": "Configuration for one quota limiter.\n\nThere are three configuration options for each limiter:\n\n1. ``period`` is specified in a human-readable form, see\n https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT\n for all possible options. When the end of the period is reached, the\n quota is reset or increased.\n2. ``initial_quota`` is the value set at the beginning of the period.\n3. ``quota_increase`` is the value (if specified) used to increase the\n quota when the period is reached.\n\nThere are two basic use cases:\n\n1. When the quota needs to be reset to a specific value periodically (for\n example on a weekly or monthly basis), set ``initial_quota`` to the\n required value.\n2. When the quota needs to be increased by a specific value periodically\n (for example on a daily basis), set ``quota_increase``." }, "QuotaSchedulerConfiguration": { "properties": { @@ -6893,9 +6982,11 @@ "type": "integer", "exclusiveMinimum": 0.0, "title": "Period", + "description": "Quota scheduler period specified in seconds", "default": 1 } }, + "additionalProperties": false, "type": "object", "title": "QuotaSchedulerConfiguration", "description": "Quota scheduler configuration." @@ -7201,46 +7292,56 @@ "host": { "type": "string", "title": "Host", + "description": "Service hostname", "default": "localhost" }, "port": { "type": "integer", "exclusiveMinimum": 0.0, "title": "Port", + "description": "Service port", "default": 8080 }, "auth_enabled": { "type": "boolean", - "title": "Auth Enabled", + "title": "Authentication enabled", + "description": "Enables the authentication subsystem", "default": false }, "workers": { "type": "integer", "exclusiveMinimum": 0.0, - "title": "Workers", + "title": "Number of workers", + "description": "Number of Uvicorn worker processes to start", "default": 1 }, "color_log": { "type": "boolean", - "title": "Color Log", + "title": "Color log", + "description": "Enables colorized logging", "default": true }, "access_log": { "type": "boolean", - "title": "Access Log", + "title": "Access log", + "description": "Enables logging of all access information", "default": true }, "tls_config": { - "$ref": "#/components/schemas/TLSConfiguration" + "$ref": "#/components/schemas/TLSConfiguration", + "title": "TLS configuration", + "description": "Transport Layer Security configuration for HTTPS support" }, "cors": { - "$ref": "#/components/schemas/CORSConfiguration" + "$ref": "#/components/schemas/CORSConfiguration", + "title": "CORS configuration", + "description": "Cross-Origin Resource Sharing configuration for cross-domain requests" } }, "additionalProperties": false, "type": "object", "title": "ServiceConfiguration", - "description": "Service configuration." + "description": "Service configuration.\n\nLightspeed Core Stack is a REST API service that accepts requests\non a specified hostname and port. It is also possible to enable\nauthentication and specify the number of Uvicorn workers. When more\nworkers are specified, the service can handle requests concurrently." }, "ServiceUnavailableResponse": { "properties": { @@ -7384,7 +7485,7 @@ "additionalProperties": false, "type": "object", "title": "TLSConfiguration", - "description": "TLS configuration.\n\nSee also:\n- https://fastapi.tiangolo.com/deployment/https/\n- https://en.wikipedia.org/wiki/Transport_Layer_Security" + "description": "TLS configuration.\n\nTransport Layer Security (TLS) is a cryptographic protocol designed to\nprovide communications security over a computer network, such as the\nInternet. The protocol is widely used in applications such as email,\ninstant messaging, and voice over IP, but its use in securing HTTPS remains\nthe most publicly visible.\n\nUseful resources:\n\n - [FastAPI HTTPS Deployment](https://fastapi.tiangolo.com/deployment/https/)\n - [Transport Layer Security Overview](https://en.wikipedia.org/wiki/Transport_Layer_Security)\n - [What is TLS](https://www.ssltrust.eu/learning/ssl/transport-layer-security-tls)" }, "ToolCall": { "properties": { diff --git a/docs/output.md b/docs/output.md index a21840c66..f55ea48ef 100644 --- a/docs/output.md +++ b/docs/output.md @@ -3868,6 +3868,17 @@ Examples +## APIKeyTokenConfiguration + + +API Key Token configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| api_key | string | | + + ## AccessRule @@ -3929,6 +3940,7 @@ Authentication configuration. | k8s_cluster_api | | | | k8s_ca_cert_path | | | | jwk_config | | | +| api_key_config | | | | rh_identity_config | | | @@ -3994,13 +4006,23 @@ BYOK RAG configuration. CORS configuration. +CORS or 'Cross-Origin Resource Sharing' refers to the situations when a +frontend running in a browser has JavaScript code that communicates with a +backend, and the backend is in a different 'origin' than the frontend. + +Useful resources: + + - [CORS in FastAPI](https://fastapi.tiangolo.com/tutorial/cors/) + - [Wikipedia article](https://en.wikipedia.org/wiki/Cross-origin_resource_sharing) + - [What is CORS?](https://dev.to/akshay_chauhan/what-is-cors-explained-8f1) + | Field | Type | Description | |-------|------|-------------| -| allow_origins | array | | -| allow_credentials | boolean | | -| allow_methods | array | | -| allow_headers | array | | +| allow_origins | array | A list of origins allowed for cross-origin requests. An origin is the combination of protocol (http, https), domain (myapp.com, localhost, localhost.tiangolo.com), and port (80, 443, 8080). Use ['*'] to allow all origins. | +| allow_credentials | boolean | Indicate that cookies should be supported for cross-origin requests | +| allow_methods | array | A list of HTTP methods that should be allowed for cross-origin requests. You can use ['*'] to allow all standard methods. | +| allow_headers | array | A list of HTTP request headers that should be supported for cross-origin requests. You can use ['*'] to allow all headers. The Accept, Accept-Language, Content-Language and Content-Type headers are always allowed for simple CORS requests. | ## Configuration @@ -4011,19 +4033,19 @@ Global service configuration. | Field | Type | Description | |-------|------|-------------| -| name | string | | -| service | | | -| llama_stack | | | -| user_data_collection | | | -| database | | | -| mcp_servers | array | | -| authentication | | | -| authorization | | | -| customization | | | -| inference | | | +| name | string | Name of the service. That value will be used in REST API endpoints. | +| service | | This section contains Lightspeed Core Stack service configuration. | +| llama_stack | | This section contains Llama Stack configuration. Lightspeed Core Stack service can call Llama Stack in library mode or in server mode. | +| user_data_collection | | This section contains configuration for subsystem that collects user data(transcription history and feedbacks). | +| database | | Configuration for database to store conversation IDs and other runtime data | +| mcp_servers | array | MCP (Model Context Protocol) servers provide tools and capabilities to the AI agents. These are configured in this section. Only MCP servers defined in the lightspeed-stack.yaml configuration are available to the agents. Tools configured in the llama-stack run.yaml are not accessible to lightspeed-core agents. | +| authentication | | Authentication configuration | +| authorization | | Lightspeed Core Stack implements a modular authentication and authorization system with multiple authentication methods. Authorization is configurable through role-based access control. Authentication is handled through selectable modules configured via the module field in the authentication configuration. | +| customization | | It is possible to customize Lightspeed Core Stack via this section. System prompt can be customized and also different parts of the service can be replaced by custom Python modules. | +| inference | | One LLM provider and one its model might be selected as default ones. When no provider+model pair is specified in REST API calls (query endpoints), the default provider and model are used. | | conversation_cache | | | -| byok_rag | array | | -| quota_handlers | | | +| byok_rag | array | BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file | +| quota_handlers | | Quota handlers configuration | ## ConfigurationResponse @@ -4102,7 +4124,7 @@ Attributes: ## ConversationHistoryConfiguration -Conversation cache configuration. +Conversation history configuration. | Field | Type | Description | @@ -4231,8 +4253,8 @@ Database configuration. | Field | Type | Description | |-------|------|-------------| -| sqlite | | | -| postgres | | | +| sqlite | | SQLite database configuration | +| postgres | | PostgreSQL database configuration | ## DetailModel @@ -4373,7 +4395,7 @@ In-memory cache configuration. | Field | Type | Description | |-------|------|-------------| -| max_entries | integer | | +| max_entries | integer | Maximum number of entries stored in the in-memory cache | ## InferenceConfiguration @@ -4459,11 +4481,11 @@ Rule for extracting roles from JWT claims. | Field | Type | Description | |-------|------|-------------| -| jsonpath | string | | -| operator | | | -| negate | boolean | | -| value | | | -| roles | array | | +| jsonpath | string | JSONPath expression to evaluate against the JWT payload | +| operator | | JSON path comparison operator | +| negate | boolean | If set to true, the meaning of the rule is negated | +| value | | Value to compare against | +| roles | array | Roles to be assigned if the rule matches | ## LivenessResponse @@ -4485,26 +4507,49 @@ Attributes: Llama stack configuration. +Llama Stack is a comprehensive system that provides a uniform set of tools +for building, scaling, and deploying generative AI applications, enabling +developers to create, integrate, and orchestrate multiple AI services and +capabilities into an adaptable setup. + +Useful resources: + + - [Llama Stack](https://www.llama.com/products/llama-stack/) + - [Python Llama Stack client](https://github.com/llamastack/llama-stack-client-python) + - [Build AI Applications with Llama Stack](https://llamastack.github.io/) + | Field | Type | Description | |-------|------|-------------| -| url | | | -| api_key | | | -| use_as_library_client | | | -| library_client_config_path | | | +| url | | URL to Llama Stack service; used when library mode is disabled | +| api_key | | API key to access Llama Stack service | +| use_as_library_client | | When set to true Llama Stack will be used in library mode, not in server mode (default) | +| library_client_config_path | | Path to configuration file used when Llama Stack is run in library mode | ## ModelContextProtocolServer -model context protocol server configuration. +Model context protocol server configuration. + +MCP (Model Context Protocol) servers provide tools and +capabilities to the AI agents. These are configured by this structure. +Only MCP servers defined in the lightspeed-stack.yaml configuration are +available to the agents. Tools configured in the llama-stack run.yaml +are not accessible to lightspeed-core agents. + +Useful resources: + +- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro) +- [MCP FAQs](https://modelcontextprotocol.io/faqs) +- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol) | Field | Type | Description | |-------|------|-------------| -| name | string | | -| provider_id | string | | -| url | string | | +| name | string | MCP server name that must be unique | +| provider_id | string | MCP provider identification | +| url | string | URL of the MCP server | ## ModelsResponse @@ -4535,18 +4580,28 @@ Model representing a response to models request. PostgreSQL database configuration. +PostgreSQL database is used by Lightspeed Core Stack service for storing information about +conversation IDs. It can also be leveraged to store conversation history and information +about quota usage. + +Useful resources: + +- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html) +- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/) +- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/) + | Field | Type | Description | |-------|------|-------------| -| host | string | | -| port | integer | | -| db | string | | -| user | string | | -| password | string | | -| namespace | | | -| ssl_mode | string | | -| gss_encmode | string | | -| ca_cert_path | | | +| host | string | Database server host or socket directory | +| port | integer | Database server port | +| db | string | Database name to connect to | +| user | string | Database user name used to authenticate | +| password | string | Password used to authenticate | +| namespace | | Database namespace | +| ssl_mode | string | SSL mode | +| gss_encmode | string | This option determines whether or with what priority a secure GSS TCP/IP connection will be negotiated with the server. | +| ca_cert_path | | Path to CA certificate | ## ProviderHealthStatus @@ -4675,14 +4730,19 @@ Attributes: Quota limiter configuration. +It is possible to limit quota usage per user or per service or services +(that typically run in one cluster). Each limit is configured as a separate +_quota limiter_. It can be of type `user_limiter` or `cluster_limiter` +(which is name that makes sense in OpenShift deployment). + | Field | Type | Description | |-------|------|-------------| -| sqlite | | | -| postgres | | | -| limiters | array | | -| scheduler | | | -| enable_token_history | boolean | | +| sqlite | | SQLite database configuration | +| postgres | | PostgreSQL database configuration | +| limiters | array | Quota limiters configuration | +| scheduler | | Quota scheduler configuration | +| enable_token_history | boolean | Enables storing information about token usage history | ## QuotaLimiterConfiguration @@ -4690,14 +4750,32 @@ Quota limiter configuration. Configuration for one quota limiter. +There are three configuration options for each limiter: + +1. ``period`` is specified in a human-readable form, see + https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT + for all possible options. When the end of the period is reached, the + quota is reset or increased. +2. ``initial_quota`` is the value set at the beginning of the period. +3. ``quota_increase`` is the value (if specified) used to increase the + quota when the period is reached. + +There are two basic use cases: + +1. When the quota needs to be reset to a specific value periodically (for + example on a weekly or monthly basis), set ``initial_quota`` to the + required value. +2. When the quota needs to be increased by a specific value periodically + (for example on a daily basis), set ``quota_increase``. + | Field | Type | Description | |-------|------|-------------| -| type | string | | -| name | string | | -| initial_quota | integer | | -| quota_increase | integer | | -| period | string | | +| type | string | Quota limiter type, either user_limiter or cluster_limiter | +| name | string | Human readable quota limiter name | +| initial_quota | integer | Quota set at beginning of the period | +| quota_increase | integer | Delta value used to increase quota when period is reached | +| period | string | Period specified in human readable form | ## QuotaSchedulerConfiguration @@ -4708,7 +4786,7 @@ Quota scheduler configuration. | Field | Type | Description | |-------|------|-------------| -| period | integer | | +| period | integer | Quota scheduler period specified in seconds | ## RAGChunk @@ -4814,17 +4892,22 @@ SQLite database configuration. Service configuration. +Lightspeed Core Stack is a REST API service that accepts requests +on a specified hostname and port. It is also possible to enable +authentication and specify the number of Uvicorn workers. When more +workers are specified, the service can handle requests concurrently. + | Field | Type | Description | |-------|------|-------------| -| host | string | | -| port | integer | | -| auth_enabled | boolean | | -| workers | integer | | -| color_log | boolean | | -| access_log | boolean | | -| tls_config | | | -| cors | | | +| host | string | Service hostname | +| port | integer | Service port | +| auth_enabled | boolean | Enables the authentication subsystem | +| workers | integer | Number of Uvicorn worker processes to start | +| color_log | boolean | Enables colorized logging | +| access_log | boolean | Enables logging of all access information | +| tls_config | | Transport Layer Security configuration for HTTPS support | +| cors | | Cross-Origin Resource Sharing configuration for cross-domain requests | ## ServiceUnavailableResponse @@ -4871,9 +4954,17 @@ Attributes: TLS configuration. -See also: -- https://fastapi.tiangolo.com/deployment/https/ -- https://en.wikipedia.org/wiki/Transport_Layer_Security +Transport Layer Security (TLS) is a cryptographic protocol designed to +provide communications security over a computer network, such as the +Internet. The protocol is widely used in applications such as email, +instant messaging, and voice over IP, but its use in securing HTTPS remains +the most publicly visible. + +Useful resources: + + - [FastAPI HTTPS Deployment](https://fastapi.tiangolo.com/deployment/https/) + - [Transport Layer Security Overview](https://en.wikipedia.org/wiki/Transport_Layer_Security) + - [What is TLS](https://www.ssltrust.eu/learning/ssl/transport-layer-security-tls) | Field | Type | Description | diff --git a/examples/lightspeed-stack-api-key-auth.yaml b/examples/lightspeed-stack-api-key-auth.yaml new file mode 100644 index 000000000..29efd001c --- /dev/null +++ b/examples/lightspeed-stack-api-key-auth.yaml @@ -0,0 +1,16 @@ +name: API Key Token Authentication Example +service: + host: localhost + port: 8080 + auth_enabled: true + workers: 1 + color_log: true + access_log: true +llama_stack: + use_as_library_client: false + url: http://localhost:8321 +authentication: + module: "api-key-token" + api_key_config: + api_key: "some-api-key" + diff --git a/src/authentication/README.md b/src/authentication/README.md index d76702548..d0c330a3b 100644 --- a/src/authentication/README.md +++ b/src/authentication/README.md @@ -3,6 +3,9 @@ ## [__init__.py](__init__.py) This package contains authentication code and modules. +## [api_key_token.py](api_key_token.py) +Authentication flow for FastAPI endpoints with a provided API key. + ## [interface.py](interface.py) Abstract base class for all authentication method implementations. diff --git a/src/authentication/__init__.py b/src/authentication/__init__.py index ecf9b32dc..16b00111b 100644 --- a/src/authentication/__init__.py +++ b/src/authentication/__init__.py @@ -4,7 +4,14 @@ import os import constants -from authentication import jwk_token, k8s, noop, noop_with_token, rh_identity +from authentication import ( + jwk_token, + k8s, + noop, + noop_with_token, + rh_identity, + api_key_token, +) from authentication.interface import AuthInterface from configuration import LogicError, configuration @@ -54,6 +61,11 @@ def get_auth_dependency( required_entitlements=rh_identity_config.required_entitlements, virtual_path=virtual_path, ) + case constants.AUTH_MOD_APIKEY_TOKEN: + return api_key_token.APIKeyTokenAuthDependency( + config=configuration.authentication_configuration.api_key_configuration, + virtual_path=virtual_path, + ) case _: err_msg = f"Unsupported authentication module '{module}'" logger.error(err_msg) diff --git a/src/authentication/api_key_token.py b/src/authentication/api_key_token.py new file mode 100644 index 000000000..79909349d --- /dev/null +++ b/src/authentication/api_key_token.py @@ -0,0 +1,74 @@ +"""Authentication flow for FastAPI endpoints with a provided API key. + +Behavior: +- Reads a user token from request headers via `authentication.utils.extract_user_token` and verifies +the value equals to the API Key, given from configuration parameter. +- Returns a tuple: (DEFAULT_USER_NAME, DEFAULT_USER_NAME, skip_userid_check, user_token). +""" + +import secrets + +from fastapi import Request, HTTPException, status + +from constants import ( + DEFAULT_USER_NAME, + DEFAULT_VIRTUAL_PATH, + DEFAULT_USER_UID, +) +from authentication.interface import AuthInterface +from authentication.utils import extract_user_token +from log import get_logger +from models.config import APIKeyTokenConfiguration + +logger = get_logger(__name__) + + +class APIKeyTokenAuthDependency( + AuthInterface +): # pylint: disable=too-few-public-methods + """FastAPI dependency for API key token authentication. + + Validates bearer tokens against a configured API key and returns + user authentication information for authorized requests. + """ + + def __init__( + self, config: APIKeyTokenConfiguration, virtual_path: str = DEFAULT_VIRTUAL_PATH + ) -> None: + """Initialize the API key token authentication dependency. + + Args: + config: The API key token configuration containing the API key. + virtual_path: The virtual path for the service (default: DEFAULT_VIRTUAL_PATH). + """ + self.virtual_path: str = virtual_path + self.config: APIKeyTokenConfiguration = config + self.skip_userid_check = True + + async def __call__(self, request: Request) -> tuple[str, str, bool, str]: + """Validate FastAPI Requests for authentication and authorization. + + Args: + request: The FastAPI request object. + + Returns: + A tuple containing (user_uid, username, skip_userid_check, user_token) + if authentication succeeds. + + Raises: + HTTPException: If the bearer token is missing or + doesn't match the configured API key (HTTP 401). + """ + # try to extract user token from request + user_token = extract_user_token(request.headers) + + # API Key validation. Use secrets.compare_digest for constant-time comparison + if not secrets.compare_digest( + user_token, self.config.api_key.get_secret_value() + ): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid API Key", + ) + + return DEFAULT_USER_UID, DEFAULT_USER_NAME, self.skip_userid_check, user_token diff --git a/src/authorization/middleware.py b/src/authorization/middleware.py index 8cb27e02e..8c914d5b6 100644 --- a/src/authorization/middleware.py +++ b/src/authorization/middleware.py @@ -37,6 +37,7 @@ def get_authorization_resolvers() -> Tuple[RolesResolver, AccessResolver]: constants.AUTH_MOD_NOOP | constants.AUTH_MOD_K8S | constants.AUTH_MOD_NOOP_WITH_TOKEN + | constants.AUTH_MOD_APIKEY_TOKEN ): return ( NoopRolesResolver(), diff --git a/src/constants.py b/src/constants.py index 681e29693..82ea14151 100644 --- a/src/constants.py +++ b/src/constants.py @@ -99,6 +99,7 @@ AUTH_MOD_K8S = "k8s" AUTH_MOD_NOOP = "noop" AUTH_MOD_NOOP_WITH_TOKEN = "noop-with-token" +AUTH_MOD_APIKEY_TOKEN = "api-key-token" AUTH_MOD_JWK_TOKEN = "jwk-token" AUTH_MOD_RH_IDENTITY = "rh-identity" # Supported authentication modules @@ -108,6 +109,7 @@ AUTH_MOD_NOOP, AUTH_MOD_NOOP_WITH_TOKEN, AUTH_MOD_JWK_TOKEN, + AUTH_MOD_APIKEY_TOKEN, AUTH_MOD_RH_IDENTITY, } ) diff --git a/src/models/config.py b/src/models/config.py index ae4946cb7..0eec2d2d0 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -700,6 +700,21 @@ class RHIdentityConfiguration(ConfigurationBase): required_entitlements: Optional[list[str]] = None +class APIKeyTokenConfiguration(ConfigurationBase): + """API Key Token configuration.""" + + # Use SecretStr to prevent accidental exposure in logs or error messages. + api_key: SecretStr = Field( + min_length=1, + title="API key", + json_schema_extra={ + "format": "password", + "writeOnly": True, + "examples": ["some-api-key"], + }, + ) + + class AuthenticationConfiguration(ConfigurationBase): """Authentication configuration.""" @@ -708,6 +723,7 @@ class AuthenticationConfiguration(ConfigurationBase): k8s_cluster_api: Optional[AnyHttpUrl] = None k8s_ca_cert_path: Optional[FilePath] = None jwk_config: Optional[JwkConfiguration] = None + api_key_config: Optional[APIKeyTokenConfiguration] = None rh_identity_config: Optional[RHIdentityConfiguration] = None @model_validator(mode="after") @@ -733,6 +749,17 @@ def check_authentication_model(self) -> Self: "when using RH Identity authentication" ) + if self.module == constants.AUTH_MOD_APIKEY_TOKEN: + if self.api_key_config is None: + raise ValueError( + "API Key configuration section must be specified " + "when using API Key token authentication" + ) + if self.api_key_config.api_key.get_secret_value() is None: + raise ValueError( + "api_key parameter must be specified when using API_KEY token authentication" + ) + return self @property @@ -757,6 +784,17 @@ def rh_identity_configuration(self) -> RHIdentityConfiguration: raise ValueError("RH Identity configuration should not be None") return self.rh_identity_config + @property + def api_key_configuration(self) -> APIKeyTokenConfiguration: + """Return API_KEY configuration if the module is API_KEY token.""" + if self.module != constants.AUTH_MOD_APIKEY_TOKEN: + raise ValueError( + "API Key configuration is only available for API Key token authentication module" + ) + if self.api_key_config is None: + raise ValueError("API Key configuration should not be None") + return self.api_key_config + @dataclass class CustomProfile: diff --git a/src/utils/README.md b/src/utils/README.md index 1b7583522..f4431b83f 100644 --- a/src/utils/README.md +++ b/src/utils/README.md @@ -24,6 +24,9 @@ MCP headers handling. ## [quota.py](quota.py) Quota handling helper functions. +## [responses.py](responses.py) +Utility functions for processing Responses API output. + ## [shields.py](shields.py) Utility functions for working with Llama Stack shields. diff --git a/tests/unit/authentication/README.md b/tests/unit/authentication/README.md index 5fef8ffa9..7daf60637 100644 --- a/tests/unit/authentication/README.md +++ b/tests/unit/authentication/README.md @@ -3,6 +3,9 @@ ## [__init__.py](__init__.py) Authentication unit tests package. +## [test_api_key_token.py](test_api_key_token.py) +Unit tests for functions defined in authentication/api_key_token.py + ## [test_auth.py](test_auth.py) Unit tests for functions defined in authentication/__init__.py diff --git a/tests/unit/authentication/test_api_key_token.py b/tests/unit/authentication/test_api_key_token.py new file mode 100644 index 000000000..7fd577dd3 --- /dev/null +++ b/tests/unit/authentication/test_api_key_token.py @@ -0,0 +1,122 @@ +# pylint: disable=redefined-outer-name + +"""Unit tests for functions defined in authentication/api_key_token.py""" + +from fastapi import Request, HTTPException +import pytest +from pydantic import SecretStr + +from authentication.api_key_token import APIKeyTokenAuthDependency +from constants import DEFAULT_USER_NAME, DEFAULT_USER_UID +from models.config import APIKeyTokenConfiguration + + +@pytest.fixture +def default_api_key_token_configuration() -> APIKeyTokenConfiguration: + """Default APIKeyTokenConfiguration for testing.""" + return APIKeyTokenConfiguration(api_key=SecretStr("some-test-api-key")) + + +async def test_api_key_with_token_auth_dependency( + default_api_key_token_configuration: APIKeyTokenConfiguration, +) -> None: + """Test the APIKeyTokenAuthDependency class with default user ID.""" + dependency = APIKeyTokenAuthDependency(default_api_key_token_configuration) + + request = Request( + scope={ + "type": "http", + "query_string": b"", + "headers": [ + (b"authorization", b"Bearer some-test-api-key"), + ], + }, + ) + + # Call the dependency + user_id, username, skip_userid_check, user_token = await dependency(request) + + # Assert the expected values + assert user_id == DEFAULT_USER_UID + assert username == DEFAULT_USER_NAME + assert skip_userid_check is True + assert user_token == default_api_key_token_configuration.api_key.get_secret_value() + + +async def test_api_key_with_token_auth_dependency_no_token( + default_api_key_token_configuration: APIKeyTokenConfiguration, +) -> None: + """ + Test if checks for Authorization header is in place. + + Test that APIKeyTokenConfiguration raises an HTTPException when no + Authorization header is present in the request. + + Asserts that the exception has a status code of 401 and the detail message + "No Authorization header found". + """ + dependency = APIKeyTokenAuthDependency(default_api_key_token_configuration) + + # Create a mock request without token + request = Request( + scope={ + "type": "http", + "query_string": b"", + "headers": [], + }, + ) + + # Assert that an HTTPException is raised when no Authorization header is found + with pytest.raises(HTTPException) as exc_info: + await dependency(request) + + assert exc_info.value.status_code == 401 + assert exc_info.value.detail["cause"] == "No Authorization header found" + + +async def test_api_key_with_token_auth_dependency_no_bearer( + default_api_key_token_configuration: APIKeyTokenConfiguration, +) -> None: + """Test the APIKeyTokenConfiguration class with no token.""" + dependency = APIKeyTokenAuthDependency(default_api_key_token_configuration) + + # Create a mock request without token + request = Request( + scope={ + "type": "http", + "query_string": b"", + "headers": [(b"authorization", b"NotBearer anything")], + }, + ) + + # Assert that an HTTPException is raised when no Authorization header is found + with pytest.raises(HTTPException) as exc_info: + await dependency(request) + + assert exc_info.value.status_code == 401 + assert exc_info.value.detail["cause"] == "No token found in Authorization header" + + +async def test_api_key_with_token_auth_dependency_invalid( + default_api_key_token_configuration: APIKeyTokenConfiguration, +) -> None: + """Test the APIKeyTokenAuthDependency class with default user ID, + where token's value is not the one from configuration.""" + dependency = APIKeyTokenAuthDependency(default_api_key_token_configuration) + + request = Request( + scope={ + "type": "http", + "query_string": b"", + "headers": [ + (b"authorization", b"Bearer another-test-api-key"), + ], + }, + ) + + # Assert that an HTTPException is raised when the API key is invalid. + with pytest.raises(HTTPException) as exc_info: + await dependency(request) + + assert exc_info.value.status_code == 401 + assert exc_info.value.detail == "Invalid API Key" diff --git a/tests/unit/models/config/test_authentication_configuration.py b/tests/unit/models/config/test_authentication_configuration.py index 3534ccdcf..7d89f7644 100644 --- a/tests/unit/models/config/test_authentication_configuration.py +++ b/tests/unit/models/config/test_authentication_configuration.py @@ -4,7 +4,7 @@ import pytest -from pydantic import ValidationError +from pydantic import ValidationError, SecretStr from models.config import ( AuthenticationConfiguration, @@ -14,6 +14,7 @@ LlamaStackConfiguration, ServiceConfiguration, UserDataCollection, + APIKeyTokenConfiguration, ) from constants import ( @@ -21,6 +22,7 @@ AUTH_MOD_K8S, AUTH_MOD_JWK_TOKEN, AUTH_MOD_RH_IDENTITY, + AUTH_MOD_APIKEY_TOKEN, ) @@ -349,3 +351,43 @@ def test_authentication_configuration_in_config_jwktoken() -> None: assert cfg.authentication.skip_tls_verification is True assert cfg.authentication.k8s_ca_cert_path == Path("tests/configuration/server.crt") assert cfg.authentication.k8s_cluster_api is None + + +def test_authentication_configuration_api_token() -> None: + """Test the AuthenticationConfiguration with API Token.""" + + auth_config = AuthenticationConfiguration( + module=AUTH_MOD_APIKEY_TOKEN, + skip_tls_verification=False, + k8s_ca_cert_path=None, + k8s_cluster_api=None, + api_key_config=APIKeyTokenConfiguration(api_key=SecretStr("my-api-key")), + ) + assert auth_config is not None + assert auth_config.module == AUTH_MOD_APIKEY_TOKEN + assert auth_config.skip_tls_verification is False + assert auth_config.k8s_ca_cert_path is None + assert auth_config.k8s_cluster_api is None + + assert auth_config.api_key_config is not None + assert auth_config.api_key_configuration is auth_config.api_key_config + assert auth_config.api_key_configuration.api_key is not None + assert ( + auth_config.api_key_configuration.api_key is auth_config.api_key_config.api_key + ) + + +def test_authentication_configuration_api_key_but_insufficient_config() -> None: + """Test the AuthenticationConfiguration with API Token.""" + + with pytest.raises( + ValidationError, + match="API Key configuration section must be " + "specified when using API Key token authentication", + ): + AuthenticationConfiguration( + module=AUTH_MOD_APIKEY_TOKEN, + skip_tls_verification=False, + k8s_ca_cert_path=None, + k8s_cluster_api=None, + ) diff --git a/tests/unit/models/config/test_dump_configuration.py b/tests/unit/models/config/test_dump_configuration.py index fce7cdc0c..38177a8a7 100644 --- a/tests/unit/models/config/test_dump_configuration.py +++ b/tests/unit/models/config/test_dump_configuration.py @@ -149,6 +149,7 @@ def test_dump_configuration(tmp_path: Path) -> None: "k8s_ca_cert_path": None, "k8s_cluster_api": None, "jwk_config": None, + "api_key_config": None, "rh_identity_config": None, }, "customization": None, @@ -448,6 +449,7 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None: "k8s_ca_cert_path": None, "k8s_cluster_api": None, "jwk_config": None, + "api_key_config": None, "rh_identity_config": None, }, "customization": None, @@ -632,6 +634,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: "k8s_ca_cert_path": None, "k8s_cluster_api": None, "jwk_config": None, + "api_key_config": None, "rh_identity_config": None, }, "customization": None, diff --git a/tests/unit/utils/README.md b/tests/unit/utils/README.md index 12f2d5bd5..4b785131d 100644 --- a/tests/unit/utils/README.md +++ b/tests/unit/utils/README.md @@ -24,6 +24,9 @@ Unit tests for utility function to check Llama Stack version. ## [test_mcp_headers.py](test_mcp_headers.py) Unit tests for MCP headers utility functions. +## [test_responses.py](test_responses.py) +Unit tests for utils/responses.py functions. + ## [test_suid.py](test_suid.py) Unit tests for functions defined in utils.suid module.