From 8d2a65764759bba9e612edcfa0821626e765af2b Mon Sep 17 00:00:00 2001 From: Pavel Tisnovsky Date: Sun, 16 Nov 2025 10:25:52 +0100 Subject: [PATCH] LCORE-946: updated REST API documentation --- docs/openapi.json | 88 ++++++++++++++++++++++++++++++++++++++++++++++- docs/openapi.md | 46 +++++++++++++++++++++---- docs/output.md | 46 +++++++++++++++++++++---- 3 files changed, 165 insertions(+), 15 deletions(-) diff --git a/docs/openapi.json b/docs/openapi.json index 82ada88a9..c4f4ed005 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -411,7 +411,7 @@ "streaming_query" ], "summary": "Streaming Query Endpoint Handler", - "description": "Handle request to the /streaming_query endpoint.\n\nThis endpoint receives a query request, authenticates the user,\nselects the appropriate model and provider, and streams\nincremental response events from the Llama Stack backend to the\nclient. Events include start, token updates, tool calls, turn\ncompletions, errors, and end-of-stream metadata. Optionally\nstores the conversation transcript if enabled in configuration.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException: Returns HTTP 500 if unable to connect to the\n Llama Stack server.", + "description": "Handle request to the /streaming_query endpoint using Agent API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Agent API specific retrieve_response and response generator functions.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException: Returns HTTP 500 if unable to connect to the\n Llama Stack server.", "operationId": "streaming_query_endpoint_handler_v1_streaming_query_post", "requestBody": { "content": { @@ -1306,6 +1306,92 @@ } } }, + "/v2/streaming_query": { + "post": { + "tags": [ + "streaming_query_v2" + ], + "summary": "Streaming Query Endpoint Handler V2", + "description": "Handle request to the /streaming_query endpoint using Responses API.\n\nThis is a wrapper around streaming_query_endpoint_handler_base that provides\nthe Responses API specific retrieve_response and response generator functions.\n\nReturns:\n StreamingResponse: An HTTP streaming response yielding\n SSE-formatted events for the query lifecycle.\n\nRaises:\n HTTPException: Returns HTTP 500 if unable to connect to the\n Llama Stack server.", + "operationId": "streaming_query_endpoint_handler_v2_v2_streaming_query_post", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/QueryRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Streaming response with Server-Sent Events", + "content": { + "application/json": { + "schema": { + "type": "string", + "example": "data: {\"event\": \"start\", \"data\": {\"conversation_id\": \"123e4567-e89b-12d3-a456-426614174000\"}}\n\ndata: {\"event\": \"token\", \"data\": {\"id\": 0, \"token\": \"Hello\"}}\n\ndata: {\"event\": \"end\", \"data\": {\"referenced_documents\": [], \"truncated\": null, \"input_tokens\": 0, \"output_tokens\": 0}, \"available_quotas\": {}}\n\n" + } + }, + "text/plain": { + "schema": { + "type": "string", + "example": "Hello world!\n\n---\n\nReference: https://example.com/doc" + } + } + } + }, + "400": { + "description": "Missing or invalid credentials provided by client", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + } + } + } + }, + "401": { + "description": "Unauthorized: Invalid or missing Bearer token for k8s auth", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UnauthorizedResponse" + } + } + } + }, + "403": { + "description": "User is not authorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ForbiddenResponse" + } + } + } + }, + "500": { + "description": "Internal Server Error", + "detail": { + "response": "Unable to connect to Llama Stack", + "cause": "Connection error." + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, "/readiness": { "get": { "tags": [ diff --git a/docs/openapi.md b/docs/openapi.md index b4b41c5c6..32efedce3 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -227,14 +227,10 @@ Returns: > **Streaming Query Endpoint Handler** -Handle request to the /streaming_query endpoint. +Handle request to the /streaming_query endpoint using Agent API. -This endpoint receives a query request, authenticates the user, -selects the appropriate model and provider, and streams -incremental response events from the Llama Stack backend to the -client. Events include start, token updates, tool calls, turn -completions, errors, and end-of-stream metadata. Optionally -stores the conversation transcript if enabled in configuration. +This is a wrapper around streaming_query_endpoint_handler_base that provides +the Agent API specific retrieve_response and response generator functions. Returns: StreamingResponse: An HTTP streaming response yielding @@ -587,6 +583,42 @@ Returns: | 429 | The quota has been exceeded | [QuotaExceededResponse](#quotaexceededresponse) | | 500 | Internal Server Error | | | 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) | +## POST `/v2/streaming_query` + +> **Streaming Query Endpoint Handler V2** + +Handle request to the /streaming_query endpoint using Responses API. + +This is a wrapper around streaming_query_endpoint_handler_base that provides +the Responses API specific retrieve_response and response generator functions. + +Returns: + StreamingResponse: An HTTP streaming response yielding + SSE-formatted events for the query lifecycle. + +Raises: + HTTPException: Returns HTTP 500 if unable to connect to the + Llama Stack server. + + + + + +### 📦 Request Body + +[QueryRequest](#queryrequest) + +### ✅ Responses + +| Status Code | Description | Component | +|-------------|-------------|-----------| +| 200 | Streaming response with Server-Sent Events | string +string | +| 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) | +| 401 | Unauthorized: Invalid or missing Bearer token for k8s auth | [UnauthorizedResponse](#unauthorizedresponse) | +| 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) | +| 500 | Internal Server Error | | +| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) | ## GET `/readiness` > **Readiness Probe Get Method** diff --git a/docs/output.md b/docs/output.md index b4b41c5c6..32efedce3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -227,14 +227,10 @@ Returns: > **Streaming Query Endpoint Handler** -Handle request to the /streaming_query endpoint. +Handle request to the /streaming_query endpoint using Agent API. -This endpoint receives a query request, authenticates the user, -selects the appropriate model and provider, and streams -incremental response events from the Llama Stack backend to the -client. Events include start, token updates, tool calls, turn -completions, errors, and end-of-stream metadata. Optionally -stores the conversation transcript if enabled in configuration. +This is a wrapper around streaming_query_endpoint_handler_base that provides +the Agent API specific retrieve_response and response generator functions. Returns: StreamingResponse: An HTTP streaming response yielding @@ -587,6 +583,42 @@ Returns: | 429 | The quota has been exceeded | [QuotaExceededResponse](#quotaexceededresponse) | | 500 | Internal Server Error | | | 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) | +## POST `/v2/streaming_query` + +> **Streaming Query Endpoint Handler V2** + +Handle request to the /streaming_query endpoint using Responses API. + +This is a wrapper around streaming_query_endpoint_handler_base that provides +the Responses API specific retrieve_response and response generator functions. + +Returns: + StreamingResponse: An HTTP streaming response yielding + SSE-formatted events for the query lifecycle. + +Raises: + HTTPException: Returns HTTP 500 if unable to connect to the + Llama Stack server. + + + + + +### 📦 Request Body + +[QueryRequest](#queryrequest) + +### ✅ Responses + +| Status Code | Description | Component | +|-------------|-------------|-----------| +| 200 | Streaming response with Server-Sent Events | string +string | +| 400 | Missing or invalid credentials provided by client | [UnauthorizedResponse](#unauthorizedresponse) | +| 401 | Unauthorized: Invalid or missing Bearer token for k8s auth | [UnauthorizedResponse](#unauthorizedresponse) | +| 403 | User is not authorized | [ForbiddenResponse](#forbiddenresponse) | +| 500 | Internal Server Error | | +| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) | ## GET `/readiness` > **Readiness Probe Get Method**