diff --git a/docs/openapi.json b/docs/openapi.json index dc7dfbb63..4f4176b8b 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -152,7 +152,13 @@ } }, "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "LLM answer" + "response": "LLM answer", + "referenced_documents": [ + { + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html", + "doc_title": "Operator Lifecycle Manager (OLM)" + } + ] }, "400": { "description": "Missing or invalid credentials provided by client", @@ -1155,6 +1161,9 @@ }, "inference": { "$ref": "#/components/schemas/InferenceConfiguration" + }, + "conversation_cache": { + "$ref": "#/components/schemas/ConversationCacheConfiguration" } }, "additionalProperties": false, @@ -1168,6 +1177,61 @@ "title": "Configuration", "description": "Global service configuration." }, + "ConversationCacheConfiguration": { + "properties": { + "type": { + "anyOf": [ + { + "type": "string", + "enum": [ + "noop", + "memory", + "sqlite", + "postgres" + ] + }, + { + "type": "null" + } + ], + "title": "Type" + }, + "memory": { + "anyOf": [ + { + "$ref": "#/components/schemas/InMemoryCacheConfig" + }, + { + "type": "null" + } + ] + }, + "sqlite": { + "anyOf": [ + { + "$ref": "#/components/schemas/SQLiteDatabaseConfiguration" + }, + { + "type": "null" + } + ] + }, + "postgres": { + "anyOf": [ + { + "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "type": "object", + "title": "ConversationCacheConfiguration", + "description": "Conversation cache configuration." + }, "ConversationDeleteResponse": { "properties": { "conversation_id": { @@ -1751,6 +1815,22 @@ "type": "object", "title": "HTTPValidationError" }, + "InMemoryCacheConfig": { + "properties": { + "max_entries": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Max Entries" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "max_entries" + ], + "title": "InMemoryCacheConfig", + "description": "In-memory cache configuration." + }, "InferenceConfiguration": { "properties": { "default_model": { @@ -2372,6 +2452,45 @@ "examples": [ "Kubernetes is an open-source container orchestration system for automating ..." ] + }, + "rag_chunks": { + "items": { + "$ref": "#/components/schemas/RAGChunk" + }, + "type": "array", + "title": "Rag Chunks", + "default": [] + }, + "tool_calls": { + "anyOf": [ + { + "items": { + "$ref": "#/components/schemas/ToolCall" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Tool Calls", + "description": "List of tool calls made during response generation" + }, + "referenced_documents": { + "items": { + "$ref": "#/components/schemas/ReferencedDocument" + }, + "type": "array", + "title": "Referenced Documents", + "description": "List of documents referenced in generating the response", + "examples": [ + [ + { + "doc_title": "Operator Lifecycle Manager (OLM)", + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html" + } + ] + ] } }, "type": "object", @@ -2379,14 +2498,77 @@ "response" ], "title": "QueryResponse", - "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.", + "description": "Model representing LLM response to a query.\n\nAttributes:\n conversation_id: The optional conversation ID (UUID).\n response: The response.\n rag_chunks: List of RAG chunks used to generate the response.\n referenced_documents: The URLs and titles for the documents used to generate the response.\n tool_calls: List of tool calls made during response generation.\n TODO: truncated: Whether conversation history was truncated.\n TODO: input_tokens: Number of tokens sent to LLM.\n TODO: output_tokens: Number of tokens received from LLM.\n TODO: available_quotas: Quota available as measured by all configured quota limiters\n TODO: tool_results: List of tool results.", "examples": [ { "conversation_id": "123e4567-e89b-12d3-a456-426614174000", - "response": "Operator Lifecycle Manager (OLM) helps users install..." + "rag_chunks": [ + { + "content": "OLM is a component of the Operator Framework toolkit...", + "score": 0.95, + "source": "kubernetes-docs/operators.md" + } + ], + "referenced_documents": [ + { + "doc_title": "Operator Lifecycle Manager (OLM)", + "doc_url": "https://docs.openshift.com/container-platform/4.15/operators/olm/index.html" + } + ], + "response": "Operator Lifecycle Manager (OLM) helps users install...", + "tool_calls": [ + { + "arguments": { + "query": "operator lifecycle manager" + }, + "result": { + "chunks_found": 5 + }, + "tool_name": "knowledge_search" + } + ] } ] }, + "RAGChunk": { + "properties": { + "content": { + "type": "string", + "title": "Content", + "description": "The content of the chunk" + }, + "source": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Source", + "description": "Source document or URL" + }, + "score": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "null" + } + ], + "title": "Score", + "description": "Relevance score" + } + }, + "type": "object", + "required": [ + "content" + ], + "title": "RAGChunk", + "description": "Model representing a RAG chunk used in the response." + }, "ReadinessResponse": { "properties": { "ready": { @@ -2432,6 +2614,35 @@ } ] }, + "ReferencedDocument": { + "properties": { + "doc_url": { + "anyOf": [ + { + "type": "string", + "minLength": 1, + "format": "uri" + }, + { + "type": "null" + } + ], + "title": "Doc Url", + "description": "URL of the referenced document" + }, + "doc_title": { + "type": "string", + "title": "Doc Title", + "description": "Title of the referenced document" + } + }, + "type": "object", + "required": [ + "doc_title" + ], + "title": "ReferencedDocument", + "description": "Model representing a document referenced in generating a response.\n\nAttributes:\n doc_url: Url to the referenced doc.\n doc_title: Title of the referenced doc." + }, "SQLiteDatabaseConfiguration": { "properties": { "db_path": { @@ -2565,6 +2776,41 @@ "title": "TLSConfiguration", "description": "TLS configuration." }, + "ToolCall": { + "properties": { + "tool_name": { + "type": "string", + "title": "Tool Name", + "description": "Name of the tool called" + }, + "arguments": { + "additionalProperties": true, + "type": "object", + "title": "Arguments", + "description": "Arguments passed to the tool" + }, + "result": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Result", + "description": "Result from the tool" + } + }, + "type": "object", + "required": [ + "tool_name", + "arguments" + ], + "title": "ToolCall", + "description": "Model representing a tool call made during response generation." + }, "UnauthorizedResponse": { "properties": { "detail": { diff --git a/docs/openapi.md b/docs/openapi.md index 6ef13f8d7..c68fcd985 100644 --- a/docs/openapi.md +++ b/docs/openapi.md @@ -562,6 +562,21 @@ Global service configuration. | authorization | | | | customization | | | | inference | | | +| conversation_cache | | | + + +## ConversationCacheConfiguration + + +Conversation cache configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| type | | | +| memory | | | +| sqlite | | | +| postgres | | | ## ConversationDeleteResponse @@ -881,6 +896,17 @@ Model representing response for forbidden access. | detail | array | | +## InMemoryCacheConfig + + +In-memory cache configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| max_entries | integer | | + + ## InferenceConfiguration @@ -1102,12 +1128,36 @@ Model representing LLM response to a query. Attributes: conversation_id: The optional conversation ID (UUID). response: The response. + rag_chunks: List of RAG chunks used to generate the response. + referenced_documents: The URLs and titles for the documents used to generate the response. + tool_calls: List of tool calls made during response generation. + TODO: truncated: Whether conversation history was truncated. + TODO: input_tokens: Number of tokens sent to LLM. + TODO: output_tokens: Number of tokens received from LLM. + TODO: available_quotas: Quota available as measured by all configured quota limiters + TODO: tool_results: List of tool results. | Field | Type | Description | |-------|------|-------------| | conversation_id | | The optional conversation ID (UUID) | | response | string | Response from LLM | +| rag_chunks | array | | +| tool_calls | | List of tool calls made during response generation | +| referenced_documents | array | List of documents referenced in generating the response | + + +## RAGChunk + + +Model representing a RAG chunk used in the response. + + +| Field | Type | Description | +|-------|------|-------------| +| content | string | The content of the chunk | +| source | | Source document or URL | +| score | | Relevance score | ## ReadinessResponse @@ -1143,6 +1193,22 @@ Example: | providers | array | List of unhealthy providers in case of readiness failure. | +## ReferencedDocument + + +Model representing a document referenced in generating a response. + +Attributes: + doc_url: Url to the referenced doc. + doc_title: Title of the referenced doc. + + +| Field | Type | Description | +|-------|------|-------------| +| doc_url | | URL of the referenced document | +| doc_title | string | Title of the referenced document | + + ## SQLiteDatabaseConfiguration @@ -1209,6 +1275,19 @@ TLS configuration. | tls_key_password | | | +## ToolCall + + +Model representing a tool call made during response generation. + + +| Field | Type | Description | +|-------|------|-------------| +| tool_name | string | Name of the tool called | +| arguments | object | Arguments passed to the tool | +| result | | Result from the tool | + + ## UnauthorizedResponse diff --git a/docs/output.md b/docs/output.md index 6ef13f8d7..c68fcd985 100644 --- a/docs/output.md +++ b/docs/output.md @@ -562,6 +562,21 @@ Global service configuration. | authorization | | | | customization | | | | inference | | | +| conversation_cache | | | + + +## ConversationCacheConfiguration + + +Conversation cache configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| type | | | +| memory | | | +| sqlite | | | +| postgres | | | ## ConversationDeleteResponse @@ -881,6 +896,17 @@ Model representing response for forbidden access. | detail | array | | +## InMemoryCacheConfig + + +In-memory cache configuration. + + +| Field | Type | Description | +|-------|------|-------------| +| max_entries | integer | | + + ## InferenceConfiguration @@ -1102,12 +1128,36 @@ Model representing LLM response to a query. Attributes: conversation_id: The optional conversation ID (UUID). response: The response. + rag_chunks: List of RAG chunks used to generate the response. + referenced_documents: The URLs and titles for the documents used to generate the response. + tool_calls: List of tool calls made during response generation. + TODO: truncated: Whether conversation history was truncated. + TODO: input_tokens: Number of tokens sent to LLM. + TODO: output_tokens: Number of tokens received from LLM. + TODO: available_quotas: Quota available as measured by all configured quota limiters + TODO: tool_results: List of tool results. | Field | Type | Description | |-------|------|-------------| | conversation_id | | The optional conversation ID (UUID) | | response | string | Response from LLM | +| rag_chunks | array | | +| tool_calls | | List of tool calls made during response generation | +| referenced_documents | array | List of documents referenced in generating the response | + + +## RAGChunk + + +Model representing a RAG chunk used in the response. + + +| Field | Type | Description | +|-------|------|-------------| +| content | string | The content of the chunk | +| source | | Source document or URL | +| score | | Relevance score | ## ReadinessResponse @@ -1143,6 +1193,22 @@ Example: | providers | array | List of unhealthy providers in case of readiness failure. | +## ReferencedDocument + + +Model representing a document referenced in generating a response. + +Attributes: + doc_url: Url to the referenced doc. + doc_title: Title of the referenced doc. + + +| Field | Type | Description | +|-------|------|-------------| +| doc_url | | URL of the referenced document | +| doc_title | string | Title of the referenced document | + + ## SQLiteDatabaseConfiguration @@ -1209,6 +1275,19 @@ TLS configuration. | tls_key_password | | | +## ToolCall + + +Model representing a tool call made during response generation. + + +| Field | Type | Description | +|-------|------|-------------| +| tool_name | string | Name of the tool called | +| arguments | object | Arguments passed to the tool | +| result | | Result from the tool | + + ## UnauthorizedResponse