From c33f951496c267c04466d732551f41cea7809de3 Mon Sep 17 00:00:00 2001 From: erangi-ar <111747955+erangi-ar@users.noreply.github.com> Date: Wed, 26 Nov 2025 17:06:54 +0530 Subject: [PATCH] Rag 149- Show chunk context in Test LLM Connection Page (#173) * partialy completes prompt refiner * integrate prompt refiner with llm_config_module * fixed ruff lint issues * complete prompt refiner, chunk retriver and reranker * remove unnesessary comments * updated .gitignore * Remove data_sets from tracking * update .gitignore file * complete vault setup and response generator * remove ignore comment * removed old modules * fixed merge conflicts * Vault Authentication token handling (#154) (#70) * partialy completes prompt refiner * integrate prompt refiner with llm_config_module * fixed ruff lint issues * complete prompt refiner, chunk retriver and reranker * remove unnesessary comments * updated .gitignore * Remove data_sets from tracking * update .gitignore file * complete vault setup and response generator * remove ignore comment * removed old modules * fixed merge conflicts * added initial setup for the vector indexer * initial llm orchestration service update with context generation * added new endpoints * vector indexer with contextual retrieval * fixed requested changes * fixed issue * initial diff identifier setup * uncommment docker compose file * added test endpoint for orchestrate service * fixed ruff linting issue * Rag 103 budget related schema changes (#41) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * rename commonUtils --------- * Rag 93 update connection status (#47) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * rename commonUtils * Implement LLM connection status update functionality with API integration and UI enhancements --------- * Rag 99 production llm connections logic (#46) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * rename commonUtils * Add production connection retrieval and update related components * Implement LLM connection environment update and enhance connection management logic --------- * Rag 119 endpoint to update used budget (#42) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * Add functionality to update used budget for LLM connections with validation and response handling * Implement budget threshold checks and connection deactivation logic in update process * resolve pr comments --------- * Rag 113 warning and termination banners (#43) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * Add budget status check and update BudgetBanner component * rename commonUtils * resove pr comments --------- * rag-105-reset-used-budget-cron-job (#44) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * Add cron job to reset used budget * rename commonUtils * resolve pr comments * Remove trailing slash from vault/agent-out in .gitignore --------- * Rag 101 budget check functionality (#45) * Refactor llm_connections table: update budget tracking fields and reorder columns * Add budget threshold fields and logic to LLM connection management * Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections * resolve pr comments & refactoring * rename commonUtils * budget check functionality --------- * gui running on 3003 issue fixed * gui running on 3003 issue fixed (#50) * added get-configuration.sqpl and updated llmconnections.ts * Add SQL query to retrieve configuration values * Hashicorp key saving (#51) * gui running on 3003 issue fixed * Add SQL query to retrieve configuration values --------- * Remove REACT_APP_NOTIFICATION_NODE_URL variable Removed REACT_APP_NOTIFICATION_NODE_URL environment variable. * added initil diff identifier functionality * test phase1 * Refactor inference and connection handling in YAML and TypeScript files * fixes (#52) * gui running on 3003 issue fixed * Add SQL query to retrieve configuration values * Refactor inference and connection handling in YAML and TypeScript files --------- * Add entry point script for Vector Indexer with command line interface * fix (#53) * gui running on 3003 issue fixed * Add SQL query to retrieve configuration values * Refactor inference and connection handling in YAML and TypeScript files * Add entry point script for Vector Indexer with command line interface --------- * diff fixes * uncomment llm orchestration service in docker compose file * complete vector indexer * Add YAML configurations and scripts for managing vault secrets * Add vault secret management functions and endpoints for LLM connections * Add Test Production LLM page with messaging functionality and styles * fixed issue * fixed merge conflicts * fixed issue * fixed issue * updated with requested chnages * fixed test ui endpoint request responses schema issue * fixed dvc path issue * added dspy optimization * filters fixed * refactor: restructure llm_connections table for improved configuration and tracking * feat: enhance LLM connection handling with AWS and Azure embedding credentials * fixed issues * refactor: remove redundant Azure and AWS credential assignments in vault secret functions * fixed issue * intial vault setup script * complete vault authentication handling * review requested change fix * fixed issues according to the pr review * fixed issues in docker compose file relevent to pr review --------- Co-authored-by: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com> Co-authored-by: erangi-ar * Add context section with collapsible display for inference results * chunks integration * testing * security improvements * fix guardrail issue * fix review comments * fixed issue * remove optimized modules * remove unnesesary file * fix typo * fixed review * soure metadata rename and optimize input guardrail flow * optimized components * remove unnesessary files * fixed ruff format issue * fixed requested changes * fixed ruff format issue * tested and improved chunk retrieval quality and performance * complete backed logic to show chunks in test ui * hide inference result loading state in UI * resolve pr comments --------- Co-authored-by: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com> Co-authored-by: nuwangeek Co-authored-by: erangi-ar --- GUI/src/pages/TestModel/TestLLM.scss | 38 +++++++++++++++++++++++++ GUI/src/pages/TestModel/index.tsx | 42 ++++++++++++++++++++++------ GUI/src/services/inference.ts | 4 +++ src/llm_orchestration_service.py | 31 ++++++++++++++++++++ src/llm_orchestration_service_api.py | 14 ++++++++-- src/models/request_models.py | 12 +++++++- 6 files changed, 130 insertions(+), 11 deletions(-) diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss index 2dd2b4e..833690d 100644 --- a/GUI/src/pages/TestModel/TestLLM.scss +++ b/GUI/src/pages/TestModel/TestLLM.scss @@ -41,6 +41,44 @@ line-height: 1.5; color: #555; } + + .context-section { + margin-top: 20px; + + .context-list { + display: flex; + flex-direction: column; + gap: 12px; + margin-top: 8px; + } + + .context-item { + padding: 12px; + background-color: #ffffff; + border: 1px solid #e0e0e0; + border-radius: 6px; + box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); + + .context-rank { + margin-bottom: 8px; + padding-bottom: 4px; + border-bottom: 1px solid #f0f0f0; + + strong { + color: #2563eb; + font-size: 0.875rem; + font-weight: 600; + } + } + + .context-content { + color: #374151; + line-height: 1.5; + font-size: 0.9rem; + white-space: pre-wrap; + } + } + } } .testModalList { diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx index 4b16522..b6e66e7 100644 --- a/GUI/src/pages/TestModel/index.tsx +++ b/GUI/src/pages/TestModel/index.tsx @@ -1,5 +1,5 @@ import { useMutation, useQuery } from '@tanstack/react-query'; -import { Button, FormSelect, FormTextarea } from 'components'; +import { Button, FormSelect, FormTextarea, Collapsible } from 'components'; import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner'; import { FC, useState } from 'react'; import { useTranslation } from 'react-i18next'; @@ -19,6 +19,9 @@ const TestLLM: FC = () => { text: '', }); + // Sort context by rank + const sortedContext = inferenceResult?.chunks?.toSorted((a, b) => a.rank - b.rank) ?? []; + // Fetch LLM connections for dropdown - using the working legacy endpoint for now const { data: connections, isLoading: isLoadingConnections } = useQuery({ queryKey: llmConnectionsQueryKeys.list({ @@ -99,7 +102,7 @@ const TestLLM: FC = () => { onSelectionChange={(selection) => { handleChange('connectionId', selection?.value as string); }} - value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined} + value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined} defaultValue={testLLM?.connectionId ?? undefined} /> @@ -126,15 +129,38 @@ const TestLLM: FC = () => { {/* Inference Result */} - {inferenceResult && ( + {inferenceResult && !inferenceMutation.isLoading && (
-
- {t('testModels.responseLabel') || 'Response:'} -
- {inferenceResult.content} +
+ Response: +
+ {inferenceResult.content} +
+ + {/* Context Section */} + { + sortedContext && sortedContext?.length > 0 && ( +
+ +
+ {sortedContext?.map((contextItem, index) => ( +
+
+ Rank {contextItem.rank} +
+
+ {contextItem.chunkRetrieved} +
+
+ ))} +
+
+
+ ) + } +
-
)} {/* Error State */} diff --git a/GUI/src/services/inference.ts b/GUI/src/services/inference.ts index 691522c..44baf69 100644 --- a/GUI/src/services/inference.ts +++ b/GUI/src/services/inference.ts @@ -25,6 +25,10 @@ export interface InferenceResponse { llmServiceActive: boolean; questionOutOfLlmScope: boolean; content: string; + chunks?: { + rank: number, + chunkRetrieved: string + }[] }; } diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py index 26c4b7d..a7de4c6 100644 --- a/src/llm_orchestration_service.py +++ b/src/llm_orchestration_service.py @@ -18,6 +18,7 @@ PromptRefinerOutput, ContextGenerationRequest, TestOrchestrationResponse, + ChunkInfo, ) from prompt_refine_manager.prompt_refiner import PromptRefinerAgent from src.response_generator.response_generate import ResponseGeneratorAgent @@ -922,6 +923,7 @@ def handle_input_guardrails( questionOutOfLLMScope=False, inputGuardFailed=True, content=INPUT_GUARDRAIL_VIOLATION_MESSAGE, + chunks=None, ) else: return OrchestrationResponse( @@ -1606,6 +1608,31 @@ def _initialize_response_generator( logger.error(f"Failed to initialize response generator: {str(e)}") raise + @staticmethod + def _format_chunks_for_test_response( + relevant_chunks: Optional[List[Dict[str, Union[str, float, Dict[str, Any]]]]], + ) -> Optional[List[ChunkInfo]]: + """ + Format retrieved chunks for test response. + + Args: + relevant_chunks: List of retrieved chunks with metadata + + Returns: + List of ChunkInfo objects with rank and content, or None if no chunks + """ + if not relevant_chunks: + return None + + formatted_chunks = [] + for rank, chunk in enumerate(relevant_chunks, start=1): + # Extract text content - prefer "text" key, fallback to "content" + chunk_text = chunk.get("text", chunk.get("content", "")) + if isinstance(chunk_text, str) and chunk_text.strip(): + formatted_chunks.append(ChunkInfo(rank=rank, chunkRetrieved=chunk_text)) + + return formatted_chunks if formatted_chunks else None + @observe(name="generate_rag_response", as_type="generation") def _generate_rag_response( self, @@ -1639,6 +1666,7 @@ def _generate_rag_response( questionOutOfLLMScope=False, inputGuardFailed=False, content=TECHNICAL_ISSUE_MESSAGE, + chunks=self._format_chunks_for_test_response(relevant_chunks), ) else: return OrchestrationResponse( @@ -1706,6 +1734,7 @@ def _generate_rag_response( questionOutOfLLMScope=True, inputGuardFailed=False, content=OUT_OF_SCOPE_MESSAGE, + chunks=self._format_chunks_for_test_response(relevant_chunks), ) else: return OrchestrationResponse( @@ -1725,6 +1754,7 @@ def _generate_rag_response( questionOutOfLLMScope=False, inputGuardFailed=False, content=answer, + chunks=self._format_chunks_for_test_response(relevant_chunks), ) else: return OrchestrationResponse( @@ -1765,6 +1795,7 @@ def _generate_rag_response( questionOutOfLLMScope=False, inputGuardFailed=False, content=TECHNICAL_ISSUE_MESSAGE, + chunks=self._format_chunks_for_test_response(relevant_chunks), ) else: return OrchestrationResponse( diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py index df2fa21..b58eac9 100644 --- a/src/llm_orchestration_service_api.py +++ b/src/llm_orchestration_service_api.py @@ -332,7 +332,9 @@ def test_orchestrate_llm_request( conversationHistory=[], url="test-context", environment=request.environment, - connection_id=str(request.connectionId), + connection_id=str(request.connectionId) + if request.connectionId is not None + else None, ) logger.info(f"This is full request constructed for testing: {full_request}") @@ -340,12 +342,20 @@ def test_orchestrate_llm_request( # Process the request using the same logic response = orchestration_service.process_orchestration_request(full_request) - # Convert to TestOrchestrationResponse (exclude chatId) + # If response is already TestOrchestrationResponse (when environment is testing), return it directly + if isinstance(response, TestOrchestrationResponse): + logger.info( + f"Successfully processed test request for environment: {request.environment}" + ) + return response + + # Convert to TestOrchestrationResponse (exclude chatId) for other cases test_response = TestOrchestrationResponse( llmServiceActive=response.llmServiceActive, questionOutOfLLMScope=response.questionOutOfLLMScope, inputGuardFailed=response.inputGuardFailed, content=response.content, + chunks=None, # OrchestrationResponse doesn't have chunks ) logger.info( diff --git a/src/models/request_models.py b/src/models/request_models.py index e31eec4..2239425 100644 --- a/src/models/request_models.py +++ b/src/models/request_models.py @@ -230,10 +230,17 @@ class TestOrchestrationRequest(BaseModel): ..., description="Environment context" ) connectionId: Optional[int] = Field( - ..., description="Optional connection identifier" + None, description="Optional connection identifier" ) +class ChunkInfo(BaseModel): + """Model for chunk information in test response.""" + + rank: int = Field(..., description="Rank of the retrieved chunk") + chunkRetrieved: str = Field(..., description="Content of the retrieved chunk") + + class TestOrchestrationResponse(BaseModel): """Model for test orchestration response (without chatId).""" @@ -245,3 +252,6 @@ class TestOrchestrationResponse(BaseModel): ..., description="Whether input guard validation failed" ) content: str = Field(..., description="Response content with citations") + chunks: Optional[List[ChunkInfo]] = Field( + default=None, description="Retrieved chunks with rank and content" + )