From c33f951496c267c04466d732551f41cea7809de3 Mon Sep 17 00:00:00 2001
From: erangi-ar <111747955+erangi-ar@users.noreply.github.com>
Date: Wed, 26 Nov 2025 17:06:54 +0530
Subject: [PATCH] Rag 149- Show chunk context in Test LLM Connection Page
 (#173)

* partialy completes prompt refiner

* integrate prompt refiner with llm_config_module

* fixed ruff lint issues

* complete prompt refiner, chunk retriver and reranker

* remove unnesessary comments

* updated .gitignore

* Remove data_sets from tracking

* update .gitignore file

* complete vault setup and response generator

* remove ignore comment

* removed old modules

* fixed merge conflicts

* Vault Authentication token handling (#154) (#70)

* partialy completes prompt refiner

* integrate prompt refiner with llm_config_module

* fixed ruff lint issues

* complete prompt refiner, chunk retriver and reranker

* remove unnesessary comments

* updated .gitignore

* Remove data_sets from tracking

* update .gitignore file

* complete vault setup and response generator

* remove ignore comment

* removed old modules

* fixed merge conflicts

* added initial setup for the vector indexer

* initial llm orchestration service update with context generation

* added new endpoints

* vector indexer with contextual retrieval

* fixed requested changes

* fixed issue

* initial diff identifier setup

* uncommment docker compose file

* added test endpoint for orchestrate service

* fixed ruff linting issue

* Rag 103 budget related schema changes (#41)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* rename commonUtils

---------


* Rag 93 update connection status (#47)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* rename commonUtils

* Implement LLM connection status update functionality with API integration and UI enhancements

---------


* Rag 99 production llm connections logic (#46)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* rename commonUtils

* Add production connection retrieval and update related components

* Implement LLM connection environment update and enhance connection management logic

---------


* Rag 119 endpoint to update used budget (#42)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* Add functionality to update used budget for LLM connections with validation and response handling

* Implement budget threshold checks and connection deactivation logic in update process

* resolve pr comments

---------


* Rag 113 warning and termination banners (#43)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* Add budget status check and update BudgetBanner component

* rename commonUtils

* resove pr comments

---------


* rag-105-reset-used-budget-cron-job (#44)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* Add cron job to reset used budget

* rename commonUtils

* resolve pr comments

* Remove trailing slash from vault/agent-out in .gitignore

---------


* Rag 101 budget check functionality (#45)

* Refactor llm_connections table: update budget tracking fields and reorder columns

* Add budget threshold fields and logic to LLM connection management

* Enhance budget management: update budget status logic, adjust thresholds, and improve form handling for LLM connections

* resolve pr comments & refactoring

* rename commonUtils

* budget check functionality

---------


* gui running on 3003 issue fixed

* gui running on 3003 issue fixed (#50)


* added get-configuration.sqpl and updated llmconnections.ts

* Add SQL query to retrieve configuration values

* Hashicorp key saving (#51)

* gui running on 3003 issue fixed

* Add SQL query to retrieve configuration values

---------


* Remove REACT_APP_NOTIFICATION_NODE_URL variable

Removed REACT_APP_NOTIFICATION_NODE_URL environment variable.

* added initil diff identifier functionality

* test phase1

* Refactor inference and connection handling in YAML and TypeScript files

* fixes (#52)

* gui running on 3003 issue fixed

* Add SQL query to retrieve configuration values

* Refactor inference and connection handling in YAML and TypeScript files

---------


* Add entry point script for Vector Indexer with command line interface

* fix (#53)

* gui running on 3003 issue fixed

* Add SQL query to retrieve configuration values

* Refactor inference and connection handling in YAML and TypeScript files

* Add entry point script for Vector Indexer with command line interface

---------


* diff fixes

* uncomment llm orchestration service in docker compose file

* complete vector indexer

* Add YAML configurations and scripts for managing vault secrets

* Add vault secret management functions and endpoints for LLM connections

* Add Test Production LLM page with messaging functionality and styles

* fixed issue

* fixed merge conflicts

* fixed issue

* fixed issue

* updated with requested chnages

* fixed test ui endpoint request responses schema issue

* fixed dvc path issue

* added dspy optimization

* filters fixed

* refactor: restructure llm_connections table for improved configuration and tracking

* feat: enhance LLM connection handling with AWS and Azure embedding credentials

* fixed issues

* refactor: remove redundant Azure and AWS credential assignments in vault secret functions

* fixed issue

* intial vault setup script

* complete vault authentication handling

* review requested change fix

* fixed issues according to the pr review

* fixed issues in docker compose file relevent to pr review

---------

Co-authored-by: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com>
Co-authored-by: erangi-ar <erangika.ariyasena@rootcode.io>

* Add context section with collapsible display for inference results

* chunks integration

* testing

* security improvements

* fix guardrail issue

* fix review comments

* fixed issue

* remove optimized modules

* remove unnesesary file

* fix typo

* fixed review

* soure metadata rename and optimize input guardrail flow

* optimized components

* remove unnesessary files

* fixed ruff format issue

* fixed requested changes

* fixed ruff format issue

* tested and improved chunk retrieval quality and performance

* complete backed logic to show chunks in test ui

* hide inference result loading state in UI

* resolve pr comments

---------

Co-authored-by: Charith Nuwan Bimsara <59943919+nuwangeek@users.noreply.github.com>
Co-authored-by: nuwangeek <charith.bimsara@rootcode.io>
Co-authored-by: erangi-ar <erangika.ariyasena@rootcode.io>
---
 GUI/src/pages/TestModel/TestLLM.scss | 38 +++++++++++++++++++++++++
 GUI/src/pages/TestModel/index.tsx    | 42 ++++++++++++++++++++++------
 GUI/src/services/inference.ts        |  4 +++
 src/llm_orchestration_service.py     | 31 ++++++++++++++++++++
 src/llm_orchestration_service_api.py | 14 ++++++++--
 src/models/request_models.py         | 12 +++++++-
 6 files changed, 130 insertions(+), 11 deletions(-)

diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss
index 2dd2b4e..833690d 100644
--- a/GUI/src/pages/TestModel/TestLLM.scss
+++ b/GUI/src/pages/TestModel/TestLLM.scss
@@ -41,6 +41,44 @@
     line-height: 1.5;
     color: #555;
   }
+
+  .context-section {
+    margin-top: 20px;
+
+    .context-list {
+      display: flex;
+      flex-direction: column;
+      gap: 12px;
+      margin-top: 8px;
+    }
+
+    .context-item {
+      padding: 12px;
+      background-color: #ffffff;
+      border: 1px solid #e0e0e0;
+      border-radius: 6px;
+      box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+
+      .context-rank {
+        margin-bottom: 8px;
+        padding-bottom: 4px;
+        border-bottom: 1px solid #f0f0f0;
+
+        strong {
+          color: #2563eb;
+          font-size: 0.875rem;
+          font-weight: 600;
+        }
+      }
+
+      .context-content {
+        color: #374151;
+        line-height: 1.5;
+        font-size: 0.9rem;
+        white-space: pre-wrap;
+      }
+    }
+  }
 }
 
 .testModalList {
diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx
index 4b16522..b6e66e7 100644
--- a/GUI/src/pages/TestModel/index.tsx
+++ b/GUI/src/pages/TestModel/index.tsx
@@ -1,5 +1,5 @@
 import { useMutation, useQuery } from '@tanstack/react-query';
-import { Button, FormSelect, FormTextarea } from 'components';
+import { Button, FormSelect, FormTextarea, Collapsible } from 'components';
 import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
 import { FC, useState } from 'react';
 import { useTranslation } from 'react-i18next';
@@ -19,6 +19,9 @@ const TestLLM: FC = () => {
     text: '',
   });
 
+  // Sort context by rank
+  const sortedContext = inferenceResult?.chunks?.toSorted((a, b) => a.rank - b.rank) ?? [];
+
   // Fetch LLM connections for dropdown - using the working legacy endpoint for now
   const { data: connections, isLoading: isLoadingConnections } = useQuery({
     queryKey: llmConnectionsQueryKeys.list({
@@ -99,7 +102,7 @@ const TestLLM: FC = () => {
                 onSelectionChange={(selection) => {
                   handleChange('connectionId', selection?.value as string);
                 }}
-                value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined} 
+                value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined}
                 defaultValue={testLLM?.connectionId ?? undefined}
               />
             </div>
@@ -126,15 +129,38 @@ const TestLLM: FC = () => {
 
           {/* Inference Result */}
 
-          {inferenceResult && (
+          {inferenceResult && !inferenceMutation.isLoading && (
             <div className="inference-results-container">
-            <div className="result-item">
-              <strong>{t('testModels.responseLabel') || 'Response:'}</strong>
-              <div className="response-content">
-                {inferenceResult.content}
+              <div className="result-item">
+                <strong>Response:</strong>
+                <div className="response-content">
+                  {inferenceResult.content}
+                </div>
               </div>
+
+              {/* Context Section */}
+              {
+                sortedContext && sortedContext?.length > 0 && (
+                  <div className="context-section">
+                    <Collapsible title={`Context (${sortedContext?.length} chunks)`} defaultOpen={false}>
+                      <div className="context-list">
+                        {sortedContext?.map((contextItem, index) => (
+                          <div key={index} className="context-item">
+                            <div className="context-rank">
+                              <strong>Rank {contextItem.rank}</strong>
+                            </div>
+                            <div className="context-content">
+                              {contextItem.chunkRetrieved}
+                            </div>
+                          </div>
+                        ))}
+                      </div>
+                    </Collapsible>
+                  </div>
+                )
+              }
+
             </div>
-          </div>
           )}
 
           {/* Error State */}
diff --git a/GUI/src/services/inference.ts b/GUI/src/services/inference.ts
index 691522c..44baf69 100644
--- a/GUI/src/services/inference.ts
+++ b/GUI/src/services/inference.ts
@@ -25,6 +25,10 @@ export interface InferenceResponse {
     llmServiceActive: boolean;
     questionOutOfLlmScope: boolean;
     content: string;
+    chunks?: {
+      rank: number,
+      chunkRetrieved: string
+    }[]
   };
 }
 
diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 26c4b7d..a7de4c6 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -18,6 +18,7 @@
     PromptRefinerOutput,
     ContextGenerationRequest,
     TestOrchestrationResponse,
+    ChunkInfo,
 )
 from prompt_refine_manager.prompt_refiner import PromptRefinerAgent
 from src.response_generator.response_generate import ResponseGeneratorAgent
@@ -922,6 +923,7 @@ def handle_input_guardrails(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=True,
                     content=INPUT_GUARDRAIL_VIOLATION_MESSAGE,
+                    chunks=None,
                 )
             else:
                 return OrchestrationResponse(
@@ -1606,6 +1608,31 @@ def _initialize_response_generator(
             logger.error(f"Failed to initialize response generator: {str(e)}")
             raise
 
+    @staticmethod
+    def _format_chunks_for_test_response(
+        relevant_chunks: Optional[List[Dict[str, Union[str, float, Dict[str, Any]]]]],
+    ) -> Optional[List[ChunkInfo]]:
+        """
+        Format retrieved chunks for test response.
+
+        Args:
+            relevant_chunks: List of retrieved chunks with metadata
+
+        Returns:
+            List of ChunkInfo objects with rank and content, or None if no chunks
+        """
+        if not relevant_chunks:
+            return None
+
+        formatted_chunks = []
+        for rank, chunk in enumerate(relevant_chunks, start=1):
+            # Extract text content - prefer "text" key, fallback to "content"
+            chunk_text = chunk.get("text", chunk.get("content", ""))
+            if isinstance(chunk_text, str) and chunk_text.strip():
+                formatted_chunks.append(ChunkInfo(rank=rank, chunkRetrieved=chunk_text))
+
+        return formatted_chunks if formatted_chunks else None
+
     @observe(name="generate_rag_response", as_type="generation")
     def _generate_rag_response(
         self,
@@ -1639,6 +1666,7 @@ def _generate_rag_response(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=TECHNICAL_ISSUE_MESSAGE,
+                    chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
                 return OrchestrationResponse(
@@ -1706,6 +1734,7 @@ def _generate_rag_response(
                         questionOutOfLLMScope=True,
                         inputGuardFailed=False,
                         content=OUT_OF_SCOPE_MESSAGE,
+                        chunks=self._format_chunks_for_test_response(relevant_chunks),
                     )
                 else:
                     return OrchestrationResponse(
@@ -1725,6 +1754,7 @@ def _generate_rag_response(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=answer,
+                    chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
                 return OrchestrationResponse(
@@ -1765,6 +1795,7 @@ def _generate_rag_response(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=TECHNICAL_ISSUE_MESSAGE,
+                    chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
                 return OrchestrationResponse(
diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
index df2fa21..b58eac9 100644
--- a/src/llm_orchestration_service_api.py
+++ b/src/llm_orchestration_service_api.py
@@ -332,7 +332,9 @@ def test_orchestrate_llm_request(
             conversationHistory=[],
             url="test-context",
             environment=request.environment,
-            connection_id=str(request.connectionId),
+            connection_id=str(request.connectionId)
+            if request.connectionId is not None
+            else None,
         )
 
         logger.info(f"This is full request constructed for testing: {full_request}")
@@ -340,12 +342,20 @@ def test_orchestrate_llm_request(
         # Process the request using the same logic
         response = orchestration_service.process_orchestration_request(full_request)
 
-        # Convert to TestOrchestrationResponse (exclude chatId)
+        # If response is already TestOrchestrationResponse (when environment is testing), return it directly
+        if isinstance(response, TestOrchestrationResponse):
+            logger.info(
+                f"Successfully processed test request for environment: {request.environment}"
+            )
+            return response
+
+        # Convert to TestOrchestrationResponse (exclude chatId) for other cases
         test_response = TestOrchestrationResponse(
             llmServiceActive=response.llmServiceActive,
             questionOutOfLLMScope=response.questionOutOfLLMScope,
             inputGuardFailed=response.inputGuardFailed,
             content=response.content,
+            chunks=None,  # OrchestrationResponse doesn't have chunks
         )
 
         logger.info(
diff --git a/src/models/request_models.py b/src/models/request_models.py
index e31eec4..2239425 100644
--- a/src/models/request_models.py
+++ b/src/models/request_models.py
@@ -230,10 +230,17 @@ class TestOrchestrationRequest(BaseModel):
         ..., description="Environment context"
     )
     connectionId: Optional[int] = Field(
-        ..., description="Optional connection identifier"
+        None, description="Optional connection identifier"
     )
 
 
+class ChunkInfo(BaseModel):
+    """Model for chunk information in test response."""
+
+    rank: int = Field(..., description="Rank of the retrieved chunk")
+    chunkRetrieved: str = Field(..., description="Content of the retrieved chunk")
+
+
 class TestOrchestrationResponse(BaseModel):
     """Model for test orchestration response (without chatId)."""
 
@@ -245,3 +252,6 @@ class TestOrchestrationResponse(BaseModel):
         ..., description="Whether input guard validation failed"
     )
     content: str = Field(..., description="Response content with citations")
+    chunks: Optional[List[ChunkInfo]] = Field(
+        default=None, description="Retrieved chunks with rank and content"
+    )