rootcodelabs · nuwangeek · Nov 27, 2025 · Nov 26, 2025
diff --git a/GUI/src/pages/TestModel/TestLLM.scss b/GUI/src/pages/TestModel/TestLLM.scss
@@ -41,6 +41,44 @@
     line-height: 1.5;
     color: #555;
   }
+
+  .context-section {
+    margin-top: 20px;
+
+    .context-list {
+      display: flex;
+      flex-direction: column;
+      gap: 12px;
+      margin-top: 8px;
+    }
+
+    .context-item {
+      padding: 12px;
+      background-color: #ffffff;
+      border: 1px solid #e0e0e0;
+      border-radius: 6px;
+      box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+
+      .context-rank {
+        margin-bottom: 8px;
+        padding-bottom: 4px;
+        border-bottom: 1px solid #f0f0f0;
+
+        strong {
+          color: #2563eb;
+          font-size: 0.875rem;
+          font-weight: 600;
+        }
+      }
+
+      .context-content {
+        color: #374151;
+        line-height: 1.5;
+        font-size: 0.9rem;
+        white-space: pre-wrap;
+      }
+    }
+  }
 }
 
 .testModalList {

diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx
@@ -1,5 +1,5 @@
 import { useMutation, useQuery } from '@tanstack/react-query';
-import { Button, FormSelect, FormTextarea } from 'components';
+import { Button, FormSelect, FormTextarea, Collapsible } from 'components';
 import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
 import { FC, useState } from 'react';
 import { useTranslation } from 'react-i18next';
@@ -19,6 +19,9 @@ const TestLLM: FC = () => {
     text: '',
   });
 
+  // Sort context by rank
+  const sortedContext = inferenceResult?.chunks?.toSorted((a, b) => a.rank - b.rank) ?? [];
+
   // Fetch LLM connections for dropdown - using the working legacy endpoint for now
   const { data: connections, isLoading: isLoadingConnections } = useQuery({
     queryKey: llmConnectionsQueryKeys.list({
@@ -99,7 +102,7 @@ const TestLLM: FC = () => {
                 onSelectionChange={(selection) => {
                   handleChange('connectionId', selection?.value as string);
                 }}
-                value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined} 
+                value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined}
                 defaultValue={testLLM?.connectionId ?? undefined}
               />
             </div>
@@ -126,15 +129,38 @@ const TestLLM: FC = () => {
 
           {/* Inference Result */}
 
-          {inferenceResult && (
+          {inferenceResult && !inferenceMutation.isLoading && (
             <div className="inference-results-container">
-            <div className="result-item">
-              <strong>{t('testModels.responseLabel') || 'Response:'}</strong>
-              <div className="response-content">
-                {inferenceResult.content}
+              <div className="result-item">
+                <strong>Response:</strong>
+                <div className="response-content">
+                  {inferenceResult.content}
+                </div>
               </div>
+
+              {/* Context Section */}
+              {
+                sortedContext && sortedContext?.length > 0 && (
+                  <div className="context-section">
+                    <Collapsible title={`Context (${sortedContext?.length} chunks)`} defaultOpen={false}>
+                      <div className="context-list">
+                        {sortedContext?.map((contextItem, index) => (
+                          <div key={index} className="context-item">
+                            <div className="context-rank">
+                              <strong>Rank {contextItem.rank}</strong>
+                            </div>
+                            <div className="context-content">
+                              {contextItem.chunkRetrieved}
+                            </div>
+                          </div>
+                        ))}
+                      </div>
+                    </Collapsible>
+                  </div>
+                )
+              }
+
             </div>
-          </div>
           )}
 
           {/* Error State */}

diff --git a/GUI/src/services/inference.ts b/GUI/src/services/inference.ts
@@ -25,6 +25,10 @@ export interface InferenceResponse {
     llmServiceActive: boolean;
     questionOutOfLlmScope: boolean;
     content: string;
+    chunks?: {
+      rank: number,
+      chunkRetrieved: string
+    }[]
   };
 }
 

diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
@@ -18,6 +18,7 @@
     PromptRefinerOutput,
     ContextGenerationRequest,
     TestOrchestrationResponse,
+    ChunkInfo,
 )
 from prompt_refine_manager.prompt_refiner import PromptRefinerAgent
 from src.response_generator.response_generate import ResponseGeneratorAgent
@@ -922,6 +923,7 @@ def handle_input_guardrails(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=True,
                     content=INPUT_GUARDRAIL_VIOLATION_MESSAGE,
+                    chunks=None,
                 )
             else:
                 return OrchestrationResponse(
@@ -1606,6 +1608,31 @@ def _initialize_response_generator(
             logger.error(f"Failed to initialize response generator: {str(e)}")
             raise
 
+    @staticmethod
+    def _format_chunks_for_test_response(
+        relevant_chunks: Optional[List[Dict[str, Union[str, float, Dict[str, Any]]]]],
+    ) -> Optional[List[ChunkInfo]]:
+        """
+        Format retrieved chunks for test response.
+
+        Args:
+            relevant_chunks: List of retrieved chunks with metadata
+
+        Returns:
+            List of ChunkInfo objects with rank and content, or None if no chunks
+        """
+        if not relevant_chunks:
+            return None
+
+        formatted_chunks = []
+        for rank, chunk in enumerate(relevant_chunks, start=1):
+            # Extract text content - prefer "text" key, fallback to "content"
+            chunk_text = chunk.get("text", chunk.get("content", ""))
+            if isinstance(chunk_text, str) and chunk_text.strip():
+                formatted_chunks.append(ChunkInfo(rank=rank, chunkRetrieved=chunk_text))
+
+        return formatted_chunks if formatted_chunks else None
+
     @observe(name="generate_rag_response", as_type="generation")
     def _generate_rag_response(
         self,
@@ -1639,6 +1666,7 @@ def _generate_rag_response(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=TECHNICAL_ISSUE_MESSAGE,
+                    chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
                 return OrchestrationResponse(
@@ -1706,6 +1734,7 @@ def _generate_rag_response(
                         questionOutOfLLMScope=True,
                         inputGuardFailed=False,
                         content=OUT_OF_SCOPE_MESSAGE,
+                        chunks=self._format_chunks_for_test_response(relevant_chunks),
                     )
                 else:
                     return OrchestrationResponse(
@@ -1725,6 +1754,7 @@ def _generate_rag_response(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=answer,
+                    chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
                 return OrchestrationResponse(
@@ -1765,6 +1795,7 @@ def _generate_rag_response(
                     questionOutOfLLMScope=False,
                     inputGuardFailed=False,
                     content=TECHNICAL_ISSUE_MESSAGE,
+                    chunks=self._format_chunks_for_test_response(relevant_chunks),
                 )
             else:
                 return OrchestrationResponse(

diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
@@ -332,20 +332,30 @@ def test_orchestrate_llm_request(
             conversationHistory=[],
             url="test-context",
             environment=request.environment,
-            connection_id=str(request.connectionId),
+            connection_id=str(request.connectionId)
+            if request.connectionId is not None
+            else None,
         )
 
         logger.info(f"This is full request constructed for testing: {full_request}")
 
         # Process the request using the same logic
         response = orchestration_service.process_orchestration_request(full_request)
 
-        # Convert to TestOrchestrationResponse (exclude chatId)
+        # If response is already TestOrchestrationResponse (when environment is testing), return it directly
+        if isinstance(response, TestOrchestrationResponse):
+            logger.info(
+                f"Successfully processed test request for environment: {request.environment}"
+            )
+            return response
+
+        # Convert to TestOrchestrationResponse (exclude chatId) for other cases
         test_response = TestOrchestrationResponse(
             llmServiceActive=response.llmServiceActive,
             questionOutOfLLMScope=response.questionOutOfLLMScope,
             inputGuardFailed=response.inputGuardFailed,
             content=response.content,
+            chunks=None,  # OrchestrationResponse doesn't have chunks
         )
 
         logger.info(

diff --git a/src/models/request_models.py b/src/models/request_models.py
@@ -230,10 +230,17 @@ class TestOrchestrationRequest(BaseModel):
         ..., description="Environment context"
     )
     connectionId: Optional[int] = Field(
-        ..., description="Optional connection identifier"
+        None, description="Optional connection identifier"
     )
 
 
+class ChunkInfo(BaseModel):
+    """Model for chunk information in test response."""
+
+    rank: int = Field(..., description="Rank of the retrieved chunk")
+    chunkRetrieved: str = Field(..., description="Content of the retrieved chunk")
+
+
 class TestOrchestrationResponse(BaseModel):
     """Model for test orchestration response (without chatId)."""
 
@@ -245,3 +252,6 @@ class TestOrchestrationResponse(BaseModel):
         ..., description="Whether input guard validation failed"
     )
     content: str = Field(..., description="Response content with citations")
+    chunks: Optional[List[ChunkInfo]] = Field(
+        default=None, description="Retrieved chunks with rank and content"
+    )