Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions GUI/src/pages/TestModel/TestLLM.scss
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,44 @@
line-height: 1.5;
color: #555;
}

.context-section {
margin-top: 20px;

.context-list {
display: flex;
flex-direction: column;
gap: 12px;
margin-top: 8px;
}

.context-item {
padding: 12px;
background-color: #ffffff;
border: 1px solid #e0e0e0;
border-radius: 6px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);

.context-rank {
margin-bottom: 8px;
padding-bottom: 4px;
border-bottom: 1px solid #f0f0f0;

strong {
color: #2563eb;
font-size: 0.875rem;
font-weight: 600;
}
}

.context-content {
color: #374151;
line-height: 1.5;
font-size: 0.9rem;
white-space: pre-wrap;
}
}
}
}

.testModalList {
Expand Down
42 changes: 34 additions & 8 deletions GUI/src/pages/TestModel/index.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { useMutation, useQuery } from '@tanstack/react-query';
import { Button, FormSelect, FormTextarea } from 'components';
import { Button, FormSelect, FormTextarea, Collapsible } from 'components';
import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
import { FC, useState } from 'react';
import { useTranslation } from 'react-i18next';
Expand All @@ -19,6 +19,9 @@ const TestLLM: FC = () => {
text: '',
});

// Sort context by rank
const sortedContext = inferenceResult?.chunks?.toSorted((a, b) => a.rank - b.rank) ?? [];

// Fetch LLM connections for dropdown - using the working legacy endpoint for now
const { data: connections, isLoading: isLoadingConnections } = useQuery({
queryKey: llmConnectionsQueryKeys.list({
Expand Down Expand Up @@ -99,7 +102,7 @@ const TestLLM: FC = () => {
onSelectionChange={(selection) => {
handleChange('connectionId', selection?.value as string);
}}
value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined}
value={testLLM?.connectionId === null ? t('testModels.connectionNotExist') || 'Connection does not exist' : undefined}
defaultValue={testLLM?.connectionId ?? undefined}
/>
</div>
Expand All @@ -126,15 +129,38 @@ const TestLLM: FC = () => {

{/* Inference Result */}

{inferenceResult && (
{inferenceResult && !inferenceMutation.isLoading && (
<div className="inference-results-container">
<div className="result-item">
<strong>{t('testModels.responseLabel') || 'Response:'}</strong>
<div className="response-content">
{inferenceResult.content}
<div className="result-item">
<strong>Response:</strong>
<div className="response-content">
{inferenceResult.content}
</div>
</div>

{/* Context Section */}
{
sortedContext && sortedContext?.length > 0 && (
<div className="context-section">
<Collapsible title={`Context (${sortedContext?.length} chunks)`} defaultOpen={false}>
<div className="context-list">
{sortedContext?.map((contextItem, index) => (
<div key={index} className="context-item">
<div className="context-rank">
<strong>Rank {contextItem.rank}</strong>
</div>
<div className="context-content">
{contextItem.chunkRetrieved}
</div>
</div>
))}
</div>
</Collapsible>
</div>
)
}

</div>
</div>
)}

{/* Error State */}
Expand Down
4 changes: 4 additions & 0 deletions GUI/src/services/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ export interface InferenceResponse {
llmServiceActive: boolean;
questionOutOfLlmScope: boolean;
content: string;
chunks?: {
rank: number,
chunkRetrieved: string
}[]
};
}

Expand Down
31 changes: 31 additions & 0 deletions src/llm_orchestration_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
PromptRefinerOutput,
ContextGenerationRequest,
TestOrchestrationResponse,
ChunkInfo,
)
from prompt_refine_manager.prompt_refiner import PromptRefinerAgent
from src.response_generator.response_generate import ResponseGeneratorAgent
Expand Down Expand Up @@ -922,6 +923,7 @@ def handle_input_guardrails(
questionOutOfLLMScope=False,
inputGuardFailed=True,
content=INPUT_GUARDRAIL_VIOLATION_MESSAGE,
chunks=None,
)
else:
return OrchestrationResponse(
Expand Down Expand Up @@ -1606,6 +1608,31 @@ def _initialize_response_generator(
logger.error(f"Failed to initialize response generator: {str(e)}")
raise

@staticmethod
def _format_chunks_for_test_response(
relevant_chunks: Optional[List[Dict[str, Union[str, float, Dict[str, Any]]]]],
) -> Optional[List[ChunkInfo]]:
"""
Format retrieved chunks for test response.

Args:
relevant_chunks: List of retrieved chunks with metadata

Returns:
List of ChunkInfo objects with rank and content, or None if no chunks
"""
if not relevant_chunks:
return None

formatted_chunks = []
for rank, chunk in enumerate(relevant_chunks, start=1):
# Extract text content - prefer "text" key, fallback to "content"
chunk_text = chunk.get("text", chunk.get("content", ""))
if isinstance(chunk_text, str) and chunk_text.strip():
formatted_chunks.append(ChunkInfo(rank=rank, chunkRetrieved=chunk_text))

return formatted_chunks if formatted_chunks else None

@observe(name="generate_rag_response", as_type="generation")
def _generate_rag_response(
self,
Expand Down Expand Up @@ -1639,6 +1666,7 @@ def _generate_rag_response(
questionOutOfLLMScope=False,
inputGuardFailed=False,
content=TECHNICAL_ISSUE_MESSAGE,
chunks=self._format_chunks_for_test_response(relevant_chunks),
)
else:
return OrchestrationResponse(
Expand Down Expand Up @@ -1706,6 +1734,7 @@ def _generate_rag_response(
questionOutOfLLMScope=True,
inputGuardFailed=False,
content=OUT_OF_SCOPE_MESSAGE,
chunks=self._format_chunks_for_test_response(relevant_chunks),
)
else:
return OrchestrationResponse(
Expand All @@ -1725,6 +1754,7 @@ def _generate_rag_response(
questionOutOfLLMScope=False,
inputGuardFailed=False,
content=answer,
chunks=self._format_chunks_for_test_response(relevant_chunks),
)
else:
return OrchestrationResponse(
Expand Down Expand Up @@ -1765,6 +1795,7 @@ def _generate_rag_response(
questionOutOfLLMScope=False,
inputGuardFailed=False,
content=TECHNICAL_ISSUE_MESSAGE,
chunks=self._format_chunks_for_test_response(relevant_chunks),
)
else:
return OrchestrationResponse(
Expand Down
14 changes: 12 additions & 2 deletions src/llm_orchestration_service_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,20 +332,30 @@ def test_orchestrate_llm_request(
conversationHistory=[],
url="test-context",
environment=request.environment,
connection_id=str(request.connectionId),
connection_id=str(request.connectionId)
if request.connectionId is not None
else None,
)

logger.info(f"This is full request constructed for testing: {full_request}")

# Process the request using the same logic
response = orchestration_service.process_orchestration_request(full_request)

# Convert to TestOrchestrationResponse (exclude chatId)
# If response is already TestOrchestrationResponse (when environment is testing), return it directly
if isinstance(response, TestOrchestrationResponse):
logger.info(
f"Successfully processed test request for environment: {request.environment}"
)
return response

# Convert to TestOrchestrationResponse (exclude chatId) for other cases
test_response = TestOrchestrationResponse(
llmServiceActive=response.llmServiceActive,
questionOutOfLLMScope=response.questionOutOfLLMScope,
inputGuardFailed=response.inputGuardFailed,
content=response.content,
chunks=None, # OrchestrationResponse doesn't have chunks
)

logger.info(
Expand Down
12 changes: 11 additions & 1 deletion src/models/request_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,17 @@ class TestOrchestrationRequest(BaseModel):
..., description="Environment context"
)
connectionId: Optional[int] = Field(
..., description="Optional connection identifier"
None, description="Optional connection identifier"
)


class ChunkInfo(BaseModel):
"""Model for chunk information in test response."""

rank: int = Field(..., description="Rank of the retrieved chunk")
chunkRetrieved: str = Field(..., description="Content of the retrieved chunk")


class TestOrchestrationResponse(BaseModel):
"""Model for test orchestration response (without chatId)."""

Expand All @@ -245,3 +252,6 @@ class TestOrchestrationResponse(BaseModel):
..., description="Whether input guard validation failed"
)
content: str = Field(..., description="Response content with citations")
chunks: Optional[List[ChunkInfo]] = Field(
default=None, description="Retrieved chunks with rank and content"
)
Loading