Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions GUI/src/pages/TestProductionLLM/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,38 @@ const TestProductionLLM: FC = () => {
url: 'https://test-url.example.com',
};

const response = await productionInference(request);
let response;
let attemptCount = 0;
const maxAttempts = 2;

// Retry logic
while (attemptCount < maxAttempts) {
try {
attemptCount++;
console.log(`Production Inference Attempt ${attemptCount}/${maxAttempts}`);
response = await productionInference(request);

// If we get a successful response, break out of retry loop
if (!response.status || response.status < 400) {
break;
}

// If first attempt failed with error status, retry once more
if (attemptCount < maxAttempts && response.status >= 400) {
console.log('Retrying due to error status...');
continue;
}
} catch (err) {
// If first attempt threw an error, retry once more
if (attemptCount < maxAttempts) {
console.log('Retrying due to exception...');
continue;
}
throw err; // Re-throw on final attempt
}
}

console.log('Production Inference Response:', response);

// Create bot response message
let botContent = '';
Expand All @@ -76,7 +107,7 @@ const TestProductionLLM: FC = () => {
botMessageType = 'error';
} else {
// Success response
botContent = response.content || 'Response received successfully.';
botContent = response?.response?.content || 'Response received successfully.';

if (response.questionOutOfLlmScope) {
botContent += ' (Note: This question appears to be outside the LLM scope)';
Expand All @@ -95,10 +126,8 @@ const TestProductionLLM: FC = () => {
// Show toast notification
toast.open({
type: botMessageType,
title: botMessageType === 'success' ? t('responseReceived') : t('errorOccurred'),
message: botMessageType === 'success'
? t('successMessage')
: t('errorMessage'),
title: t('errorOccurred'),
message: t('errorMessage'),
});

} catch (error) {
Expand Down Expand Up @@ -215,4 +244,4 @@ const TestProductionLLM: FC = () => {
);
};

export default TestProductionLLM;
export default TestProductionLLM;
6 changes: 1 addition & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,9 @@ dependencies = [
"uvicorn>=0.35.0",
"qdrant-client>=1.15.1",
"rank-bm25>=0.2.2",
"rerankers[transformers]>=0.10.0",
"deepeval>=3.6.0",
"pytest-json-report>=1.5.0",
"deepteam>=0.2.5",
"anthropic>=0.69.0",
"nemoguardrails>=0.16.0",
"rerankers[transformers]>=0.10.0",
"rerankers>=0.10.0",
"deepeval>=3.6.0",
"pytest-json-report>=1.5.0",
"deepteam>=0.2.5",
Expand Down
179 changes: 0 additions & 179 deletions run_vector_indexer.py

This file was deleted.

41 changes: 31 additions & 10 deletions src/guardrails/nemo_rails_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from loguru import logger

from src.guardrails.dspy_nemo_adapter import DSPyNeMoLLM
from src.guardrails.rails_config import RAILS_CONFIG_PATH
from src.llm_orchestrator_config.llm_manager import LLMManager
from src.utils.cost_utils import get_lm_usage_since

Expand Down Expand Up @@ -76,6 +75,7 @@ def _register_custom_provider(self) -> None:
def _ensure_initialized(self) -> None:
"""
Lazy initialization of NeMo Rails with DSPy LLM.
Supports loading optimized guardrails configuration.

Raises:
RuntimeError: If initialization fails
Expand All @@ -95,21 +95,42 @@ def _ensure_initialized(self) -> None:
# Step 2: Register custom LLM provider
self._register_custom_provider()

# Step 3: Load rails configuration from YAML file
# Step 3: Load rails configuration (optimized or base)
try:
if not RAILS_CONFIG_PATH.exists():
from src.guardrails.optimized_guardrails_loader import (
get_guardrails_loader,
)

# Try to load optimized config
guardrails_loader = get_guardrails_loader()
config_path, metadata = guardrails_loader.get_optimized_config_path()

if not config_path.exists():
raise FileNotFoundError(
f"Rails config file not found: {RAILS_CONFIG_PATH}"
f"Rails config file not found: {config_path}"
)

rails_config = RailsConfig.from_path(str(RAILS_CONFIG_PATH))
logger.info(f"Loaded rails config from: {RAILS_CONFIG_PATH}")
rails_config = RailsConfig.from_path(str(config_path))

# Log which config is being used
if metadata.get("optimized", False):
logger.info(
f"Loaded OPTIMIZED guardrails config "
f"(version: {metadata.get('version', 'unknown')})"
)
metrics = metadata.get("metrics", {})
if metrics:
logger.info(
f" Optimization metrics: "
f"weighted_accuracy={metrics.get('weighted_accuracy', 'N/A')}"
)
else:
logger.info(f"Loaded BASE guardrails config from: {config_path}")

except Exception as yaml_error:
logger.error(
f"Failed to load Rails YAML configuration: {str(yaml_error)}"
)
logger.error(f"Failed to load Rails configuration: {str(yaml_error)}")
raise RuntimeError(
f"Rails YAML configuration error: {str(yaml_error)}"
f"Rails configuration error: {str(yaml_error)}"
) from yaml_error

# Step 4: Initialize LLMRails with custom DSPy LLM
Expand Down
Loading
Loading