rootcodelabs · nuwangeek · Oct 23, 2025 · Oct 23, 2025
diff --git a/GUI/src/pages/TestProductionLLM/index.tsx b/GUI/src/pages/TestProductionLLM/index.tsx
@@ -64,7 +64,38 @@ const TestProductionLLM: FC = () => {
         url: 'https://test-url.example.com',
       };
 
-      const response = await productionInference(request);
+      let response;
+      let attemptCount = 0;
+      const maxAttempts = 2;
+
+      // Retry logic
+      while (attemptCount < maxAttempts) {
+        try {
+          attemptCount++;
+          console.log(`Production Inference Attempt ${attemptCount}/${maxAttempts}`);
+          response = await productionInference(request);
+
+          // If we get a successful response, break out of retry loop
+          if (!response.status || response.status < 400) {
+            break;
+          }
+
+          // If first attempt failed with error status, retry once more
+          if (attemptCount < maxAttempts && response.status >= 400) {
+            console.log('Retrying due to error status...');
+            continue;
+          }
+        } catch (err) {
+          // If first attempt threw an error, retry once more
+          if (attemptCount < maxAttempts) {
+            console.log('Retrying due to exception...');
+            continue;
+          }
+          throw err; // Re-throw on final attempt
+        }
+      }
+
+      console.log('Production Inference Response:', response);
 
       // Create bot response message
       let botContent = '';
@@ -76,7 +107,7 @@ const TestProductionLLM: FC = () => {
         botMessageType = 'error';
       } else {
         // Success response
-        botContent = response.content || 'Response received successfully.';
+        botContent = response?.response?.content || 'Response received successfully.';
 
         if (response.questionOutOfLlmScope) {
           botContent += ' (Note: This question appears to be outside the LLM scope)';
@@ -95,10 +126,8 @@ const TestProductionLLM: FC = () => {
       // Show toast notification
       toast.open({
         type: botMessageType,
-        title: botMessageType === 'success' ? t('responseReceived') : t('errorOccurred'),
-        message: botMessageType === 'success' 
-          ? t('successMessage') 
-          : t('errorMessage'),
+        title: t('errorOccurred'),
+        message: t('errorMessage'),
       });
 
     } catch (error) {
@@ -215,4 +244,4 @@ const TestProductionLLM: FC = () => {
   );
 };
 
-export default TestProductionLLM;
+export default TestProductionLLM;
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,13 +25,9 @@ dependencies = [
     "uvicorn>=0.35.0",
     "qdrant-client>=1.15.1",
     "rank-bm25>=0.2.2",
-    "rerankers[transformers]>=0.10.0",
     "deepeval>=3.6.0",
-    "pytest-json-report>=1.5.0",
     "deepteam>=0.2.5",
-    "anthropic>=0.69.0",
-    "nemoguardrails>=0.16.0",
-    "rerankers[transformers]>=0.10.0",
+    "rerankers>=0.10.0",
     "deepeval>=3.6.0",
     "pytest-json-report>=1.5.0",
     "deepteam>=0.2.5",

diff --git a/run_vector_indexer.py b/run_vector_indexer.py
diff --git a/src/guardrails/nemo_rails_adapter.py b/src/guardrails/nemo_rails_adapter.py
@@ -12,7 +12,6 @@
 from loguru import logger
 
 from src.guardrails.dspy_nemo_adapter import DSPyNeMoLLM
-from src.guardrails.rails_config import RAILS_CONFIG_PATH
 from src.llm_orchestrator_config.llm_manager import LLMManager
 from src.utils.cost_utils import get_lm_usage_since
 
@@ -76,6 +75,7 @@ def _register_custom_provider(self) -> None:
     def _ensure_initialized(self) -> None:
         """
         Lazy initialization of NeMo Rails with DSPy LLM.
+        Supports loading optimized guardrails configuration.
 
         Raises:
             RuntimeError: If initialization fails
@@ -95,21 +95,42 @@ def _ensure_initialized(self) -> None:
             # Step 2: Register custom LLM provider
             self._register_custom_provider()
 
-            # Step 3: Load rails configuration from YAML file
+            # Step 3: Load rails configuration (optimized or base)
             try:
-                if not RAILS_CONFIG_PATH.exists():
+                from src.guardrails.optimized_guardrails_loader import (
+                    get_guardrails_loader,
+                )
+
+                # Try to load optimized config
+                guardrails_loader = get_guardrails_loader()
+                config_path, metadata = guardrails_loader.get_optimized_config_path()
+
+                if not config_path.exists():
                     raise FileNotFoundError(
-                        f"Rails config file not found: {RAILS_CONFIG_PATH}"
+                        f"Rails config file not found: {config_path}"
                     )
 
-                rails_config = RailsConfig.from_path(str(RAILS_CONFIG_PATH))
-                logger.info(f"Loaded rails config from: {RAILS_CONFIG_PATH}")
+                rails_config = RailsConfig.from_path(str(config_path))
+
+                # Log which config is being used
+                if metadata.get("optimized", False):
+                    logger.info(
+                        f"Loaded OPTIMIZED guardrails config "
+                        f"(version: {metadata.get('version', 'unknown')})"
+                    )
+                    metrics = metadata.get("metrics", {})
+                    if metrics:
+                        logger.info(
+                            f" Optimization metrics: "
+                            f"weighted_accuracy={metrics.get('weighted_accuracy', 'N/A')}"
+                        )
+                else:
+                    logger.info(f"Loaded BASE guardrails config from: {config_path}")
+
             except Exception as yaml_error:
-                logger.error(
-                    f"Failed to load Rails YAML configuration: {str(yaml_error)}"
-                )
+                logger.error(f"Failed to load Rails configuration: {str(yaml_error)}")
                 raise RuntimeError(
-                    f"Rails YAML configuration error: {str(yaml_error)}"
+                    f"Rails configuration error: {str(yaml_error)}"
                 ) from yaml_error
 
             # Step 4: Initialize LLMRails with custom DSPy LLM