diff --git a/.gitignore b/.gitignore
index 1dde8af..706c607 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,6 @@ __pycache__/
 .env
 tim-db
 datasets
-logs/
\ No newline at end of file
+logs/
+data_sets
+vault/agent-out
diff --git a/DSL/CronManager/DSL/reset.yml b/DSL/CronManager/DSL/reset.yml
new file mode 100644
index 0000000..cef6083
--- /dev/null
+++ b/DSL/CronManager/DSL/reset.yml
@@ -0,0 +1,5 @@
+agency_data_sync:
+  trigger: "0 0 1 * * ?" # Runs at 00:00 AM 1st day of every month
+  # trigger: off
+  type: exec
+  command: "../app/scripts/budget_reset.sh -s 10"
diff --git a/DSL/CronManager/config/config.ini b/DSL/CronManager/config/config.ini
new file mode 100644
index 0000000..a154d8e
--- /dev/null
+++ b/DSL/CronManager/config/config.ini
@@ -0,0 +1,3 @@
+[DSL]
+
+RAG_SEARCH_RESQL=http://resql:8082
diff --git a/DSL/CronManager/script/budget_reset.sh b/DSL/CronManager/script/budget_reset.sh
new file mode 100644
index 0000000..406530e
--- /dev/null
+++ b/DSL/CronManager/script/budget_reset.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# DEFINING ENDPOINTS
+
+BUDGET_RESET_ENDPOINT=http://ruuter-public:8086/rag-search/llm-connections/cost/reset
+
+payload=$(cat <<EOF
+{}
+EOF
+)
+
+echo "SENDING REQUEST TO RESET MONTHLY USED BUDGET TO 0"
+response=$(curl -s -X POST "$BUDGET_RESET_ENDPOINT" \
+    -H "Content-Type: application/json" \
+    -d "$payload")
+
+echo "BUDGET RESET SUMMARY:"
+  echo "$response"
diff --git a/DSL/DMapper/rag-search/lib/helpers.js b/DSL/DMapper/rag-search/lib/helpers.js
new file mode 100644
index 0000000..6f5e74f
--- /dev/null
+++ b/DSL/DMapper/rag-search/lib/helpers.js
@@ -0,0 +1,271 @@
+import { randomBytes } from "crypto";
+import fs from "fs/promises";
+import path from "path";
+
+export function getAuthHeader(username, token) {
+  const auth = `${username}:${token}`;
+  const encodedAuth = Buffer.from(auth).toString("base64");
+  return `Basic ${encodedAuth}`;
+}
+
+export function mergeLabelData(labels, existing_labels) {
+  let mergedArray = [...labels, ...existing_labels];
+  let uniqueArray = [...new Set(mergedArray)];
+  return { labels: uniqueArray };
+}
+
+export function platformStatus(platform, data) {
+  const platformData = data.find((item) => item.platform === platform);
+  return platformData ? platformData.isConnect : false;
+}
+
+export function isLabelsMismatch(newLabels, correctedLabels, predictedLabels) {
+  function check(arr, newLabels) {
+    if (
+      Array.isArray(newLabels) &&
+      Array.isArray(arr) &&
+      newLabels.length === arr.length
+    ) {
+      for (let label of newLabels) {
+        if (!arr.includes(label)) {
+          return true;
+        }
+      }
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  const val1 = check(correctedLabels, newLabels);
+  const val2 = check(predictedLabels, newLabels);
+  return val1 && val2;
+}
+
+export function getOutlookExpirationDateTime() {
+  const currentDate = new Date();
+  currentDate.setDate(currentDate.getDate() + 3);
+  const updatedDateISOString = currentDate.toISOString();
+  return updatedDateISOString;
+}
+
+export function findDuplicateStopWords(inputArray, existingArray) {
+  const set1 = new Set(existingArray);
+  const duplicates = inputArray.filter((item) => set1.has(item));
+  const value = JSON.stringify(duplicates);
+  return value;
+}
+
+export function findNotExistingStopWords(inputArray, existingArray) {
+  const set1 = new Set(existingArray);
+  const notExisting = inputArray.filter((item) => !set1.has(item));
+  const value = JSON.stringify(notExisting);
+  return value;
+}
+
+export function getRandomString() {
+  const randomHexString = randomBytes(32).toString("hex");
+  return randomHexString;
+}
+
+export function base64Decrypt(cipher, isObject) {
+  if (!cipher) {
+    return JSON.stringify({
+      error: true,
+      message: 'Cipher is missing',
+    });
+  }
+
+  try {
+    const decodedContent = !isObject ? Buffer.from(cipher, 'base64').toString('utf8') : JSON.parse(Buffer.from(cipher, 'base64').toString('utf8'));
+    const cleanedContent = decodedContent.replace(/\r/g, '');
+    return JSON.stringify({
+      error: false,
+      content: cleanedContent
+    });
+  } catch (err) {
+    return JSON.stringify({
+      error: true,
+      message: 'Base64 Decryption Failed',
+    });
+  }
+}
+
+export function base64Encrypt(content) {
+  if (!content) {
+    return {
+      error: true,
+      message: 'Content is missing',
+    }
+  }
+
+  try {
+    return JSON.stringify({
+      error: false,
+      cipher: Buffer.from(typeof content === 'string' ? content : JSON.stringify(content)).toString('base64')
+    });
+  } catch (err) {
+    return JSON.stringify({
+      error: true,
+      message: 'Base64 Encryption Failed',
+    });
+  }
+}
+
+export function jsEscape(str) {
+  return JSON.stringify(str).slice(1, -1)
+}
+
+export function isValidIntentName(name) {
+  // Allows letters (any unicode letter), numbers, and underscores
+  // Matches front-end validation with spaces replaced with underscores
+  return /^[\p{L}\p{N}_]+$/u.test(name);
+}
+
+export function eq(v1, v2) {
+  return v1 === v2;
+}
+
+export function getAgencyDataHash(agencyId) {
+  // Generate a random hash based on agency ID
+  // Create a consistent but seemingly random hash for each agencyId
+  const baseHash = agencyId.padEnd(10, agencyId); // Ensure at least 10 chars
+  let hash = '';
+  const chars = 'abcdefghijklmnopqrstuvwxyz0123456789';
+
+  // Use the agencyId as a seed for pseudo-randomness
+  for (let i = 0; i < 16; i++) {
+    // Get character code from the baseHash, or use index if out of bounds
+    const charCode = i < baseHash.length ? baseHash.charCodeAt(i) : i;
+    // Use the character code to get an index in our chars string
+    const index = (charCode * 13 + i * 7) % chars.length;
+    hash += chars[index];
+  }
+
+  return hash;
+}
+
+export function getAgencyDataAvailable(agencyId) {
+  // Use agencyId as a seed for deterministic but seemingly random result
+  // This ensures the same agencyId always gets the same result in the same session
+
+  // Create a hash from the agencyId
+  let hashValue = 0;
+  for (let i = 0; i < agencyId.length; i++) {
+    hashValue = ((hashValue << 5) - hashValue) + agencyId.charCodeAt(i);
+    hashValue |= 0; // Convert to 32bit integer
+  }
+
+  // Add a time component to make it change between sessions
+  // Use current date (year+month only) so it changes monthly but not every request
+  const date = new Date();
+  const timeComponent = date.getFullYear() * 100 + date.getMonth();
+
+  // Combine the hash and time component for pseudo-randomness
+  const combinedValue = hashValue + timeComponent;
+
+  // Return true or false based on even/odd value
+  return (combinedValue % 2) === 0;
+}
+
+export function json(context) {
+  return JSON.stringify(context);
+}
+
+/**
+ * Helper function to check if a value is an array
+ * @param {any} value - The value to check
+ * @returns {boolean} - True if value is an array, false otherwise
+ */
+export function isArray(value) {
+  return Array.isArray(value);
+}
+
+/**
+ * Returns an array of agencies that are in centopsAgencies but not in gcAgencies (by agencyId).
+ * @param {Array} gcAgencies - Array of existing agencies, each with an agencyId property.
+ * @param {Array} centopsAgencies - Array of agencies from CentOps, each with an agencyId property.
+ * @returns {Array} Array of new agency objects from centopsAgencies.
+ */
+export function extractNewAgencies(gcAgencies, centopsAgencies) {
+  const existingIds = new Set(gcAgencies.map(a => a.agencyId));
+  const newAgencies = centopsAgencies.filter(a => !existingIds.has(a.agencyId))
+  // return newAgencies;
+  return JSON.stringify({
+    agencies: newAgencies,
+  });
+}
+
+/**
+ * Downloads a JSON file from S3 and returns its parsed content.
+ * @param {string} datasetId
+ * @param {string|number} pageNum
+ * @returns {Object} Parsed JSON content of the file
+ */
+export function getSingleChunkData(chunkData) { 
+  const mapped = chunkData?.map(item => ({
+    clientId: item.agency_id,
+    id: item.id,
+    clientName: item.agency_name, 
+    question: item.question
+  }));
+
+  return JSON.stringify(mapped);
+}
+
+export function getPaginatedChunkIds(chunks, agencyId, pageNum, pageSize = 5) {
+  let agencyRecordIndex = 0; // total agency records seen so far
+  let collected = 0;         // agency records collected for this page
+  let resultChunks = [];
+  let startIndex = 0;
+  let foundPage = false;
+
+  for (const chunk of chunks) {
+    let agencies = JSON.parse(chunk.includedAgencies.value)
+
+    const count = agencies.filter(a => String(a) === String(agencyId)).length;
+    if (count === 0) continue;
+
+    // If we haven't reached the start of this page, skip these records
+    if (!foundPage && agencyRecordIndex + count < (pageNum - 1) * pageSize + 1) {
+      agencyRecordIndex += count;
+      continue;
+    }
+
+    // If this is the first chunk of the page, calculate startIndex
+    if (!foundPage) {
+      startIndex = (pageNum - 1) * pageSize - agencyRecordIndex;
+      foundPage = true;
+    }
+
+    resultChunks.push(chunk.chunkId || chunk.chunkId);
+    collected += count;
+
+    if (collected >= pageSize) break;
+
+    agencyRecordIndex += count;
+  }
+
+  return JSON.stringify(
+    {
+      chunks: resultChunks,
+      startIndex: startIndex
+    }
+  );
+}
+
+export function filterDataByAgency(aggregatedData, startIndex, agencyId, pageSize=5) {
+
+  const filtered = aggregatedData.filter(item => String(item.agency_id) === String(agencyId));
+
+  const paginated = filtered.slice(startIndex, startIndex + 5);
+
+  const result= paginated.map(item => ({
+    clientId: item.agency_id,
+    id: item.id,
+    clientName: item.agency_name, // No mapping available, so use agency_id
+    question: item.question
+  }));
+  return JSON.stringify(result);
+  
+}
diff --git a/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js b/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js
new file mode 100644
index 0000000..727a36f
--- /dev/null
+++ b/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js
@@ -0,0 +1,30 @@
+/**
+ * @param res Original Response Object
+ * @param send Original UNMODIFIED res.send function
+ * @return A patched res.send which takes the send content, binds it to contentBody on
+ * the res and then calls the original res.send after restoring it
+ */
+const resDotSendInterceptor = (res, send) => (content) => {
+  res.contentBody = content;
+  res.send = send;
+  res.send(content);
+};
+
+export const requestLoggerMiddleware =
+  ({ logger }) =>
+  (req, res, next) => {
+    logger(
+      `Request: {method: ${req.method}, url: ${
+        req.url
+      }, params: ${JSON.stringify(req.params)}, query: ${JSON.stringify(
+        req.query
+      )}, body: ${JSON.stringify(req.body)}`
+    );
+    res.send = resDotSendInterceptor(res, res.send);
+    res.on("finish", () => {
+      logger(
+        `Response: {statusCode: ${res.statusCode}, responseData: ${res.contentBody}}`
+      );
+    });
+    next();
+  };
diff --git a/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql b/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql
index 2b2e958..cfead3d 100644
--- a/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql
+++ b/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql
@@ -10,14 +10,17 @@ CREATE TABLE llm_connections (
     embedding_platform VARCHAR(100) NOT NULL, -- e.g. Azure AI, OpenAI
     embedding_model VARCHAR(100) NOT NULL,    -- e.g. Ada-200-1
     
-    -- Budget and Environment
+    -- Budget and Usage Tracking
     monthly_budget NUMERIC(12,2) NOT NULL,    -- e.g. 1000.00
     used_budget NUMERIC(12,2) DEFAULT 0.00,  -- e.g. 250.00
-    environment VARCHAR(50) NOT NULL,
+    warn_budget_threshold NUMERIC(5) DEFAULT 80, -- percentage to warn at
+    stop_budget_threshold NUMERIC(5) DEFAULT 100, -- percentage to stop at
+    disconnect_on_budget_exceed BOOLEAN DEFAULT TRUE,
     
     -- Metadata
     connection_status VARCHAR(50) DEFAULT 'active',      -- active / inactive
     created_at TIMESTAMP DEFAULT NOW(),
+    environment VARCHAR(50) NOT NULL,
 
     -- Mocked Credentials and Access Info
     -- Azure
diff --git a/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql b/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql
new file mode 100644
index 0000000..af9da1b
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql
@@ -0,0 +1,11 @@
+UPDATE llm_connections 
+SET 
+    connection_status = 'inactive'
+WHERE id = :connection_id
+RETURNING 
+    id,
+    connection_name,
+    connection_status,
+    used_budget,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed;
diff --git a/DSL/Resql/rag-search/POST/get-configuration.sql b/DSL/Resql/rag-search/POST/get-configuration.sql
new file mode 100644
index 0000000..f03b322
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/get-configuration.sql
@@ -0,0 +1,5 @@
+SELECT id, key, value
+FROM configuration
+WHERE key=:key
+AND id IN (SELECT max(id) from configuration GROUP BY key)
+AND NOT deleted;
diff --git a/DSL/Resql/rag-search/POST/get-llm-connection.sql b/DSL/Resql/rag-search/POST/get-llm-connection.sql
index 16e86f6..30fdb93 100644
--- a/DSL/Resql/rag-search/POST/get-llm-connection.sql
+++ b/DSL/Resql/rag-search/POST/get-llm-connection.sql
@@ -6,6 +6,10 @@ SELECT
     embedding_platform,
     embedding_model,
     monthly_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    used_budget,
+    disconnect_on_budget_exceed,
     environment,
     connection_status,
     created_at,
diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
index 74bd584..419d7bc 100644
--- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
+++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql
@@ -6,20 +6,25 @@ SELECT
     embedding_platform,
     embedding_model,
     monthly_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
     used_budget,
     environment,
     connection_status,
     created_at,
     CEIL(COUNT(*) OVER() / :page_size::DECIMAL) AS totalPages,
-    -- Calculate budget status based on usage percentage
+    -- Calculate budget status based on usage percentage and configured thresholds
     CASE 
-        WHEN used_budget IS NULL OR monthly_budget IS NULL OR monthly_budget = 0 THEN 'within_budget'
-        WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1.0 THEN 'over_budget'
-        WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 0.8 THEN 'close_to_exceed'
+        WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget'
+        WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget'
+        WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget'
+        WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed'
         ELSE 'within_budget'
     END AS budget_status
 FROM llm_connections
 WHERE connection_status <> 'deleted'
+    AND environment = 'testing'
     AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform)
     AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model)
     AND (:environment IS NULL OR :environment = '' OR environment = :environment)
diff --git a/DSL/Resql/rag-search/POST/get-production-connection.sql b/DSL/Resql/rag-search/POST/get-production-connection.sql
new file mode 100644
index 0000000..eca9f97
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/get-production-connection.sql
@@ -0,0 +1,25 @@
+SELECT 
+    id,
+    connection_name,
+    used_budget,
+    monthly_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    environment,
+    connection_status,
+    created_at,
+    llm_platform,
+    llm_model,
+    embedding_platform,
+    embedding_model,
+    CASE 
+        WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget'
+        WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget'
+        WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget'
+        WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed'
+        ELSE 'within_budget'
+    END AS budget_status
+FROM llm_connections
+WHERE environment = 'production'
+ORDER BY created_at DESC
+LIMIT 1;
diff --git a/DSL/Resql/rag-search/POST/insert-llm-connection.sql b/DSL/Resql/rag-search/POST/insert-llm-connection.sql
index c16296c..c4d9679 100644
--- a/DSL/Resql/rag-search/POST/insert-llm-connection.sql
+++ b/DSL/Resql/rag-search/POST/insert-llm-connection.sql
@@ -5,6 +5,9 @@ INSERT INTO llm_connections (
     embedding_platform,
     embedding_model,
     monthly_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
     environment,
     connection_status,
     created_at,
@@ -21,6 +24,9 @@ INSERT INTO llm_connections (
     :embedding_platform,
     :embedding_model,
     :monthly_budget,
+    :warn_budget_threshold,
+    :stop_budget_threshold,
+    :disconnect_on_budget_exceed,
     :environment,
     :connection_status,
     :created_at::timestamp with time zone,
@@ -32,11 +38,15 @@ INSERT INTO llm_connections (
     :embedding_model_api_key
 ) RETURNING 
     id, 
+    connection_name,
     llm_platform, 
     llm_model, 
     embedding_platform, 
     embedding_model, 
     monthly_budget, 
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
     environment, 
     connection_status, 
     created_at,
diff --git a/DSL/Resql/rag-search/POST/reset-llm-connection-used-budget.sql b/DSL/Resql/rag-search/POST/reset-llm-connection-used-budget.sql
new file mode 100644
index 0000000..581f0b9
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/reset-llm-connection-used-budget.sql
@@ -0,0 +1,13 @@
+UPDATE llm_connections 
+SET 
+    used_budget = 0.00
+WHERE connection_status <> 'deleted'
+RETURNING 
+    id,
+    connection_name,
+    monthly_budget,
+    used_budget,
+    (monthly_budget - used_budget) AS remaining_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed;
diff --git a/DSL/Resql/rag-search/POST/update-llm-connection-environment.sql b/DSL/Resql/rag-search/POST/update-llm-connection-environment.sql
new file mode 100644
index 0000000..c16b98c
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/update-llm-connection-environment.sql
@@ -0,0 +1,24 @@
+UPDATE llm_connections 
+SET 
+    environment = :environment
+WHERE id = :connection_id
+RETURNING 
+    id, 
+    connection_name,
+    llm_platform, 
+    llm_model, 
+    embedding_platform, 
+    embedding_model, 
+    monthly_budget, 
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
+    environment, 
+    connection_status, 
+    created_at,
+    deployment_name,
+    target_uri,
+    api_key,
+    secret_key,
+    access_key,
+    embedding_model_api_key;
diff --git a/DSL/Resql/rag-search/POST/update-llm-connection-status.sql b/DSL/Resql/rag-search/POST/update-llm-connection-status.sql
new file mode 100644
index 0000000..463936e
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/update-llm-connection-status.sql
@@ -0,0 +1,23 @@
+UPDATE llm_connections 
+SET connection_status = :connection_status
+WHERE id = :connection_id
+RETURNING 
+    id, 
+    connection_name,
+    llm_platform, 
+    llm_model, 
+    embedding_platform, 
+    embedding_model, 
+    monthly_budget, 
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
+    environment, 
+    connection_status, 
+    created_at,
+    deployment_name,
+    target_uri,
+    api_key,
+    secret_key,
+    access_key,
+    embedding_model_api_key;
diff --git a/DSL/Resql/rag-search/POST/update-llm-connection-used-budget.sql b/DSL/Resql/rag-search/POST/update-llm-connection-used-budget.sql
new file mode 100644
index 0000000..ba6cd4d
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/update-llm-connection-used-budget.sql
@@ -0,0 +1,14 @@
+UPDATE llm_connections 
+SET 
+    used_budget = used_budget + :usage
+WHERE id = :connection_id
+RETURNING 
+    id,
+    connection_name,
+    monthly_budget,
+    used_budget,
+    (monthly_budget - used_budget) AS remaining_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
+    connection_status;
\ No newline at end of file
diff --git a/DSL/Resql/rag-search/POST/update-llm-connection.sql b/DSL/Resql/rag-search/POST/update-llm-connection.sql
index f3eae2a..a442227 100644
--- a/DSL/Resql/rag-search/POST/update-llm-connection.sql
+++ b/DSL/Resql/rag-search/POST/update-llm-connection.sql
@@ -6,6 +6,9 @@ SET
     embedding_platform = :embedding_platform,
     embedding_model = :embedding_model,
     monthly_budget = :monthly_budget,
+    warn_budget_threshold = :warn_budget_threshold,
+    stop_budget_threshold = :stop_budget_threshold,
+    disconnect_on_budget_exceed = :disconnect_on_budget_exceed,
     environment = :environment,
     -- Azure credentials
     deployment_name = :deployment_name,
@@ -25,6 +28,9 @@ RETURNING
     embedding_platform, 
     embedding_model, 
     monthly_budget, 
+    warn_budget_threshold,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed,
     environment, 
     connection_status, 
     created_at,
diff --git a/DSL/Ruuter.private/rag-search/GET/llm-connections/cost/check.yml b/DSL/Ruuter.private/rag-search/GET/llm-connections/cost/check.yml
new file mode 100644
index 0000000..df51bbb
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/GET/llm-connections/cost/check.yml
@@ -0,0 +1,57 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Check if production LLM connection's used budget has exceeded warn or stop budget thresholds"
+  method: get
+  accepts: json
+  returns: json
+  namespace: rag-search
+
+get_production_connection_budget_status:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-production-connection"
+    body: {}
+  result: budget_result
+  next: check_budget_status
+
+check_budget_status:
+  switch:
+    - condition: "${budget_result.response.body.length > 0}"
+      next: process_budget_status
+  next: return_no_production_connection
+
+process_budget_status:
+  assign:
+      used_budget: '${budget_result.response.body[0].usedBudget || 0}'
+      monthly_budget: '${budget_result.response.body[0].monthlyBudget}'
+      warn_threshold: '${budget_result.response.body[0].warnBudgetThreshold}'
+      stop_threshold: '${budget_result.response.body[0].stopBudgetThreshold}'
+      warn_budget_amount: '${(monthly_budget * warn_threshold) / 100}'
+      stop_budget_amount: '${(monthly_budget * stop_threshold) / 100}'
+      exceeded_stop_budget: '${stop_budget_amount != 0 && used_budget >= stop_budget_amount}'
+      exceeded_warn_budget: '${used_budget >= warn_budget_amount}'
+      within_budget: '${used_budget < warn_budget_amount}'
+      used_budget_percentage: '${(used_budget * 100) / monthly_budget}'
+
+  next: process_warnings
+
+process_warnings:
+  assign:
+      response: {
+        data: '${budget_result.response.body[0]}',
+        used_budget_percentage: '${used_budget_percentage}',
+        exceeded_stop_budget: '${exceeded_stop_budget}',
+        exceeded_warn_budget: '${exceeded_warn_budget}',
+      }
+  next: return_budget_status
+
+return_budget_status:
+  return: ${response}
+  status: 200
+  next: end
+
+return_no_production_connection:
+  return: "No production LLM connection found"
+  status: 404
+  next: end
\ No newline at end of file
diff --git a/DSL/Ruuter.private/rag-search/GET/llm-connections/production.yml b/DSL/Ruuter.private/rag-search/GET/llm-connections/production.yml
new file mode 100644
index 0000000..be75219
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/GET/llm-connections/production.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Get production LLM connection"
+  method: get
+  returns: json
+  namespace: rag-search
+
+get_production_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-production-connection"
+  result: connection_result
+  next: return_success
+
+return_success:
+  return: ${connection_result.response.body}
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.private/rag-search/POST/inference/production.yml b/DSL/Ruuter.private/rag-search/POST/inference/production.yml
new file mode 100644
index 0000000..5b13570
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/POST/inference/production.yml
@@ -0,0 +1,121 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Call LLM orchestration service with budget validation"
+  method: post
+  accepts: json
+  returns: json
+  namespace: rag-search
+  allowlist:
+    body:
+      - field: chatId
+        type: string
+        description: "Chat ID"
+      - field: message
+        type: string
+        description: "User message"
+      - field: authorId
+        type: string
+        description: "Author ID"
+      - field: conversationHistory
+        type: array
+        description: "Conversation history"
+      - field: url
+        type: string
+        description: "URL reference"
+
+extract_request_data:
+  assign:
+    chatId: ${incoming.body.chatId}
+    message: ${incoming.body.message}
+    authorId: ${incoming.body.authorId}
+    conversationHistory: ${incoming.body.conversationHistory}
+    url: ${incoming.body.url}
+  next: get_production_connection
+
+get_production_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-production-connection"
+    body: {}
+  result: production_connection_result
+  next: validate_production_connection
+
+validate_production_connection:
+  switch:
+    - condition: "${production_connection_result.response.body.length > 0}"
+      next: extract_connection_id
+  next: return_no_production_connection
+
+extract_connection_id:
+  assign:
+    connection_id: ${Number(production_connection_result.response.body[0].id)}
+  next: check_budget_status
+
+check_budget_status:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RUUTER_PRIVATE]/llm-connections/usage/check"
+    body:
+      connection_id: ${connection_id}
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: budget_check_result
+  next: validate_budget_status
+
+validate_budget_status:
+  switch:
+    - condition: ${budget_check_result.response.body.response.isLLMConnectionDisconnected}
+      next: assign_disconnected_response
+    - condition: "${budget_check_result.response.statusCodeValue >= 400}"
+      next: return_budget_check_error
+  next: call_orchestrate_endpoint
+
+call_orchestrate_endpoint:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_LLM_ORCHESTRATOR]"
+    body:
+      chatId: ${chatId}
+      message: ${message}
+      authorId: ${authorId}
+      conversationHistory: ${conversationHistory}
+      url: ${url}
+      environment: "production"
+    headers:
+      Content-Type: "application/json"
+  result: orchestrate_result
+  next: assign_response
+
+assign_response:
+  assign:
+    response: "${orchestrate_result.response.body}"
+  next: return_orchestrate_response
+
+return_orchestrate_response:
+  return: ${response}
+  next: end
+
+assign_disconnected_response:
+  assign:
+    disconnected_response:
+      {
+        chatId: "${chatId}",
+        content: "The LLM connection is currently unavailable. Your request couldn’t be processed. Please retry shortly.",
+        status: 400
+      }
+  next: return_connection_disconnected
+
+return_connection_disconnected:
+  status: 400
+  return: ${disconnected_response}
+  next: end
+
+return_budget_check_error:
+  return: ${budget_check_result.response.body}
+  next: end
+
+return_no_production_connection:
+  status: 404
+  return: "No production connection found"
+  next: end
diff --git a/DSL/Ruuter.private/rag-search/POST/inference/test.yml b/DSL/Ruuter.private/rag-search/POST/inference/test.yml
new file mode 100644
index 0000000..61a5bd9
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/POST/inference/test.yml
@@ -0,0 +1,102 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Call LLM orchestration service with budget validation"
+  method: post
+  accepts: json
+  returns: json
+  namespace: rag-search
+  allowlist:
+    body:
+      - field: connectionId
+        type: string
+        description: "Connection ID"
+      - field: message
+        type: string
+        description: "User message"
+
+extract_request_data:
+  assign:
+    connectionId: ${incoming.body.connectionId}
+    message: ${incoming.body.message}
+  next: get_connection
+
+get_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-llm-connection"
+    body: 
+      connection_id: ${connectionId}
+  result: connection_result
+  next: validate_connection
+
+validate_connection:
+  switch:
+    - condition: "${connection_result.response.body.length > 0}"
+      next: check_budget_status
+  next: return_no_test_connection
+
+check_budget_status:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RUUTER_PRIVATE]/llm-connections/usage/check"
+    body:
+      connection_id: ${connectionId}
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: budget_check_result
+  next: validate_budget_status
+
+validate_budget_status:
+  switch:
+    - condition: ${budget_check_result.response.body.response.isLLMConnectionDisconnected}
+      next: assign_disconnected_response
+    - condition: "${budget_check_result.response.statusCodeValue >= 400}"
+      next: return_budget_check_error
+  next: call_orchestrate_endpoint
+
+call_orchestrate_endpoint:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_LLM_ORCHESTRATOR]/test"
+    body:
+      connectionId: ${connectionId}
+      message: ${message}
+      environment: "test"
+    headers:
+      Content-Type: "application/json"
+  result: orchestrate_result
+  next: assign_response
+
+assign_response:
+  assign:
+    response: "${orchestrate_result.response.body}"
+  next: return_orchestrate_response
+
+return_orchestrate_response:
+  return: ${response}
+  next: end
+
+assign_disconnected_response:
+  assign:
+    disconnected_response:
+      {
+        connectionId: "${connectionId}",
+        content: "The LLM connection is currently unavailable. Your request couldn’t be processed. Please retry shortly.",
+        status: 400
+      }
+  next: return_connection_disconnected
+
+return_connection_disconnected:
+  status: 400
+  return: ${disconnected_response}
+  next: end
+
+return_budget_check_error:
+  return: ${budget_check_result.response.body}
+  next: end
+
+return_no_test_connection:
+  status: 404
+  return: "No test connection found"
+  next: end
diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml
index 512238a..dffe487 100644
--- a/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml
+++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml
@@ -26,6 +26,15 @@ declaration:
       - field: monthly_budget
         type: number
         description: "Monthly budget amount"
+      - field: warn_budget_threshold
+        type: number
+        description: "Warn budget threshold percentage"
+      - field: stop_budget_threshold
+        type: number
+        description: "Stop budget threshold percentage"
+      - field: disconnect_on_budget_exceed
+        type: boolean
+        description: "Automatically disconnect when budget threshold is exceeded"
       - field: deployment_environment
         type: string
         description: "Deployment environment (Testing or Production)"
@@ -59,6 +68,9 @@ extract_request_data:
     embedding_platform: ${incoming.body.embedding_platform}
     embedding_model: ${incoming.body.embedding_model}
     monthly_budget: ${incoming.body.monthly_budget}
+    warn_budget_threshold: ${incoming.body.warn_budget_threshold || 0}
+    stop_budget_threshold: ${incoming.body.stop_budget_threshold || 0}
+    disconnect_on_budget_exceed: ${incoming.body.disconnect_on_budget_exceed || false}
     deployment_environment: ${incoming.body.deployment_environment}
     deployment_name: ${incoming.body.deployment_name || ""}
     target_uri: ${incoming.body.target_uri || ""}
@@ -72,9 +84,38 @@ extract_request_data:
 validate_environment:
   switch:
     - condition: ${deployment_environment == "testing" || deployment_environment == "production"}
-      next: add_llm_connection
+      next: check_production_environment
   next: return_invalid_environment
 
+check_production_environment:
+  switch:
+    - condition: ${deployment_environment == "production"}
+      next: get_existing_production_connection
+  next: add_llm_connection
+
+get_existing_production_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-production-connection"
+  result: existing_production_result
+  next: update_existing_production_to_testing
+
+update_existing_production_to_testing:
+  switch:
+    - condition: ${existing_production_result.response.body && existing_production_result.response.body.length > 0}
+      next: update_production_connection
+  next: add_llm_connection
+
+update_production_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/update-llm-connection-environment"
+    body:
+      connection_id: ${existing_production_result.response.body[0].id}
+      environment: "testing"
+  result: update_result
+  next: add_llm_connection
+
 add_llm_connection:
   call: http.post
   args:
@@ -86,6 +127,9 @@ add_llm_connection:
       embedding_platform: ${embedding_platform}
       embedding_model: ${embedding_model}
       monthly_budget: ${monthly_budget}
+      warn_budget_threshold: ${warn_budget_threshold}
+      stop_budget_threshold: ${stop_budget_threshold}
+      disconnect_on_budget_exceed: ${disconnect_on_budget_exceed}
       environment: ${deployment_environment}
       connection_status: "active"
       created_at: ${new Date().toISOString()}
@@ -96,10 +140,19 @@ add_llm_connection:
       access_key: ${access_key}
       embedding_model_api_key: ${embedding_model_api_key}
   result: connection_result
+  next: assign_connection_response
+
+assign_connection_response:
+  assign:
+    response: {
+      id: "${connection_result.response.body[0].id}",
+      status: 201,
+      operationSuccess: true
+    }
   next: return_success
 
 return_success:
-  return: "LLM connection added successfully"
+  return: ${response}
   status: 200
   next: end
 
diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/cost/update.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/cost/update.yml
new file mode 100644
index 0000000..d0e55c3
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/cost/update.yml
@@ -0,0 +1,150 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Update used budget for an LLM connection"
+  method: post
+  accepts: json
+  returns: json
+  namespace: rag-search
+  allowlist:
+    body:
+      - field: connection_id
+        type: number
+        description: "LLM connection ID"
+      - field: usage
+        type: number
+        description: "Usage amount to add to current used_budget"
+
+extract_request_data:
+  assign:
+    connection_id: ${Number(incoming.body.connection_id)}
+    usage: ${Number(incoming.body.usage)}
+  next: validate_request
+
+validate_request:
+  switch:
+    - condition: ${connection_id && usage >= 0}
+      next: check_connection_exists
+  next: return_bad_request
+
+check_connection_exists:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-llm-connection"
+    body:
+      connection_id: ${connection_id}
+  result: existing_connection
+  next: validate_connection_exists
+
+validate_connection_exists:
+  switch:
+    - condition: "${existing_connection.response.body.length > 0}"
+      next: update_used_budget
+  next: return_not_found
+
+update_used_budget:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/update-llm-connection-used-budget"
+    body:
+      connection_id: ${connection_id}
+      usage: ${usage}
+  result: update_result
+  next: check_update_status
+
+check_update_status:
+  switch:
+    - condition: ${200 <= update_result.response.statusCodeValue && update_result.response.statusCodeValue < 300}
+      next: check_budget_threshold
+  next: return_update_failed
+
+check_budget_threshold:
+  assign:
+    updated_connection: ${update_result.response.body[0]}
+    disconnect_flag: ${updated_connection.disconnectOnBudgetExceed}
+    monthly_budget: ${Number(updated_connection.monthlyBudget)}
+    used_budget: ${Number(updated_connection.usedBudget)}
+    stop_threshold: ${Number(updated_connection.stopBudgetThreshold)}
+    threshold_amount: ${(monthly_budget / 100) * stop_threshold}
+    should_deactivate: ${disconnect_flag && used_budget >= threshold_amount}
+  next: evaluate_deactivation
+
+evaluate_deactivation:
+  switch:
+    - condition: ${should_deactivate}
+      next: deactivate_connection
+  next: format_success_response
+
+deactivate_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/deactivate-llm-connection-budget-exceed"
+    body:
+      connection_id: ${connection_id}
+  result: deactivate_result
+  next: check_deactivate_status
+
+check_deactivate_status:
+  switch:
+    - condition: ${200 <= deactivate_result.response.statusCodeValue && deactivate_result.response.statusCodeValue < 300}
+      next: assignDisconnectResponse
+  next: format_success_response
+
+assignDisconnectResponse:
+    assign:
+      data_budget_exceeded: {
+        data: '${deactivate_result.response.body[0]}',
+      }
+    next: format_budget_exceeded_response
+
+format_budget_exceeded_response:
+  assign:
+    response_budget_exceeded: {
+      data: '${data_budget_exceeded.data}',
+      message: 'Used budget updated successfully. Connection deactivated due to budget threshold exceeded.',
+      budgetExceeded: true,
+      operationSuccess: true,
+      statusCode: 200
+    }
+  next: return_budget_exceeded
+
+format_success_response:
+  assign:
+    response_success: {
+      data: '${update_result.response.body[0]}',
+      budgetExceeded: false,
+      message: 'Used budget updated successfully',
+      operationSuccess: true,
+      statusCode: 200
+    }
+  next: return_success
+
+return_budget_exceeded:
+  return: ${response_budget_exceeded}
+  status: 200
+  next: end
+
+return_success:
+  return: ${response_success}
+  status: 200
+  next: end
+
+return_not_found:
+  status: 404
+  return: "error: connection not found"
+  next: end
+
+return_bad_request:
+  status: 400
+  return: "error: connection_id and usage (>= 0) are required"
+  next: end
+
+return_update_failed:
+  status: 500
+  return: "error: failed to update used budget"
+  next: end
+
+return_unauthorized:
+  status: 401
+  return: "error: unauthorized"
+  next: end
diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml
index 417109f..420f3ca 100644
--- a/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml
+++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml
@@ -29,6 +29,15 @@ declaration:
       - field: monthly_budget
         type: number
         description: "Monthly budget amount"
+      - field: warn_budget_threshold
+        type: number
+        description: "Warn budget threshold percentage"
+      - field: stop_budget_threshold
+        type: number
+        description: "Stop budget threshold percentage"
+      - field: disconnect_on_budget_exceed
+        type: boolean
+        description: "Automatically disconnect when budget threshold is exceeded"
       - field: deployment_environment
         type: string
         description: "Deployment environment (Testing or Production)"
@@ -60,6 +69,9 @@ extract_request_data:
     embedding_platform: ${incoming.body.embedding_platform}
     embedding_model: ${incoming.body.embedding_model}
     monthly_budget: ${Number(incoming.body.monthly_budget)}
+    warn_budget_threshold: ${Number(incoming.body.warn_budget_threshold)}
+    stop_budget_threshold: ${Number(incoming.body.stop_budget_threshold)}
+    disconnect_on_budget_exceed: ${incoming.body.disconnect_on_budget_exceed}
     environment: ${incoming.body.deployment_environment}
     deployment_name: ${incoming.body.deployment_name || ""}
     target_uri: ${incoming.body.target_uri || ""}
@@ -103,6 +115,9 @@ update_llm_connection:
       embedding_platform: ${embedding_platform}
       embedding_model: ${embedding_model}
       monthly_budget: ${monthly_budget}
+      warn_budget_threshold: ${warn_budget_threshold}
+      stop_budget_threshold: ${stop_budget_threshold}
+      disconnect_on_budget_exceed: ${disconnect_on_budget_exceed}
       environment: ${environment}
       deployment_name: ${deployment_name}
       target_uri: ${target_uri}
diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/update-status.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/update-status.yml
new file mode 100644
index 0000000..953e392
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/update-status.yml
@@ -0,0 +1,91 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Update LLM connection status (active/inactive)"
+  method: post
+  accepts: json
+  returns: json
+  namespace: rag-search
+  allowlist:
+    body:
+      - field: connection_id
+        type: number
+        description: "LLM connection ID"
+      - field: connection_status
+        type: string
+        description: "Connection status (active/inactive)"
+
+extract_request_data:
+  assign:
+    connection_id: ${Number(incoming.body.connection_id)}
+    connection_status: ${incoming.body.connection_status}
+  next: validate_request
+
+validate_request:
+  switch:
+    - condition: ${!connection_id || !connection_status}
+      next: return_bad_request
+    - condition: ${connection_status !== "active" && connection_status !== "inactive"}
+      next: return_invalid_status
+  next: check_connection_exists
+
+check_connection_exists:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-llm-connection"
+    body:
+      connection_id: ${connection_id}
+  result: existing_connection
+  next: validate_connection_exists
+
+validate_connection_exists:
+  switch:
+    - condition: "${existing_connection.response.body.length > 0}"
+      next: update_connection_status
+  next: return_not_found
+
+update_connection_status:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/update-llm-connection-status"
+    body:
+      connection_id: ${connection_id}
+      connection_status: ${connection_status}
+  result: update_result
+  next: check_update_status
+
+check_update_status:
+  switch:
+    - condition: ${200 <= update_result.response.statusCodeValue && update_result.response.statusCodeValue < 300}
+      next: return_success
+  next: return_update_failed
+
+return_success:
+  return: ${update_result.response.body[0]}
+  status: 200
+  next: end
+
+return_not_found:
+  status: 404
+  return: "error: connection not found"
+  next: end
+
+return_bad_request:
+  status: 400
+  return: "error: connection_id and connection_status are required"
+  next: end
+
+return_invalid_status:
+  status: 400
+  return: "error: connection_status must be 'active' or 'inactive'"
+  next: end
+
+return_update_failed:
+  status: 500
+  return: "error: failed to update connection status"
+  next: end
+
+return_unauthorized:
+  status: 401
+  return: "error: unauthorized"
+  next: end
diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/usage/check.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/usage/check.yml
new file mode 100644
index 0000000..eabf745
--- /dev/null
+++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/usage/check.yml
@@ -0,0 +1,99 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Check budget usage and either proceed with orchestration or return budget exceeded response"
+  method: post
+  accepts: json
+  returns: json
+  namespace: rag-search
+  allowlist:
+    body:
+      - field: connection_id
+        type: number
+        description: "LLM connection ID"
+
+extract_request_data:
+  assign:
+    connection_id: ${Number(incoming.body.connection_id)}
+  next: get_connection
+
+get_connection:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-llm-connection"
+    body:
+      connection_id: ${connection_id}
+  result: connection_result
+  next: check_connection_exists
+
+check_connection_exists:
+  switch:
+    - condition: "${connection_result.response.body.length > 0}"
+      next: calculate_budget_threshold
+  next: return_connection_not_found
+
+calculate_budget_threshold:
+  assign:
+    connection_data: ${connection_result.response.body[0]}
+    monthly_budget: ${Number(connection_data.monthlyBudget)}
+    used_budget: ${Number(connection_data.usedBudget)}
+    stop_budget_threshold: ${Number(connection_data.stopBudgetThreshold)}
+    disconnect_on_budget_exceed: ${connection_data.disconnectOnBudgetExceed}
+    budget_threshold_amount: ${monthly_budget / 100 * stop_budget_threshold}
+  next: check_budget_threshold
+
+check_budget_threshold:
+  switch:
+    - condition: ${used_budget >= budget_threshold_amount}
+      next: handle_budget_exceeded
+  next: format_budget_within_threshold
+
+handle_budget_exceeded:
+  switch:
+    - condition: ${disconnect_on_budget_exceed}
+      next: format_budget_exceeded_disconnected
+  next: format_budget_exceeded_not_disconnected
+
+format_budget_within_threshold:
+  assign:
+    response_budget_within_threshold: 
+      {
+        isBudgetExceed: false,
+        isLLMConnectionDisconnected: false,
+      }
+  next: return_budget_within_threshold
+
+format_budget_exceeded_not_disconnected:
+  assign:
+    response_budget_exceeded_not_disconnected: 
+      {
+        isBudgetExceed: true,
+        isLLMConnectionDisconnected: false,
+      }
+  next: return_budget_exceeded_not_disconnected
+
+format_budget_exceeded_disconnected:
+  assign:
+    response_budget_exceeded_disconnected: 
+      {
+        isBudgetExceed: true,
+        isLLMConnectionDisconnected: true,
+      }
+  next: return_budget_exceeded_disconnected
+
+return_budget_within_threshold:
+  return: ${response_budget_within_threshold}
+  next: end
+
+return_budget_exceeded_not_disconnected:
+  return: ${response_budget_exceeded_not_disconnected}
+  next: end
+
+return_budget_exceeded_disconnected:
+  return: ${response_budget_exceeded_disconnected}
+  next: end
+
+return_connection_not_found:
+  status: 404
+  return: "Connection not found"
+  next: end
diff --git a/DSL/Ruuter.public/rag-search/POST/llm-connections/cost/reset.yml b/DSL/Ruuter.public/rag-search/POST/llm-connections/cost/reset.yml
new file mode 100644
index 0000000..cc55fec
--- /dev/null
+++ b/DSL/Ruuter.public/rag-search/POST/llm-connections/cost/reset.yml
@@ -0,0 +1,42 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Reset used budget for all LLM connections to 0"
+  method: post
+  accepts: json
+  returns: json
+  namespace: rag-search
+
+reset_used_budget:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/reset-llm-connection-used-budget"
+    body: {}
+  result: reset_result
+  next: check_reset_status
+
+check_reset_status:
+  switch:
+    - condition: ${200 <= reset_result.response.statusCodeValue && reset_result.response.statusCodeValue < 300}
+      next: format_success_response
+  next: return_reset_failed
+
+format_success_response:
+  assign:
+    response_success: {
+      message: 'Used budget reset to 0 successfully for all connections',
+      totalConnections: '${reset_result.response.body.length}',
+      operationSuccess: true,
+      statusCode: 200
+    }
+  next: return_success
+
+return_success:
+  return: ${response_success}
+  status: 200
+  next: end
+
+return_reset_failed:
+  return: "error: failed to reset used budget"
+  status: 500
+  next: end
diff --git a/GUI/.env.development b/GUI/.env.development
index 7ff4d8b..39f5e47 100644
--- a/GUI/.env.development
+++ b/GUI/.env.development
@@ -1,8 +1,7 @@
 REACT_APP_RUUTER_API_URL=http://localhost:8086
 REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost:8088
-REACT_APP_EXTERNAL_API_URL=http://localhost:8000
 REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost:3004/et/dev-auth
 REACT_APP_SERVICE_ID=conversations,settings,monitoring
-REACT_APP_NOTIFICATION_NODE_URL=http://localhost:4040
-REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:8085 http://localhost:4040;
+REACT_APP_NOTIFICATION_NODE_URL=http://localhost:3005
+REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 ws://localhost;
 REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE
\ No newline at end of file
diff --git a/GUI/src/components/FormElements/FormInput/index.tsx b/GUI/src/components/FormElements/FormInput/index.tsx
index dbf2b95..7d681a6 100644
--- a/GUI/src/components/FormElements/FormInput/index.tsx
+++ b/GUI/src/components/FormElements/FormInput/index.tsx
@@ -2,7 +2,7 @@ import { forwardRef, InputHTMLAttributes, PropsWithChildren, useId } from 'react
 import clsx from 'clsx';
 import './FormInput.scss';
 import { DefaultTFuncReturn } from 'i18next';
-import { formatNumberWithCommas, removeCommasFromNumber } from 'utils/commonUtilts';
+import { formatNumberWithCommas, removeCommasFromNumber } from 'utils/commonUtils';
 
 type InputProps = PropsWithChildren<InputHTMLAttributes<HTMLInputElement>> & {
   label: string;
diff --git a/GUI/src/components/molecules/BudgetBanner/BudgetBanner.scss b/GUI/src/components/molecules/BudgetBanner/BudgetBanner.scss
new file mode 100644
index 0000000..6d71a8d
--- /dev/null
+++ b/GUI/src/components/molecules/BudgetBanner/BudgetBanner.scss
@@ -0,0 +1,64 @@
+.budget-banner {
+    width: 100%;
+    padding: 20px;
+    margin-bottom: 20px;
+    border-radius: 8px;
+    border: 1px solid;
+
+    &__content {
+        display: flex;
+        align-items: center;
+
+    }
+
+    &__message {
+        font-size: 16px;
+        font-weight: 400;
+        line-height: 1.4;
+    }
+
+    &__description {
+        font-size: 14px;
+        font-weight: 400;
+        line-height: 1.4;
+    }
+
+    &--warning {
+        background-color: #fef3cd;
+        border-color: #b57c00;
+        color: #94690d;
+
+        .budget-banner__message {
+            color: #94690d;
+        }
+    }
+
+    &--error {
+        background-color: #ffeaeb;
+        border-color: #dc3545;
+        color: #721c24;
+
+        .budget-banner__message {
+            color: #721c24;
+        }
+    }
+
+    // Responsive design
+    @media (max-width: 768px) {
+        padding: 10px 12px;
+
+        &__content {
+            flex-direction: column;
+            align-items: flex-start;
+            gap: 8px;
+        }
+
+        &__message {
+            font-size: 13px;
+        }
+    }
+}
+
+.m-3 {
+    margin-top: .75rem !important;
+}
\ No newline at end of file
diff --git a/GUI/src/components/molecules/BudgetBanner/index.tsx b/GUI/src/components/molecules/BudgetBanner/index.tsx
new file mode 100644
index 0000000..743900d
--- /dev/null
+++ b/GUI/src/components/molecules/BudgetBanner/index.tsx
@@ -0,0 +1,80 @@
+import React from 'react';
+import { useQuery } from '@tanstack/react-query';
+import { useNavigate } from 'react-router-dom';
+import { checkBudgetStatus, BudgetStatus } from 'services/llmConnections';
+import { llmConnectionsQueryKeys } from 'utils/queryKeys';
+import './BudgetBanner.scss';
+import Button from 'components/Button';
+import { MdOutlineGppMaybe, MdWarning } from 'react-icons/md';
+
+const BudgetBanner: React.FC = () => {
+    const navigate = useNavigate();
+    const { data: budgetStatus } = useQuery({
+        queryKey: llmConnectionsQueryKeys.budgetStatus(),
+        queryFn: checkBudgetStatus,
+    });
+
+    if (!budgetStatus) {
+        return null;
+    }
+
+    const getBannerContent = (status: BudgetStatus) => {
+        const { used_budget_percentage, exceeded_stop_budget, exceeded_warn_budget, data } = status;
+
+        if (exceeded_stop_budget) {
+            return {
+                type: 'error' as const,
+                message: `Production LLM connection disabled`,
+                description: `${data?.llmPlatform === "aws" ? "AWS Bedrock" : "Azure OpenAI"} integration has exceeded its budget. Update budget to reactivate LLM connection.`,
+                icon: <MdOutlineGppMaybe size={30} />
+            };
+        }
+
+        if (exceeded_warn_budget) {
+            return {
+                type: 'warning' as const,
+                message: `${used_budget_percentage?.toFixed(1)}% of connection budget is used.`,
+                description: `${data?.llmPlatform === "aws" ? "AWS Bedrock" : "Azure OpenAI"} integration has used ${used_budget_percentage?.toFixed(1)}% of its budget. Review connection budget to avoid disconnections`,
+                icon: <MdWarning size={30} />
+
+            };
+        }
+
+        return null; // Don't show banner if within budget
+    };
+
+    const bannerContent = getBannerContent(budgetStatus);
+
+    if (!bannerContent) {
+        return null;
+    }
+
+    return (
+        <div className={`budget-banner budget-banner--${bannerContent.type}`}>
+            <div className='budget-banner__content'>
+                {bannerContent.icon}
+                <span className="budget-banner__message">
+                    {bannerContent.message}
+                </span>
+            </div>
+            <span className="budget-banner__description">
+                {bannerContent.description}
+            </span>
+            <br></br>
+            <div className='m-3'></div>
+            {budgetStatus.exceeded_warn_budget && !budgetStatus.exceeded_stop_budget ?
+                (
+                    <Button size='s' onClick={() => navigate(`/view-llm-connection?id=${budgetStatus.data.id}`)}>
+                        Review Budget
+                    </Button>
+                ) : (
+                    <Button size='s' onClick={() => navigate(`/view-llm-connection?id=${budgetStatus.data.id}`)}>
+                        Update Budget
+                    </Button>
+                )
+            }
+        </div>
+    );
+};
+
+export default BudgetBanner;
diff --git a/GUI/src/components/molecules/LLMConnectionCard/index.tsx b/GUI/src/components/molecules/LLMConnectionCard/index.tsx
index 2e06f9a..8d843ff 100644
--- a/GUI/src/components/molecules/LLMConnectionCard/index.tsx
+++ b/GUI/src/components/molecules/LLMConnectionCard/index.tsx
@@ -1,13 +1,17 @@
-import { FC, PropsWithChildren } from 'react';
+import { FC, PropsWithChildren, useState } from 'react';
 import Button from 'components/Button';
 import Label from 'components/Label';
 import { useDialog } from 'hooks/useDialog';
 import './LLMConnectionCard.scss';
 import { useTranslation } from 'react-i18next';
-import { formatDate } from 'utils/commonUtilts';
 import { useNavigate } from 'react-router-dom';
-import { pl } from 'date-fns/locale';
 import { Switch } from 'components/FormElements';
+import { updateLLMConnectionStatus } from 'services/llmConnections';
+import { useToast } from 'hooks/useToast';
+import { ToastTypes } from 'enums/commonEnums';
+import { useMutation, useQueryClient } from '@tanstack/react-query';
+import { llmConnectionsQueryKeys } from 'utils/queryKeys';
+import { AxiosError } from 'axios';
 
 type LLMConnectionCardProps = {
   llmConnectionId: number | string;
@@ -17,6 +21,7 @@ type LLMConnectionCardProps = {
   isActive?: boolean;
   deploymentEnv?: string;
   budgetStatus?: string;
+  onStatusChange?: (id: number | string, newStatus: boolean) => void;
 };
 
 const LLMConnectionCard: FC<PropsWithChildren<LLMConnectionCardProps>> = ({
@@ -27,19 +32,69 @@ const LLMConnectionCard: FC<PropsWithChildren<LLMConnectionCardProps>> = ({
   isActive,
   deploymentEnv,
   budgetStatus,
-
+  onStatusChange,
 }) => {
   const { open, close } = useDialog();
   const { t } = useTranslation();
   const navigate = useNavigate();
+  const toast = useToast();
+  const queryClient = useQueryClient();
+
+  const updateStatusMutation = useMutation({
+    mutationFn: ({ id, status }: { id: string | number; status: 'active' | 'inactive' }) =>
+      updateLLMConnectionStatus(id, status),
+    onSuccess: async (data, variables) => {
+      // Invalidate queries to refresh the data
+      await queryClient.invalidateQueries({
+        queryKey: llmConnectionsQueryKeys.all()
+      });
+      
+      toast.open({
+        type: ToastTypes.SUCCESS,
+        title: t('toast.success.title'),
+        message: `Connection ${variables.status === 'active' ? 'activated' : 'deactivated'} successfully`,
+      });
+      
+      // Call the parent callback to update the list immediately
+      if (onStatusChange) {
+        onStatusChange(llmConnectionId, variables.status === 'active');
+      }
+    },
+    onError: (error: AxiosError) => {
+      console.error('Error updating connection status:', error);
+      toast.open({
+        type: ToastTypes.ERROR,
+        title: t('toast.error.title'),
+        message: 'Failed to update connection status',
+      });
+    },
+  });
+
+  const handleStatusChange = async (checked: boolean) => {
+    if (updateStatusMutation.isLoading) return;
+    
+    const newStatus = checked ? 'active' : 'inactive';
+    updateStatusMutation.mutate({
+      id: llmConnectionId,
+      status: newStatus
+    });
+  };
 
 
   const renderDeploymentEnv = (deploymentEnvironment: string | undefined) => {
-    return (
+    if (deploymentEnvironment === "testing") {
+      return (
+        <Label type="info">
+          testing
+        </Label>
+      );
+    } else if (deploymentEnvironment === "production") {
+      return (
         <Label type="success">
-          {deploymentEnvironment}
+         production
         </Label>
       );
+    }
   };
 
   const renderBudgetStatus = (status: string | undefined) => {
@@ -71,8 +126,9 @@ const LLMConnectionCard: FC<PropsWithChildren<LLMConnectionCardProps>> = ({
           <p>{llmConnectionName}</p>
           <Switch
             label=""
-            checked={false}
-            onCheckedChange={() => {}}
+            checked={isActive ?? false}
+            onCheckedChange={handleStatusChange}
+            disabled={updateStatusMutation.isLoading}
           />
         </div>
 
diff --git a/GUI/src/components/molecules/LLMConnectionForm/index.tsx b/GUI/src/components/molecules/LLMConnectionForm/index.tsx
index cf0a68b..04557de 100644
--- a/GUI/src/components/molecules/LLMConnectionForm/index.tsx
+++ b/GUI/src/components/molecules/LLMConnectionForm/index.tsx
@@ -4,17 +4,19 @@ import { useTranslation } from 'react-i18next';
 import { useQuery } from '@tanstack/react-query';
 import FormInput from 'components/FormElements/FormInput';
 import FormSelect from 'components/FormElements/FormSelect';
+import FormCheckbox from 'components/FormElements/FormCheckbox';
 import Button from 'components/Button';
 import Track from 'components/Track';
-import { 
-  getLLMPlatforms, 
-  getLLMModels, 
-  getEmbeddingPlatforms, 
+import {
+  getLLMPlatforms,
+  getLLMModels,
+  getEmbeddingPlatforms,
   getEmbeddingModels,
   PlatformOption,
-  ModelOption 
+  ModelOption
 } from 'services/llmConfigs';
 import './LLMConnectionForm.scss';
+import { toOptions } from 'utils/commonUtils';
 
 export type LLMConnectionFormData = {
   connectionName: string;
@@ -23,6 +25,9 @@ export type LLMConnectionFormData = {
   embeddingModelPlatform: string;
   embeddingModel: string;
   monthlyBudget: string;
+  warnBudget: string;
+  stopBudget: string;
+  disconnectOnBudgetExceed: boolean;
   deploymentEnvironment: string;
   // AWS Bedrock credentials
   accessKey?: string;
@@ -68,7 +73,10 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
       embeddingModel: '',
       embeddingModelApiKey: '',
       monthlyBudget: '',
-      deploymentEnvironment: 'testing',
+      warnBudget: '',
+      stopBudget: '',
+      disconnectOnBudgetExceed: false,
+      deploymentEnvironment: '',
       // AWS Bedrock credentials
       accessKey: '',
       secretKey: '',
@@ -84,65 +92,36 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
 
   const selectedLLMPlatform = watch('llmPlatform');
   const selectedEmbeddingPlatform = watch('embeddingModelPlatform');
+  const disconnectOnBudgetExceed = watch('disconnectOnBudgetExceed');
 
   // Fetch platform and model options from API
   const { data: llmPlatformsData = [], isLoading: llmPlatformsLoading, error: llmPlatformsError } = useQuery({
     queryKey: ['llm-platforms'],
-    queryFn: getLLMPlatforms,
-    retry: 2,
-    staleTime: 5 * 60 * 1000, // 5 minutes
+    queryFn: getLLMPlatforms
   });
 
   const { data: embeddingPlatformsData = [], isLoading: embeddingPlatformsLoading, error: embeddingPlatformsError } = useQuery({
     queryKey: ['embedding-platforms'],
-    queryFn: getEmbeddingPlatforms,
-    retry: 2,
-    staleTime: 5 * 60 * 1000, // 5 minutes
+    queryFn: getEmbeddingPlatforms
   });
 
   const { data: llmModelsData = [], isLoading: llmModelsLoading, error: llmModelsError } = useQuery({
     queryKey: ['llm-models', selectedLLMPlatform],
     queryFn: () => getLLMModels(selectedLLMPlatform),
     enabled: !!selectedLLMPlatform,
-    retry: 2,
-    staleTime: 2 * 60 * 1000, // 2 minutes
   });
 
   const { data: embeddingModelsData = [], isLoading: embeddingModelsLoading, error: embeddingModelsError } = useQuery({
     queryKey: ['embedding-models', selectedEmbeddingPlatform],
     queryFn: () => getEmbeddingModels(selectedEmbeddingPlatform),
     enabled: !!selectedEmbeddingPlatform,
-    retry: 2,
-    staleTime: 2 * 60 * 1000, // 2 minutes
   });
 
-  // Convert API data to option format
-  const llmPlatformOptions = llmPlatformsData?.map((platform: PlatformOption) => ({
-    label: platform.label,
-    value: platform.value,
-  }));
-
-  const embeddingPlatformOptions = embeddingPlatformsData?.map((platform: PlatformOption) => ({
-    label: platform.label,
-    value: platform.value,
-  }));
-
-  const llmModelOptions = llmModelsData?.map((model: ModelOption) => ({
-    label: model.label,
-    value: model.value,
-  }));
-
-  const embeddingModelOptions = embeddingModelsData?.map((model: ModelOption) => ({
-    label: model.label,
-    value: model.value,
-  }));
-
-  const [replaceApiKey, setReplaceApiKey] = React.useState(false);
-  const [replaceSecretKey, setReplaceSecretKey] = React.useState(false);
-  const [replaceAccessKey, setReplaceAccessKey] = React.useState(false);
-  const [replaceEmbeddingModelApiKey, setReplaceEmbeddingModelApiKey] = React.useState(false);
-
-  // State to track if API key fields should be in replace mode (readonly with replace button)
+const llmPlatformOptions = toOptions(llmPlatformsData);
+const embeddingPlatformOptions = toOptions(embeddingPlatformsData);
+const llmModelOptions = toOptions(llmModelsData);
+const embeddingModelOptions = toOptions(embeddingModelsData);
+
   const [apiKeyReplaceMode, setApiKeyReplaceMode] = React.useState(isEditing);
   const [secretKeyReplaceMode, setSecretKeyReplaceMode] = React.useState(isEditing);
   const [accessKeyReplaceMode, setAccessKeyReplaceMode] = React.useState(isEditing);
@@ -155,17 +134,17 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
     setValue('targetUri', '');
     setValue('apiKey', '');
     setValue('llmModel', '');
-    
+
     // Reset replace mode states when platform changes
     setApiKeyReplaceMode(false);
     setSecretKeyReplaceMode(false);
     setAccessKeyReplaceMode(false);
   };
 
-   const resetEmbeddingModelCredentialFields = () => {
+  const resetEmbeddingModelCredentialFields = () => {
     setValue('embeddingModelApiKey', '');
     setValue('embeddingModel', '');
-    
+
     // Reset replace mode state when platform changes
     setEmbeddingApiKeyReplaceMode(false);
   };
@@ -266,7 +245,7 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
               <Controller
                 name="targetUri"
                 control={control}
-                rules={{ 
+                rules={{
                   required: 'Endpoint is required for Azure OpenAI',
                   pattern: {
                     value: /^https?:\/\/.+/,
@@ -311,7 +290,7 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
             </div>
           </>
         );
-      
+
       default:
         return (
           <div className="form-row">
@@ -340,6 +319,8 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
     const cleanedData = {
       ...data,
       monthlyBudget: data.monthlyBudget.replace(/,/g, ''),
+      warnBudget: data.warnBudget.replace('%', ''),
+      stopBudget: data.stopBudget.replace('%', ''),
     };
     onSubmit(cleanedData);
   };
@@ -379,12 +360,12 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
               render={({ field }) => (
                 <FormSelect
                   label=""
-                  options={llmPlatformOptions}
+                  options={llmPlatformOptions || []}
                   placeholder={
-                    llmPlatformsLoading 
-                      ? "Loading platforms..." 
-                      : llmPlatformsError 
-                        ? "Error loading platforms" 
+                    llmPlatformsLoading
+                      ? "Loading platforms..."
+                      : llmPlatformsError
+                        ? "Error loading platforms"
                         : "Select LLM Platform"
                   }
                   error={errors.llmPlatform?.message || (llmPlatformsError ? "Failed to load platforms" : undefined)}
@@ -411,13 +392,13 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
               render={({ field }) => (
                 <FormSelect
                   label=""
-                  options={getLLMModelOptions()}
+                  options={getLLMModelOptions() || []}
                   placeholder={
-                    llmModelsLoading 
-                      ? "Loading models..." 
-                      : llmModelsError 
-                        ? "Error loading models" 
-                        : !selectedLLMPlatform 
+                    llmModelsLoading
+                      ? "Loading models..."
+                      : llmModelsError
+                        ? "Error loading models"
+                        : !selectedLLMPlatform
                           ? "Select a platform first"
                           : "Select LLM Model"
                   }
@@ -451,12 +432,12 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
               render={({ field }) => (
                 <FormSelect
                   label=""
-                  options={embeddingPlatformOptions}
+                  options={embeddingPlatformOptions || []}
                   placeholder={
-                    embeddingPlatformsLoading 
-                      ? "Loading platforms..." 
-                      : embeddingPlatformsError 
-                        ? "Error loading platforms" 
+                    embeddingPlatformsLoading
+                      ? "Loading platforms..."
+                      : embeddingPlatformsError
+                        ? "Error loading platforms"
                         : "Select Embedding Platform"
                   }
                   error={errors.embeddingModelPlatform?.message || (embeddingPlatformsError ? "Failed to load platforms" : undefined)}
@@ -483,13 +464,13 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
               render={({ field }) => (
                 <FormSelect
                   label=""
-                  options={getEmbeddingModelOptions()}
+                  options={getEmbeddingModelOptions() || []}
                   placeholder={
-                    embeddingModelsLoading 
-                      ? "Loading models..." 
-                      : embeddingModelsError 
-                        ? "Error loading models" 
-                        : !selectedEmbeddingPlatform 
+                    embeddingModelsLoading
+                      ? "Loading models..."
+                      : embeddingModelsError
+                        ? "Error loading models"
+                        : !selectedEmbeddingPlatform
                           ? "Select a platform first"
                           : "Select Embedding Model"
                   }
@@ -568,6 +549,117 @@ const LLMConnectionForm: React.FC<LLMConnectionFormProps> = ({
             />
           </div>
 
+          <div className="form-row">
+
+            <Controller
+              name="disconnectOnBudgetExceed"
+              control={control}
+              render={({ field }) => (
+                <FormCheckbox
+                  label=""
+                  name="disconnectOnBudgetExceed"
+                  item={{
+                    label: "Automatically disconnect LLM connection when stop budget threshold is exceeded",
+                    value: "true",
+                    checked: field.value
+                  }}
+                  checked={field.value}
+                  onChange={(e) => field.onChange(e.target.checked)}
+                  hideLabel={true}
+                />
+              )}
+            />
+          </div>
+
+          <div className="form-row">
+            <p className='form-label'>Warn Budget Threshold</p>
+            <p className='form-description'>You will get a notification when your usage reaches this percentage of your allocated monthly budget.</p>
+
+            <Controller
+              name="warnBudget"
+              control={control}
+              rules={{
+                required: 'Warn Budget Threshold is required',
+                pattern: {
+                  value: /^\d+$/,
+                  message: 'Please enter numbers only'
+                },
+                validate: (value, formValues) => {
+                  const numericValue = Number(value.replace('%', ''));
+
+                  if (numericValue < 1 || numericValue > 100) {
+                    return 'Warn Budget Threshold must be between 1-100%';
+                  }
+                  return true;
+                }
+              }}
+              render={({ field }) => (
+                <FormInput
+                  label=""
+                  placeholder="Enter warn budget threshold"
+                  error={errors.warnBudget?.message}
+                  value={field.value ? `${field.value}%` : ''}
+                  onChange={(e) => {
+                    const value = e.target.value.replace(/[^\d]/g, ''); // Remove all non-numeric characters
+                    field.onChange(value);
+                  }}
+                  name={field.name}
+                  onBlur={field.onBlur}
+                />
+              )}
+            />
+          </div>
+
+          {disconnectOnBudgetExceed && (
+            <div className="form-row">
+              <p className='form-label'>Disconnect Budget Threshold</p>
+              <p className='form-description'>Your LLM connection will be automatically disconnected and all further requests will be stopped when your usage reaches
+                this percentage of your monthly budget.</p>
+
+              <Controller
+                name="stopBudget"
+                control={control}
+                rules={{
+                  required: disconnectOnBudgetExceed ? 'Stop Budget Threshold is required' : false,
+                  pattern: {
+                    value: /^\d+$/,
+                    message: 'Please enter numbers only'
+                  },
+                  validate: (value, formValues) => {
+                    if (!disconnectOnBudgetExceed) return true;
+
+                    const numericValue = Number(value.replace('%', ''));
+                    const warnValue = Number(formValues.warnBudget?.replace('%', '') || 0);
+
+                    if (numericValue < 1 || numericValue > 200) {
+                      return 'Stop Budget Threshold must be between 1-200%';
+                    }
+
+                    if (warnValue > 0 && numericValue <= warnValue) {
+                      return 'Stop Budget Threshold must be greater than Warn Budget Threshold';
+                    }
+
+                    return true;
+                  }
+                }}
+                render={({ field }) => (
+                  <FormInput
+                    label=""
+                    placeholder="Enter stop budget threshold"
+                    error={errors.stopBudget?.message}
+                    value={field.value ? `${field.value}%` : ''}
+                    onChange={(e) => {
+                      const value = e.target.value.replace(/[^\d]/g, ''); // Remove all non-numeric characters
+                      field.onChange(value);
+                    }}
+                    name={field.name}
+                    onBlur={field.onBlur}
+                  />
+                )}
+              />
+            </div>
+          )}
+
           <div className="form-row">
             <Controller
               name="deploymentEnvironment"
diff --git a/GUI/src/pages/LLMConnections/CreateLLMConnection.tsx b/GUI/src/pages/LLMConnections/CreateLLMConnection.tsx
index 6cb22c4..dfeb583 100644
--- a/GUI/src/pages/LLMConnections/CreateLLMConnection.tsx
+++ b/GUI/src/pages/LLMConnections/CreateLLMConnection.tsx
@@ -1,9 +1,9 @@
 import BackArrowButton from "assets/BackArrowButton";
 import LLMConnectionForm, { LLMConnectionFormData } from "components/molecules/LLMConnectionForm";
 import { Link, useNavigate } from "react-router-dom";
-import { useMutation, useQueryClient } from '@tanstack/react-query';
+import { useMutation, useQueryClient, useQuery } from '@tanstack/react-query';
 import { useDialog } from 'hooks/useDialog';
-import { createLLMConnection } from 'services/llmConnections';
+import { createLLMConnection, getProductionConnection } from 'services/llmConnections';
 import { llmConnectionsQueryKeys } from 'utils/queryKeys';
 import { ButtonAppearanceTypes } from 'enums/commonEnums';
 import { Button } from 'components';
@@ -13,6 +13,12 @@ const CreateLLMConnection = () => {
     const { open: openDialog, close: closeDialog } = useDialog();
     const queryClient = useQueryClient();
     
+    // Query to check for existing production connection
+    const { data: existingProductionConnection } = useQuery({
+      queryKey: ['production-connection'],
+      queryFn: getProductionConnection,
+    });
+    
     const createConnectionMutation = useMutation({
       mutationFn: createLLMConnection,
       onSuccess: async () => {
@@ -54,7 +60,41 @@ const CreateLLMConnection = () => {
     });
     
     const handleSubmit = async (data: LLMConnectionFormData) => {
-      createConnectionMutation.mutate(data);
+      const isCreatingProductionConnection = data.deploymentEnvironment === 'production';
+      const hasExistingProductionConnection = existingProductionConnection && existingProductionConnection.id;
+      
+      if (isCreatingProductionConnection && hasExistingProductionConnection) {
+        openDialog({
+          title: 'Replace Production Connection',
+          content: (
+            <div>
+              <p>A production connection <strong>"{existingProductionConnection.connectionName}"</strong> already exists.</p>
+              <p>Creating this new production connection will replace the current one. Are you sure you want to proceed?</p>
+            </div>
+          ),
+          footer: (
+            <div className="button-wrapper">
+              <Button
+                appearance={ButtonAppearanceTypes.SECONDARY}
+                onClick={closeDialog}
+              >
+                Cancel
+              </Button>
+              <Button
+                appearance={ButtonAppearanceTypes.PRIMARY}
+                onClick={() => {
+                  createConnectionMutation.mutate(data);
+                }}
+                showLoadingIcon={createConnectionMutation.isLoading}
+              >
+                Yes, Replace Production Connection
+              </Button>
+            </div>
+          ),
+        });
+      } else {
+        createConnectionMutation.mutate(data);
+      }
     };
     
     const handleCancel = () => {
diff --git a/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx b/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx
index af41bd9..28e429f 100644
--- a/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx
+++ b/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx
@@ -112,7 +112,42 @@ const ViewLLMConnection = () => {
   });
 
   const handleSubmit = async (data: LLMConnectionFormData) => {
-    updateConnectionMutation.mutate(data);
+    const isCurrentlyProduction = connectionData?.environment === 'production';
+    const isChangingToTesting = data.deploymentEnvironment === 'testing';
+    
+    if (isCurrentlyProduction && isChangingToTesting) {
+      openDialog({
+        title: 'Confirm Production Environment Change',
+        content: (
+          <div>
+            <p>You are about to change a <strong>production</strong> connection to <strong>testing</strong> environment.</p>
+            <p>This will affect the current production setup. Are you sure you want to proceed?</p>
+          </div>
+        ),
+        footer: (
+          <div className="button-wrapper">
+            <Button
+              appearance={ButtonAppearanceTypes.SECONDARY}
+              onClick={closeDialog}
+            >
+              Cancel
+            </Button>
+            <Button
+              appearance={ButtonAppearanceTypes.PRIMARY}
+              onClick={() => {
+                closeDialog();
+                updateConnectionMutation.mutate(data);
+              }}
+              showLoadingIcon={updateConnectionMutation.isLoading}
+            >
+              Yes, Change Environment
+            </Button>
+          </div>
+        ),
+      });
+    } else {
+      updateConnectionMutation.mutate(data);
+    }
   };
 
   const handleCancel = () => {
@@ -122,29 +157,51 @@ const ViewLLMConnection = () => {
 
 
   const handleDelete = () => {
-    openDialog({
-      title: 'Confirm Delete',
-      content: <p>Are you sure you want to delete this LLM connection? This action cannot be undone.</p>,
-      footer: (
-        <div className="button-wrapper">
+    const isProductionConnection = connectionData?.environment === 'production';
+    
+    if (isProductionConnection) {
+      openDialog({
+        title: 'Cannot Delete Production Connection',
+        content: (
+          <div>
+            <p>This LLM connection is currently set as the production connection and cannot be deleted.</p>
+            <p>To delete this connection, please ensure another connection is set as the production connection.</p>
+          </div>
+        ),
+        footer: (
           <Button
-            appearance={ButtonAppearanceTypes.SECONDARY}
+            appearance={ButtonAppearanceTypes.PRIMARY}
             onClick={closeDialog}
           >
-            Cancel
+            OK
           </Button>
-          <Button
-            appearance={ButtonAppearanceTypes.ERROR}
-            onClick={() => {
-              deleteConnectionMutation.mutate();
-            }}
-            showLoadingIcon={deleteConnectionMutation.isLoading}
-          >
-            Delete
-          </Button>
-        </div>
-      ),
-    });
+        ),
+      });
+    } else {
+      openDialog({
+        title: 'Confirm Delete',
+        content: <p>Are you sure you want to delete this LLM connection? This action cannot be undone.</p>,
+        footer: (
+          <div className="button-wrapper">
+            <Button
+              appearance={ButtonAppearanceTypes.SECONDARY}
+              onClick={closeDialog}
+            >
+              Cancel
+            </Button>
+            <Button
+              appearance={ButtonAppearanceTypes.ERROR}
+              onClick={() => {
+                deleteConnectionMutation.mutate();
+              }}
+              showLoadingIcon={deleteConnectionMutation.isLoading}
+            >
+              Delete
+            </Button>
+          </div>
+        ),
+      });
+    }
   };
 
   if (isLoading) {
@@ -179,6 +236,9 @@ const ViewLLMConnection = () => {
     embeddingModelPlatform: connectionData.embeddingPlatform,
     embeddingModel: connectionData.embeddingModel,
     monthlyBudget: connectionData.monthlyBudget.toString(),
+    warnBudget: connectionData.warnBudgetThreshold.toString(),
+    stopBudget: connectionData.disconnectOnBudgetExceed ? connectionData.stopBudgetThreshold.toString() : '0',
+    disconnectOnBudgetExceed: connectionData.disconnectOnBudgetExceed,
     deploymentEnvironment: connectionData.environment,
     // Azure credentials (don't show sensitive data, but include structure)
     deploymentName: connectionData.deploymentName || '',
diff --git a/GUI/src/pages/LLMConnections/index.tsx b/GUI/src/pages/LLMConnections/index.tsx
index 0247cc5..6d46024 100644
--- a/GUI/src/pages/LLMConnections/index.tsx
+++ b/GUI/src/pages/LLMConnections/index.tsx
@@ -2,17 +2,18 @@ import { FC, useEffect, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Button, FormSelect } from 'components';
 import Pagination from 'components/molecules/Pagination';
-import { useQuery } from '@tanstack/react-query';
+import { useQuery, useQueryClient } from '@tanstack/react-query';
 import { useNavigate, useSearchParams } from 'react-router-dom';
-import { formattedArray } from 'utils/commonUtilts';
+import { formattedArray } from 'utils/commonUtils';
 import DataModelCard from 'components/molecules/LLMConnectionCard';
 import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
 import { ButtonAppearanceTypes } from 'enums/commonEnums';
 import NoDataView from 'components/molecules/NoDataView';
+import BudgetBanner from 'components/molecules/BudgetBanner';
 import './LLMConnections.scss';
 import { platforms, trainingStatuses } from 'config/dataModelsConfig';
 import LLMConnectionCard from 'components/molecules/LLMConnectionCard';
-import { fetchLLMConnectionsPaginated, LLMConnectionFilters, LLMConnection } from 'services/llmConnections';
+import { fetchLLMConnectionsPaginated, LLMConnectionFilters, LLMConnection, getProductionConnection } from 'services/llmConnections';
 import { llmConnectionsQueryKeys } from 'utils/queryKeys';
 
 const LLMConnections: FC = () => {
@@ -34,6 +35,13 @@ const LLMConnections: FC = () => {
     queryFn: () => fetchLLMConnectionsPaginated(filters),
   });
 
+  // Fetch production connection separately
+  const { data: productionConnection, isLoading: isProductionLoading } = useQuery({
+    queryKey: llmConnectionsQueryKeys.production(),
+    queryFn: getProductionConnection,
+  });
+
+
   const llmConnections = connectionsResponse;
   const totalPages = connectionsResponse?.[0]?.totalPages || 1;
 
@@ -47,12 +55,12 @@ const LLMConnections: FC = () => {
     value: string | number | undefined | { name: string; id: string }
   ) => {
     let filterUpdate: Partial<LLMConnectionFilters> = {};
-    
+
     if (name === 'sorting') {
       // Handle sorting format - no conversion needed, use snake_case directly
       const sortingValue = value as string;
       const [sortBy, sortOrder] = sortingValue.split(' ');
-      
+
       filterUpdate = {
         sortBy: sortBy,
         sortOrder: sortOrder as 'asc' | 'desc'
@@ -65,7 +73,7 @@ const LLMConnections: FC = () => {
       ...prevFilters,
       ...filterUpdate,
     }));
-    
+
     // Reset to first page when filters change
     if (name !== 'pageNumber') {
       setPageIndex(1);
@@ -75,23 +83,17 @@ const LLMConnections: FC = () => {
   // Platform filter options
   const platformOptions = [
     { label: 'All Platforms', value: 'all' },
-    { label: 'OpenAI', value: 'openai' },
-    { label: 'Anthropic', value: 'anthropic' },
     { label: 'Azure OpenAI', value: 'azure' },
-    { label: 'Google AI', value: 'google' },
-    { label: 'AWS Bedrock', value: 'bedrock' },
-    { label: 'Hugging Face', value: 'huggingface' },
+    { label: 'AWS Bedrock', value: 'aws' },
   ];
 
   // LLM Model filter options - these would ideally come from an API
   const llmModelOptions = [
     { label: 'All Models', value: 'all' },
-    { label: 'GPT-4', value: 'gpt-4' },
-    { label: 'GPT-4 Turbo', value: 'gpt-4-turbo' },
-    { label: 'GPT-3.5 Turbo', value: 'gpt-3.5-turbo' },
-    { label: 'Claude-3 Sonnet', value: 'claude-3-sonnet' },
-    { label: 'Claude-3 Haiku', value: 'claude-3-haiku' },
-    { label: 'Gemini Pro', value: 'gemini-pro' },
+    { label: 'GPT-4 Mini', value: 'gpt-4o-mini' },
+    { label: 'GPT-4o', value: 'gpt-4o' },
+    { label: 'Anthropic Claude 3.5 Sonnet', value: 'anthropic-claude-3.5-sonnet' },
+    { label: 'Anthropic Claude 3.7 Sonnet', value: 'anthropic-claude-3.7-sonnet' },
   ];
 
   // Environment filter options
@@ -99,7 +101,6 @@ const LLMConnections: FC = () => {
     { label: 'All Environments', value: 'all' },
     { label: 'Testing', value: 'testing' },
     { label: 'Production', value: 'production' },
-    { label: 'Development', value: 'development' },
   ];
 
   // Sort options - using snake_case format for backend
@@ -116,14 +117,13 @@ const LLMConnections: FC = () => {
 
   const currentSorting = `${filters.sortBy || 'created_at'} ${filters.sortOrder || 'desc'}`;
 
-  // Find featured connection (first active one)
-  const featuredConnection = llmConnections?.[0];
+  // Use production connection as featured connection
   const otherConnections = llmConnections || [];
 
   return (
     <div>
       <div className="container">
-        {!isModelDataLoading ? (
+        {!isModelDataLoading && !isProductionLoading ? (
           <div>
             <div>
               <div className="title_container">
@@ -202,19 +202,19 @@ const LLMConnections: FC = () => {
                 </div>
               </div>
 
-              {featuredConnection && (
+              {productionConnection && (
                 <div className="m-30-0">
                   <p>Production LLM Connection</p>
                   <div className="grid-container m-30-0">
                     <LLMConnectionCard
-                      key={featuredConnection.id}
-                      llmConnectionId={featuredConnection.id}
-                      llmConnectionName={`${featuredConnection.llmPlatform} - ${featuredConnection.llmModel}`}
-                      isActive={featuredConnection.status === 'active'}
-                      deploymentEnv={featuredConnection.environment}
-                      budgetStatus={featuredConnection.budgetStatus}
-                      platform={featuredConnection.llmPlatform}
-                      model={featuredConnection.llmModel}
+                      key={productionConnection.id}
+                      llmConnectionId={productionConnection.id}
+                      llmConnectionName={productionConnection.connectionName}
+                      isActive={productionConnection.connectionStatus === 'active'}
+                      deploymentEnv={productionConnection.environment}
+                      budgetStatus={productionConnection.budgetStatus}
+                      platform={productionConnection.llmPlatform}
+                      model={productionConnection.llmModel}
                     />
                   </div>
                 </div>
@@ -229,8 +229,8 @@ const LLMConnections: FC = () => {
                         <LLMConnectionCard
                           key={llmConnection.id}
                           llmConnectionId={llmConnection.id}
-                          llmConnectionName={`${llmConnection.llmPlatform} - ${llmConnection.llmModel}`}
-                          isActive={llmConnection.status === 'active'}
+                          llmConnectionName={llmConnection.connectionName}
+                          isActive={llmConnection.connectionStatus === 'active'}
                           deploymentEnv={llmConnection.environment}
                           budgetStatus={llmConnection.budgetStatus}
                           platform={llmConnection.llmPlatform}
@@ -240,7 +240,7 @@ const LLMConnections: FC = () => {
                     })}
                   </div>
                 </div>
-              ) : !featuredConnection ? (
+              ) : !productionConnection ? (
                 <NoDataView text={t('dataModels.noModels') ?? 'No LLM connections found'} />
               ) : null}
 
diff --git a/GUI/src/services/inference.ts b/GUI/src/services/inference.ts
index f469251..28de0e7 100644
--- a/GUI/src/services/inference.ts
+++ b/GUI/src/services/inference.ts
@@ -16,8 +16,8 @@ export interface InferenceResponse {
 }
 
 export async function viewInferenceResult(request: InferenceRequest): Promise<InferenceResponse> {
-  const { data } = await apiDev.post(inferenceEndpoints.VIEW_INFERENCE_RESULT(), {
-    llmConnectionId: request.llmConnectionId,
+  const { data } = await apiDev.post(inferenceEndpoints.VIEW_TEST_INFERENCE_RESULT(), {
+    connectionId: request.llmConnectionId,
     message: request.message,
   });
   return data;
diff --git a/GUI/src/services/llmConnections.ts b/GUI/src/services/llmConnections.ts
index 979da53..96d37e9 100644
--- a/GUI/src/services/llmConnections.ts
+++ b/GUI/src/services/llmConnections.ts
@@ -1,6 +1,6 @@
 import apiDev from './api-dev';
 import { llmConnectionsEndpoints } from 'utils/endpoints';
-import { removeCommasFromNumber } from 'utils/commonUtilts';
+import { removeCommasFromNumber } from 'utils/commonUtils';
 import { maskSensitiveKey } from 'utils/llmConnectionsUtils';
 
 export interface LLMConnection {
@@ -11,8 +11,11 @@ export interface LLMConnection {
   embeddingPlatform: string;
   embeddingModel: string;
   monthlyBudget: number;
+  warnBudgetThreshold: number;
+  stopBudgetThreshold: number;
+  disconnectOnBudgetExceed: boolean;
   environment: string;
-  status: string;
+  connectionStatus: 'active' | 'inactive';
   createdAt: string;
   updatedAt: string;
   totalPages?: number;
@@ -31,7 +34,28 @@ export interface LLMConnection {
 
 export interface LLMConnectionsResponse {
   data: LLMConnection[];
-  
+
+}
+
+export interface BudgetStatus {
+  used_budget_percentage: number;
+  exceeded_stop_budget: boolean;
+  exceeded_warn_budget: boolean;
+  data: {
+    id: number;
+    connectionName: string;
+    usedBudget: number;
+    monthlyBudget: number;
+    warnBudgetThreshold: number;
+    stopBudgetThreshold: number;
+    environment: string;
+    connectionStatus: string;
+    createdAt: string;
+    llmPlatform: string;
+    llmModel: string;
+    embeddingPlatform: string;
+    embeddingModel: string;
+  }
 }
 
 export interface LLMConnectionFilters {
@@ -45,8 +69,6 @@ export interface LLMConnectionFilters {
   environment?: string;
   status?: string;
 }
-
-// Legacy interface for backwards compatibility
 export interface LegacyLLMConnectionFilters {
   page: number;
   pageSize: number;
@@ -56,7 +78,6 @@ export interface LegacyLLMConnectionFilters {
   environment?: string;
   status?: string;
 }
-
 export interface LLMConnectionFormData {
   connectionName: string;
   llmPlatform: string;
@@ -64,6 +85,9 @@ export interface LLMConnectionFormData {
   embeddingModelPlatform: string;
   embeddingModel: string;
   monthlyBudget: string;
+  warnBudget: string;
+  stopBudget: string;
+  disconnectOnBudgetExceed: boolean;
   deploymentEnvironment: string;
   // Azure credentials
   deploymentName?: string;
@@ -78,7 +102,7 @@ export interface LLMConnectionFormData {
 
 export async function fetchLLMConnectionsPaginated(filters: LLMConnectionFilters): Promise<LLMConnection[]> {
   const queryParams = new URLSearchParams();
-  
+
   if (filters.pageNumber) queryParams.append('pageNumber', filters.pageNumber.toString());
   if (filters.pageSize) queryParams.append('pageSize', filters.pageSize.toString());
   if (filters.sortBy) queryParams.append('sortBy', filters.sortBy);
@@ -86,7 +110,7 @@ export async function fetchLLMConnectionsPaginated(filters: LLMConnectionFilters
   if (filters.llmPlatform) queryParams.append('llmPlatform', filters.llmPlatform);
   if (filters.llmModel) queryParams.append('llmModel', filters.llmModel);
   if (filters.environment) queryParams.append('environment', filters.environment);
-  
+
   const url = `${llmConnectionsEndpoints.FETCH_LLM_CONNECTIONS_PAGINATED()}?${queryParams.toString()}`;
   const { data } = await apiDev.get(url);
   return data?.response;
@@ -99,6 +123,12 @@ export async function getLLMConnection(id: string | number): Promise<LLMConnecti
   return data?.response;
 }
 
+export async function getProductionConnection(): Promise<LLMConnection | null> {
+  const { data } = await apiDev.get(llmConnectionsEndpoints.GET_PRODUCTION_CONNECTION());
+  return data?.response?.[0] || null;
+}
+
+
 export async function createLLMConnection(connectionData: LLMConnectionFormData): Promise<LLMConnection> {
   const { data } = await apiDev.post(llmConnectionsEndpoints.CREATE_LLM_CONNECTION(), {
     connection_name: connectionData.connectionName,
@@ -107,22 +137,25 @@ export async function createLLMConnection(connectionData: LLMConnectionFormData)
     embedding_platform: connectionData.embeddingModelPlatform,
     embedding_model: connectionData.embeddingModel,
     monthly_budget: parseFloat(removeCommasFromNumber(connectionData.monthlyBudget)),
+    warn_budget_threshold: parseInt(connectionData.warnBudget),
+    stop_budget_threshold: connectionData.disconnectOnBudgetExceed ? parseInt(connectionData.stopBudget) : 0,
+    disconnect_on_budget_exceed: connectionData.disconnectOnBudgetExceed,
     deployment_environment: connectionData.deploymentEnvironment.toLowerCase(),
     // Azure credentials
-    deployment_name: connectionData.deploymentName || null,
-    target_uri: connectionData.targetUri || null,
-    api_key: maskSensitiveKey(connectionData.apiKey) || null,
+    deployment_name: connectionData.deploymentName || "",
+    target_uri: connectionData.targetUri || "",
+    api_key: maskSensitiveKey(connectionData.apiKey) || "",
     // AWS Bedrock credentials
-    secret_key: maskSensitiveKey(connectionData.secretKey) || null,
-    access_key: maskSensitiveKey(connectionData.accessKey) || null,
+    secret_key: maskSensitiveKey(connectionData.secretKey) || "",
+    access_key: maskSensitiveKey(connectionData.accessKey) || "",
     // Embedding model credentials
-    embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || null,
+    embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || "",
   });
   return data?.response;
 }
 
 export async function updateLLMConnection(
-  id: string | number, 
+  id: string | number,
   connectionData: LLMConnectionFormData
 ): Promise<LLMConnection> {
   const { data } = await apiDev.post(llmConnectionsEndpoints.UPDATE_LLM_CONNECTION(), {
@@ -133,16 +166,19 @@ export async function updateLLMConnection(
     embedding_platform: connectionData.embeddingModelPlatform,
     embedding_model: connectionData.embeddingModel,
     monthly_budget: parseFloat(removeCommasFromNumber(connectionData.monthlyBudget)),
+    warn_budget_threshold: parseInt(connectionData.warnBudget),
+    stop_budget_threshold: connectionData.disconnectOnBudgetExceed ? parseInt(connectionData.stopBudget) : 0,
+    disconnect_on_budget_exceed: connectionData.disconnectOnBudgetExceed,
     deployment_environment: connectionData.deploymentEnvironment.toLowerCase(),
     // Azure credentials
-    deployment_name: connectionData.deploymentName || null,
-    target_uri: connectionData.targetUri || null,
-    api_key: maskSensitiveKey(connectionData.apiKey) || null,
+    deployment_name: connectionData.deploymentName || "",
+    target_uri: connectionData.targetUri || "",
+    api_key: maskSensitiveKey(connectionData.apiKey) || "",
     // AWS Bedrock credentials
-    secret_key: maskSensitiveKey(connectionData.secretKey) || null,
-    access_key: maskSensitiveKey(connectionData.accessKey) || null,
+    secret_key: maskSensitiveKey(connectionData.secretKey) || "",
+    access_key: maskSensitiveKey(connectionData.accessKey) || "",
     // Embedding model credentials
-    embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || null,
+    embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || "",
   });
   return data?.response;
 }
@@ -152,3 +188,24 @@ export async function deleteLLMConnection(id: string | number): Promise<void> {
     connection_id: id,
   });
 }
+
+export async function checkBudgetStatus(): Promise<BudgetStatus | null> {
+  try {
+    const { data } = await apiDev.get(llmConnectionsEndpoints.CHECK_BUDGET_STATUS());
+    return data?.response as BudgetStatus;
+  } catch (error) {
+    // Return null if no production connection found (404) or other errors
+    return null;
+  }
+}
+  
+export async function updateLLMConnectionStatus(
+  id: string | number, 
+  status: 'active' | 'inactive'
+): Promise<LLMConnection> {
+  const { data } = await apiDev.post(llmConnectionsEndpoints.UPDATE_LLM_CONNECTION_STATUS(), {
+    connection_id: id,
+    connection_status: status,
+  });
+  return data?.response;
+}
diff --git a/GUI/src/utils/commonUtilts.ts b/GUI/src/utils/commonUtils.ts
similarity index 95%
rename from GUI/src/utils/commonUtilts.ts
rename to GUI/src/utils/commonUtils.ts
index 93f55df..c84e23f 100644
--- a/GUI/src/utils/commonUtilts.ts
+++ b/GUI/src/utils/commonUtils.ts
@@ -112,3 +112,6 @@ export const formatNumberWithCommas = (value: string | number): string => {
 export const removeCommasFromNumber = (value: string): string => {
   return value.replace(/,/g, '');
 };
+
+export const toOptions = <T extends { label: string; value: string }>(data?: T[]) =>
+  data?.map(({ label, value }) => ({ label, value }));
\ No newline at end of file
diff --git a/GUI/src/utils/endpoints.ts b/GUI/src/utils/endpoints.ts
index a950369..37d8173 100644
--- a/GUI/src/utils/endpoints.ts
+++ b/GUI/src/utils/endpoints.ts
@@ -16,11 +16,14 @@ export const authEndpoints = {
 export const llmConnectionsEndpoints = {
   FETCH_LLM_CONNECTIONS_PAGINATED: (): string => `/rag-search/llm-connections/list`,
   GET_LLM_CONNECTION: (): string => `/rag-search/llm-connections/get`,
+  GET_PRODUCTION_CONNECTION: (): string => `/rag-search/llm-connections/production`,
   CREATE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/add`,
   UPDATE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/edit`,
+  UPDATE_LLM_CONNECTION_STATUS: (): string => `/rag-search/llm-connections/update-status`,
   DELETE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/delete`,
+  CHECK_BUDGET_STATUS: (): string => `/rag-search/llm-connections/cost/check`,
 }
 
 export const inferenceEndpoints = {
-  VIEW_INFERENCE_RESULT: (): string => `/rag-search/inference/results/view`,
+  VIEW_TEST_INFERENCE_RESULT: (): string => `/rag-search/inference/test`,
 }
diff --git a/GUI/src/utils/queryKeys.ts b/GUI/src/utils/queryKeys.ts
index b1680d8..e004497 100644
--- a/GUI/src/utils/queryKeys.ts
+++ b/GUI/src/utils/queryKeys.ts
@@ -29,6 +29,8 @@ export const llmConnectionsQueryKeys = {
   paginatedList: (filters: LLMConnectionFilters) => [...llmConnectionsQueryKeys.paginatedLists(), filters] as const,
   details: () => [...llmConnectionsQueryKeys.all(), 'detail'] as const,
   detail: (id: string | number) => [...llmConnectionsQueryKeys.details(), id] as const,
+  budgetStatus: () => [...llmConnectionsQueryKeys.all(), 'budget-status'] as const,
+  production: () => [...llmConnectionsQueryKeys.all(), 'production'] as const,
 };
 
 export const inferenceQueryKeys = {
diff --git a/constants.ini b/constants.ini
index 4a68f32..bc09e03 100644
--- a/constants.ini
+++ b/constants.ini
@@ -6,5 +6,6 @@ RAG_SEARCH_RESQL=http://resql:8082/rag-search
 RAG_SEARCH_PROJECT_LAYER=rag-search
 RAG_SEARCH_TIM=http://tim:8085
 RAG_SEARCH_CRON_MANAGER=http://cron-manager:9010
+RAG_SEARCH_LLM_ORCHESTRATOR=http://llm-orchestration-service:8100/orchestrate
 DOMAIN=localhost
 DB_PASSWORD=dbadmin
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index d8d1224..8fc77ef 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,160 +1,173 @@
 services:
-  # ruuter-public:
-  #   container_name: ruuter-public
-  #   image: ruuter
-  #   environment:
-  #     - application.cors.allowedOrigins=http://localhost:8086,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090
-  #     - application.httpCodesAllowList=200,201,202,204,400,401,403,500
-  #     - application.internalRequests.allowedIPs=127.0.0.1
-  #     - application.logging.displayRequestContent=true
-  #     - application.logging.displayResponseContent=true
-  #     - application.logging.printStackTrace=true
-  #     - application.internalRequests.disabled=true
-  #     - server.port=8086
-  #   volumes:
-  #     - ./DSL/Ruuter.public:/DSL
-  #     - ./constants.ini:/app/constants.ini
-  #   ports:
-  #     - 8086:8086
-  #   networks:
-  #     - bykstack
-  #   cpus: "0.5"
-  #   mem_limit: "512M"
+  ruuter-public:
+    container_name: ruuter-public
+    image: ruuter
+    environment:
+      - application.cors.allowedOrigins=http://localhost:8086,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090
+      - application.httpCodesAllowList=200,201,202,204,400,401,403,500
+      - application.internalRequests.allowedIPs=127.0.0.1
+      - application.logging.displayRequestContent=true
+      - application.logging.displayResponseContent=true
+      - application.logging.printStackTrace=true
+      - application.internalRequests.disabled=true
+      - server.port=8086
+    volumes:
+      - ./DSL/Ruuter.public:/DSL
+      - ./constants.ini:/app/constants.ini
+    ports:
+      - 8086:8086
+    networks:
+      - bykstack
+    cpus: "0.5"
+    mem_limit: "512M"
   
-  # ruuter-private:
-  #   container_name: ruuter-private
-  #   image: ruuter
-  #   environment:
-  #     - application.cors.allowedOrigins=http://localhost:3001,http://localhost:3003,http://localhost:8088,http://localhost:3002,http://localhost:3004,http://localhost:8000
-  #     - application.httpCodesAllowList=200,201,202,400,401,403,500
-  #     - application.internalRequests.allowedIPs=127.0.0.1
-  #     - application.logging.displayRequestContent=true
-  #     - application.logging.displayResponseContent=true
-  #     - application.logging.printStackTrace=true
-  #     - application.internalRequests.disabled=true
-  #     - server.port=8088
-  #   volumes:
-  #     - ./DSL/Ruuter.private:/DSL
-  #     - ./constants.ini:/app/constants.ini
-  #   ports:
-  #     - 8088:8088
-  #   networks:
-  #     - bykstack
-  #   cpus: "0.5"
-  #   mem_limit: "512M"
-
-  # data-mapper:
-  #   container_name: data-mapper
-  #   image: data-mapper
-  #   environment:
-  #     - PORT=3000
-  #     - CONTENT_FOLDER=/data
-  #   volumes:
-  #     - ./DSL:/data
-  #     - ./DSL/DMapper/rag-search/hbs:/workspace/app/views/rag-search
-  #     - ./DSL/DMapper/rag-search/lib:/workspace/app/lib
-  #   ports:
-  #     - 3000:3000
-  #   networks:
-  #     - bykstack
+  ruuter-private:
+    container_name: ruuter-private
+    image: ruuter
+    environment:
+      - application.cors.allowedOrigins=http://localhost:3001,http://localhost:3003,http://localhost:8088,http://localhost:3002,http://localhost:3004,http://localhost:8000
+      - application.httpCodesAllowList=200,201,202,400,401,403,500
+      - application.internalRequests.allowedIPs=127.0.0.1
+      - application.logging.displayRequestContent=true
+      - application.logging.displayResponseContent=true
+      - application.logging.printStackTrace=true
+      - application.internalRequests.disabled=true
+      - server.port=8088
+    volumes:
+      - ./DSL/Ruuter.private:/DSL
+      - ./constants.ini:/app/constants.ini
+    ports:
+      - 8088:8088
+    networks:
+      - bykstack
+    cpus: "0.5"
+    mem_limit: "512M"
 
-  # tim:
-  #   container_name: tim
-  #   image: tim
-  #   depends_on:
-  #     tim-postgresql:
-  #       condition: service_started
-  #   environment:
-  #     - SECURITY_ALLOWLIST_JWT=ruuter-private,ruuter-public,data-mapper,resql,tim,tim-postgresql,chat-widget,authentication-layer,127.0.0.1,::1
-  #     - KEY_PASS=ppjjpp
-  #   ports:
-  #     - 8085:8085
-  #   networks:
-  #     - bykstack
-  #   extra_hosts:
-  #     - "host.docker.internal:host-gateway"
-  #   cpus: "0.5"
-  #   mem_limit: "512M"
+  data-mapper:
+    container_name: data-mapper
+    image: data-mapper
+    environment:
+      - PORT=3000
+      - CONTENT_FOLDER=/data
+    volumes:
+      - ./DSL:/data
+      - ./DSL/DMapper/rag-search/hbs:/workspace/app/views/rag-search
+      - ./DSL/DMapper/rag-search/lib:/workspace/app/lib
+    ports:
+      - 3000:3000
+    networks:
+      - bykstack
 
-  # tim-postgresql:
-  #   container_name: tim-postgresql
-  #   image: postgres:14.1
-  #   environment:
-  #     - POSTGRES_USER=tim
-  #     - POSTGRES_PASSWORD=123
-  #     - POSTGRES_DB=tim
-  #     # - POSTGRES_HOST_AUTH_METHOD=trust
-  #   volumes:
-  #     - ./tim-db:/var/lib/postgresql/data
-  #   ports:
-  #     - 9876:5432
-  #   networks:
-  #     - bykstack
+  tim:
+    container_name: tim
+    image: tim
+    depends_on:
+      tim-postgresql:
+        condition: service_started
+    environment:
+      - SECURITY_ALLOWLIST_JWT=ruuter-private,ruuter-public,data-mapper,resql,tim,tim-postgresql,chat-widget,authentication-layer,127.0.0.1,::1
+      - KEY_PASS=ppjjpp
+    ports:
+      - 8085:8085
+    networks:
+      - bykstack
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    cpus: "0.5"
+    mem_limit: "512M"
 
-  # authentication-layer:
-  #   container_name: authentication-layer
-  #   image: authentication-layer
-  #   ports:
-  #     - 3004:3004
-  #   networks:
-  #     - bykstack
+  tim-postgresql:
+    container_name: tim-postgresql
+    image: postgres:14.1
+    environment:
+      - POSTGRES_USER=tim
+      - POSTGRES_PASSWORD=123
+      - POSTGRES_DB=tim
+      # - POSTGRES_HOST_AUTH_METHOD=trust
+    volumes:
+      - ./tim-db:/var/lib/postgresql/data
+    ports:
+      - 9876:5432
+    networks:
+      - bykstack
 
-  # resql:
-  #   container_name: resql
-  #   image: resql
-  #   depends_on:
-  #     rag_search_db:
-  #       condition: service_started
-  #   environment:
-  #     - sqlms.datasources.[0].name=byk
-  #     - sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://rag_search_db:5432/rag-search #For LocalDb Use
-  #     # sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://171.22.247.13:5435/byk?sslmode=require
-  #     - sqlms.datasources.[0].username=postgres
-  #     - sqlms.datasources.[0].password=dbadmin
-  #     - logging.level.org.springframework.boot=INFO
-  #   ports:
-  #     - 8082:8082
-  #   volumes:
-  #     - ./DSL/Resql:/DSL
-  #     - ./shared:/shared
-  #     - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets
-  #   networks:
-  #     - bykstack
+  authentication-layer:
+    container_name: authentication-layer
+    image: authentication-layer
+    ports:
+      - 3004:3004
+    networks:
+      - bykstack
 
-  # gui:
-  #   container_name: gui
-  #   environment:
-  #     - NODE_ENV=development
-  #     - REACT_APP_RUUTER_API_URL=http://localhost/ruuter-public
-  #     - REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost/ruuter-private
-  #     - REACT_APP_EXTERNAL_API_URL=http://localhost/dataset-gen-service
-  #     - REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost/authentication-layer/et/dev-auth
-  #     - REACT_APP_NOTIFICATION_NODE_URL=http://localhost/notifications-node
-  #     - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost ws://localhost;
-  #     - DEBUG_ENABLED=true
-  #     - CHOKIDAR_USEPOLLING=true
-  #     - PORT=3001
-  #     - REACT_APP_SERVICE_ID=conversations,settings,monitoring
-  #     - REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE
-  #     - VITE_HOST=0.0.0.0
-  #     - VITE_PORT=3001
-  #     - HOST=0.0.0.0
-  #     - VITE_ALLOWED_HOSTS=localhost,127.0.0.1
-  #     - HMR=false
-  #     - FAST_REFRESH=false
-  #   build:
-  #     context: ./GUI
-  #     dockerfile: Dockerfile.dev
-  #   ports:
-  #     - "3003:3001"
-  #   volumes:
-  #     - /app/node_modules
-  #     - ./GUI:/app
-  #   networks:
-  #     - bykstack
-  #   cpus: "0.5"
-  #   mem_limit: "1G"
-  #   restart: unless-stopped
+  resql:
+    container_name: resql
+    image: resql
+    depends_on:
+      rag_search_db:
+        condition: service_started
+    environment:
+      - sqlms.datasources.[0].name=byk
+      - sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://rag_search_db:5432/rag-search #For LocalDb Use
+      # sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://171.22.247.13:5435/byk?sslmode=require
+      - sqlms.datasources.[0].username=postgres
+      - sqlms.datasources.[0].password=dbadmin
+      - logging.level.org.springframework.boot=INFO
+    ports:
+      - 8082:8082
+    volumes:
+      - ./DSL/Resql:/DSL
+      - ./shared:/shared
+      - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets
+    networks:
+      - bykstack
+  
+  cron-manager:
+    container_name: cron-manager
+    image: cron-manager-python:latest
+    volumes:
+      - ./DSL/CronManager/DSL:/DSL
+      - ./DSL/CronManager/script:/app/scripts
+      - cron_data:/app/data
+      - ./models:/app/models
+    environment:
+      - server.port=9010
+    ports:
+      - 9010:8080
+    networks:
+      - bykstack
+   
+  gui:
+    container_name: gui
+    environment:
+      - NODE_ENV=development
+      - REACT_APP_RUUTER_API_URL=http://localhost:8086
+      - REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost:8088  
+      - REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost:3004/et/dev-auth
+      - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 ws://localhost;
+      - DEBUG_ENABLED=true
+      - CHOKIDAR_USEPOLLING=true
+      - PORT=3001
+      - REACT_APP_SERVICE_ID=conversations,settings,monitoring
+      - REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE
+      - VITE_HOST=0.0.0.0
+      - VITE_PORT=3001
+      - HOST=0.0.0.0
+      - VITE_ALLOWED_HOSTS=localhost,127.0.0.1
+      - HMR=false
+      - FAST_REFRESH=false
+    build:
+      context: ./GUI
+      dockerfile: Dockerfile.dev
+    ports:
+      - "3003:3001"
+    volumes:
+      - /app/node_modules
+      - ./GUI:/app
+    networks:
+      - bykstack
+    cpus: "0.5"
+    mem_limit: "1G"
+    restart: unless-stopped
 
   qdrant:
     image: qdrant/qdrant:v1.15.1
@@ -406,7 +419,7 @@ services:
       - ./vault/config:/vault/config          # contains vault.hcl
       - ./vault/logs:/vault/logs
     expose:
-      - "8200"
+    - "8200"
     networks:
       - bykstack
     restart: unless-stopped
@@ -432,7 +445,6 @@ services:
       - bykstack
     restart: unless-stopped
 
-
   # LLM Orchestration Service
   llm-orchestration-service:
     build:
@@ -487,8 +499,10 @@ volumes:
     name: vault-data
   vault-agent-out:
     name: vault-agent-out
+  cron_data:
+    name: cron_data
 
 networks:
   bykstack:
     name: bykstack
-    driver: bridge
\ No newline at end of file
+    driver: bridge
diff --git a/run_vector_indexer.py b/run_vector_indexer.py
new file mode 100644
index 0000000..b01150f
--- /dev/null
+++ b/run_vector_indexer.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+"""
+Entry point script for Vector Indexer - Contextual Retrieval Pipeline
+
+This script can be run directly or called by cron jobs for automated processing.
+
+Usage:
+    python run_vector_indexer.py [--config CONFIG_PATH] [--health-check] [--dry-run]
+
+Examples:
+    # Run with default config
+    python run_vector_indexer.py
+
+    # Run with custom config
+    python run_vector_indexer.py --config /path/to/config.yaml
+
+    # Health check only
+    python run_vector_indexer.py --health-check
+
+    # Dry run (validate without processing)
+    python run_vector_indexer.py --dry-run
+"""
+
+import argparse
+import asyncio
+import sys
+from pathlib import Path
+
+# Add src to Python path
+sys.path.insert(0, str(Path(__file__).parent / "src"))
+
+from src.vector_indexer.main_indexer import VectorIndexer
+
+
+async def main():
+    """Main entry point with command line argument parsing."""
+
+    parser = argparse.ArgumentParser(
+        description="Vector Indexer - Contextual Retrieval Pipeline",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="src/vector_indexer/config/vector_indexer_config.yaml",
+        help="Path to configuration file (default: src/vector_indexer/config/vector_indexer_config.yaml)",
+    )
+
+    parser.add_argument(
+        "--health-check", action="store_true", help="Run health check only and exit"
+    )
+
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Validate configuration and connectivity without processing documents",
+    )
+
+    parser.add_argument(
+        "--verbose", "-v", action="store_true", help="Enable verbose logging"
+    )
+
+    parser.add_argument(
+        "--quiet", "-q", action="store_true", help="Suppress non-error output"
+    )
+
+    args = parser.parse_args()
+
+    # Configure logging level based on arguments
+    log_level = "INFO"
+    if args.verbose:
+        log_level = "DEBUG"
+    elif args.quiet:
+        log_level = "ERROR"
+
+    try:
+        # Initialize vector indexer with specified config
+        indexer = VectorIndexer(config_path=args.config)
+
+        if args.health_check:
+            # Health check only
+            print("🔍 Running health check...")
+            health_ok = await indexer.run_health_check()
+
+            if health_ok:
+                print("✅ Health check passed!")
+                return 0
+            else:
+                print("❌ Health check failed!")
+                return 1
+
+        elif args.dry_run:
+            # Dry run - validate without processing
+            print("🧪 Running dry run validation...")
+
+            health_ok = await indexer.run_health_check()
+            if not health_ok:
+                print("❌ Validation failed!")
+                return 1
+
+            # Discover documents but don't process
+            documents = indexer.document_loader.discover_all_documents()
+            print(f"📄 Found {len(documents)} documents ready for processing")
+            print("✅ Dry run validation passed!")
+            return 0
+
+        else:
+            # Full processing run
+            print("🚀 Starting Vector Indexer processing...")
+
+            # Health check first
+            health_ok = await indexer.run_health_check()
+            if not health_ok:
+                print("❌ Pre-processing health check failed!")
+                return 1
+
+            # Process all documents
+            stats = await indexer.process_all_documents()
+
+            # Return appropriate exit code
+            if stats.documents_failed > 0:
+                print(f"⚠️  Processing completed with {stats.documents_failed} failures")
+                return 2  # Partial success
+            else:
+                print("✅ Processing completed successfully!")
+                return 0
+
+    except KeyboardInterrupt:
+        print("\n⏹️  Processing interrupted by user")
+        return 130
+    except FileNotFoundError as e:
+        print(f"❌ Configuration file not found: {e}")
+        return 1
+    except Exception as e:
+        print(f"💥 Fatal error: {e}")
+        return 1
+
+
+def cron_entry_point():
+    """
+    Entry point specifically designed for cron jobs.
+
+    This function:
+    - Uses minimal output suitable for cron logs
+    - Returns appropriate exit codes for monitoring
+    - Handles errors gracefully for automated systems
+    """
+    import logging
+
+    # Configure minimal logging for cron
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - Vector Indexer - %(levelname)s - %(message)s",
+    )
+
+    try:
+        # Run with default configuration
+        result = asyncio.run(main())
+
+        if result == 0:
+            logging.info("Vector indexer completed successfully")
+        elif result == 2:
+            logging.warning("Vector indexer completed with some failures")
+        else:
+            logging.error("Vector indexer failed")
+
+        return result
+
+    except Exception as e:
+        logging.error(f"Vector indexer fatal error: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    # Run the async main function
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)
diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
index dd97fa9..4dfd295 100644
--- a/src/llm_orchestration_service_api.py
+++ b/src/llm_orchestration_service_api.py
@@ -11,6 +11,8 @@
 from models.request_models import (
     OrchestrationRequest,
     OrchestrationResponse,
+    TestOrchestrationRequest,
+    TestOrchestrationResponse,
     EmbeddingRequest,
     EmbeddingResponse,
     ContextGenerationRequest,
@@ -124,6 +126,88 @@ def orchestrate_llm_request(
         )
 
 
+@app.post(
+    "/orchestrate/test",
+    response_model=TestOrchestrationResponse,
+    status_code=status.HTTP_200_OK,
+    summary="Process test LLM orchestration request",
+    description="Processes a simplified test message through the LLM orchestration pipeline",
+)
+def test_orchestrate_llm_request(
+    http_request: Request,
+    request: TestOrchestrationRequest,
+) -> TestOrchestrationResponse:
+    """
+    Process test LLM orchestration request with simplified input.
+
+    Args:
+        http_request: FastAPI Request object for accessing app state
+        request: TestOrchestrationRequest containing only message, environment, and connection_id
+
+    Returns:
+        TestOrchestrationResponse: Response with LLM output and status flags (without chatId)
+
+    Raises:
+        HTTPException: For processing errors
+    """
+    try:
+        logger.info(
+            f"Received test orchestration request for environment: {request.environment}"
+        )
+
+        # Get the orchestration service from app state
+        if not hasattr(http_request.app.state, "orchestration_service"):
+            logger.error("Orchestration service not found in app state")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Service not initialized",
+            )
+
+        orchestration_service = http_request.app.state.orchestration_service
+        if orchestration_service is None:
+            logger.error("Orchestration service is None")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Service not initialized",
+            )
+
+        # Map TestOrchestrationRequest to OrchestrationRequest with defaults
+        full_request = OrchestrationRequest(
+            chatId="test-session",
+            message=request.message,
+            authorId="test-user",
+            conversationHistory=[],
+            url="test-context",
+            environment=request.environment,
+            connection_id=request.connection_id,
+        )
+
+        # Process the request using the same logic
+        response = orchestration_service.process_orchestration_request(full_request)
+
+        # Convert to TestOrchestrationResponse (exclude chatId)
+        test_response = TestOrchestrationResponse(
+            llmServiceActive=response.llmServiceActive,
+            questionOutOfLLMScope=response.questionOutOfLLMScope,
+            inputGuardFailed=response.inputGuardFailed,
+            content=response.content,
+        )
+
+        logger.info(
+            f"Successfully processed test request for environment: {request.environment}"
+        )
+        return test_response
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error processing test request: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Internal server error occurred",
+        )
+
+
 @app.post(
     "/embeddings",
     response_model=EmbeddingResponse,
diff --git a/src/models/request_models.py b/src/models/request_models.py
index 27152db..c6b9b50 100644
--- a/src/models/request_models.py
+++ b/src/models/request_models.py
@@ -129,3 +129,31 @@ class EmbeddingErrorResponse(BaseModel):
     error: str = Field(..., description="Error message")
     failed_texts: List[str] = Field(..., description="Texts that failed to embed")
     retry_after: Optional[int] = Field(None, description="Retry after seconds")
+
+
+# Test endpoint models
+
+
+class TestOrchestrationRequest(BaseModel):
+    """Model for simplified test orchestration request."""
+
+    message: str = Field(..., description="User's message/query")
+    environment: Literal["production", "test", "development"] = Field(
+        ..., description="Environment context"
+    )
+    connection_id: Optional[str] = Field(
+        None, description="Optional connection identifier"
+    )
+
+
+class TestOrchestrationResponse(BaseModel):
+    """Model for test orchestration response (without chatId)."""
+
+    llmServiceActive: bool = Field(..., description="Whether LLM service is active")
+    questionOutOfLLMScope: bool = Field(
+        ..., description="Whether question is out of LLM scope"
+    )
+    inputGuardFailed: bool = Field(
+        ..., description="Whether input guard validation failed"
+    )
+    content: str = Field(..., description="Response content with citations")
diff --git a/vault/agent-out/pidfile b/vault/agent-out/pidfile
index c793025..e69de29 100644
--- a/vault/agent-out/pidfile
+++ b/vault/agent-out/pidfile
@@ -1 +0,0 @@
-7
\ No newline at end of file