rootcodelabs · nuwangeek · Oct 16, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,6 @@ __pycache__/
 .env
 tim-db
 datasets
-logs/
+logs/
+data_sets
+vault/agent-out
diff --git a/DSL/CronManager/DSL/reset.yml b/DSL/CronManager/DSL/reset.yml
@@ -0,0 +1,5 @@
+agency_data_sync:
+  trigger: "0 0 1 * * ?" # Runs at 00:00 AM 1st day of every month
+  # trigger: off
+  type: exec
+  command: "../app/scripts/budget_reset.sh -s 10"
diff --git a/DSL/CronManager/config/config.ini b/DSL/CronManager/config/config.ini
@@ -0,0 +1,3 @@
+[DSL]
+
+RAG_SEARCH_RESQL=http://resql:8082
diff --git a/DSL/CronManager/script/budget_reset.sh b/DSL/CronManager/script/budget_reset.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# DEFINING ENDPOINTS
+
+BUDGET_RESET_ENDPOINT=http://ruuter-public:8086/rag-search/llm-connections/cost/reset
+
+payload=$(cat <<EOF
+{}
+EOF
+)
+
+echo "SENDING REQUEST TO RESET MONTHLY USED BUDGET TO 0"
+response=$(curl -s -X POST "$BUDGET_RESET_ENDPOINT" \
+    -H "Content-Type: application/json" \
+    -d "$payload")
+
+echo "BUDGET RESET SUMMARY:"
+  echo "$response"
diff --git a/DSL/DMapper/rag-search/lib/helpers.js b/DSL/DMapper/rag-search/lib/helpers.js
@@ -0,0 +1,271 @@
+import { randomBytes } from "crypto";
+import fs from "fs/promises";
+import path from "path";
+
+export function getAuthHeader(username, token) {
+  const auth = `${username}:${token}`;
+  const encodedAuth = Buffer.from(auth).toString("base64");
+  return `Basic ${encodedAuth}`;
+}
+
+export function mergeLabelData(labels, existing_labels) {
+  let mergedArray = [...labels, ...existing_labels];
+  let uniqueArray = [...new Set(mergedArray)];
+  return { labels: uniqueArray };
+}
+
+export function platformStatus(platform, data) {
+  const platformData = data.find((item) => item.platform === platform);
+  return platformData ? platformData.isConnect : false;
+}
+
+export function isLabelsMismatch(newLabels, correctedLabels, predictedLabels) {
+  function check(arr, newLabels) {
+    if (
+      Array.isArray(newLabels) &&
+      Array.isArray(arr) &&
+      newLabels.length === arr.length
+    ) {
+      for (let label of newLabels) {
+        if (!arr.includes(label)) {
+          return true;
+        }
+      }
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  const val1 = check(correctedLabels, newLabels);
+  const val2 = check(predictedLabels, newLabels);
+  return val1 && val2;
+}
+
+export function getOutlookExpirationDateTime() {
+  const currentDate = new Date();
+  currentDate.setDate(currentDate.getDate() + 3);
+  const updatedDateISOString = currentDate.toISOString();
+  return updatedDateISOString;
+}
+
+export function findDuplicateStopWords(inputArray, existingArray) {
+  const set1 = new Set(existingArray);
+  const duplicates = inputArray.filter((item) => set1.has(item));
+  const value = JSON.stringify(duplicates);
+  return value;
+}
+
+export function findNotExistingStopWords(inputArray, existingArray) {
+  const set1 = new Set(existingArray);
+  const notExisting = inputArray.filter((item) => !set1.has(item));
+  const value = JSON.stringify(notExisting);
+  return value;
+}
+
+export function getRandomString() {
+  const randomHexString = randomBytes(32).toString("hex");
+  return randomHexString;
+}
+
+export function base64Decrypt(cipher, isObject) {
+  if (!cipher) {
+    return JSON.stringify({
+      error: true,
+      message: 'Cipher is missing',
+    });
+  }
+
+  try {
+    const decodedContent = !isObject ? Buffer.from(cipher, 'base64').toString('utf8') : JSON.parse(Buffer.from(cipher, 'base64').toString('utf8'));
+    const cleanedContent = decodedContent.replace(/\r/g, '');
+    return JSON.stringify({
+      error: false,
+      content: cleanedContent
+    });
+  } catch (err) {
+    return JSON.stringify({
+      error: true,
+      message: 'Base64 Decryption Failed',
+    });
+  }
+}
+
+export function base64Encrypt(content) {
+  if (!content) {
+    return {
+      error: true,
+      message: 'Content is missing',
+    }
+  }
+
+  try {
+    return JSON.stringify({
+      error: false,
+      cipher: Buffer.from(typeof content === 'string' ? content : JSON.stringify(content)).toString('base64')
+    });
+  } catch (err) {
+    return JSON.stringify({
+      error: true,
+      message: 'Base64 Encryption Failed',
+    });
+  }
+}
+
+export function jsEscape(str) {
+  return JSON.stringify(str).slice(1, -1)
+}
+
+export function isValidIntentName(name) {
+  // Allows letters (any unicode letter), numbers, and underscores
+  // Matches front-end validation with spaces replaced with underscores
+  return /^[\p{L}\p{N}_]+$/u.test(name);
+}
+
+export function eq(v1, v2) {
+  return v1 === v2;
+}
+
+export function getAgencyDataHash(agencyId) {
+  // Generate a random hash based on agency ID
+  // Create a consistent but seemingly random hash for each agencyId
+  const baseHash = agencyId.padEnd(10, agencyId); // Ensure at least 10 chars
+  let hash = '';
+  const chars = 'abcdefghijklmnopqrstuvwxyz0123456789';
+
+  // Use the agencyId as a seed for pseudo-randomness
+  for (let i = 0; i < 16; i++) {
+    // Get character code from the baseHash, or use index if out of bounds
+    const charCode = i < baseHash.length ? baseHash.charCodeAt(i) : i;
+    // Use the character code to get an index in our chars string
+    const index = (charCode * 13 + i * 7) % chars.length;
+    hash += chars[index];
+  }
+
+  return hash;
+}
+
+export function getAgencyDataAvailable(agencyId) {
+  // Use agencyId as a seed for deterministic but seemingly random result
+  // This ensures the same agencyId always gets the same result in the same session
+
+  // Create a hash from the agencyId
+  let hashValue = 0;
+  for (let i = 0; i < agencyId.length; i++) {
+    hashValue = ((hashValue << 5) - hashValue) + agencyId.charCodeAt(i);
+    hashValue |= 0; // Convert to 32bit integer
+  }
+
+  // Add a time component to make it change between sessions
+  // Use current date (year+month only) so it changes monthly but not every request
+  const date = new Date();
+  const timeComponent = date.getFullYear() * 100 + date.getMonth();
+
+  // Combine the hash and time component for pseudo-randomness
+  const combinedValue = hashValue + timeComponent;
+
+  // Return true or false based on even/odd value
+  return (combinedValue % 2) === 0;
+}
+
+export function json(context) {
+  return JSON.stringify(context);
+}
+
+/**
+ * Helper function to check if a value is an array
+ * @param {any} value - The value to check
+ * @returns {boolean} - True if value is an array, false otherwise
+ */
+export function isArray(value) {
+  return Array.isArray(value);
+}
+
+/**
+ * Returns an array of agencies that are in centopsAgencies but not in gcAgencies (by agencyId).
+ * @param {Array} gcAgencies - Array of existing agencies, each with an agencyId property.
+ * @param {Array} centopsAgencies - Array of agencies from CentOps, each with an agencyId property.
+ * @returns {Array} Array of new agency objects from centopsAgencies.
+ */
+export function extractNewAgencies(gcAgencies, centopsAgencies) {
+  const existingIds = new Set(gcAgencies.map(a => a.agencyId));
+  const newAgencies = centopsAgencies.filter(a => !existingIds.has(a.agencyId))
+  // return newAgencies;
+  return JSON.stringify({
+    agencies: newAgencies,
+  });
+}
+
+/**
+ * Downloads a JSON file from S3 and returns its parsed content.
+ * @param {string} datasetId
+ * @param {string|number} pageNum
+ * @returns {Object} Parsed JSON content of the file
+ */
+export function getSingleChunkData(chunkData) { 
+  const mapped = chunkData?.map(item => ({
+    clientId: item.agency_id,
+    id: item.id,
+    clientName: item.agency_name, 
+    question: item.question
+  }));
+
+  return JSON.stringify(mapped);
+}
+
+export function getPaginatedChunkIds(chunks, agencyId, pageNum, pageSize = 5) {
+  let agencyRecordIndex = 0; // total agency records seen so far
+  let collected = 0;         // agency records collected for this page
+  let resultChunks = [];
+  let startIndex = 0;
+  let foundPage = false;
+
+  for (const chunk of chunks) {
+    let agencies = JSON.parse(chunk.includedAgencies.value)
+
+    const count = agencies.filter(a => String(a) === String(agencyId)).length;
+    if (count === 0) continue;
+
+    // If we haven't reached the start of this page, skip these records
+    if (!foundPage && agencyRecordIndex + count < (pageNum - 1) * pageSize + 1) {
+      agencyRecordIndex += count;
+      continue;
+    }
+
+    // If this is the first chunk of the page, calculate startIndex
+    if (!foundPage) {
+      startIndex = (pageNum - 1) * pageSize - agencyRecordIndex;
+      foundPage = true;
+    }
+
+    resultChunks.push(chunk.chunkId || chunk.chunkId);
+    collected += count;
+
+    if (collected >= pageSize) break;
+
+    agencyRecordIndex += count;
+  }
+
+  return JSON.stringify(
+    {
+      chunks: resultChunks,
+      startIndex: startIndex
+    }
+  );
+}
+
+export function filterDataByAgency(aggregatedData, startIndex, agencyId, pageSize=5) {
+
+  const filtered = aggregatedData.filter(item => String(item.agency_id) === String(agencyId));
+
+  const paginated = filtered.slice(startIndex, startIndex + 5);
+
+  const result= paginated.map(item => ({
+    clientId: item.agency_id,
+    id: item.id,
+    clientName: item.agency_name, // No mapping available, so use agency_id
+    question: item.question
+  }));
+  return JSON.stringify(result);
+
+}
diff --git a/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js b/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js
@@ -0,0 +1,30 @@
+/**
+ * @param res Original Response Object
+ * @param send Original UNMODIFIED res.send function
+ * @return A patched res.send which takes the send content, binds it to contentBody on
+ * the res and then calls the original res.send after restoring it
+ */
+const resDotSendInterceptor = (res, send) => (content) => {
+  res.contentBody = content;
+  res.send = send;
+  res.send(content);
+};
+
+export const requestLoggerMiddleware =
+  ({ logger }) =>
+  (req, res, next) => {
+    logger(
+      `Request: {method: ${req.method}, url: ${
+        req.url
+      }, params: ${JSON.stringify(req.params)}, query: ${JSON.stringify(
+        req.query
+      )}, body: ${JSON.stringify(req.body)}`
+    );
+    res.send = resDotSendInterceptor(res, res.send);
+    res.on("finish", () => {
+      logger(
+        `Response: {statusCode: ${res.statusCode}, responseData: ${res.contentBody}}`
+      );
+    });
+    next();
+  };
diff --git a/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql b/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql
@@ -10,14 +10,17 @@ CREATE TABLE llm_connections (
     embedding_platform VARCHAR(100) NOT NULL, -- e.g. Azure AI, OpenAI
     embedding_model VARCHAR(100) NOT NULL,    -- e.g. Ada-200-1
 
-    -- Budget and Environment
+    -- Budget and Usage Tracking
     monthly_budget NUMERIC(12,2) NOT NULL,    -- e.g. 1000.00
     used_budget NUMERIC(12,2) DEFAULT 0.00,  -- e.g. 250.00
-    environment VARCHAR(50) NOT NULL,
+    warn_budget_threshold NUMERIC(5) DEFAULT 80, -- percentage to warn at
+    stop_budget_threshold NUMERIC(5) DEFAULT 100, -- percentage to stop at
+    disconnect_on_budget_exceed BOOLEAN DEFAULT TRUE,
 
     -- Metadata
     connection_status VARCHAR(50) DEFAULT 'active',      -- active / inactive
     created_at TIMESTAMP DEFAULT NOW(),
+    environment VARCHAR(50) NOT NULL,
 
     -- Mocked Credentials and Access Info
     -- Azure

diff --git a/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql b/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql
@@ -0,0 +1,11 @@
+UPDATE llm_connections 
+SET 
+    connection_status = 'inactive'
+WHERE id = :connection_id
+RETURNING 
+    id,
+    connection_name,
+    connection_status,
+    used_budget,
+    stop_budget_threshold,
+    disconnect_on_budget_exceed;
diff --git a/DSL/Resql/rag-search/POST/get-configuration.sql b/DSL/Resql/rag-search/POST/get-configuration.sql
@@ -0,0 +1,5 @@
+SELECT id, key, value
+FROM configuration
+WHERE key=:key
+AND id IN (SELECT max(id) from configuration GROUP BY key)
+AND NOT deleted;
diff --git a/DSL/Resql/rag-search/POST/get-llm-connection.sql b/DSL/Resql/rag-search/POST/get-llm-connection.sql
@@ -6,6 +6,10 @@ SELECT
     embedding_platform,
     embedding_model,
     monthly_budget,
+    warn_budget_threshold,
+    stop_budget_threshold,
+    used_budget,
+    disconnect_on_budget_exceed,
     environment,
     connection_status,
     created_at,