diff --git a/.gitignore b/.gitignore index 1dde8af..706c607 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ __pycache__/ .env tim-db datasets -logs/ \ No newline at end of file +logs/ +data_sets +vault/agent-out diff --git a/DSL/CronManager/DSL/reset.yml b/DSL/CronManager/DSL/reset.yml new file mode 100644 index 0000000..cef6083 --- /dev/null +++ b/DSL/CronManager/DSL/reset.yml @@ -0,0 +1,5 @@ +agency_data_sync: + trigger: "0 0 1 * * ?" # Runs at 00:00 AM 1st day of every month + # trigger: off + type: exec + command: "../app/scripts/budget_reset.sh -s 10" diff --git a/DSL/CronManager/config/config.ini b/DSL/CronManager/config/config.ini new file mode 100644 index 0000000..a154d8e --- /dev/null +++ b/DSL/CronManager/config/config.ini @@ -0,0 +1,3 @@ +[DSL] + +RAG_SEARCH_RESQL=http://resql:8082 diff --git a/DSL/CronManager/script/budget_reset.sh b/DSL/CronManager/script/budget_reset.sh new file mode 100644 index 0000000..406530e --- /dev/null +++ b/DSL/CronManager/script/budget_reset.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# DEFINING ENDPOINTS + +BUDGET_RESET_ENDPOINT=http://ruuter-public:8086/rag-search/llm-connections/cost/reset + +payload=$(cat < item.platform === platform); + return platformData ? platformData.isConnect : false; +} + +export function isLabelsMismatch(newLabels, correctedLabels, predictedLabels) { + function check(arr, newLabels) { + if ( + Array.isArray(newLabels) && + Array.isArray(arr) && + newLabels.length === arr.length + ) { + for (let label of newLabels) { + if (!arr.includes(label)) { + return true; + } + } + return false; + } else { + return true; + } + } + + const val1 = check(correctedLabels, newLabels); + const val2 = check(predictedLabels, newLabels); + return val1 && val2; +} + +export function getOutlookExpirationDateTime() { + const currentDate = new Date(); + currentDate.setDate(currentDate.getDate() + 3); + const updatedDateISOString = currentDate.toISOString(); + return updatedDateISOString; +} + +export function findDuplicateStopWords(inputArray, existingArray) { + const set1 = new Set(existingArray); + const duplicates = inputArray.filter((item) => set1.has(item)); + const value = JSON.stringify(duplicates); + return value; +} + +export function findNotExistingStopWords(inputArray, existingArray) { + const set1 = new Set(existingArray); + const notExisting = inputArray.filter((item) => !set1.has(item)); + const value = JSON.stringify(notExisting); + return value; +} + +export function getRandomString() { + const randomHexString = randomBytes(32).toString("hex"); + return randomHexString; +} + +export function base64Decrypt(cipher, isObject) { + if (!cipher) { + return JSON.stringify({ + error: true, + message: 'Cipher is missing', + }); + } + + try { + const decodedContent = !isObject ? Buffer.from(cipher, 'base64').toString('utf8') : JSON.parse(Buffer.from(cipher, 'base64').toString('utf8')); + const cleanedContent = decodedContent.replace(/\r/g, ''); + return JSON.stringify({ + error: false, + content: cleanedContent + }); + } catch (err) { + return JSON.stringify({ + error: true, + message: 'Base64 Decryption Failed', + }); + } +} + +export function base64Encrypt(content) { + if (!content) { + return { + error: true, + message: 'Content is missing', + } + } + + try { + return JSON.stringify({ + error: false, + cipher: Buffer.from(typeof content === 'string' ? content : JSON.stringify(content)).toString('base64') + }); + } catch (err) { + return JSON.stringify({ + error: true, + message: 'Base64 Encryption Failed', + }); + } +} + +export function jsEscape(str) { + return JSON.stringify(str).slice(1, -1) +} + +export function isValidIntentName(name) { + // Allows letters (any unicode letter), numbers, and underscores + // Matches front-end validation with spaces replaced with underscores + return /^[\p{L}\p{N}_]+$/u.test(name); +} + +export function eq(v1, v2) { + return v1 === v2; +} + +export function getAgencyDataHash(agencyId) { + // Generate a random hash based on agency ID + // Create a consistent but seemingly random hash for each agencyId + const baseHash = agencyId.padEnd(10, agencyId); // Ensure at least 10 chars + let hash = ''; + const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'; + + // Use the agencyId as a seed for pseudo-randomness + for (let i = 0; i < 16; i++) { + // Get character code from the baseHash, or use index if out of bounds + const charCode = i < baseHash.length ? baseHash.charCodeAt(i) : i; + // Use the character code to get an index in our chars string + const index = (charCode * 13 + i * 7) % chars.length; + hash += chars[index]; + } + + return hash; +} + +export function getAgencyDataAvailable(agencyId) { + // Use agencyId as a seed for deterministic but seemingly random result + // This ensures the same agencyId always gets the same result in the same session + + // Create a hash from the agencyId + let hashValue = 0; + for (let i = 0; i < agencyId.length; i++) { + hashValue = ((hashValue << 5) - hashValue) + agencyId.charCodeAt(i); + hashValue |= 0; // Convert to 32bit integer + } + + // Add a time component to make it change between sessions + // Use current date (year+month only) so it changes monthly but not every request + const date = new Date(); + const timeComponent = date.getFullYear() * 100 + date.getMonth(); + + // Combine the hash and time component for pseudo-randomness + const combinedValue = hashValue + timeComponent; + + // Return true or false based on even/odd value + return (combinedValue % 2) === 0; +} + +export function json(context) { + return JSON.stringify(context); +} + +/** + * Helper function to check if a value is an array + * @param {any} value - The value to check + * @returns {boolean} - True if value is an array, false otherwise + */ +export function isArray(value) { + return Array.isArray(value); +} + +/** + * Returns an array of agencies that are in centopsAgencies but not in gcAgencies (by agencyId). + * @param {Array} gcAgencies - Array of existing agencies, each with an agencyId property. + * @param {Array} centopsAgencies - Array of agencies from CentOps, each with an agencyId property. + * @returns {Array} Array of new agency objects from centopsAgencies. + */ +export function extractNewAgencies(gcAgencies, centopsAgencies) { + const existingIds = new Set(gcAgencies.map(a => a.agencyId)); + const newAgencies = centopsAgencies.filter(a => !existingIds.has(a.agencyId)) + // return newAgencies; + return JSON.stringify({ + agencies: newAgencies, + }); +} + +/** + * Downloads a JSON file from S3 and returns its parsed content. + * @param {string} datasetId + * @param {string|number} pageNum + * @returns {Object} Parsed JSON content of the file + */ +export function getSingleChunkData(chunkData) { + const mapped = chunkData?.map(item => ({ + clientId: item.agency_id, + id: item.id, + clientName: item.agency_name, + question: item.question + })); + + return JSON.stringify(mapped); +} + +export function getPaginatedChunkIds(chunks, agencyId, pageNum, pageSize = 5) { + let agencyRecordIndex = 0; // total agency records seen so far + let collected = 0; // agency records collected for this page + let resultChunks = []; + let startIndex = 0; + let foundPage = false; + + for (const chunk of chunks) { + let agencies = JSON.parse(chunk.includedAgencies.value) + + const count = agencies.filter(a => String(a) === String(agencyId)).length; + if (count === 0) continue; + + // If we haven't reached the start of this page, skip these records + if (!foundPage && agencyRecordIndex + count < (pageNum - 1) * pageSize + 1) { + agencyRecordIndex += count; + continue; + } + + // If this is the first chunk of the page, calculate startIndex + if (!foundPage) { + startIndex = (pageNum - 1) * pageSize - agencyRecordIndex; + foundPage = true; + } + + resultChunks.push(chunk.chunkId || chunk.chunkId); + collected += count; + + if (collected >= pageSize) break; + + agencyRecordIndex += count; + } + + return JSON.stringify( + { + chunks: resultChunks, + startIndex: startIndex + } + ); +} + +export function filterDataByAgency(aggregatedData, startIndex, agencyId, pageSize=5) { + + const filtered = aggregatedData.filter(item => String(item.agency_id) === String(agencyId)); + + const paginated = filtered.slice(startIndex, startIndex + 5); + + const result= paginated.map(item => ({ + clientId: item.agency_id, + id: item.id, + clientName: item.agency_name, // No mapping available, so use agency_id + question: item.question + })); + return JSON.stringify(result); + +} diff --git a/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js b/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js new file mode 100644 index 0000000..727a36f --- /dev/null +++ b/DSL/DMapper/rag-search/lib/requestLoggerMiddleware.js @@ -0,0 +1,30 @@ +/** + * @param res Original Response Object + * @param send Original UNMODIFIED res.send function + * @return A patched res.send which takes the send content, binds it to contentBody on + * the res and then calls the original res.send after restoring it + */ +const resDotSendInterceptor = (res, send) => (content) => { + res.contentBody = content; + res.send = send; + res.send(content); +}; + +export const requestLoggerMiddleware = + ({ logger }) => + (req, res, next) => { + logger( + `Request: {method: ${req.method}, url: ${ + req.url + }, params: ${JSON.stringify(req.params)}, query: ${JSON.stringify( + req.query + )}, body: ${JSON.stringify(req.body)}` + ); + res.send = resDotSendInterceptor(res, res.send); + res.on("finish", () => { + logger( + `Response: {statusCode: ${res.statusCode}, responseData: ${res.contentBody}}` + ); + }); + next(); + }; diff --git a/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql b/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql index 2b2e958..cfead3d 100644 --- a/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql +++ b/DSL/Liquibase/changelog/rag-search-script-v1-llm-connections.sql @@ -10,14 +10,17 @@ CREATE TABLE llm_connections ( embedding_platform VARCHAR(100) NOT NULL, -- e.g. Azure AI, OpenAI embedding_model VARCHAR(100) NOT NULL, -- e.g. Ada-200-1 - -- Budget and Environment + -- Budget and Usage Tracking monthly_budget NUMERIC(12,2) NOT NULL, -- e.g. 1000.00 used_budget NUMERIC(12,2) DEFAULT 0.00, -- e.g. 250.00 - environment VARCHAR(50) NOT NULL, + warn_budget_threshold NUMERIC(5) DEFAULT 80, -- percentage to warn at + stop_budget_threshold NUMERIC(5) DEFAULT 100, -- percentage to stop at + disconnect_on_budget_exceed BOOLEAN DEFAULT TRUE, -- Metadata connection_status VARCHAR(50) DEFAULT 'active', -- active / inactive created_at TIMESTAMP DEFAULT NOW(), + environment VARCHAR(50) NOT NULL, -- Mocked Credentials and Access Info -- Azure diff --git a/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql b/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql new file mode 100644 index 0000000..af9da1b --- /dev/null +++ b/DSL/Resql/rag-search/POST/deactivate-llm-connection-budget-exceed.sql @@ -0,0 +1,11 @@ +UPDATE llm_connections +SET + connection_status = 'inactive' +WHERE id = :connection_id +RETURNING + id, + connection_name, + connection_status, + used_budget, + stop_budget_threshold, + disconnect_on_budget_exceed; diff --git a/DSL/Resql/rag-search/POST/get-configuration.sql b/DSL/Resql/rag-search/POST/get-configuration.sql new file mode 100644 index 0000000..f03b322 --- /dev/null +++ b/DSL/Resql/rag-search/POST/get-configuration.sql @@ -0,0 +1,5 @@ +SELECT id, key, value +FROM configuration +WHERE key=:key +AND id IN (SELECT max(id) from configuration GROUP BY key) +AND NOT deleted; diff --git a/DSL/Resql/rag-search/POST/get-llm-connection.sql b/DSL/Resql/rag-search/POST/get-llm-connection.sql index 16e86f6..30fdb93 100644 --- a/DSL/Resql/rag-search/POST/get-llm-connection.sql +++ b/DSL/Resql/rag-search/POST/get-llm-connection.sql @@ -6,6 +6,10 @@ SELECT embedding_platform, embedding_model, monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + used_budget, + disconnect_on_budget_exceed, environment, connection_status, created_at, diff --git a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql index 74bd584..419d7bc 100644 --- a/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql +++ b/DSL/Resql/rag-search/POST/get-llm-connections-paginated.sql @@ -6,20 +6,25 @@ SELECT embedding_platform, embedding_model, monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, used_budget, environment, connection_status, created_at, CEIL(COUNT(*) OVER() / :page_size::DECIMAL) AS totalPages, - -- Calculate budget status based on usage percentage + -- Calculate budget status based on usage percentage and configured thresholds CASE - WHEN used_budget IS NULL OR monthly_budget IS NULL OR monthly_budget = 0 THEN 'within_budget' - WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1.0 THEN 'over_budget' - WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 0.8 THEN 'close_to_exceed' + WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget' + WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget' + WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget' + WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed' ELSE 'within_budget' END AS budget_status FROM llm_connections WHERE connection_status <> 'deleted' + AND environment = 'testing' AND (:llm_platform IS NULL OR :llm_platform = '' OR llm_platform = :llm_platform) AND (:llm_model IS NULL OR :llm_model = '' OR llm_model = :llm_model) AND (:environment IS NULL OR :environment = '' OR environment = :environment) diff --git a/DSL/Resql/rag-search/POST/get-production-connection.sql b/DSL/Resql/rag-search/POST/get-production-connection.sql new file mode 100644 index 0000000..eca9f97 --- /dev/null +++ b/DSL/Resql/rag-search/POST/get-production-connection.sql @@ -0,0 +1,25 @@ +SELECT + id, + connection_name, + used_budget, + monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + environment, + connection_status, + created_at, + llm_platform, + llm_model, + embedding_platform, + embedding_model, + CASE + WHEN used_budget IS NULL OR used_budget = 0 OR (used_budget::DECIMAL / monthly_budget::DECIMAL) < (warn_budget_threshold::DECIMAL / 100.0) THEN 'within_budget' + WHEN stop_budget_threshold != 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (stop_budget_threshold::DECIMAL / 100.0) THEN 'over_budget' + WHEN stop_budget_threshold = 0 AND (used_budget::DECIMAL / monthly_budget::DECIMAL) >= 1 THEN 'over_budget' + WHEN (used_budget::DECIMAL / monthly_budget::DECIMAL) >= (warn_budget_threshold::DECIMAL / 100.0) THEN 'close_to_exceed' + ELSE 'within_budget' + END AS budget_status +FROM llm_connections +WHERE environment = 'production' +ORDER BY created_at DESC +LIMIT 1; diff --git a/DSL/Resql/rag-search/POST/insert-llm-connection.sql b/DSL/Resql/rag-search/POST/insert-llm-connection.sql index c16296c..c4d9679 100644 --- a/DSL/Resql/rag-search/POST/insert-llm-connection.sql +++ b/DSL/Resql/rag-search/POST/insert-llm-connection.sql @@ -5,6 +5,9 @@ INSERT INTO llm_connections ( embedding_platform, embedding_model, monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, environment, connection_status, created_at, @@ -21,6 +24,9 @@ INSERT INTO llm_connections ( :embedding_platform, :embedding_model, :monthly_budget, + :warn_budget_threshold, + :stop_budget_threshold, + :disconnect_on_budget_exceed, :environment, :connection_status, :created_at::timestamp with time zone, @@ -32,11 +38,15 @@ INSERT INTO llm_connections ( :embedding_model_api_key ) RETURNING id, + connection_name, llm_platform, llm_model, embedding_platform, embedding_model, monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, environment, connection_status, created_at, diff --git a/DSL/Resql/rag-search/POST/reset-llm-connection-used-budget.sql b/DSL/Resql/rag-search/POST/reset-llm-connection-used-budget.sql new file mode 100644 index 0000000..581f0b9 --- /dev/null +++ b/DSL/Resql/rag-search/POST/reset-llm-connection-used-budget.sql @@ -0,0 +1,13 @@ +UPDATE llm_connections +SET + used_budget = 0.00 +WHERE connection_status <> 'deleted' +RETURNING + id, + connection_name, + monthly_budget, + used_budget, + (monthly_budget - used_budget) AS remaining_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed; diff --git a/DSL/Resql/rag-search/POST/update-llm-connection-environment.sql b/DSL/Resql/rag-search/POST/update-llm-connection-environment.sql new file mode 100644 index 0000000..c16b98c --- /dev/null +++ b/DSL/Resql/rag-search/POST/update-llm-connection-environment.sql @@ -0,0 +1,24 @@ +UPDATE llm_connections +SET + environment = :environment +WHERE id = :connection_id +RETURNING + id, + connection_name, + llm_platform, + llm_model, + embedding_platform, + embedding_model, + monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, + environment, + connection_status, + created_at, + deployment_name, + target_uri, + api_key, + secret_key, + access_key, + embedding_model_api_key; diff --git a/DSL/Resql/rag-search/POST/update-llm-connection-status.sql b/DSL/Resql/rag-search/POST/update-llm-connection-status.sql new file mode 100644 index 0000000..463936e --- /dev/null +++ b/DSL/Resql/rag-search/POST/update-llm-connection-status.sql @@ -0,0 +1,23 @@ +UPDATE llm_connections +SET connection_status = :connection_status +WHERE id = :connection_id +RETURNING + id, + connection_name, + llm_platform, + llm_model, + embedding_platform, + embedding_model, + monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, + environment, + connection_status, + created_at, + deployment_name, + target_uri, + api_key, + secret_key, + access_key, + embedding_model_api_key; diff --git a/DSL/Resql/rag-search/POST/update-llm-connection-used-budget.sql b/DSL/Resql/rag-search/POST/update-llm-connection-used-budget.sql new file mode 100644 index 0000000..ba6cd4d --- /dev/null +++ b/DSL/Resql/rag-search/POST/update-llm-connection-used-budget.sql @@ -0,0 +1,14 @@ +UPDATE llm_connections +SET + used_budget = used_budget + :usage +WHERE id = :connection_id +RETURNING + id, + connection_name, + monthly_budget, + used_budget, + (monthly_budget - used_budget) AS remaining_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, + connection_status; \ No newline at end of file diff --git a/DSL/Resql/rag-search/POST/update-llm-connection.sql b/DSL/Resql/rag-search/POST/update-llm-connection.sql index f3eae2a..a442227 100644 --- a/DSL/Resql/rag-search/POST/update-llm-connection.sql +++ b/DSL/Resql/rag-search/POST/update-llm-connection.sql @@ -6,6 +6,9 @@ SET embedding_platform = :embedding_platform, embedding_model = :embedding_model, monthly_budget = :monthly_budget, + warn_budget_threshold = :warn_budget_threshold, + stop_budget_threshold = :stop_budget_threshold, + disconnect_on_budget_exceed = :disconnect_on_budget_exceed, environment = :environment, -- Azure credentials deployment_name = :deployment_name, @@ -25,6 +28,9 @@ RETURNING embedding_platform, embedding_model, monthly_budget, + warn_budget_threshold, + stop_budget_threshold, + disconnect_on_budget_exceed, environment, connection_status, created_at, diff --git a/DSL/Ruuter.private/rag-search/GET/llm-connections/cost/check.yml b/DSL/Ruuter.private/rag-search/GET/llm-connections/cost/check.yml new file mode 100644 index 0000000..df51bbb --- /dev/null +++ b/DSL/Ruuter.private/rag-search/GET/llm-connections/cost/check.yml @@ -0,0 +1,57 @@ +declaration: + call: declare + version: 0.1 + description: "Check if production LLM connection's used budget has exceeded warn or stop budget thresholds" + method: get + accepts: json + returns: json + namespace: rag-search + +get_production_connection_budget_status: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-production-connection" + body: {} + result: budget_result + next: check_budget_status + +check_budget_status: + switch: + - condition: "${budget_result.response.body.length > 0}" + next: process_budget_status + next: return_no_production_connection + +process_budget_status: + assign: + used_budget: '${budget_result.response.body[0].usedBudget || 0}' + monthly_budget: '${budget_result.response.body[0].monthlyBudget}' + warn_threshold: '${budget_result.response.body[0].warnBudgetThreshold}' + stop_threshold: '${budget_result.response.body[0].stopBudgetThreshold}' + warn_budget_amount: '${(monthly_budget * warn_threshold) / 100}' + stop_budget_amount: '${(monthly_budget * stop_threshold) / 100}' + exceeded_stop_budget: '${stop_budget_amount != 0 && used_budget >= stop_budget_amount}' + exceeded_warn_budget: '${used_budget >= warn_budget_amount}' + within_budget: '${used_budget < warn_budget_amount}' + used_budget_percentage: '${(used_budget * 100) / monthly_budget}' + + next: process_warnings + +process_warnings: + assign: + response: { + data: '${budget_result.response.body[0]}', + used_budget_percentage: '${used_budget_percentage}', + exceeded_stop_budget: '${exceeded_stop_budget}', + exceeded_warn_budget: '${exceeded_warn_budget}', + } + next: return_budget_status + +return_budget_status: + return: ${response} + status: 200 + next: end + +return_no_production_connection: + return: "No production LLM connection found" + status: 404 + next: end \ No newline at end of file diff --git a/DSL/Ruuter.private/rag-search/GET/llm-connections/production.yml b/DSL/Ruuter.private/rag-search/GET/llm-connections/production.yml new file mode 100644 index 0000000..be75219 --- /dev/null +++ b/DSL/Ruuter.private/rag-search/GET/llm-connections/production.yml @@ -0,0 +1,19 @@ +declaration: + call: declare + version: 0.1 + description: "Get production LLM connection" + method: get + returns: json + namespace: rag-search + +get_production_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-production-connection" + result: connection_result + next: return_success + +return_success: + return: ${connection_result.response.body} + status: 200 + next: end diff --git a/DSL/Ruuter.private/rag-search/POST/inference/production.yml b/DSL/Ruuter.private/rag-search/POST/inference/production.yml new file mode 100644 index 0000000..5b13570 --- /dev/null +++ b/DSL/Ruuter.private/rag-search/POST/inference/production.yml @@ -0,0 +1,121 @@ +declaration: + call: declare + version: 0.1 + description: "Call LLM orchestration service with budget validation" + method: post + accepts: json + returns: json + namespace: rag-search + allowlist: + body: + - field: chatId + type: string + description: "Chat ID" + - field: message + type: string + description: "User message" + - field: authorId + type: string + description: "Author ID" + - field: conversationHistory + type: array + description: "Conversation history" + - field: url + type: string + description: "URL reference" + +extract_request_data: + assign: + chatId: ${incoming.body.chatId} + message: ${incoming.body.message} + authorId: ${incoming.body.authorId} + conversationHistory: ${incoming.body.conversationHistory} + url: ${incoming.body.url} + next: get_production_connection + +get_production_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-production-connection" + body: {} + result: production_connection_result + next: validate_production_connection + +validate_production_connection: + switch: + - condition: "${production_connection_result.response.body.length > 0}" + next: extract_connection_id + next: return_no_production_connection + +extract_connection_id: + assign: + connection_id: ${Number(production_connection_result.response.body[0].id)} + next: check_budget_status + +check_budget_status: + call: http.post + args: + url: "[#RAG_SEARCH_RUUTER_PRIVATE]/llm-connections/usage/check" + body: + connection_id: ${connection_id} + headers: + cookie: ${incoming.headers.cookie} + result: budget_check_result + next: validate_budget_status + +validate_budget_status: + switch: + - condition: ${budget_check_result.response.body.response.isLLMConnectionDisconnected} + next: assign_disconnected_response + - condition: "${budget_check_result.response.statusCodeValue >= 400}" + next: return_budget_check_error + next: call_orchestrate_endpoint + +call_orchestrate_endpoint: + call: http.post + args: + url: "[#RAG_SEARCH_LLM_ORCHESTRATOR]" + body: + chatId: ${chatId} + message: ${message} + authorId: ${authorId} + conversationHistory: ${conversationHistory} + url: ${url} + environment: "production" + headers: + Content-Type: "application/json" + result: orchestrate_result + next: assign_response + +assign_response: + assign: + response: "${orchestrate_result.response.body}" + next: return_orchestrate_response + +return_orchestrate_response: + return: ${response} + next: end + +assign_disconnected_response: + assign: + disconnected_response: + { + chatId: "${chatId}", + content: "The LLM connection is currently unavailable. Your request couldn’t be processed. Please retry shortly.", + status: 400 + } + next: return_connection_disconnected + +return_connection_disconnected: + status: 400 + return: ${disconnected_response} + next: end + +return_budget_check_error: + return: ${budget_check_result.response.body} + next: end + +return_no_production_connection: + status: 404 + return: "No production connection found" + next: end diff --git a/DSL/Ruuter.private/rag-search/POST/inference/test.yml b/DSL/Ruuter.private/rag-search/POST/inference/test.yml new file mode 100644 index 0000000..61a5bd9 --- /dev/null +++ b/DSL/Ruuter.private/rag-search/POST/inference/test.yml @@ -0,0 +1,102 @@ +declaration: + call: declare + version: 0.1 + description: "Call LLM orchestration service with budget validation" + method: post + accepts: json + returns: json + namespace: rag-search + allowlist: + body: + - field: connectionId + type: string + description: "Connection ID" + - field: message + type: string + description: "User message" + +extract_request_data: + assign: + connectionId: ${incoming.body.connectionId} + message: ${incoming.body.message} + next: get_connection + +get_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-llm-connection" + body: + connection_id: ${connectionId} + result: connection_result + next: validate_connection + +validate_connection: + switch: + - condition: "${connection_result.response.body.length > 0}" + next: check_budget_status + next: return_no_test_connection + +check_budget_status: + call: http.post + args: + url: "[#RAG_SEARCH_RUUTER_PRIVATE]/llm-connections/usage/check" + body: + connection_id: ${connectionId} + headers: + cookie: ${incoming.headers.cookie} + result: budget_check_result + next: validate_budget_status + +validate_budget_status: + switch: + - condition: ${budget_check_result.response.body.response.isLLMConnectionDisconnected} + next: assign_disconnected_response + - condition: "${budget_check_result.response.statusCodeValue >= 400}" + next: return_budget_check_error + next: call_orchestrate_endpoint + +call_orchestrate_endpoint: + call: http.post + args: + url: "[#RAG_SEARCH_LLM_ORCHESTRATOR]/test" + body: + connectionId: ${connectionId} + message: ${message} + environment: "test" + headers: + Content-Type: "application/json" + result: orchestrate_result + next: assign_response + +assign_response: + assign: + response: "${orchestrate_result.response.body}" + next: return_orchestrate_response + +return_orchestrate_response: + return: ${response} + next: end + +assign_disconnected_response: + assign: + disconnected_response: + { + connectionId: "${connectionId}", + content: "The LLM connection is currently unavailable. Your request couldn’t be processed. Please retry shortly.", + status: 400 + } + next: return_connection_disconnected + +return_connection_disconnected: + status: 400 + return: ${disconnected_response} + next: end + +return_budget_check_error: + return: ${budget_check_result.response.body} + next: end + +return_no_test_connection: + status: 404 + return: "No test connection found" + next: end diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml index 512238a..dffe487 100644 --- a/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml +++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/add.yml @@ -26,6 +26,15 @@ declaration: - field: monthly_budget type: number description: "Monthly budget amount" + - field: warn_budget_threshold + type: number + description: "Warn budget threshold percentage" + - field: stop_budget_threshold + type: number + description: "Stop budget threshold percentage" + - field: disconnect_on_budget_exceed + type: boolean + description: "Automatically disconnect when budget threshold is exceeded" - field: deployment_environment type: string description: "Deployment environment (Testing or Production)" @@ -59,6 +68,9 @@ extract_request_data: embedding_platform: ${incoming.body.embedding_platform} embedding_model: ${incoming.body.embedding_model} monthly_budget: ${incoming.body.monthly_budget} + warn_budget_threshold: ${incoming.body.warn_budget_threshold || 0} + stop_budget_threshold: ${incoming.body.stop_budget_threshold || 0} + disconnect_on_budget_exceed: ${incoming.body.disconnect_on_budget_exceed || false} deployment_environment: ${incoming.body.deployment_environment} deployment_name: ${incoming.body.deployment_name || ""} target_uri: ${incoming.body.target_uri || ""} @@ -72,9 +84,38 @@ extract_request_data: validate_environment: switch: - condition: ${deployment_environment == "testing" || deployment_environment == "production"} - next: add_llm_connection + next: check_production_environment next: return_invalid_environment +check_production_environment: + switch: + - condition: ${deployment_environment == "production"} + next: get_existing_production_connection + next: add_llm_connection + +get_existing_production_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-production-connection" + result: existing_production_result + next: update_existing_production_to_testing + +update_existing_production_to_testing: + switch: + - condition: ${existing_production_result.response.body && existing_production_result.response.body.length > 0} + next: update_production_connection + next: add_llm_connection + +update_production_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/update-llm-connection-environment" + body: + connection_id: ${existing_production_result.response.body[0].id} + environment: "testing" + result: update_result + next: add_llm_connection + add_llm_connection: call: http.post args: @@ -86,6 +127,9 @@ add_llm_connection: embedding_platform: ${embedding_platform} embedding_model: ${embedding_model} monthly_budget: ${monthly_budget} + warn_budget_threshold: ${warn_budget_threshold} + stop_budget_threshold: ${stop_budget_threshold} + disconnect_on_budget_exceed: ${disconnect_on_budget_exceed} environment: ${deployment_environment} connection_status: "active" created_at: ${new Date().toISOString()} @@ -96,10 +140,19 @@ add_llm_connection: access_key: ${access_key} embedding_model_api_key: ${embedding_model_api_key} result: connection_result + next: assign_connection_response + +assign_connection_response: + assign: + response: { + id: "${connection_result.response.body[0].id}", + status: 201, + operationSuccess: true + } next: return_success return_success: - return: "LLM connection added successfully" + return: ${response} status: 200 next: end diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/cost/update.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/cost/update.yml new file mode 100644 index 0000000..d0e55c3 --- /dev/null +++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/cost/update.yml @@ -0,0 +1,150 @@ +declaration: + call: declare + version: 0.1 + description: "Update used budget for an LLM connection" + method: post + accepts: json + returns: json + namespace: rag-search + allowlist: + body: + - field: connection_id + type: number + description: "LLM connection ID" + - field: usage + type: number + description: "Usage amount to add to current used_budget" + +extract_request_data: + assign: + connection_id: ${Number(incoming.body.connection_id)} + usage: ${Number(incoming.body.usage)} + next: validate_request + +validate_request: + switch: + - condition: ${connection_id && usage >= 0} + next: check_connection_exists + next: return_bad_request + +check_connection_exists: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-llm-connection" + body: + connection_id: ${connection_id} + result: existing_connection + next: validate_connection_exists + +validate_connection_exists: + switch: + - condition: "${existing_connection.response.body.length > 0}" + next: update_used_budget + next: return_not_found + +update_used_budget: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/update-llm-connection-used-budget" + body: + connection_id: ${connection_id} + usage: ${usage} + result: update_result + next: check_update_status + +check_update_status: + switch: + - condition: ${200 <= update_result.response.statusCodeValue && update_result.response.statusCodeValue < 300} + next: check_budget_threshold + next: return_update_failed + +check_budget_threshold: + assign: + updated_connection: ${update_result.response.body[0]} + disconnect_flag: ${updated_connection.disconnectOnBudgetExceed} + monthly_budget: ${Number(updated_connection.monthlyBudget)} + used_budget: ${Number(updated_connection.usedBudget)} + stop_threshold: ${Number(updated_connection.stopBudgetThreshold)} + threshold_amount: ${(monthly_budget / 100) * stop_threshold} + should_deactivate: ${disconnect_flag && used_budget >= threshold_amount} + next: evaluate_deactivation + +evaluate_deactivation: + switch: + - condition: ${should_deactivate} + next: deactivate_connection + next: format_success_response + +deactivate_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/deactivate-llm-connection-budget-exceed" + body: + connection_id: ${connection_id} + result: deactivate_result + next: check_deactivate_status + +check_deactivate_status: + switch: + - condition: ${200 <= deactivate_result.response.statusCodeValue && deactivate_result.response.statusCodeValue < 300} + next: assignDisconnectResponse + next: format_success_response + +assignDisconnectResponse: + assign: + data_budget_exceeded: { + data: '${deactivate_result.response.body[0]}', + } + next: format_budget_exceeded_response + +format_budget_exceeded_response: + assign: + response_budget_exceeded: { + data: '${data_budget_exceeded.data}', + message: 'Used budget updated successfully. Connection deactivated due to budget threshold exceeded.', + budgetExceeded: true, + operationSuccess: true, + statusCode: 200 + } + next: return_budget_exceeded + +format_success_response: + assign: + response_success: { + data: '${update_result.response.body[0]}', + budgetExceeded: false, + message: 'Used budget updated successfully', + operationSuccess: true, + statusCode: 200 + } + next: return_success + +return_budget_exceeded: + return: ${response_budget_exceeded} + status: 200 + next: end + +return_success: + return: ${response_success} + status: 200 + next: end + +return_not_found: + status: 404 + return: "error: connection not found" + next: end + +return_bad_request: + status: 400 + return: "error: connection_id and usage (>= 0) are required" + next: end + +return_update_failed: + status: 500 + return: "error: failed to update used budget" + next: end + +return_unauthorized: + status: 401 + return: "error: unauthorized" + next: end diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml index 417109f..420f3ca 100644 --- a/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml +++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/edit.yml @@ -29,6 +29,15 @@ declaration: - field: monthly_budget type: number description: "Monthly budget amount" + - field: warn_budget_threshold + type: number + description: "Warn budget threshold percentage" + - field: stop_budget_threshold + type: number + description: "Stop budget threshold percentage" + - field: disconnect_on_budget_exceed + type: boolean + description: "Automatically disconnect when budget threshold is exceeded" - field: deployment_environment type: string description: "Deployment environment (Testing or Production)" @@ -60,6 +69,9 @@ extract_request_data: embedding_platform: ${incoming.body.embedding_platform} embedding_model: ${incoming.body.embedding_model} monthly_budget: ${Number(incoming.body.monthly_budget)} + warn_budget_threshold: ${Number(incoming.body.warn_budget_threshold)} + stop_budget_threshold: ${Number(incoming.body.stop_budget_threshold)} + disconnect_on_budget_exceed: ${incoming.body.disconnect_on_budget_exceed} environment: ${incoming.body.deployment_environment} deployment_name: ${incoming.body.deployment_name || ""} target_uri: ${incoming.body.target_uri || ""} @@ -103,6 +115,9 @@ update_llm_connection: embedding_platform: ${embedding_platform} embedding_model: ${embedding_model} monthly_budget: ${monthly_budget} + warn_budget_threshold: ${warn_budget_threshold} + stop_budget_threshold: ${stop_budget_threshold} + disconnect_on_budget_exceed: ${disconnect_on_budget_exceed} environment: ${environment} deployment_name: ${deployment_name} target_uri: ${target_uri} diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/update-status.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/update-status.yml new file mode 100644 index 0000000..953e392 --- /dev/null +++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/update-status.yml @@ -0,0 +1,91 @@ +declaration: + call: declare + version: 0.1 + description: "Update LLM connection status (active/inactive)" + method: post + accepts: json + returns: json + namespace: rag-search + allowlist: + body: + - field: connection_id + type: number + description: "LLM connection ID" + - field: connection_status + type: string + description: "Connection status (active/inactive)" + +extract_request_data: + assign: + connection_id: ${Number(incoming.body.connection_id)} + connection_status: ${incoming.body.connection_status} + next: validate_request + +validate_request: + switch: + - condition: ${!connection_id || !connection_status} + next: return_bad_request + - condition: ${connection_status !== "active" && connection_status !== "inactive"} + next: return_invalid_status + next: check_connection_exists + +check_connection_exists: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-llm-connection" + body: + connection_id: ${connection_id} + result: existing_connection + next: validate_connection_exists + +validate_connection_exists: + switch: + - condition: "${existing_connection.response.body.length > 0}" + next: update_connection_status + next: return_not_found + +update_connection_status: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/update-llm-connection-status" + body: + connection_id: ${connection_id} + connection_status: ${connection_status} + result: update_result + next: check_update_status + +check_update_status: + switch: + - condition: ${200 <= update_result.response.statusCodeValue && update_result.response.statusCodeValue < 300} + next: return_success + next: return_update_failed + +return_success: + return: ${update_result.response.body[0]} + status: 200 + next: end + +return_not_found: + status: 404 + return: "error: connection not found" + next: end + +return_bad_request: + status: 400 + return: "error: connection_id and connection_status are required" + next: end + +return_invalid_status: + status: 400 + return: "error: connection_status must be 'active' or 'inactive'" + next: end + +return_update_failed: + status: 500 + return: "error: failed to update connection status" + next: end + +return_unauthorized: + status: 401 + return: "error: unauthorized" + next: end diff --git a/DSL/Ruuter.private/rag-search/POST/llm-connections/usage/check.yml b/DSL/Ruuter.private/rag-search/POST/llm-connections/usage/check.yml new file mode 100644 index 0000000..eabf745 --- /dev/null +++ b/DSL/Ruuter.private/rag-search/POST/llm-connections/usage/check.yml @@ -0,0 +1,99 @@ +declaration: + call: declare + version: 0.1 + description: "Check budget usage and either proceed with orchestration or return budget exceeded response" + method: post + accepts: json + returns: json + namespace: rag-search + allowlist: + body: + - field: connection_id + type: number + description: "LLM connection ID" + +extract_request_data: + assign: + connection_id: ${Number(incoming.body.connection_id)} + next: get_connection + +get_connection: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/get-llm-connection" + body: + connection_id: ${connection_id} + result: connection_result + next: check_connection_exists + +check_connection_exists: + switch: + - condition: "${connection_result.response.body.length > 0}" + next: calculate_budget_threshold + next: return_connection_not_found + +calculate_budget_threshold: + assign: + connection_data: ${connection_result.response.body[0]} + monthly_budget: ${Number(connection_data.monthlyBudget)} + used_budget: ${Number(connection_data.usedBudget)} + stop_budget_threshold: ${Number(connection_data.stopBudgetThreshold)} + disconnect_on_budget_exceed: ${connection_data.disconnectOnBudgetExceed} + budget_threshold_amount: ${monthly_budget / 100 * stop_budget_threshold} + next: check_budget_threshold + +check_budget_threshold: + switch: + - condition: ${used_budget >= budget_threshold_amount} + next: handle_budget_exceeded + next: format_budget_within_threshold + +handle_budget_exceeded: + switch: + - condition: ${disconnect_on_budget_exceed} + next: format_budget_exceeded_disconnected + next: format_budget_exceeded_not_disconnected + +format_budget_within_threshold: + assign: + response_budget_within_threshold: + { + isBudgetExceed: false, + isLLMConnectionDisconnected: false, + } + next: return_budget_within_threshold + +format_budget_exceeded_not_disconnected: + assign: + response_budget_exceeded_not_disconnected: + { + isBudgetExceed: true, + isLLMConnectionDisconnected: false, + } + next: return_budget_exceeded_not_disconnected + +format_budget_exceeded_disconnected: + assign: + response_budget_exceeded_disconnected: + { + isBudgetExceed: true, + isLLMConnectionDisconnected: true, + } + next: return_budget_exceeded_disconnected + +return_budget_within_threshold: + return: ${response_budget_within_threshold} + next: end + +return_budget_exceeded_not_disconnected: + return: ${response_budget_exceeded_not_disconnected} + next: end + +return_budget_exceeded_disconnected: + return: ${response_budget_exceeded_disconnected} + next: end + +return_connection_not_found: + status: 404 + return: "Connection not found" + next: end diff --git a/DSL/Ruuter.public/rag-search/POST/llm-connections/cost/reset.yml b/DSL/Ruuter.public/rag-search/POST/llm-connections/cost/reset.yml new file mode 100644 index 0000000..cc55fec --- /dev/null +++ b/DSL/Ruuter.public/rag-search/POST/llm-connections/cost/reset.yml @@ -0,0 +1,42 @@ +declaration: + call: declare + version: 0.1 + description: "Reset used budget for all LLM connections to 0" + method: post + accepts: json + returns: json + namespace: rag-search + +reset_used_budget: + call: http.post + args: + url: "[#RAG_SEARCH_RESQL]/reset-llm-connection-used-budget" + body: {} + result: reset_result + next: check_reset_status + +check_reset_status: + switch: + - condition: ${200 <= reset_result.response.statusCodeValue && reset_result.response.statusCodeValue < 300} + next: format_success_response + next: return_reset_failed + +format_success_response: + assign: + response_success: { + message: 'Used budget reset to 0 successfully for all connections', + totalConnections: '${reset_result.response.body.length}', + operationSuccess: true, + statusCode: 200 + } + next: return_success + +return_success: + return: ${response_success} + status: 200 + next: end + +return_reset_failed: + return: "error: failed to reset used budget" + status: 500 + next: end diff --git a/GUI/.env.development b/GUI/.env.development index 7ff4d8b..39f5e47 100644 --- a/GUI/.env.development +++ b/GUI/.env.development @@ -1,8 +1,7 @@ REACT_APP_RUUTER_API_URL=http://localhost:8086 REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost:8088 -REACT_APP_EXTERNAL_API_URL=http://localhost:8000 REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost:3004/et/dev-auth REACT_APP_SERVICE_ID=conversations,settings,monitoring -REACT_APP_NOTIFICATION_NODE_URL=http://localhost:4040 -REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:8085 http://localhost:4040; +REACT_APP_NOTIFICATION_NODE_URL=http://localhost:3005 +REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 ws://localhost; REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE \ No newline at end of file diff --git a/GUI/src/components/FormElements/FormInput/index.tsx b/GUI/src/components/FormElements/FormInput/index.tsx index dbf2b95..7d681a6 100644 --- a/GUI/src/components/FormElements/FormInput/index.tsx +++ b/GUI/src/components/FormElements/FormInput/index.tsx @@ -2,7 +2,7 @@ import { forwardRef, InputHTMLAttributes, PropsWithChildren, useId } from 'react import clsx from 'clsx'; import './FormInput.scss'; import { DefaultTFuncReturn } from 'i18next'; -import { formatNumberWithCommas, removeCommasFromNumber } from 'utils/commonUtilts'; +import { formatNumberWithCommas, removeCommasFromNumber } from 'utils/commonUtils'; type InputProps = PropsWithChildren> & { label: string; diff --git a/GUI/src/components/molecules/BudgetBanner/BudgetBanner.scss b/GUI/src/components/molecules/BudgetBanner/BudgetBanner.scss new file mode 100644 index 0000000..6d71a8d --- /dev/null +++ b/GUI/src/components/molecules/BudgetBanner/BudgetBanner.scss @@ -0,0 +1,64 @@ +.budget-banner { + width: 100%; + padding: 20px; + margin-bottom: 20px; + border-radius: 8px; + border: 1px solid; + + &__content { + display: flex; + align-items: center; + + } + + &__message { + font-size: 16px; + font-weight: 400; + line-height: 1.4; + } + + &__description { + font-size: 14px; + font-weight: 400; + line-height: 1.4; + } + + &--warning { + background-color: #fef3cd; + border-color: #b57c00; + color: #94690d; + + .budget-banner__message { + color: #94690d; + } + } + + &--error { + background-color: #ffeaeb; + border-color: #dc3545; + color: #721c24; + + .budget-banner__message { + color: #721c24; + } + } + + // Responsive design + @media (max-width: 768px) { + padding: 10px 12px; + + &__content { + flex-direction: column; + align-items: flex-start; + gap: 8px; + } + + &__message { + font-size: 13px; + } + } +} + +.m-3 { + margin-top: .75rem !important; +} \ No newline at end of file diff --git a/GUI/src/components/molecules/BudgetBanner/index.tsx b/GUI/src/components/molecules/BudgetBanner/index.tsx new file mode 100644 index 0000000..743900d --- /dev/null +++ b/GUI/src/components/molecules/BudgetBanner/index.tsx @@ -0,0 +1,80 @@ +import React from 'react'; +import { useQuery } from '@tanstack/react-query'; +import { useNavigate } from 'react-router-dom'; +import { checkBudgetStatus, BudgetStatus } from 'services/llmConnections'; +import { llmConnectionsQueryKeys } from 'utils/queryKeys'; +import './BudgetBanner.scss'; +import Button from 'components/Button'; +import { MdOutlineGppMaybe, MdWarning } from 'react-icons/md'; + +const BudgetBanner: React.FC = () => { + const navigate = useNavigate(); + const { data: budgetStatus } = useQuery({ + queryKey: llmConnectionsQueryKeys.budgetStatus(), + queryFn: checkBudgetStatus, + }); + + if (!budgetStatus) { + return null; + } + + const getBannerContent = (status: BudgetStatus) => { + const { used_budget_percentage, exceeded_stop_budget, exceeded_warn_budget, data } = status; + + if (exceeded_stop_budget) { + return { + type: 'error' as const, + message: `Production LLM connection disabled`, + description: `${data?.llmPlatform === "aws" ? "AWS Bedrock" : "Azure OpenAI"} integration has exceeded its budget. Update budget to reactivate LLM connection.`, + icon: + }; + } + + if (exceeded_warn_budget) { + return { + type: 'warning' as const, + message: `${used_budget_percentage?.toFixed(1)}% of connection budget is used.`, + description: `${data?.llmPlatform === "aws" ? "AWS Bedrock" : "Azure OpenAI"} integration has used ${used_budget_percentage?.toFixed(1)}% of its budget. Review connection budget to avoid disconnections`, + icon: + + }; + } + + return null; // Don't show banner if within budget + }; + + const bannerContent = getBannerContent(budgetStatus); + + if (!bannerContent) { + return null; + } + + return ( +
+
+ {bannerContent.icon} + + {bannerContent.message} + +
+ + {bannerContent.description} + +

+
+ {budgetStatus.exceeded_warn_budget && !budgetStatus.exceeded_stop_budget ? + ( + + ) : ( + + ) + } +
+ ); +}; + +export default BudgetBanner; diff --git a/GUI/src/components/molecules/LLMConnectionCard/index.tsx b/GUI/src/components/molecules/LLMConnectionCard/index.tsx index 2e06f9a..8d843ff 100644 --- a/GUI/src/components/molecules/LLMConnectionCard/index.tsx +++ b/GUI/src/components/molecules/LLMConnectionCard/index.tsx @@ -1,13 +1,17 @@ -import { FC, PropsWithChildren } from 'react'; +import { FC, PropsWithChildren, useState } from 'react'; import Button from 'components/Button'; import Label from 'components/Label'; import { useDialog } from 'hooks/useDialog'; import './LLMConnectionCard.scss'; import { useTranslation } from 'react-i18next'; -import { formatDate } from 'utils/commonUtilts'; import { useNavigate } from 'react-router-dom'; -import { pl } from 'date-fns/locale'; import { Switch } from 'components/FormElements'; +import { updateLLMConnectionStatus } from 'services/llmConnections'; +import { useToast } from 'hooks/useToast'; +import { ToastTypes } from 'enums/commonEnums'; +import { useMutation, useQueryClient } from '@tanstack/react-query'; +import { llmConnectionsQueryKeys } from 'utils/queryKeys'; +import { AxiosError } from 'axios'; type LLMConnectionCardProps = { llmConnectionId: number | string; @@ -17,6 +21,7 @@ type LLMConnectionCardProps = { isActive?: boolean; deploymentEnv?: string; budgetStatus?: string; + onStatusChange?: (id: number | string, newStatus: boolean) => void; }; const LLMConnectionCard: FC> = ({ @@ -27,19 +32,69 @@ const LLMConnectionCard: FC> = ({ isActive, deploymentEnv, budgetStatus, - + onStatusChange, }) => { const { open, close } = useDialog(); const { t } = useTranslation(); const navigate = useNavigate(); + const toast = useToast(); + const queryClient = useQueryClient(); + + const updateStatusMutation = useMutation({ + mutationFn: ({ id, status }: { id: string | number; status: 'active' | 'inactive' }) => + updateLLMConnectionStatus(id, status), + onSuccess: async (data, variables) => { + // Invalidate queries to refresh the data + await queryClient.invalidateQueries({ + queryKey: llmConnectionsQueryKeys.all() + }); + + toast.open({ + type: ToastTypes.SUCCESS, + title: t('toast.success.title'), + message: `Connection ${variables.status === 'active' ? 'activated' : 'deactivated'} successfully`, + }); + + // Call the parent callback to update the list immediately + if (onStatusChange) { + onStatusChange(llmConnectionId, variables.status === 'active'); + } + }, + onError: (error: AxiosError) => { + console.error('Error updating connection status:', error); + toast.open({ + type: ToastTypes.ERROR, + title: t('toast.error.title'), + message: 'Failed to update connection status', + }); + }, + }); + + const handleStatusChange = async (checked: boolean) => { + if (updateStatusMutation.isLoading) return; + + const newStatus = checked ? 'active' : 'inactive'; + updateStatusMutation.mutate({ + id: llmConnectionId, + status: newStatus + }); + }; const renderDeploymentEnv = (deploymentEnvironment: string | undefined) => { - return ( + if (deploymentEnvironment === "testing") { + return ( + + ); + } else if (deploymentEnvironment === "production") { + return ( ); + } }; const renderBudgetStatus = (status: string | undefined) => { @@ -71,8 +126,9 @@ const LLMConnectionCard: FC> = ({

{llmConnectionName}

{}} + checked={isActive ?? false} + onCheckedChange={handleStatusChange} + disabled={updateStatusMutation.isLoading} /> diff --git a/GUI/src/components/molecules/LLMConnectionForm/index.tsx b/GUI/src/components/molecules/LLMConnectionForm/index.tsx index cf0a68b..04557de 100644 --- a/GUI/src/components/molecules/LLMConnectionForm/index.tsx +++ b/GUI/src/components/molecules/LLMConnectionForm/index.tsx @@ -4,17 +4,19 @@ import { useTranslation } from 'react-i18next'; import { useQuery } from '@tanstack/react-query'; import FormInput from 'components/FormElements/FormInput'; import FormSelect from 'components/FormElements/FormSelect'; +import FormCheckbox from 'components/FormElements/FormCheckbox'; import Button from 'components/Button'; import Track from 'components/Track'; -import { - getLLMPlatforms, - getLLMModels, - getEmbeddingPlatforms, +import { + getLLMPlatforms, + getLLMModels, + getEmbeddingPlatforms, getEmbeddingModels, PlatformOption, - ModelOption + ModelOption } from 'services/llmConfigs'; import './LLMConnectionForm.scss'; +import { toOptions } from 'utils/commonUtils'; export type LLMConnectionFormData = { connectionName: string; @@ -23,6 +25,9 @@ export type LLMConnectionFormData = { embeddingModelPlatform: string; embeddingModel: string; monthlyBudget: string; + warnBudget: string; + stopBudget: string; + disconnectOnBudgetExceed: boolean; deploymentEnvironment: string; // AWS Bedrock credentials accessKey?: string; @@ -68,7 +73,10 @@ const LLMConnectionForm: React.FC = ({ embeddingModel: '', embeddingModelApiKey: '', monthlyBudget: '', - deploymentEnvironment: 'testing', + warnBudget: '', + stopBudget: '', + disconnectOnBudgetExceed: false, + deploymentEnvironment: '', // AWS Bedrock credentials accessKey: '', secretKey: '', @@ -84,65 +92,36 @@ const LLMConnectionForm: React.FC = ({ const selectedLLMPlatform = watch('llmPlatform'); const selectedEmbeddingPlatform = watch('embeddingModelPlatform'); + const disconnectOnBudgetExceed = watch('disconnectOnBudgetExceed'); // Fetch platform and model options from API const { data: llmPlatformsData = [], isLoading: llmPlatformsLoading, error: llmPlatformsError } = useQuery({ queryKey: ['llm-platforms'], - queryFn: getLLMPlatforms, - retry: 2, - staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: getLLMPlatforms }); const { data: embeddingPlatformsData = [], isLoading: embeddingPlatformsLoading, error: embeddingPlatformsError } = useQuery({ queryKey: ['embedding-platforms'], - queryFn: getEmbeddingPlatforms, - retry: 2, - staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: getEmbeddingPlatforms }); const { data: llmModelsData = [], isLoading: llmModelsLoading, error: llmModelsError } = useQuery({ queryKey: ['llm-models', selectedLLMPlatform], queryFn: () => getLLMModels(selectedLLMPlatform), enabled: !!selectedLLMPlatform, - retry: 2, - staleTime: 2 * 60 * 1000, // 2 minutes }); const { data: embeddingModelsData = [], isLoading: embeddingModelsLoading, error: embeddingModelsError } = useQuery({ queryKey: ['embedding-models', selectedEmbeddingPlatform], queryFn: () => getEmbeddingModels(selectedEmbeddingPlatform), enabled: !!selectedEmbeddingPlatform, - retry: 2, - staleTime: 2 * 60 * 1000, // 2 minutes }); - // Convert API data to option format - const llmPlatformOptions = llmPlatformsData?.map((platform: PlatformOption) => ({ - label: platform.label, - value: platform.value, - })); - - const embeddingPlatformOptions = embeddingPlatformsData?.map((platform: PlatformOption) => ({ - label: platform.label, - value: platform.value, - })); - - const llmModelOptions = llmModelsData?.map((model: ModelOption) => ({ - label: model.label, - value: model.value, - })); - - const embeddingModelOptions = embeddingModelsData?.map((model: ModelOption) => ({ - label: model.label, - value: model.value, - })); - - const [replaceApiKey, setReplaceApiKey] = React.useState(false); - const [replaceSecretKey, setReplaceSecretKey] = React.useState(false); - const [replaceAccessKey, setReplaceAccessKey] = React.useState(false); - const [replaceEmbeddingModelApiKey, setReplaceEmbeddingModelApiKey] = React.useState(false); - - // State to track if API key fields should be in replace mode (readonly with replace button) +const llmPlatformOptions = toOptions(llmPlatformsData); +const embeddingPlatformOptions = toOptions(embeddingPlatformsData); +const llmModelOptions = toOptions(llmModelsData); +const embeddingModelOptions = toOptions(embeddingModelsData); + const [apiKeyReplaceMode, setApiKeyReplaceMode] = React.useState(isEditing); const [secretKeyReplaceMode, setSecretKeyReplaceMode] = React.useState(isEditing); const [accessKeyReplaceMode, setAccessKeyReplaceMode] = React.useState(isEditing); @@ -155,17 +134,17 @@ const LLMConnectionForm: React.FC = ({ setValue('targetUri', ''); setValue('apiKey', ''); setValue('llmModel', ''); - + // Reset replace mode states when platform changes setApiKeyReplaceMode(false); setSecretKeyReplaceMode(false); setAccessKeyReplaceMode(false); }; - const resetEmbeddingModelCredentialFields = () => { + const resetEmbeddingModelCredentialFields = () => { setValue('embeddingModelApiKey', ''); setValue('embeddingModel', ''); - + // Reset replace mode state when platform changes setEmbeddingApiKeyReplaceMode(false); }; @@ -266,7 +245,7 @@ const LLMConnectionForm: React.FC = ({ = ({ ); - + default: return (
@@ -340,6 +319,8 @@ const LLMConnectionForm: React.FC = ({ const cleanedData = { ...data, monthlyBudget: data.monthlyBudget.replace(/,/g, ''), + warnBudget: data.warnBudget.replace('%', ''), + stopBudget: data.stopBudget.replace('%', ''), }; onSubmit(cleanedData); }; @@ -379,12 +360,12 @@ const LLMConnectionForm: React.FC = ({ render={({ field }) => ( = ({ render={({ field }) => ( = ({ render={({ field }) => ( = ({ render={({ field }) => ( = ({ />
+
+ + ( + field.onChange(e.target.checked)} + hideLabel={true} + /> + )} + /> +
+ +
+

Warn Budget Threshold

+

You will get a notification when your usage reaches this percentage of your allocated monthly budget.

+ + { + const numericValue = Number(value.replace('%', '')); + + if (numericValue < 1 || numericValue > 100) { + return 'Warn Budget Threshold must be between 1-100%'; + } + return true; + } + }} + render={({ field }) => ( + { + const value = e.target.value.replace(/[^\d]/g, ''); // Remove all non-numeric characters + field.onChange(value); + }} + name={field.name} + onBlur={field.onBlur} + /> + )} + /> +
+ + {disconnectOnBudgetExceed && ( +
+

Disconnect Budget Threshold

+

Your LLM connection will be automatically disconnected and all further requests will be stopped when your usage reaches + this percentage of your monthly budget.

+ + { + if (!disconnectOnBudgetExceed) return true; + + const numericValue = Number(value.replace('%', '')); + const warnValue = Number(formValues.warnBudget?.replace('%', '') || 0); + + if (numericValue < 1 || numericValue > 200) { + return 'Stop Budget Threshold must be between 1-200%'; + } + + if (warnValue > 0 && numericValue <= warnValue) { + return 'Stop Budget Threshold must be greater than Warn Budget Threshold'; + } + + return true; + } + }} + render={({ field }) => ( + { + const value = e.target.value.replace(/[^\d]/g, ''); // Remove all non-numeric characters + field.onChange(value); + }} + name={field.name} + onBlur={field.onBlur} + /> + )} + /> +
+ )} +
{ const { open: openDialog, close: closeDialog } = useDialog(); const queryClient = useQueryClient(); + // Query to check for existing production connection + const { data: existingProductionConnection } = useQuery({ + queryKey: ['production-connection'], + queryFn: getProductionConnection, + }); + const createConnectionMutation = useMutation({ mutationFn: createLLMConnection, onSuccess: async () => { @@ -54,7 +60,41 @@ const CreateLLMConnection = () => { }); const handleSubmit = async (data: LLMConnectionFormData) => { - createConnectionMutation.mutate(data); + const isCreatingProductionConnection = data.deploymentEnvironment === 'production'; + const hasExistingProductionConnection = existingProductionConnection && existingProductionConnection.id; + + if (isCreatingProductionConnection && hasExistingProductionConnection) { + openDialog({ + title: 'Replace Production Connection', + content: ( +
+

A production connection "{existingProductionConnection.connectionName}" already exists.

+

Creating this new production connection will replace the current one. Are you sure you want to proceed?

+
+ ), + footer: ( +
+ + +
+ ), + }); + } else { + createConnectionMutation.mutate(data); + } }; const handleCancel = () => { diff --git a/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx b/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx index af41bd9..28e429f 100644 --- a/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx +++ b/GUI/src/pages/LLMConnections/ViewLLMConnection.tsx @@ -112,7 +112,42 @@ const ViewLLMConnection = () => { }); const handleSubmit = async (data: LLMConnectionFormData) => { - updateConnectionMutation.mutate(data); + const isCurrentlyProduction = connectionData?.environment === 'production'; + const isChangingToTesting = data.deploymentEnvironment === 'testing'; + + if (isCurrentlyProduction && isChangingToTesting) { + openDialog({ + title: 'Confirm Production Environment Change', + content: ( +
+

You are about to change a production connection to testing environment.

+

This will affect the current production setup. Are you sure you want to proceed?

+
+ ), + footer: ( +
+ + +
+ ), + }); + } else { + updateConnectionMutation.mutate(data); + } }; const handleCancel = () => { @@ -122,29 +157,51 @@ const ViewLLMConnection = () => { const handleDelete = () => { - openDialog({ - title: 'Confirm Delete', - content:

Are you sure you want to delete this LLM connection? This action cannot be undone.

, - footer: ( -
+ const isProductionConnection = connectionData?.environment === 'production'; + + if (isProductionConnection) { + openDialog({ + title: 'Cannot Delete Production Connection', + content: ( +
+

This LLM connection is currently set as the production connection and cannot be deleted.

+

To delete this connection, please ensure another connection is set as the production connection.

+
+ ), + footer: ( - -
- ), - }); + ), + }); + } else { + openDialog({ + title: 'Confirm Delete', + content:

Are you sure you want to delete this LLM connection? This action cannot be undone.

, + footer: ( +
+ + +
+ ), + }); + } }; if (isLoading) { @@ -179,6 +236,9 @@ const ViewLLMConnection = () => { embeddingModelPlatform: connectionData.embeddingPlatform, embeddingModel: connectionData.embeddingModel, monthlyBudget: connectionData.monthlyBudget.toString(), + warnBudget: connectionData.warnBudgetThreshold.toString(), + stopBudget: connectionData.disconnectOnBudgetExceed ? connectionData.stopBudgetThreshold.toString() : '0', + disconnectOnBudgetExceed: connectionData.disconnectOnBudgetExceed, deploymentEnvironment: connectionData.environment, // Azure credentials (don't show sensitive data, but include structure) deploymentName: connectionData.deploymentName || '', diff --git a/GUI/src/pages/LLMConnections/index.tsx b/GUI/src/pages/LLMConnections/index.tsx index 0247cc5..6d46024 100644 --- a/GUI/src/pages/LLMConnections/index.tsx +++ b/GUI/src/pages/LLMConnections/index.tsx @@ -2,17 +2,18 @@ import { FC, useEffect, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { Button, FormSelect } from 'components'; import Pagination from 'components/molecules/Pagination'; -import { useQuery } from '@tanstack/react-query'; +import { useQuery, useQueryClient } from '@tanstack/react-query'; import { useNavigate, useSearchParams } from 'react-router-dom'; -import { formattedArray } from 'utils/commonUtilts'; +import { formattedArray } from 'utils/commonUtils'; import DataModelCard from 'components/molecules/LLMConnectionCard'; import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner'; import { ButtonAppearanceTypes } from 'enums/commonEnums'; import NoDataView from 'components/molecules/NoDataView'; +import BudgetBanner from 'components/molecules/BudgetBanner'; import './LLMConnections.scss'; import { platforms, trainingStatuses } from 'config/dataModelsConfig'; import LLMConnectionCard from 'components/molecules/LLMConnectionCard'; -import { fetchLLMConnectionsPaginated, LLMConnectionFilters, LLMConnection } from 'services/llmConnections'; +import { fetchLLMConnectionsPaginated, LLMConnectionFilters, LLMConnection, getProductionConnection } from 'services/llmConnections'; import { llmConnectionsQueryKeys } from 'utils/queryKeys'; const LLMConnections: FC = () => { @@ -34,6 +35,13 @@ const LLMConnections: FC = () => { queryFn: () => fetchLLMConnectionsPaginated(filters), }); + // Fetch production connection separately + const { data: productionConnection, isLoading: isProductionLoading } = useQuery({ + queryKey: llmConnectionsQueryKeys.production(), + queryFn: getProductionConnection, + }); + + const llmConnections = connectionsResponse; const totalPages = connectionsResponse?.[0]?.totalPages || 1; @@ -47,12 +55,12 @@ const LLMConnections: FC = () => { value: string | number | undefined | { name: string; id: string } ) => { let filterUpdate: Partial = {}; - + if (name === 'sorting') { // Handle sorting format - no conversion needed, use snake_case directly const sortingValue = value as string; const [sortBy, sortOrder] = sortingValue.split(' '); - + filterUpdate = { sortBy: sortBy, sortOrder: sortOrder as 'asc' | 'desc' @@ -65,7 +73,7 @@ const LLMConnections: FC = () => { ...prevFilters, ...filterUpdate, })); - + // Reset to first page when filters change if (name !== 'pageNumber') { setPageIndex(1); @@ -75,23 +83,17 @@ const LLMConnections: FC = () => { // Platform filter options const platformOptions = [ { label: 'All Platforms', value: 'all' }, - { label: 'OpenAI', value: 'openai' }, - { label: 'Anthropic', value: 'anthropic' }, { label: 'Azure OpenAI', value: 'azure' }, - { label: 'Google AI', value: 'google' }, - { label: 'AWS Bedrock', value: 'bedrock' }, - { label: 'Hugging Face', value: 'huggingface' }, + { label: 'AWS Bedrock', value: 'aws' }, ]; // LLM Model filter options - these would ideally come from an API const llmModelOptions = [ { label: 'All Models', value: 'all' }, - { label: 'GPT-4', value: 'gpt-4' }, - { label: 'GPT-4 Turbo', value: 'gpt-4-turbo' }, - { label: 'GPT-3.5 Turbo', value: 'gpt-3.5-turbo' }, - { label: 'Claude-3 Sonnet', value: 'claude-3-sonnet' }, - { label: 'Claude-3 Haiku', value: 'claude-3-haiku' }, - { label: 'Gemini Pro', value: 'gemini-pro' }, + { label: 'GPT-4 Mini', value: 'gpt-4o-mini' }, + { label: 'GPT-4o', value: 'gpt-4o' }, + { label: 'Anthropic Claude 3.5 Sonnet', value: 'anthropic-claude-3.5-sonnet' }, + { label: 'Anthropic Claude 3.7 Sonnet', value: 'anthropic-claude-3.7-sonnet' }, ]; // Environment filter options @@ -99,7 +101,6 @@ const LLMConnections: FC = () => { { label: 'All Environments', value: 'all' }, { label: 'Testing', value: 'testing' }, { label: 'Production', value: 'production' }, - { label: 'Development', value: 'development' }, ]; // Sort options - using snake_case format for backend @@ -116,14 +117,13 @@ const LLMConnections: FC = () => { const currentSorting = `${filters.sortBy || 'created_at'} ${filters.sortOrder || 'desc'}`; - // Find featured connection (first active one) - const featuredConnection = llmConnections?.[0]; + // Use production connection as featured connection const otherConnections = llmConnections || []; return (
- {!isModelDataLoading ? ( + {!isModelDataLoading && !isProductionLoading ? (
@@ -202,19 +202,19 @@ const LLMConnections: FC = () => {
- {featuredConnection && ( + {productionConnection && (

Production LLM Connection

@@ -229,8 +229,8 @@ const LLMConnections: FC = () => { { })}
- ) : !featuredConnection ? ( + ) : !productionConnection ? ( ) : null} diff --git a/GUI/src/services/inference.ts b/GUI/src/services/inference.ts index f469251..28de0e7 100644 --- a/GUI/src/services/inference.ts +++ b/GUI/src/services/inference.ts @@ -16,8 +16,8 @@ export interface InferenceResponse { } export async function viewInferenceResult(request: InferenceRequest): Promise { - const { data } = await apiDev.post(inferenceEndpoints.VIEW_INFERENCE_RESULT(), { - llmConnectionId: request.llmConnectionId, + const { data } = await apiDev.post(inferenceEndpoints.VIEW_TEST_INFERENCE_RESULT(), { + connectionId: request.llmConnectionId, message: request.message, }); return data; diff --git a/GUI/src/services/llmConnections.ts b/GUI/src/services/llmConnections.ts index 979da53..96d37e9 100644 --- a/GUI/src/services/llmConnections.ts +++ b/GUI/src/services/llmConnections.ts @@ -1,6 +1,6 @@ import apiDev from './api-dev'; import { llmConnectionsEndpoints } from 'utils/endpoints'; -import { removeCommasFromNumber } from 'utils/commonUtilts'; +import { removeCommasFromNumber } from 'utils/commonUtils'; import { maskSensitiveKey } from 'utils/llmConnectionsUtils'; export interface LLMConnection { @@ -11,8 +11,11 @@ export interface LLMConnection { embeddingPlatform: string; embeddingModel: string; monthlyBudget: number; + warnBudgetThreshold: number; + stopBudgetThreshold: number; + disconnectOnBudgetExceed: boolean; environment: string; - status: string; + connectionStatus: 'active' | 'inactive'; createdAt: string; updatedAt: string; totalPages?: number; @@ -31,7 +34,28 @@ export interface LLMConnection { export interface LLMConnectionsResponse { data: LLMConnection[]; - + +} + +export interface BudgetStatus { + used_budget_percentage: number; + exceeded_stop_budget: boolean; + exceeded_warn_budget: boolean; + data: { + id: number; + connectionName: string; + usedBudget: number; + monthlyBudget: number; + warnBudgetThreshold: number; + stopBudgetThreshold: number; + environment: string; + connectionStatus: string; + createdAt: string; + llmPlatform: string; + llmModel: string; + embeddingPlatform: string; + embeddingModel: string; + } } export interface LLMConnectionFilters { @@ -45,8 +69,6 @@ export interface LLMConnectionFilters { environment?: string; status?: string; } - -// Legacy interface for backwards compatibility export interface LegacyLLMConnectionFilters { page: number; pageSize: number; @@ -56,7 +78,6 @@ export interface LegacyLLMConnectionFilters { environment?: string; status?: string; } - export interface LLMConnectionFormData { connectionName: string; llmPlatform: string; @@ -64,6 +85,9 @@ export interface LLMConnectionFormData { embeddingModelPlatform: string; embeddingModel: string; monthlyBudget: string; + warnBudget: string; + stopBudget: string; + disconnectOnBudgetExceed: boolean; deploymentEnvironment: string; // Azure credentials deploymentName?: string; @@ -78,7 +102,7 @@ export interface LLMConnectionFormData { export async function fetchLLMConnectionsPaginated(filters: LLMConnectionFilters): Promise { const queryParams = new URLSearchParams(); - + if (filters.pageNumber) queryParams.append('pageNumber', filters.pageNumber.toString()); if (filters.pageSize) queryParams.append('pageSize', filters.pageSize.toString()); if (filters.sortBy) queryParams.append('sortBy', filters.sortBy); @@ -86,7 +110,7 @@ export async function fetchLLMConnectionsPaginated(filters: LLMConnectionFilters if (filters.llmPlatform) queryParams.append('llmPlatform', filters.llmPlatform); if (filters.llmModel) queryParams.append('llmModel', filters.llmModel); if (filters.environment) queryParams.append('environment', filters.environment); - + const url = `${llmConnectionsEndpoints.FETCH_LLM_CONNECTIONS_PAGINATED()}?${queryParams.toString()}`; const { data } = await apiDev.get(url); return data?.response; @@ -99,6 +123,12 @@ export async function getLLMConnection(id: string | number): Promise { + const { data } = await apiDev.get(llmConnectionsEndpoints.GET_PRODUCTION_CONNECTION()); + return data?.response?.[0] || null; +} + + export async function createLLMConnection(connectionData: LLMConnectionFormData): Promise { const { data } = await apiDev.post(llmConnectionsEndpoints.CREATE_LLM_CONNECTION(), { connection_name: connectionData.connectionName, @@ -107,22 +137,25 @@ export async function createLLMConnection(connectionData: LLMConnectionFormData) embedding_platform: connectionData.embeddingModelPlatform, embedding_model: connectionData.embeddingModel, monthly_budget: parseFloat(removeCommasFromNumber(connectionData.monthlyBudget)), + warn_budget_threshold: parseInt(connectionData.warnBudget), + stop_budget_threshold: connectionData.disconnectOnBudgetExceed ? parseInt(connectionData.stopBudget) : 0, + disconnect_on_budget_exceed: connectionData.disconnectOnBudgetExceed, deployment_environment: connectionData.deploymentEnvironment.toLowerCase(), // Azure credentials - deployment_name: connectionData.deploymentName || null, - target_uri: connectionData.targetUri || null, - api_key: maskSensitiveKey(connectionData.apiKey) || null, + deployment_name: connectionData.deploymentName || "", + target_uri: connectionData.targetUri || "", + api_key: maskSensitiveKey(connectionData.apiKey) || "", // AWS Bedrock credentials - secret_key: maskSensitiveKey(connectionData.secretKey) || null, - access_key: maskSensitiveKey(connectionData.accessKey) || null, + secret_key: maskSensitiveKey(connectionData.secretKey) || "", + access_key: maskSensitiveKey(connectionData.accessKey) || "", // Embedding model credentials - embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || null, + embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || "", }); return data?.response; } export async function updateLLMConnection( - id: string | number, + id: string | number, connectionData: LLMConnectionFormData ): Promise { const { data } = await apiDev.post(llmConnectionsEndpoints.UPDATE_LLM_CONNECTION(), { @@ -133,16 +166,19 @@ export async function updateLLMConnection( embedding_platform: connectionData.embeddingModelPlatform, embedding_model: connectionData.embeddingModel, monthly_budget: parseFloat(removeCommasFromNumber(connectionData.monthlyBudget)), + warn_budget_threshold: parseInt(connectionData.warnBudget), + stop_budget_threshold: connectionData.disconnectOnBudgetExceed ? parseInt(connectionData.stopBudget) : 0, + disconnect_on_budget_exceed: connectionData.disconnectOnBudgetExceed, deployment_environment: connectionData.deploymentEnvironment.toLowerCase(), // Azure credentials - deployment_name: connectionData.deploymentName || null, - target_uri: connectionData.targetUri || null, - api_key: maskSensitiveKey(connectionData.apiKey) || null, + deployment_name: connectionData.deploymentName || "", + target_uri: connectionData.targetUri || "", + api_key: maskSensitiveKey(connectionData.apiKey) || "", // AWS Bedrock credentials - secret_key: maskSensitiveKey(connectionData.secretKey) || null, - access_key: maskSensitiveKey(connectionData.accessKey) || null, + secret_key: maskSensitiveKey(connectionData.secretKey) || "", + access_key: maskSensitiveKey(connectionData.accessKey) || "", // Embedding model credentials - embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || null, + embedding_model_api_key: maskSensitiveKey(connectionData.embeddingModelApiKey) || "", }); return data?.response; } @@ -152,3 +188,24 @@ export async function deleteLLMConnection(id: string | number): Promise { connection_id: id, }); } + +export async function checkBudgetStatus(): Promise { + try { + const { data } = await apiDev.get(llmConnectionsEndpoints.CHECK_BUDGET_STATUS()); + return data?.response as BudgetStatus; + } catch (error) { + // Return null if no production connection found (404) or other errors + return null; + } +} + +export async function updateLLMConnectionStatus( + id: string | number, + status: 'active' | 'inactive' +): Promise { + const { data } = await apiDev.post(llmConnectionsEndpoints.UPDATE_LLM_CONNECTION_STATUS(), { + connection_id: id, + connection_status: status, + }); + return data?.response; +} diff --git a/GUI/src/utils/commonUtilts.ts b/GUI/src/utils/commonUtils.ts similarity index 95% rename from GUI/src/utils/commonUtilts.ts rename to GUI/src/utils/commonUtils.ts index 93f55df..c84e23f 100644 --- a/GUI/src/utils/commonUtilts.ts +++ b/GUI/src/utils/commonUtils.ts @@ -112,3 +112,6 @@ export const formatNumberWithCommas = (value: string | number): string => { export const removeCommasFromNumber = (value: string): string => { return value.replace(/,/g, ''); }; + +export const toOptions = (data?: T[]) => + data?.map(({ label, value }) => ({ label, value })); \ No newline at end of file diff --git a/GUI/src/utils/endpoints.ts b/GUI/src/utils/endpoints.ts index a950369..37d8173 100644 --- a/GUI/src/utils/endpoints.ts +++ b/GUI/src/utils/endpoints.ts @@ -16,11 +16,14 @@ export const authEndpoints = { export const llmConnectionsEndpoints = { FETCH_LLM_CONNECTIONS_PAGINATED: (): string => `/rag-search/llm-connections/list`, GET_LLM_CONNECTION: (): string => `/rag-search/llm-connections/get`, + GET_PRODUCTION_CONNECTION: (): string => `/rag-search/llm-connections/production`, CREATE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/add`, UPDATE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/edit`, + UPDATE_LLM_CONNECTION_STATUS: (): string => `/rag-search/llm-connections/update-status`, DELETE_LLM_CONNECTION: (): string => `/rag-search/llm-connections/delete`, + CHECK_BUDGET_STATUS: (): string => `/rag-search/llm-connections/cost/check`, } export const inferenceEndpoints = { - VIEW_INFERENCE_RESULT: (): string => `/rag-search/inference/results/view`, + VIEW_TEST_INFERENCE_RESULT: (): string => `/rag-search/inference/test`, } diff --git a/GUI/src/utils/queryKeys.ts b/GUI/src/utils/queryKeys.ts index b1680d8..e004497 100644 --- a/GUI/src/utils/queryKeys.ts +++ b/GUI/src/utils/queryKeys.ts @@ -29,6 +29,8 @@ export const llmConnectionsQueryKeys = { paginatedList: (filters: LLMConnectionFilters) => [...llmConnectionsQueryKeys.paginatedLists(), filters] as const, details: () => [...llmConnectionsQueryKeys.all(), 'detail'] as const, detail: (id: string | number) => [...llmConnectionsQueryKeys.details(), id] as const, + budgetStatus: () => [...llmConnectionsQueryKeys.all(), 'budget-status'] as const, + production: () => [...llmConnectionsQueryKeys.all(), 'production'] as const, }; export const inferenceQueryKeys = { diff --git a/constants.ini b/constants.ini index 4a68f32..bc09e03 100644 --- a/constants.ini +++ b/constants.ini @@ -6,5 +6,6 @@ RAG_SEARCH_RESQL=http://resql:8082/rag-search RAG_SEARCH_PROJECT_LAYER=rag-search RAG_SEARCH_TIM=http://tim:8085 RAG_SEARCH_CRON_MANAGER=http://cron-manager:9010 +RAG_SEARCH_LLM_ORCHESTRATOR=http://llm-orchestration-service:8100/orchestrate DOMAIN=localhost DB_PASSWORD=dbadmin \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index d8d1224..8fc77ef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,160 +1,173 @@ services: - # ruuter-public: - # container_name: ruuter-public - # image: ruuter - # environment: - # - application.cors.allowedOrigins=http://localhost:8086,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090 - # - application.httpCodesAllowList=200,201,202,204,400,401,403,500 - # - application.internalRequests.allowedIPs=127.0.0.1 - # - application.logging.displayRequestContent=true - # - application.logging.displayResponseContent=true - # - application.logging.printStackTrace=true - # - application.internalRequests.disabled=true - # - server.port=8086 - # volumes: - # - ./DSL/Ruuter.public:/DSL - # - ./constants.ini:/app/constants.ini - # ports: - # - 8086:8086 - # networks: - # - bykstack - # cpus: "0.5" - # mem_limit: "512M" + ruuter-public: + container_name: ruuter-public + image: ruuter + environment: + - application.cors.allowedOrigins=http://localhost:8086,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090 + - application.httpCodesAllowList=200,201,202,204,400,401,403,500 + - application.internalRequests.allowedIPs=127.0.0.1 + - application.logging.displayRequestContent=true + - application.logging.displayResponseContent=true + - application.logging.printStackTrace=true + - application.internalRequests.disabled=true + - server.port=8086 + volumes: + - ./DSL/Ruuter.public:/DSL + - ./constants.ini:/app/constants.ini + ports: + - 8086:8086 + networks: + - bykstack + cpus: "0.5" + mem_limit: "512M" - # ruuter-private: - # container_name: ruuter-private - # image: ruuter - # environment: - # - application.cors.allowedOrigins=http://localhost:3001,http://localhost:3003,http://localhost:8088,http://localhost:3002,http://localhost:3004,http://localhost:8000 - # - application.httpCodesAllowList=200,201,202,400,401,403,500 - # - application.internalRequests.allowedIPs=127.0.0.1 - # - application.logging.displayRequestContent=true - # - application.logging.displayResponseContent=true - # - application.logging.printStackTrace=true - # - application.internalRequests.disabled=true - # - server.port=8088 - # volumes: - # - ./DSL/Ruuter.private:/DSL - # - ./constants.ini:/app/constants.ini - # ports: - # - 8088:8088 - # networks: - # - bykstack - # cpus: "0.5" - # mem_limit: "512M" - - # data-mapper: - # container_name: data-mapper - # image: data-mapper - # environment: - # - PORT=3000 - # - CONTENT_FOLDER=/data - # volumes: - # - ./DSL:/data - # - ./DSL/DMapper/rag-search/hbs:/workspace/app/views/rag-search - # - ./DSL/DMapper/rag-search/lib:/workspace/app/lib - # ports: - # - 3000:3000 - # networks: - # - bykstack + ruuter-private: + container_name: ruuter-private + image: ruuter + environment: + - application.cors.allowedOrigins=http://localhost:3001,http://localhost:3003,http://localhost:8088,http://localhost:3002,http://localhost:3004,http://localhost:8000 + - application.httpCodesAllowList=200,201,202,400,401,403,500 + - application.internalRequests.allowedIPs=127.0.0.1 + - application.logging.displayRequestContent=true + - application.logging.displayResponseContent=true + - application.logging.printStackTrace=true + - application.internalRequests.disabled=true + - server.port=8088 + volumes: + - ./DSL/Ruuter.private:/DSL + - ./constants.ini:/app/constants.ini + ports: + - 8088:8088 + networks: + - bykstack + cpus: "0.5" + mem_limit: "512M" - # tim: - # container_name: tim - # image: tim - # depends_on: - # tim-postgresql: - # condition: service_started - # environment: - # - SECURITY_ALLOWLIST_JWT=ruuter-private,ruuter-public,data-mapper,resql,tim,tim-postgresql,chat-widget,authentication-layer,127.0.0.1,::1 - # - KEY_PASS=ppjjpp - # ports: - # - 8085:8085 - # networks: - # - bykstack - # extra_hosts: - # - "host.docker.internal:host-gateway" - # cpus: "0.5" - # mem_limit: "512M" + data-mapper: + container_name: data-mapper + image: data-mapper + environment: + - PORT=3000 + - CONTENT_FOLDER=/data + volumes: + - ./DSL:/data + - ./DSL/DMapper/rag-search/hbs:/workspace/app/views/rag-search + - ./DSL/DMapper/rag-search/lib:/workspace/app/lib + ports: + - 3000:3000 + networks: + - bykstack - # tim-postgresql: - # container_name: tim-postgresql - # image: postgres:14.1 - # environment: - # - POSTGRES_USER=tim - # - POSTGRES_PASSWORD=123 - # - POSTGRES_DB=tim - # # - POSTGRES_HOST_AUTH_METHOD=trust - # volumes: - # - ./tim-db:/var/lib/postgresql/data - # ports: - # - 9876:5432 - # networks: - # - bykstack + tim: + container_name: tim + image: tim + depends_on: + tim-postgresql: + condition: service_started + environment: + - SECURITY_ALLOWLIST_JWT=ruuter-private,ruuter-public,data-mapper,resql,tim,tim-postgresql,chat-widget,authentication-layer,127.0.0.1,::1 + - KEY_PASS=ppjjpp + ports: + - 8085:8085 + networks: + - bykstack + extra_hosts: + - "host.docker.internal:host-gateway" + cpus: "0.5" + mem_limit: "512M" - # authentication-layer: - # container_name: authentication-layer - # image: authentication-layer - # ports: - # - 3004:3004 - # networks: - # - bykstack + tim-postgresql: + container_name: tim-postgresql + image: postgres:14.1 + environment: + - POSTGRES_USER=tim + - POSTGRES_PASSWORD=123 + - POSTGRES_DB=tim + # - POSTGRES_HOST_AUTH_METHOD=trust + volumes: + - ./tim-db:/var/lib/postgresql/data + ports: + - 9876:5432 + networks: + - bykstack - # resql: - # container_name: resql - # image: resql - # depends_on: - # rag_search_db: - # condition: service_started - # environment: - # - sqlms.datasources.[0].name=byk - # - sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://rag_search_db:5432/rag-search #For LocalDb Use - # # sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://171.22.247.13:5435/byk?sslmode=require - # - sqlms.datasources.[0].username=postgres - # - sqlms.datasources.[0].password=dbadmin - # - logging.level.org.springframework.boot=INFO - # ports: - # - 8082:8082 - # volumes: - # - ./DSL/Resql:/DSL - # - ./shared:/shared - # - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets - # networks: - # - bykstack + authentication-layer: + container_name: authentication-layer + image: authentication-layer + ports: + - 3004:3004 + networks: + - bykstack - # gui: - # container_name: gui - # environment: - # - NODE_ENV=development - # - REACT_APP_RUUTER_API_URL=http://localhost/ruuter-public - # - REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost/ruuter-private - # - REACT_APP_EXTERNAL_API_URL=http://localhost/dataset-gen-service - # - REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost/authentication-layer/et/dev-auth - # - REACT_APP_NOTIFICATION_NODE_URL=http://localhost/notifications-node - # - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost ws://localhost; - # - DEBUG_ENABLED=true - # - CHOKIDAR_USEPOLLING=true - # - PORT=3001 - # - REACT_APP_SERVICE_ID=conversations,settings,monitoring - # - REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE - # - VITE_HOST=0.0.0.0 - # - VITE_PORT=3001 - # - HOST=0.0.0.0 - # - VITE_ALLOWED_HOSTS=localhost,127.0.0.1 - # - HMR=false - # - FAST_REFRESH=false - # build: - # context: ./GUI - # dockerfile: Dockerfile.dev - # ports: - # - "3003:3001" - # volumes: - # - /app/node_modules - # - ./GUI:/app - # networks: - # - bykstack - # cpus: "0.5" - # mem_limit: "1G" - # restart: unless-stopped + resql: + container_name: resql + image: resql + depends_on: + rag_search_db: + condition: service_started + environment: + - sqlms.datasources.[0].name=byk + - sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://rag_search_db:5432/rag-search #For LocalDb Use + # sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://171.22.247.13:5435/byk?sslmode=require + - sqlms.datasources.[0].username=postgres + - sqlms.datasources.[0].password=dbadmin + - logging.level.org.springframework.boot=INFO + ports: + - 8082:8082 + volumes: + - ./DSL/Resql:/DSL + - ./shared:/shared + - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets + networks: + - bykstack + + cron-manager: + container_name: cron-manager + image: cron-manager-python:latest + volumes: + - ./DSL/CronManager/DSL:/DSL + - ./DSL/CronManager/script:/app/scripts + - cron_data:/app/data + - ./models:/app/models + environment: + - server.port=9010 + ports: + - 9010:8080 + networks: + - bykstack + + gui: + container_name: gui + environment: + - NODE_ENV=development + - REACT_APP_RUUTER_API_URL=http://localhost:8086 + - REACT_APP_RUUTER_PRIVATE_API_URL=http://localhost:8088 + - REACT_APP_CUSTOMER_SERVICE_LOGIN=http://localhost:3004/et/dev-auth + - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' http://localhost:8086 http://localhost:8088 http://localhost:3004 http://localhost:3005 ws://localhost; + - DEBUG_ENABLED=true + - CHOKIDAR_USEPOLLING=true + - PORT=3001 + - REACT_APP_SERVICE_ID=conversations,settings,monitoring + - REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE + - VITE_HOST=0.0.0.0 + - VITE_PORT=3001 + - HOST=0.0.0.0 + - VITE_ALLOWED_HOSTS=localhost,127.0.0.1 + - HMR=false + - FAST_REFRESH=false + build: + context: ./GUI + dockerfile: Dockerfile.dev + ports: + - "3003:3001" + volumes: + - /app/node_modules + - ./GUI:/app + networks: + - bykstack + cpus: "0.5" + mem_limit: "1G" + restart: unless-stopped qdrant: image: qdrant/qdrant:v1.15.1 @@ -406,7 +419,7 @@ services: - ./vault/config:/vault/config # contains vault.hcl - ./vault/logs:/vault/logs expose: - - "8200" + - "8200" networks: - bykstack restart: unless-stopped @@ -432,7 +445,6 @@ services: - bykstack restart: unless-stopped - # LLM Orchestration Service llm-orchestration-service: build: @@ -487,8 +499,10 @@ volumes: name: vault-data vault-agent-out: name: vault-agent-out + cron_data: + name: cron_data networks: bykstack: name: bykstack - driver: bridge \ No newline at end of file + driver: bridge diff --git a/run_vector_indexer.py b/run_vector_indexer.py new file mode 100644 index 0000000..b01150f --- /dev/null +++ b/run_vector_indexer.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +""" +Entry point script for Vector Indexer - Contextual Retrieval Pipeline + +This script can be run directly or called by cron jobs for automated processing. + +Usage: + python run_vector_indexer.py [--config CONFIG_PATH] [--health-check] [--dry-run] + +Examples: + # Run with default config + python run_vector_indexer.py + + # Run with custom config + python run_vector_indexer.py --config /path/to/config.yaml + + # Health check only + python run_vector_indexer.py --health-check + + # Dry run (validate without processing) + python run_vector_indexer.py --dry-run +""" + +import argparse +import asyncio +import sys +from pathlib import Path + +# Add src to Python path +sys.path.insert(0, str(Path(__file__).parent / "src")) + +from src.vector_indexer.main_indexer import VectorIndexer + + +async def main(): + """Main entry point with command line argument parsing.""" + + parser = argparse.ArgumentParser( + description="Vector Indexer - Contextual Retrieval Pipeline", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + parser.add_argument( + "--config", + type=str, + default="src/vector_indexer/config/vector_indexer_config.yaml", + help="Path to configuration file (default: src/vector_indexer/config/vector_indexer_config.yaml)", + ) + + parser.add_argument( + "--health-check", action="store_true", help="Run health check only and exit" + ) + + parser.add_argument( + "--dry-run", + action="store_true", + help="Validate configuration and connectivity without processing documents", + ) + + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose logging" + ) + + parser.add_argument( + "--quiet", "-q", action="store_true", help="Suppress non-error output" + ) + + args = parser.parse_args() + + # Configure logging level based on arguments + log_level = "INFO" + if args.verbose: + log_level = "DEBUG" + elif args.quiet: + log_level = "ERROR" + + try: + # Initialize vector indexer with specified config + indexer = VectorIndexer(config_path=args.config) + + if args.health_check: + # Health check only + print("🔍 Running health check...") + health_ok = await indexer.run_health_check() + + if health_ok: + print("✅ Health check passed!") + return 0 + else: + print("❌ Health check failed!") + return 1 + + elif args.dry_run: + # Dry run - validate without processing + print("🧪 Running dry run validation...") + + health_ok = await indexer.run_health_check() + if not health_ok: + print("❌ Validation failed!") + return 1 + + # Discover documents but don't process + documents = indexer.document_loader.discover_all_documents() + print(f"📄 Found {len(documents)} documents ready for processing") + print("✅ Dry run validation passed!") + return 0 + + else: + # Full processing run + print("🚀 Starting Vector Indexer processing...") + + # Health check first + health_ok = await indexer.run_health_check() + if not health_ok: + print("❌ Pre-processing health check failed!") + return 1 + + # Process all documents + stats = await indexer.process_all_documents() + + # Return appropriate exit code + if stats.documents_failed > 0: + print(f"⚠️ Processing completed with {stats.documents_failed} failures") + return 2 # Partial success + else: + print("✅ Processing completed successfully!") + return 0 + + except KeyboardInterrupt: + print("\n⏹️ Processing interrupted by user") + return 130 + except FileNotFoundError as e: + print(f"❌ Configuration file not found: {e}") + return 1 + except Exception as e: + print(f"💥 Fatal error: {e}") + return 1 + + +def cron_entry_point(): + """ + Entry point specifically designed for cron jobs. + + This function: + - Uses minimal output suitable for cron logs + - Returns appropriate exit codes for monitoring + - Handles errors gracefully for automated systems + """ + import logging + + # Configure minimal logging for cron + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - Vector Indexer - %(levelname)s - %(message)s", + ) + + try: + # Run with default configuration + result = asyncio.run(main()) + + if result == 0: + logging.info("Vector indexer completed successfully") + elif result == 2: + logging.warning("Vector indexer completed with some failures") + else: + logging.error("Vector indexer failed") + + return result + + except Exception as e: + logging.error(f"Vector indexer fatal error: {e}") + return 1 + + +if __name__ == "__main__": + # Run the async main function + exit_code = asyncio.run(main()) + sys.exit(exit_code) diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py index dd97fa9..4dfd295 100644 --- a/src/llm_orchestration_service_api.py +++ b/src/llm_orchestration_service_api.py @@ -11,6 +11,8 @@ from models.request_models import ( OrchestrationRequest, OrchestrationResponse, + TestOrchestrationRequest, + TestOrchestrationResponse, EmbeddingRequest, EmbeddingResponse, ContextGenerationRequest, @@ -124,6 +126,88 @@ def orchestrate_llm_request( ) +@app.post( + "/orchestrate/test", + response_model=TestOrchestrationResponse, + status_code=status.HTTP_200_OK, + summary="Process test LLM orchestration request", + description="Processes a simplified test message through the LLM orchestration pipeline", +) +def test_orchestrate_llm_request( + http_request: Request, + request: TestOrchestrationRequest, +) -> TestOrchestrationResponse: + """ + Process test LLM orchestration request with simplified input. + + Args: + http_request: FastAPI Request object for accessing app state + request: TestOrchestrationRequest containing only message, environment, and connection_id + + Returns: + TestOrchestrationResponse: Response with LLM output and status flags (without chatId) + + Raises: + HTTPException: For processing errors + """ + try: + logger.info( + f"Received test orchestration request for environment: {request.environment}" + ) + + # Get the orchestration service from app state + if not hasattr(http_request.app.state, "orchestration_service"): + logger.error("Orchestration service not found in app state") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Service not initialized", + ) + + orchestration_service = http_request.app.state.orchestration_service + if orchestration_service is None: + logger.error("Orchestration service is None") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Service not initialized", + ) + + # Map TestOrchestrationRequest to OrchestrationRequest with defaults + full_request = OrchestrationRequest( + chatId="test-session", + message=request.message, + authorId="test-user", + conversationHistory=[], + url="test-context", + environment=request.environment, + connection_id=request.connection_id, + ) + + # Process the request using the same logic + response = orchestration_service.process_orchestration_request(full_request) + + # Convert to TestOrchestrationResponse (exclude chatId) + test_response = TestOrchestrationResponse( + llmServiceActive=response.llmServiceActive, + questionOutOfLLMScope=response.questionOutOfLLMScope, + inputGuardFailed=response.inputGuardFailed, + content=response.content, + ) + + logger.info( + f"Successfully processed test request for environment: {request.environment}" + ) + return test_response + + except HTTPException: + raise + except Exception as e: + logger.error(f"Unexpected error processing test request: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error occurred", + ) + + @app.post( "/embeddings", response_model=EmbeddingResponse, diff --git a/src/models/request_models.py b/src/models/request_models.py index 27152db..c6b9b50 100644 --- a/src/models/request_models.py +++ b/src/models/request_models.py @@ -129,3 +129,31 @@ class EmbeddingErrorResponse(BaseModel): error: str = Field(..., description="Error message") failed_texts: List[str] = Field(..., description="Texts that failed to embed") retry_after: Optional[int] = Field(None, description="Retry after seconds") + + +# Test endpoint models + + +class TestOrchestrationRequest(BaseModel): + """Model for simplified test orchestration request.""" + + message: str = Field(..., description="User's message/query") + environment: Literal["production", "test", "development"] = Field( + ..., description="Environment context" + ) + connection_id: Optional[str] = Field( + None, description="Optional connection identifier" + ) + + +class TestOrchestrationResponse(BaseModel): + """Model for test orchestration response (without chatId).""" + + llmServiceActive: bool = Field(..., description="Whether LLM service is active") + questionOutOfLLMScope: bool = Field( + ..., description="Whether question is out of LLM scope" + ) + inputGuardFailed: bool = Field( + ..., description="Whether input guard validation failed" + ) + content: str = Field(..., description="Response content with citations") diff --git a/vault/agent-out/pidfile b/vault/agent-out/pidfile index c793025..e69de29 100644 --- a/vault/agent-out/pidfile +++ b/vault/agent-out/pidfile @@ -1 +0,0 @@ -7 \ No newline at end of file