diff --git a/DSL/Resql/global-classifier/POST/get-agency-centops.sql b/DSL/Resql/global-classifier/POST/get-agency-centops.sql new file mode 100644 index 00000000..b927c527 --- /dev/null +++ b/DSL/Resql/global-classifier/POST/get-agency-centops.sql @@ -0,0 +1,4 @@ +-- Check if agency exists in mock_centops table +SELECT agency_id +FROM public.mock_centops +WHERE agency_id = :agencyId; diff --git a/DSL/Resql/global-classifier/POST/get-all-dataset-versions.sql b/DSL/Resql/global-classifier/POST/get-all-dataset-versions.sql index 78be3514..cc85f699 100644 --- a/DSL/Resql/global-classifier/POST/get-all-dataset-versions.sql +++ b/DSL/Resql/global-classifier/POST/get-all-dataset-versions.sql @@ -1,3 +1,4 @@ SELECT id, major, minor FROM public.dataset_versions +WHERE generation_status = 'Generation_Success' ORDER BY id; \ No newline at end of file diff --git a/DSL/Resql/global-classifier/POST/get-datasets.sql b/DSL/Resql/global-classifier/POST/get-datasets.sql index 1245a0e5..6be6a297 100644 --- a/DSL/Resql/global-classifier/POST/get-datasets.sql +++ b/DSL/Resql/global-classifier/POST/get-datasets.sql @@ -1,27 +1,29 @@ SELECT - id, - major, - minor, - created_at, - generation_status, - last_model_trained, - last_trained, + dv.id, + dv.major, + dv.minor, + dv.created_at, + dv.generation_status, + COALESCE(dm.model_name, dv.last_model_trained) AS last_model_trained, + dv.last_trained, CEIL(COUNT(*) OVER() / :page_size::DECIMAL) AS total_pages FROM - dataset_versions + dataset_versions dv +LEFT JOIN + data_models dm ON dv.last_model_trained = dm.model_id::text WHERE - (:generation_status = 'all' OR generation_status ILIKE '%' || :generation_status || '%') + (:generation_status = 'all' OR dv.generation_status ILIKE '%' || :generation_status || '%') AND (:dataset_name = 'all' - OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT('v', major, '.', minor))) > 0 - OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT(major, '.', minor))) > 0 - OR POSITION(LOWER(:dataset_name) IN LOWER(major::text)) > 0 - OR POSITION(LOWER(:dataset_name) IN LOWER(minor::text)) > 0) + OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT('v', dv.major, '.', dv.minor))) > 0 + OR POSITION(LOWER(:dataset_name) IN LOWER(CONCAT(dv.major, '.', dv.minor))) > 0 + OR POSITION(LOWER(:dataset_name) IN LOWER(dv.major::text)) > 0 + OR POSITION(LOWER(:dataset_name) IN LOWER(dv.minor::text)) > 0) ORDER BY - CASE WHEN :sort_by = 'created_at' AND :sort_type = 'asc' THEN created_at END ASC, - CASE WHEN :sort_by = 'created_at' AND :sort_type = 'desc' THEN created_at END DESC, - -- CASE WHEN :sort_by = 'major' AND :sort_type = 'asc' THEN major END ASC, - -- CASE WHEN :sort_by = 'major' AND :sort_type = 'desc' THEN major END DESC, - -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'asc' THEN minor END ASC, - -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'desc' THEN minor END DESC, - CASE WHEN :sort_by IS NULL OR :sort_by = '' THEN created_at END DESC + CASE WHEN :sort_by = 'created_at' AND :sort_type = 'asc' THEN dv.created_at END ASC, + CASE WHEN :sort_by = 'created_at' AND :sort_type = 'desc' THEN dv.created_at END DESC, + -- CASE WHEN :sort_by = 'major' AND :sort_type = 'asc' THEN dv.major END ASC, + -- CASE WHEN :sort_by = 'major' AND :sort_type = 'desc' THEN dv.major END DESC, + -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'asc' THEN dv.minor END ASC, + -- CASE WHEN :sort_by = 'minor' AND :sort_type = 'desc' THEN dv.minor END DESC, + CASE WHEN :sort_by IS NULL OR :sort_by = '' THEN dv.created_at END DESC OFFSET ((GREATEST(:page, 1) - 1) * :page_size) LIMIT :page_size; \ No newline at end of file diff --git a/DSL/Resql/global-classifier/POST/insert-agency-centops.sql b/DSL/Resql/global-classifier/POST/insert-agency-centops.sql new file mode 100644 index 00000000..ffffc7a5 --- /dev/null +++ b/DSL/Resql/global-classifier/POST/insert-agency-centops.sql @@ -0,0 +1,3 @@ +-- Insert new agency into mock_centops table +INSERT INTO public.mock_centops (agency_id, agency_name, created_at) +VALUES (:agencyId, :agencyName, NOW()); diff --git a/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql b/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql index 0dbc3dbf..e9498d2f 100644 --- a/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql +++ b/DSL/Resql/global-classifier/POST/update-datasets-connected-models.sql @@ -8,7 +8,8 @@ SET ELSE connected_models END, - last_trained = CURRENT_TIMESTAMP + last_trained = CURRENT_TIMESTAMP, + last_model_trained = :modelId WHERE id = :datasetId RETURNING diff --git a/DSL/Ruuter.public/global-classifier/POST/centops/add.yml b/DSL/Ruuter.public/global-classifier/POST/centops/add.yml new file mode 100644 index 00000000..8a7b514f --- /dev/null +++ b/DSL/Ruuter.public/global-classifier/POST/centops/add.yml @@ -0,0 +1,76 @@ +declaration: + call: declare + version: 0.1 + description: "Create a new data model record in data_models, update connected dataset and initiate training" + method: post + accepts: json + returns: json + namespace: global-classifier + # Input Validation Schema + allowlist: + body: + - field: agency_id + type: string + description: "ID of the agency (required)" + - field: agency_name + type: string + description: "Name of the agency (required)" + next: extractRequestData + +# Data Extraction +extractRequestData: + assign: + agency_id: ${incoming.body.agency_id} + agency_name: ${incoming.body.agency_name} + next: checkAgencyExists + +# Check if Agency Already Exists +checkAgencyExists: + call: http.post + args: + url: "[#GLOBAL_CLASSIFIER_RESQL]/get-agency-centops" + body: + agencyId: ${agency_id} + result: agency_check_res + next: evaluateAgencyExistence + +# Evaluate Agency Existence +evaluateAgencyExistence: + switch: + - condition: "${agency_check_res.response.body.length > 0}" + next: return_agency_exists + next: insertAgencyData + +# Insert Agency Data +insertAgencyData: + call: http.post + args: + url: "[#GLOBAL_CLASSIFIER_RESQL]/insert-agency-centops" + body: + agencyId: ${agency_id} + agencyName: ${agency_name} + createdAt: ${new Date().toISOString()} + result: insert_agency_res + next: checkInsertResult + +# Check Insert Result +checkInsertResult: + switch: + - condition: ${200 <= insert_agency_res.response.statusCodeValue && insert_agency_res.response.statusCodeValue < 300} + next: return_success + next: return_insert_failed + +return_success: + return: "Agency created successfully in centops" + status: 200 + next: end + +return_agency_exists: + return: "error: agency already exists in centops" + status: 400 + next: end + +return_insert_failed: + return: "error: failed to add agency to centops" + status: 500 + next: end diff --git a/DSL/Ruuter.public/global-classifier/POST/data/callback.yml b/DSL/Ruuter.public/global-classifier/POST/data/callback.yml index 9aec279c..8a98f08f 100644 --- a/DSL/Ruuter.public/global-classifier/POST/data/callback.yml +++ b/DSL/Ruuter.public/global-classifier/POST/data/callback.yml @@ -25,7 +25,7 @@ declare: description: "List of agency IDs for which the dataset generation was completed" log_callback_received: - log: "Dataset generation callback received - Task ID: ${incoming.body.task_id}, Status: ${incoming.body.status}, File Path: ${incoming.body.filePath}" + log: "Dataset generation callback received - Task ID: ${incoming.body.task_id}, Status: ${incoming.body.status}, File Path: ${incoming.body.filePath}, Result: ${incoming.body.results}" next: extract_callback_data extract_callback_data: diff --git a/GUI/src/components/molecules/DataModelForm/index.tsx b/GUI/src/components/molecules/DataModelForm/index.tsx index abf7263b..02226f8f 100644 --- a/GUI/src/components/molecules/DataModelForm/index.tsx +++ b/GUI/src/components/molecules/DataModelForm/index.tsx @@ -21,6 +21,7 @@ type DataModelFormType = { handleChange: (name: keyof DataModel, value: any) => void; errors?: Record; type: string; + datasetVersions?: any; }; const DataModelForm: FC = ({ @@ -28,6 +29,7 @@ const DataModelForm: FC = ({ handleChange, errors, type, + datasetVersions: propDatasetVersions, }) => { const { t } = useTranslation(); const [showTrainingResults, setShowTrainingResults] = useState(true); @@ -39,8 +41,12 @@ const DataModelForm: FC = ({ const { data: datasetVersions } = useQuery({ queryKey: dataModelsQueryKeys.DATA_MODEL_DEPLOYMENT_ENVIRONMENTS(), queryFn: () => getAllDatasetVersions(), + enabled: !propDatasetVersions, // Only fetch if not provided as prop }); + // Use prop datasetVersions if provided, otherwise use the queried data + const finalDatasetVersions = propDatasetVersions || datasetVersions; + let trainingResults = null; if (dataModel?.trainingResults?.value) { try { @@ -91,10 +97,19 @@ const DataModelForm: FC = ({ }} > { handleChange('datasetId', selection?.value); + // Update version when dataset is selected + if (selection?.value && finalDatasetVersions) { + const selectedDataset = finalDatasetVersions.find( + (dataset: any) => dataset.id.toString() === selection.value + ); + if (selectedDataset?.version) { + handleChange('version', selectedDataset.version); + } + } }} value={dataModel?.datasetId === null && ""} defaultValue={dataModel?.datasetId ? dataModel?.datasetId : ""} diff --git a/GUI/src/pages/DataModels/ConfigureDataModel.tsx b/GUI/src/pages/DataModels/ConfigureDataModel.tsx index af524f97..cfad307d 100644 --- a/GUI/src/pages/DataModels/ConfigureDataModel.tsx +++ b/GUI/src/pages/DataModels/ConfigureDataModel.tsx @@ -15,6 +15,7 @@ import { dataModelsQueryKeys } from 'utils/queryKeys'; import { useTranslation } from 'react-i18next'; import './DataModels.scss'; import { configureDataModel, deleteDataModel, deployDataModel, getDataModelMetadata, getProductionDataModel } from 'services/datamodels'; +import { getAllDatasetVersions } from 'services/datasets'; import { use } from 'i18next'; import { set } from 'date-fns'; import { areArraysEqual } from 'utils/commonUtilts'; @@ -41,6 +42,11 @@ const ConfigureDataModel: FC = () => { queryFn: () => getProductionDataModel(), }); + const { data: datasetVersions } = useQuery({ + queryKey: dataModelsQueryKeys.DATA_MODEL_DEPLOYMENT_ENVIRONMENTS(), + queryFn: () => getAllDatasetVersions(), + }); + const [initialData, setInitialData] = useState>({ modelName: modelMetadata?.modelName, datasetId: modelMetadata?.connectedDsId, @@ -85,6 +91,22 @@ const ConfigureDataModel: FC = () => { name: keyof DataModel, value: any ) => { + // Update version when dataset is changed + if (name === 'datasetId' && value && datasetVersions) { + const selectedDataset = datasetVersions.find( + (dataset: any) => dataset.id.toString() === value + ); + if (selectedDataset?.version) { + setDataModel((prevDataModel) => ({ + ...prevDataModel, + [name]: value, + version: selectedDataset.version, + })); + return; // Early return to avoid the second setDataModel call + } + } + + // Default case - just update the field setDataModel((prevDataModel) => ({ ...prevDataModel, [name]: value, @@ -185,17 +207,24 @@ const ConfigureDataModel: FC = () => { return undefined; }; - const buildUpdatedPayload = (updateType: string | undefined) => ({ - modelGroupKey: modelMetadata.modelGroupKey ?? "", - modelName: dataModel.modelName ?? "", - connectedDsId: Number(dataModel.datasetId) ?? 0, - deploymentEnv: dataModel.deploymentEnvironment ?? "", - baseModels: dataModel.baseModels ?? [], - connectedDsMajorVersion: Number(dataModel.version?.split('.')[0]?.[1]) ?? 0, - connectedDsMinorVersion: Number(dataModel.version?.split('.')[1]) ?? 0, - updateType: updateType ?? "", - isTrainingNeeded: !areArraysEqual(initialData.baseModels as string[], dataModel.baseModels as string[]) - }); + const buildUpdatedPayload = (updateType: string | undefined) => { + // Parse version correctly - version format is "V1.0" + const versionParts = dataModel.version?.split('.'); + const majorVersion = versionParts?.[0]?.substring(1); // Remove 'V' prefix + const minorVersion = versionParts?.[1]; + + return { + modelGroupKey: modelMetadata.modelGroupKey ?? "", + modelName: dataModel.modelName ?? "", + connectedDsId: Number(dataModel.datasetId) ?? 0, + deploymentEnv: dataModel.deploymentEnvironment ?? "", + baseModels: dataModel.baseModels ?? [], + connectedDsMajorVersion: Number(majorVersion) ?? 1, + connectedDsMinorVersion: Number(minorVersion) ?? 0, + updateType: updateType ?? "", + isTrainingNeeded: !areArraysEqual(initialData.baseModels as string[], dataModel.baseModels as string[]) + }; + }; const deleteDataModelMutation = useMutation({ mutationFn: deleteDataModel, @@ -288,6 +317,7 @@ const ConfigureDataModel: FC = () => { } handleChange={handleDataModelAttributesChange} type="configure" + datasetVersions={datasetVersions} /> )} diff --git a/GUI/src/pages/DataModels/CreateDataModel.tsx b/GUI/src/pages/DataModels/CreateDataModel.tsx index 5e3a8d30..8b1c2705 100644 --- a/GUI/src/pages/DataModels/CreateDataModel.tsx +++ b/GUI/src/pages/DataModels/CreateDataModel.tsx @@ -12,9 +12,9 @@ import { DataModel, ErrorsType, } from 'types/dataModels'; -import { da } from 'date-fns/locale'; import { createDataModel, getProductionDataModel } from 'services/datamodels'; import { dataModelsQueryKeys } from 'utils/queryKeys'; +import { getAllDatasetVersions } from 'services/datasets'; const CreateDataModel: FC = () => { const { t } = useTranslation(); @@ -34,7 +34,35 @@ const CreateDataModel: FC = () => { queryFn: () => getProductionDataModel(), }); + const { data: datasetVersions } = useQuery({ + queryKey: dataModelsQueryKeys.DATA_MODEL_DEPLOYMENT_ENVIRONMENTS(), + queryFn: () => getAllDatasetVersions(), + }); + const handleDataModelAttributesChange = (name: string, value: string) => { + // Update version when dataset is changed + if (name === 'datasetId' && value && datasetVersions) { + const selectedDataset = datasetVersions.find( + (dataset: any) => dataset.id.toString() === value + ); + if (selectedDataset?.version) { + setDataModel((prevDataModel) => ({ + ...prevDataModel, + [name]: value, + version: selectedDataset.version, + })); + + // Clear datasetId error + setErrors((prevErrors) => { + const updatedErrors = { ...prevErrors }; + delete updatedErrors.datasetId; + return updatedErrors; + }); + return; // Early return to avoid the second setDataModel call + } + } + + // Default case - just update the field setDataModel((prevFilters) => ({ ...prevFilters, [name]: value, @@ -86,14 +114,18 @@ const CreateDataModel: FC = () => { }); const handleCreate = () => { + // Parse version correctly - version format is "V1.0" + const versionParts = dataModel?.version?.split('.'); + const majorVersion = versionParts?.[0]?.substring(1); // Remove 'V' prefix + const minorVersion = versionParts?.[1]; const paylod = { modelName: dataModel.modelName ?? "", deploymentEnv: dataModel.deploymentEnvironment ?? "", baseModels: dataModel.baseModels ?? [], connectedDsId: Number(dataModel.datasetId) ?? 0, - connectedDsMajorVersion: Number(dataModel?.version?.split('.')[0]?.[1]) ?? "", - connectedDsMinorVersion: Number(dataModel?.version?.split('.')[1]) ?? "", + connectedDsMajorVersion: Number(majorVersion) ?? 1, + connectedDsMinorVersion: Number(minorVersion) ?? 0, } if (prodDataModel && dataModel.deploymentEnvironment === "production") { @@ -134,6 +166,7 @@ const CreateDataModel: FC = () => { dataModel={dataModel} handleChange={handleDataModelAttributesChange} type="create" + datasetVersions={datasetVersions} />
diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml new file mode 100644 index 00000000..f606c527 --- /dev/null +++ b/docker-compose-ec2.yml @@ -0,0 +1,576 @@ +services: + ruuter-public: + container_name: ruuter-public + image: ruuter + environment: + - application.cors.allowedOrigins=http://localhost:8086,http://localhost:3001,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8000,http://localhost:8090,https://est-global-classifier-rtc.rootcode.software + - application.httpCodesAllowList=200,201,202,204,400,401,403,500 + - application.internalRequests.allowedIPs=127.0.0.1 + - application.logging.displayRequestContent=true + - application.logging.displayResponseContent=true + - application.logging.printStackTrace=true + - application.internalRequests.disabled=true + - server.port=8086 + volumes: + - ./DSL/Ruuter.public:/DSL + - ./constants.ini:/app/constants.ini + ports: + - 8086:8086 + networks: + - bykstack + cpus: "0.5" + mem_limit: "512M" + + ruuter-private: + container_name: ruuter-private + image: ruuter + environment: + - application.cors.allowedOrigins=http://localhost:3001,http://localhost:3003,http://localhost:8088,http://localhost:3002,http://localhost:3004,http://localhost:8000,https://est-global-classifier-rtc.rootcode.software + - application.httpCodesAllowList=200,201,202,400,401,403,500 + - application.internalRequests.allowedIPs=127.0.0.1 + - application.logging.displayRequestContent=true + - application.logging.displayResponseContent=true + - application.logging.printStackTrace=true + - application.internalRequests.disabled=true + - server.port=8088 + volumes: + - ./DSL/Ruuter.private:/DSL + - ./constants.ini:/app/constants.ini + ports: + - 8088:8088 + networks: + - bykstack + cpus: "0.5" + mem_limit: "512M" + + data-mapper: + container_name: data-mapper + image: data-mapper + environment: + - PORT=3000 + - CONTENT_FOLDER=/data + volumes: + - ./DSL:/data + - ./DSL/DMapper/global-classifier/hbs:/workspace/app/views/global-classifier + - ./DSL/DMapper/global-classifier/lib:/workspace/app/lib + ports: + - 3000:3000 + networks: + - bykstack + + tim: + container_name: tim + image: tim + depends_on: + tim-postgresql: + condition: service_started + environment: + - SECURITY_ALLOWLIST_JWT=ruuter-private,ruuter-public,data-mapper,resql,tim,tim-postgresql,chat-widget,authentication-layer,127.0.0.1,::1,est-global-classifier-rtc.rootcode.software + - KEY_PASS=ppjjpp + ports: + - 8085:8085 + networks: + - bykstack + extra_hosts: + - "host.docker.internal:host-gateway" + cpus: "0.5" + mem_limit: "512M" + + tim-postgresql: + container_name: tim-postgresql + image: postgres:14.1 + environment: + - POSTGRES_USER=tim + - POSTGRES_PASSWORD=123 + - POSTGRES_DB=tim + # - POSTGRES_HOST_AUTH_METHOD=trust + volumes: + - ./tim-db:/var/lib/postgresql/data + ports: + - 9876:5432 + networks: + - bykstack + + authentication-layer: + container_name: authentication-layer + image: authentication-layer + ports: + - 3004:3004 + networks: + - bykstack + + resql: + container_name: resql + image: resql + depends_on: + users_db: + condition: service_started + environment: + - sqlms.datasources.[0].name=byk + - sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://users_db:5432/global-classifier #For LocalDb Use + # sqlms.datasources.[0].jdbcUrl=jdbc:postgresql://171.22.247.13:5435/byk?sslmode=require + - sqlms.datasources.[0].username=postgres + - sqlms.datasources.[0].password=dbadmin + - logging.level.org.springframework.boot=INFO + ports: + - 8082:8082 + volumes: + - ./DSL/Resql:/DSL + - ./shared:/shared + - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets + networks: + - bykstack + + users_db: + container_name: users_db + image: postgres:14.1 + environment: + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=dbadmin + - POSTGRES_DB=global-classifier + ports: + - 5435:5432 + volumes: + - ./global-classifier-db/db_files:/var/lib/postgresql/data + - ./shared:/shared + - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets + + networks: + - bykstack + restart: always + + init: + image: busybox + command: ["sh", "-c", "chmod -R 777 /shared && chmod -R 777 /app/model_trainer && chmod -R 777 /app/data"] + volumes: + - shared-volume:/shared + - ./model_trainer:/app/model_trainer + - cron_data:/app/data + networks: + - bykstack + +# It's important to build the cron-manager container from the Dockerfile.python containing the Python enviro + cron-manager: + container_name: cron-manager + image: cron-manager-python:latest + user: "root" + volumes: + - ./DSL/CronManager/DSL:/DSL + - ./DSL/CronManager/script:/app/scripts + - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets + - ./src/s3_dataset_processor:/app/src/s3_dataset_processor + - ./DSL/DatasetGenerator/config:/app/config + - ./src/model-training:/app/src/training + - ./mlflow/mlflow_artifacts:/mlflow/mlflow_artifacts + - ./data/processed:/app/data/processed + - shared-volume:/app/shared + - ./models:/app/models + - ./src/inference/inference_scripts:/app/inference_scripts + - ./grafana-configs/loki_logger.py:/app/inference_scripts/loki_logger.py + - ./grafana-configs/loki_logger.py:/app/src/training/loki_logger.py + - ./constants.ini:/app/inference_scripts/constants.ini + - cron_data:/app/data + - ./src/scripts:/app/src/scripts + runtime: nvidia + environment: + - NVIDIA_VISIBLE_DEVICES=all + - server.port=9010 + - MLFLOW_TRACKING_URI=http://mlflow:5000 + - PYTHONPATH=/app:/app/src/model_training:/app/src/s3_dataset_processor:/app/src + ports: + - 9010:8080 + networks: + - bykstack + depends_on: + init: + condition: service_completed_successfully + + mlflow: + build: + context: ./mlflow + dockerfile: Dockerfile + container_name: mlflow + ports: + - "5001:5000" + env_file: + - .env + environment: + - MLFLOW_TRACKING_USERNAME=${MLFLOW_TRACKING_USERNAME} + - MLFLOW_TRACKING_PASSWORD=${MLFLOW_TRACKING_PASSWORD} + - MLFLOW_HOST=${MLFLOW_HOST} + - MLFLOW_PORT=${MLFLOW_PORT} + - MLFLOW_BACKEND_STORE_URI=${MLFLOW_BACKEND_STORE_URI} + - MLFLOW_DEFAULT_ARTIFACT_ROOT=${MLFLOW_DEFAULT_ARTIFACT_ROOT} + - MLFLOW_FLASK_SERVER_SECRET_KEY=${MLFLOW_FLASK_SERVER_SECRET_KEY} + volumes: + - ./mlflow/mlflow_data:/mlflow/mlflow_data + - ./mlflow/mlflow_artifacts:/mlflow/mlflow_artifacts + networks: + - bykstack + + minio: + image: minio/minio:latest + container_name: minio + env_file: + - .env + + # command: server /data --console-address ":9001" + entrypoint: > + sh -c " + export MINIO_ROOT_USER=$${S3_ACCESS_KEY_ID} && \ + export MINIO_ROOT_PASSWORD=$${S3_SECRET_ACCESS_KEY} && \ + export MINIO_BROWSER_REDIRECT_URL=$${MINIO_BROWSER_REDIRECT_URL} && \ + + minio server /app/minio_data --console-address ":9001" + " + + volumes: + - minio_data:/app/minio_data + ports: + - "9000:9000" # API port + - "9001:9001" # Console port + networks: + - bykstack + + gc-s3-ferry: + image: s3-ferry:latest + container_name: gc-s3-ferry + volumes: + - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets + - shared-volume:/app/shared + - ./models:/app/models + - ./src/inference/dummy_model:/app/dummy_model + - cron_data:/app/data + env_file: + - .env + ports: + - "3006:3000" + user: "root" + networks: + - bykstack + depends_on: + minio: + condition: service_started + + opensearch-node: + image: opensearchproject/opensearch:2.11.1 + container_name: opensearch-node + environment: + - node.name=opensearch-node + - discovery.seed_hosts=opensearch + - discovery.type=single-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - plugins.security.disabled=true + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch-data:/usr/share/opensearch/data + ports: + - 9200:9200 + - 9600:9600 + networks: + - bykstack + + notifications-node: + container_name: notifications-node + build: + context: ./notification-server + dockerfile: Dockerfile + ports: + - 4040:4040 + depends_on: + - opensearch-node + environment: + OPENSEARCH_PROTOCOL: http + OPENSEARCH_HOST: opensearch-node + OPENSEARCH_PORT: 9200 + OPENSEARCH_USERNAME: admin + OPENSEARCH_PASSWORD: admin + PORT: 4040 + REFRESH_INTERVAL: 1000 + CORS_WHITELIST_ORIGINS: http://localhost:3001,http://localhost:3002,http://localhost:3003,http://localhost:3004,http://localhost:8080,http://localhost:8088,https://est-global-classifier-rtc.rootcode.software + RUUTER_URL: http://ruuter-public:8086 + volumes: + - /app/node_modules + - ./notification-server:/app + networks: + - bykstack + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:2.11.1 + container_name: opensearch-dashboards + environment: + - OPENSEARCH_HOSTS=http://opensearch-node:9200 + - DISABLE_SECURITY_DASHBOARDS_PLUGIN=true + ports: + - 5601:5601 + networks: + - bykstack + + gui: + container_name: gui + environment: + - NODE_ENV=local + - REACT_APP_RUUTER_API_URL=https://est-global-classifier-rtc.rootcode.software/ruuter-public + - REACT_APP_RUUTER_PRIVATE_API_URL=https://est-global-classifier-rtc.rootcode.software/ruuter-private + - REACT_APP_CUSTOMER_SERVICE_LOGIN=https://est-global-classifier-rtc.rootcode.software/authentication-layer/et/dev-auth + - REACT_APP_NOTIFICATION_NODE_URL=https://est-global-classifier-rtc.rootcode.software/notifications_node + - REACT_APP_CSP=upgrade-insecure-requests; default-src 'self'; font-src 'self' data:; img-src 'self' data:; script-src 'self' 'unsafe-eval' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; object-src 'none'; connect-src 'self' https://est-global-classifier-rtc.rootcode.software/ruuter-public https://est-global-classifier-rtc.rootcode.software/ruuter-private https://est-global-classifier-rtc.rootcode.software/authentication-layer/et/dev-auth https://est-global-classifier-rtc.rootcode.software/notifications_node; + - DEBUG_ENABLED=true + - CHOKIDAR_USEPOLLING=true + - PORT=3001 + - REACT_APP_SERVICE_ID=conversations,settings,monitoring + - REACT_APP_ENABLE_HIDDEN_FEATURES=TRUE + - VITE_HOST=0.0.0.0 + - VITE_ALLOWED_HOSTS=est-global-classifier-rtc.rootcode.software,localhost,127.0.0.1 + + build: + context: ./GUI + dockerfile: Dockerfile.dev + ports: + - 3003:3001 + volumes: + - /app/node_modules + - ./GUI:/app + networks: + - bykstack + cpus: "0.5" + mem_limit: "1G" + + dataset-gen-service: + image: synthesisai/dataset-generator:latest + container_name: dataset-gen-service + ports: + - "8000:8000" + env_file: + - .env + volumes: + - ./DSL/DatasetGenerator/config:/app/config + - ./DSL/DatasetGenerator/templates:/app/templates + - ./DSL/DatasetGenerator/user_configs:/app/user_configs + - cron_data:/app/data + - ./DSL/DatasetGenerator/output_datasets:/app/output_datasets + - ./DSL/DatasetGenerator/logs:/app/logs + networks: + - bykstack + + redis: + image: redis:7-alpine + container_name: redis-embeddings + ports: + - "6378:6379" + volumes: + - redis_data:/data + - ./DSL/DatasetGenerator/config/redis.conf:/usr/local/etc/redis/redis.conf + command: redis-server /usr/local/etc/redis/redis.conf + environment: + - REDIS_REPLICATION_MODE=master + restart: unless-stopped + networks: + - bykstack + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 3 + + #temporarary container to initialize S3 storage with necessary buckets and models + init-storage: + + container_name: init-storage + image: amazon/aws-cli:latest + volumes: + - cron_data:/data_models + - ./src/inference/model-repository:/app/model-repository + entrypoint: > + sh -c " + echo 'Setting AWS KEYS' && + aws configure set aws_access_key_id $${S3_ACCESS_KEY_ID} && + aws configure set aws_secret_access_key $${S3_SECRET_ACCESS_KEY} && + + echo \"WAITING for S3 storage to be ready...\" && + until curl -s $${S3_HEALTH_ENDPOINT}; do sleep 2; done && + + echo \"Checking for S3 bucket: $${S3_DATA_BUCKET_NAME} ...\" && + if ! aws --endpoint-url $${S3_ENDPOINT_URL} s3api head-bucket --bucket $${S3_DATA_BUCKET_NAME} 2>/dev/null; then + echo \"Bucket $${S3_DATA_BUCKET_NAME} does not exist. Creating...\" && + aws --endpoint-url $${S3_ENDPOINT_URL} s3 mb s3://$${S3_DATA_BUCKET_NAME} && + echo \"Bucket $${S3_DATA_BUCKET_NAME} created successfully.\" + else + echo \"Bucket $${S3_DATA_BUCKET_NAME} already exists. Skipping creation.\" + fi && + + + echo 'S3 storage is ready.' && + echo 'Uploading models...' && + + echo 'Uploading initial dummy model to testing registry...' && + aws --endpoint-url $${S3_ENDPOINT_URL} s3 cp --recursive /app/model-repository/testing s3://$${S3_DATA_BUCKET_NAME}/$${S3_DATA_BUCKET_PATH}/models/testing && + + echo 'Uploading initial dummy model to production registry...' && + aws --endpoint-url $${S3_ENDPOINT_URL} s3 cp --recursive /app/model-repository/production s3://$${S3_DATA_BUCKET_NAME}/$${S3_DATA_BUCKET_PATH}/models/production && + + echo 'Uploading initial dummy model to undeployed registry...' && + aws --endpoint-url $${S3_ENDPOINT_URL} s3 cp --recursive /app/model-repository/undeployed s3://$${S3_DATA_BUCKET_NAME}/$${S3_DATA_BUCKET_PATH}/models/undeployed && + + echo 'Upload complete.' + " + + env_file: + - .env + networks: + - bykstack + depends_on: + minio: + condition: service_started + cron-manager: + condition: service_started + ruuter-private: + condition: service_started + ruuter-public: + condition: service_started + gc-s3-ferry: + condition: service_started + + triton-production-server: + container_name: triton-production-server + runtime: nvidia + build: + context: ./src/inference + dockerfile: Dockerfile + command: > + sh -c " + echo 'Setting environment variables for S3 access...' && + export AWS_ACCESS_KEY_ID=$${S3_ACCESS_KEY_ID} && + export AWS_SECRET_ACCESS_KEY=$${S3_SECRET_ACCESS_KEY} && + export AWS_REGION=$${S3_REGION} && + export AWS_ENDPOINT_URL=$${S3_ENDPOINT_URL} && + + echo 'Starting Triton server for production models...' && + tritonserver --model-repository=s3://$${S3_ENDPOINT_NAME}/$${S3_DATA_BUCKET_NAME}/$${S3_DATA_BUCKET_PATH}/models/production --model-control-mode=explicit --log-verbose=1 + " + ports: + - "6000:8000" + - "6001:8001" + - "6002:8002" + volumes: + - cron_data:/data_models + + env_file: + - .env + networks: + - bykstack + depends_on: + init-storage: + condition: service_completed_successfully + cron-manager: + condition: service_started + ruuter-private: + condition: service_started + minio: + condition: service_started + + triton-test-server: + container_name: triton-test-server + runtime: nvidia + build: + context: ./src/inference + dockerfile: Dockerfile + command: > + sh -c " + + echo 'Setting environment variables for S3 access...' && + + export AWS_ACCESS_KEY_ID=$${S3_ACCESS_KEY_ID} && + export AWS_SECRET_ACCESS_KEY=$${S3_SECRET_ACCESS_KEY} && + export AWS_REGION=$${S3_REGION} && + export AWS_ENDPOINT_URL=$${S3_ENDPOINT_URL} && + + echo 'Starting Triton server for testing models...' && \ + + tritonserver --model-repository=s3://$${S3_ENDPOINT_NAME}/$${S3_DATA_BUCKET_NAME}/$${S3_DATA_BUCKET_PATH}/models/testing --model-control-mode=explicit --log-verbose=1 + " + ports: + - "4000:8000" + - "4001:8001" + - "4002:8002" + volumes: + - cron_data:/data_models + + env_file: + - .env + networks: + - bykstack + depends_on: + init-storage: + condition: service_completed_successfully + cron-manager: + condition: service_started + ruuter-private: + condition: service_started + minio: + condition: service_started + + # Logging Stack - Loki and Grafana + loki: + image: grafana/loki:2.9.0 + container_name: loki + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + volumes: + - ./grafana-configs/loki-config.yaml:/etc/loki/local-config.yaml + - loki-data:/loki + networks: + - bykstack + restart: unless-stopped + + grafana: + image: grafana/grafana:10.0.0 + container_name: grafana + ports: + - "4005:3000" + env_file: + - .env + volumes: + - grafana-data:/var/lib/grafana + - ./grafana-configs/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml + - ./grafana-configs/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml + - ./grafana-configs/grafana-dashboard-deployment.json:/etc/grafana/dashboards/deployment.json + networks: + - bykstack + depends_on: + - loki + restart: unless-stopped + + +volumes: + shared-volume: + name: shared-volume + opensearch-data: + name: opensearch-data + dataset_gen_ollama_models: + name: dataset_gen_ollama_models + cron_data: + name: cron_data + minio_data: + name: minio_data + loki-data: + name: loki-data + grafana-data: + name: grafana-data + redis_data: + name: redis_data + + +networks: + bykstack: + name: bykstack + driver: bridge + \ No newline at end of file diff --git a/src/s3_dataset_processor/dataset_generation_callback_processor.py b/src/s3_dataset_processor/dataset_generation_callback_processor.py index 2c9441a4..f0d20201 100644 --- a/src/s3_dataset_processor/dataset_generation_callback_processor.py +++ b/src/s3_dataset_processor/dataset_generation_callback_processor.py @@ -257,13 +257,13 @@ def process_callback_background( current_csv_path = file_path output_csv_path = f"{OUTPUT_DATA_DIR}/{dataset_id}_aggregated.csv" - if dataset_id <= 2: + if dataset_id == 1: logger.info("No previous dataset. Using current CSV only.") df = pd.read_csv(current_csv_path) df = update_item_ids(df, dataset_id) df = update_dataset_version_id(df, dataset_id) df.to_csv(output_csv_path, index=False) - else: + elif dataset_id >= 2: prev_dataset_id = dataset_id - 1 prev_csv_local = f"{OUTPUT_DATA_DIR}/{prev_dataset_id}_prev.csv" prev_csv_s3_path = f"datasets/{prev_dataset_id}/{AGGREGATED_CSV_FILE}"