From d6fa7ae466ad355e3dc4c98efd8908267259cf3e Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 7 Aug 2025 16:55:54 +0200 Subject: [PATCH 1/2] client_cmd.py actualizado --- client_cmd.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index b0b8ab2..08c6cce 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -18,10 +18,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Reads parameters from command line.") - parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client") + # parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client") parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") - parser.add_argument("--data_file", type=str, default="data.parquet" , help="parquet o csv file with actual data") + parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use") @@ -40,20 +40,26 @@ parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") parser.add_argument("--linear_models", type=json.loads, default={"n_features": 9}, help="Linear model parameters") +# parser.add_argument("--n_features", type=int, default=0, help="Number of features") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") # Variables hardcoded - parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") - parser.add_argument("--certs_path", type=str, default="./certs_path", help="Certificates path") - parser.add_argument("--data_path", type=str, default="./data", help="Data path") + parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use") + parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path") + parser.add_argument("--data_path", type=str, default="/data", help="Data path") + args = parser.parse_args() config = vars(args) -# config["sandbox_path"] = "./sandbox" -# config["certs_path"] = "/app/config/certificates" -# config["data_path"] = "./data" + + if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): + config["linear_models"] = {} + n_feats = len(config["train_labels"]) + config['linear_models']['n_features'] = n_feats # config["n_features"] + config["held_out_center_id"] = -1 + # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt")) @@ -107,7 +113,6 @@ def flush(self): if config["production_mode"] == "True": node_name = os.getenv("NODE_NAME") # num_client = int(node_name.split("_")[-1]) - num_client = config["client_id"] data_path = os.getenv("DATA_PATH") ca_cert = Path(os.path.join(config["certs_path"],"rootCA_cert.pem")) root_certificate = Path(f"{ca_cert}").read_bytes() @@ -135,14 +140,11 @@ def flush(self): root_certificate = None central_ip = "LOCALHOST" central_port = config["local_port"] - num_client = config["client_id"] # if len(sys.argv) == 1: # raise ValueError("Please provide the client id when running in simulation mode") # num_client = int(sys.argv[1]) - - print("Client id:" + str(num_client)) - +num_client = 0 # config["client_id"] (X_train, y_train), (X_test, y_test) = datasets.load_dataset(config, num_client) data = (X_train, y_train), (X_test, y_test) From dd31da8af18b86c5505e48662bc602de5cb22699 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 7 Aug 2025 17:00:48 +0200 Subject: [PATCH 2/2] actualizar --- flcore/datasets.py | 113 +++++++++++++++------------------------------ server_cmd.py | 27 +++++++---- 2 files changed, 53 insertions(+), 87 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index 8fb4935..1c9d03d 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -8,6 +8,7 @@ import numpy as np import openml #import torch +from pathlib import Path import pandas as pd from sklearn.datasets import load_svmlight_file @@ -552,46 +553,35 @@ def min_max_normalize(col, min_val, max_val): return (col - min_val) / (max_val - min_val) def load_dt4h(config,id): - with open(config["data_path"]+config['metadata_file'], 'r') as file: +# print("LOAD_DT4H::",config["data_path"]+config['metadata_file']) + metadata_ = Path(os.path.join(config["data_path"],config['data_id'])) + metadata = Path(os.path.join(metadata_,config['metadata_file'])) + with open(metadata, 'r') as file: metadata = json.load(file) - - data_file = config["data_path"] + config["data_file"] - ext = data_file.split(".")[-1] - if ext == "pqt" or ext == "parquet": - dat = pd.read_parquet(data_file) - elif ext == "csv": - dat = pd.read_csv(data_file) +# print("METADATA",metadata) + data_file = Path(os.path.join(metadata_,config['data_id']+".parquet")) +# print("LOAD_DT4H::",data_file) +# ext = data_file.split(".")[-1] +# if ext == "pqt" or ext == "parquet": + dat = pd.read_parquet(data_file) +# elif ext == "csv": +# dat = pd.read_csv(data_file) dat_len = len(dat) - +# print("PARQUET", dat_len, dat) + # Numerical variables numeric_columns_non_zero = {} - for feat in metadata["entity"]["features"]: - if (feat["dataType"] == "NUMERIC" - and feat["name"] in train_labels - and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0): + for feat in metadata["entries"][0]["featureSet"]["features"]: + if feat["dataType"] == "NUMERIC" and feat["statistics"]["numOfNotNull"] != 0: # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] numeric_columns_non_zero[feat["name"]] = ( - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q1"], - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["avg"], - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["min"], - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q2"], - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["max"], - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q3"], - metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"], - ) - for feat in metadata["entity"]["outcomes"]: - if (feat["dataType"] == "NUMERIC" - and feat["name"] in target_labels - and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0): - # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] - numeric_columns_non_zero[feat["name"]] = ( - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q1"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["avg"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["min"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q2"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["max"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q3"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"], + feat["statistics"]["Q1"], + feat["statistics"]["avg"], + feat["statistics"]["min"], + feat["statistics"]["Q2"], + feat["statistics"]["max"], + feat["statistics"]["Q3"], + feat["statistics"]["numOfNotNull"], ) for col, (q1,avg,mini,q2,maxi,q3,numOfNotNull) in numeric_columns_non_zero.items(): @@ -602,59 +592,28 @@ def load_dt4h(config,id): pass # no std found in data set elif config["normalization_method"] == "MIN_MAX": dat[col] = min_max_normalize(col, mini, maxi) - #tipos=[] + tipos=[] map_variables = {} - for feat in metadata["entity"]["features"]: - if (feat["dataType"] == "NOMINAL" - and feat["name"] in train_labels - and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0): - #print("FEAT", feat["name"]) + for feat in metadata["entries"][0]["featureSet"]["features"]: + tipos.append(feat["dataType"]) + if feat["dataType"] == "NOMINAL" and feat["statistics"]["numOfNotNull"] != 0: + num_cat = len(feat["statistics"]["valueset"]) map_cat = {} - if "valueSet" in feat.keys(): - for ind, cat_ in enumerate(feat["valueSet"]["concept"]): - #print(ind,cat_["code"]) - cat = cat_["code"] - map_cat[cat] = ind - else: - pass - #print("NO",feat["name"]) + for ind, cat in enumerate(feat["statistics"]["valueset"]): + map_cat[cat] = ind map_variables[feat["name"]] = map_cat - - for feat in metadata["entity"]["outcomes"]: - if (feat["dataType"] == "NOMINAL" - and feat["name"] in target_labels - and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0): - #print("FEAT", feat["name"]) - map_cat = {} - if "valueSet" in feat.keys(): - for ind, cat_ in enumerate(feat["valueSet"]["concept"]): - #print(ind,cat_["code"]) - cat = cat_["code"] - map_cat[cat] = ind - else: - pass # temporal - #print("NO",feat["name"]) - map_variables[feat["name"]] = map_cat - for col,mapa in map_variables.items(): dat[col] = dat[col].map(mapa) dat[map_variables.keys()].dropna() - + + tipos=[] map_variables = {} boolean_map = {np.bool_(False) :0, np.bool_(True):1, "False":0,"True":1} - for feat in metadata["entity"]["features"]: - if (feat["dataType"] == "BOOLEAN" - and feat["name"] in train_labels - and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0): + for feat in metadata["entries"][0]["featureSet"]["features"]: + tipos.append(feat["dataType"]) + if feat["dataType"] == "BOOLEAN" and feat["statistics"]["numOfNotNull"] != 0: map_variables[feat["name"]] = boolean_map - - for feat in metadata["entity"]["outcomes"]: - if (feat["dataType"] == "BOOLEAN" - and feat["name"] in target_labels - and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0): - map_variables[feat["name"]] = boolean_map - for col,mapa in map_variables.items(): dat[col] = dat[col].map(boolean_map) diff --git a/server_cmd.py b/server_cmd.py index 0de88a3..8605f0a 100644 --- a/server_cmd.py +++ b/server_cmd.py @@ -40,8 +40,8 @@ def check_config(config): parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations") parser.add_argument("--model", type=str, default="random_forest", help="Model to train") parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use") - parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") + #parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use") + #parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") parser.add_argument("--smooth_method", type=str, default="EqualVoting", help="Weight smoothing") parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") @@ -51,20 +51,27 @@ def check_config(config): parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") - parser.add_argument("--data_path", type=str, default=None, help="Data path") + #parser.add_argument("--Wdata_path", type=str, default=None, help="Data path") parser.add_argument("--local_port", type=int, default=8081, help="Local port") parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters") + parser.add_argument("--n_features", type=int, default=0, help="Number of features") args = parser.parse_args() config = vars(args) + if config["model"] in ("logistic_regression", "elastic_net", "lsvc"): + print("LINEAR", config["model"], config["n_features"]) + config["linear_models"] = {} + config['linear_models']['n_features'] = config["n_features"] + config["held_out_center_id"] = -1 + experiment_dir = Path(os.path.join(config["experiment"]["log_path"], config["experiment"]["name"])) config["experiment_dir"] = experiment_dir # Create sandbox log file path - sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_server.txt")) + sandbox_log_file = Path(os.path.join("/sandbox", "log_server.txt")) # Set up the file handler (writes to file) file_handler = logging.FileHandler(sandbox_log_file) @@ -118,13 +125,13 @@ def flush(self): check_config(config) if config["production_mode"] == "True": print("TRUE") - data_path = os.getenv("DATA_PATH") + #data_path = "" central_ip = os.getenv("FLOWER_CENTRAL_SERVER_IP") central_port = os.getenv("FLOWER_CENTRAL_SERVER_PORT") - ca_cert = Path(os.path.join(config["certs_path"],"rootCA_cert.pem")) - server_cert = Path(os.path.join(config["certs_path"],"server_cert.pem")) - server_key = Path(os.path.join(config["certs_path"],"server_key.pem")) + ca_cert = Path(os.path.join("/certs","rootCA_cert.pem")) + server_cert = Path(os.path.join("/certs","server_cert.pem")) + server_key = Path(os.path.join("/certs","server_key.pem")) certificates = ( Path(f"{ca_cert}").read_bytes(), @@ -136,7 +143,7 @@ def flush(self): # Path('.cache/certificates/server_key.pem').read_bytes(), else: print("ELSE") - data_path = config["data_path"] + #data_path = config["data_path"] central_ip = "LOCALHOST" central_port = config["local_port"] certificates = None @@ -247,4 +254,4 @@ def flush(self): # Compile the results compile_results(experiment_dir) -""" \ No newline at end of file +"""