From 9bf6a1c8ce8b9dee1c5a868c982bf3cc9ed6deb2 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 10 Jul 2025 00:53:03 +0200 Subject: [PATCH 1/7] reajuste --- flcore/datasets.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index 3ecebd1..b93a43d 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -566,17 +566,28 @@ def load_dt4h(config,id): # Numerical variables numeric_columns_non_zero = {} - for feat in metadata["entries"][0]["featureSet"]["features"]: - if feat["dataType"] == "NUMERIC" and feat["statistics"]["numOfNotNull"] != 0: +# for feat in metadata["entries"][0]["featureSet"]["features"]: +# if feat["dataType"] == "NUMERIC" and feat["statistics"]["numOfNotNull"] != 0: + for feat in metadata["entity"]["features"]: + if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"] != 0: # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] + """ + "min": 12.0, + "max": 152.45, + "avg": 112.37444444444445, + "q1": 133.76, + "q2": 139.34, + "q3": 141.09, + + """ numeric_columns_non_zero[feat["name"]] = ( - feat["statistics"]["Q1"], - feat["statistics"]["avg"], - feat["statistics"]["min"], - feat["statistics"]["Q2"], - feat["statistics"]["max"], - feat["statistics"]["Q3"], - feat["statistics"]["numOfNotNull"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["q1"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["avg"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["min"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["q2"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["max"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["q3"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"], ) for col, (q1,avg,mini,q2,maxi,q3,numOfNotNull) in numeric_columns_non_zero.items(): From ac16b943ad043047d29881eeded727bc8c430415 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Thu, 10 Jul 2025 18:39:50 +0200 Subject: [PATCH 2/7] =?UTF-8?q?correcci=C3=B3n=20num=C3=A9ricas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/datasets.py | 51 ++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index b93a43d..4b1bb03 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -564,30 +564,19 @@ def load_dt4h(config,id): dat_len = len(dat) - # Numerical variables numeric_columns_non_zero = {} -# for feat in metadata["entries"][0]["featureSet"]["features"]: -# if feat["dataType"] == "NUMERIC" and feat["statistics"]["numOfNotNull"] != 0: + # SE TENDRIA QUE HACER LO MISMO PARA outcomeStats en vez de featureStats y metadata["entity"]["outcomes"] for feat in metadata["entity"]["features"]: - if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"] != 0: + if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] - """ - "min": 12.0, - "max": 152.45, - "avg": 112.37444444444445, - "q1": 133.76, - "q2": 139.34, - "q3": 141.09, - - """ numeric_columns_non_zero[feat["name"]] = ( - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["q1"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["avg"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["min"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["q2"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["max"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["q3"], - metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q1"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["avg"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["min"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q2"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["max"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q3"], + metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"], ) for col, (q1,avg,mini,q2,maxi,q3,numOfNotNull) in numeric_columns_non_zero.items(): @@ -598,11 +587,13 @@ def load_dt4h(config,id): pass # no std found in data set elif config["normalization_method"] == "MIN_MAX": dat[col] = min_max_normalize(col, mini, maxi) - tipos=[] + #tipos=[] map_variables = {} - for feat in metadata["entries"][0]["featureSet"]["features"]: - tipos.append(feat["dataType"]) - if feat["dataType"] == "NOMINAL" and feat["statistics"]["numOfNotNull"] != 0: + for feat in metadata["entity"]["features"]: + #for feat in metadata["entries"][0]["featureSet"]["features"]: + #if feat["dataType"] == "NOMINAL" and feat["statistics"]["numOfNotNull"] != 0: + # tipos.append(feat["dataType"]) + if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"] != 0: num_cat = len(feat["statistics"]["valueset"]) map_cat = {} for ind, cat in enumerate(feat["statistics"]["valueset"]): @@ -612,13 +603,15 @@ def load_dt4h(config,id): dat[col] = dat[col].map(mapa) dat[map_variables.keys()].dropna() - - tipos=[] + + #tipos=[] map_variables = {} boolean_map = {np.bool_(False) :0, np.bool_(True):1, "False":0,"True":1} - for feat in metadata["entries"][0]["featureSet"]["features"]: - tipos.append(feat["dataType"]) - if feat["dataType"] == "BOOLEAN" and feat["statistics"]["numOfNotNull"] != 0: + for feat in metadata["entity"]["features"]: + #for feat in metadata["entries"][0]["featureSet"]["features"]: + # tipos.append(feat["dataType"]) + if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"] != 0: +# if feat["dataType"] == "BOOLEAN" and feat["statistics"]["numOfNotNull"] != 0: map_variables[feat["name"]] = boolean_map for col,mapa in map_variables.items(): dat[col] = dat[col].map(boolean_map) From 8d8fd523e0fd1c9f125a1c7629a8a291ed6bf391 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 11 Jul 2025 10:33:44 +0200 Subject: [PATCH 3/7] =?UTF-8?q?outcomes=20numericos=20a=C3=B1adido?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/datasets.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index 4b1bb03..de4a23c 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -565,7 +565,6 @@ def load_dt4h(config,id): dat_len = len(dat) numeric_columns_non_zero = {} - # SE TENDRIA QUE HACER LO MISMO PARA outcomeStats en vez de featureStats y metadata["entity"]["outcomes"] for feat in metadata["entity"]["features"]: if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] @@ -578,6 +577,18 @@ def load_dt4h(config,id): metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q3"], metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"], ) + for feat in metadata["entity"]["outcomes"]: + if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0: + # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] + numeric_columns_non_zero[feat["name"]] = ( + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q1"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["avg"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["min"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q2"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["max"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q3"], + metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"], + ) for col, (q1,avg,mini,q2,maxi,q3,numOfNotNull) in numeric_columns_non_zero.items(): if col in dat.columns: @@ -590,10 +601,7 @@ def load_dt4h(config,id): #tipos=[] map_variables = {} for feat in metadata["entity"]["features"]: - #for feat in metadata["entries"][0]["featureSet"]["features"]: - #if feat["dataType"] == "NOMINAL" and feat["statistics"]["numOfNotNull"] != 0: - # tipos.append(feat["dataType"]) - if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"] != 0: + if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: num_cat = len(feat["statistics"]["valueset"]) map_cat = {} for ind, cat in enumerate(feat["statistics"]["valueset"]): From 3ddf3aa3f1c59ed1e7b1999138e9594437141ee2 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 11 Jul 2025 13:39:41 +0200 Subject: [PATCH 4/7] NOMINAL corregido --- flcore/datasets.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index de4a23c..428ae4b 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -602,11 +602,30 @@ def load_dt4h(config,id): map_variables = {} for feat in metadata["entity"]["features"]: if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: - num_cat = len(feat["statistics"]["valueset"]) + print("FEAT", feat["name"]) map_cat = {} - for ind, cat in enumerate(feat["statistics"]["valueset"]): - map_cat[cat] = ind + if "valueSet" in feat.keys(): + for ind, cat_ in enumerate(feat["valueSet"]["concept"]): + print(ind,cat_["code"]) + cat = cat_["code"] + map_cat[cat] = ind + else: + print("NO",feat["name"]) map_variables[feat["name"]] = map_cat + + for feat in metadata["entity"]["outcomes"]: + if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0: + print("FEAT", feat["name"]) + map_cat = {} + if "valueSet" in feat.keys(): + for ind, cat_ in enumerate(feat["valueSet"]["concept"]): + print(ind,cat_["code"]) + cat = cat_["code"] + map_cat[cat] = ind + else: + print("NO",feat["name"]) + map_variables[feat["name"]] = map_cat + for col,mapa in map_variables.items(): dat[col] = dat[col].map(mapa) From 1bb55e30325daee5ccd5571f9cb9732ca34fa7cf Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 11 Jul 2025 13:48:28 +0200 Subject: [PATCH 5/7] booleanos corregidos --- flcore/datasets.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index 428ae4b..b5e792d 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -631,15 +631,16 @@ def load_dt4h(config,id): dat[map_variables.keys()].dropna() - #tipos=[] map_variables = {} boolean_map = {np.bool_(False) :0, np.bool_(True):1, "False":0,"True":1} for feat in metadata["entity"]["features"]: - #for feat in metadata["entries"][0]["featureSet"]["features"]: - # tipos.append(feat["dataType"]) - if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["outcomeStats"][feat]["numOfNotNull"] != 0: -# if feat["dataType"] == "BOOLEAN" and feat["statistics"]["numOfNotNull"] != 0: + if feat["dataType"] == "BOOLEAN" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: map_variables[feat["name"]] = boolean_map + + for feat in metadata["entity"]["outcomes"]: + if feat["dataType"] == "BOOLEAN" and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0: + map_variables[feat["name"]] = boolean_map + for col,mapa in map_variables.items(): dat[col] = dat[col].map(boolean_map) From 52ca616dd86d62d97abf1d3009770f427e2a15a8 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Fri, 11 Jul 2025 15:28:34 +0200 Subject: [PATCH 6/7] =?UTF-8?q?a=C3=B1adido=20selector=20de=20labels?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flcore/datasets.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/flcore/datasets.py b/flcore/datasets.py index b5e792d..8fb4935 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -566,7 +566,9 @@ def load_dt4h(config,id): numeric_columns_non_zero = {} for feat in metadata["entity"]["features"]: - if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: + if (feat["dataType"] == "NUMERIC" + and feat["name"] in train_labels + and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0): # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] numeric_columns_non_zero[feat["name"]] = ( metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q1"], @@ -578,7 +580,9 @@ def load_dt4h(config,id): metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"], ) for feat in metadata["entity"]["outcomes"]: - if feat["dataType"] == "NUMERIC" and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0: + if (feat["dataType"] == "NUMERIC" + and feat["name"] in target_labels + and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0): # statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull'] numeric_columns_non_zero[feat["name"]] = ( metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q1"], @@ -601,29 +605,35 @@ def load_dt4h(config,id): #tipos=[] map_variables = {} for feat in metadata["entity"]["features"]: - if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: - print("FEAT", feat["name"]) + if (feat["dataType"] == "NOMINAL" + and feat["name"] in train_labels + and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0): + #print("FEAT", feat["name"]) map_cat = {} if "valueSet" in feat.keys(): for ind, cat_ in enumerate(feat["valueSet"]["concept"]): - print(ind,cat_["code"]) + #print(ind,cat_["code"]) cat = cat_["code"] map_cat[cat] = ind else: - print("NO",feat["name"]) + pass + #print("NO",feat["name"]) map_variables[feat["name"]] = map_cat for feat in metadata["entity"]["outcomes"]: - if feat["dataType"] == "NOMINAL" and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0: - print("FEAT", feat["name"]) + if (feat["dataType"] == "NOMINAL" + and feat["name"] in target_labels + and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0): + #print("FEAT", feat["name"]) map_cat = {} if "valueSet" in feat.keys(): for ind, cat_ in enumerate(feat["valueSet"]["concept"]): - print(ind,cat_["code"]) + #print(ind,cat_["code"]) cat = cat_["code"] map_cat[cat] = ind else: - print("NO",feat["name"]) + pass # temporal + #print("NO",feat["name"]) map_variables[feat["name"]] = map_cat for col,mapa in map_variables.items(): @@ -634,11 +644,15 @@ def load_dt4h(config,id): map_variables = {} boolean_map = {np.bool_(False) :0, np.bool_(True):1, "False":0,"True":1} for feat in metadata["entity"]["features"]: - if feat["dataType"] == "BOOLEAN" and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0: + if (feat["dataType"] == "BOOLEAN" + and feat["name"] in train_labels + and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0): map_variables[feat["name"]] = boolean_map for feat in metadata["entity"]["outcomes"]: - if feat["dataType"] == "BOOLEAN" and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0: + if (feat["dataType"] == "BOOLEAN" + and feat["name"] in target_labels + and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0): map_variables[feat["name"]] = boolean_map for col,mapa in map_variables.items(): From e36ceb0ebafee3ed34c8d4be50e85654eab988e9 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 30 Jul 2025 18:17:30 +0200 Subject: [PATCH 7/7] vars amarradas --- client_cmd.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index 98bbc79..b0b8ab2 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -34,12 +34,9 @@ parser.add_argument("--smooth_method", type=str, default=None, help="Weight smoothing") parser.add_argument("--seed", type=int, default=42, help="Seed") parser.add_argument("--local_port", type=int, default=8081, help="Local port") - parser.add_argument("--data_path", type=str, default=None, help="Data path") parser.add_argument("--production_mode", type=str, default="True", help="Production mode") - parser.add_argument("--certs_path", type=str, default="./", help="Certificates path") parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates") - parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use") parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs") parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters") parser.add_argument("--linear_models", type=json.loads, default={"n_features": 9}, help="Linear model parameters") @@ -47,10 +44,16 @@ parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters") parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters") +# Variables hardcoded + parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use") + parser.add_argument("--certs_path", type=str, default="./certs_path", help="Certificates path") + parser.add_argument("--data_path", type=str, default="./data", help="Data path") args = parser.parse_args() config = vars(args) - +# config["sandbox_path"] = "./sandbox" +# config["certs_path"] = "/app/config/certificates" +# config["data_path"] = "./data" # Create sandbox log file path sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_client.txt"))