From 460ca1f2dfe7efb9e52c182b072f289f9478f8be Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Mon, 18 Aug 2025 12:47:15 +0200 Subject: [PATCH 1/2] new data and metadata paths --- client_cmd.py | 16 +++++++++++++++- flcore/datasets.py | 14 +++----------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/client_cmd.py b/client_cmd.py index c74a0c3..33b3b59 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -1,5 +1,6 @@ import sys import os +import glob import time from pathlib import Path @@ -20,7 +21,7 @@ parser = argparse.ArgumentParser(description="Reads parameters from command line.") # # parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client") parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") + #parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") @@ -53,6 +54,19 @@ args = parser.parse_args() config = vars(args) + + est = config["data_id"] + id = est.split("/")[-1] +# dir_name = os.path.dirname(config["data_id"]) + dir_name_parent = str(Path(config["data_id"]).parent) + + config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") + + pattern = "*.parquet" + parquet_files = glob.glob(os.path.join(est, pattern)) + # ¿How to choose one of the list? + config["data_file"] = parquet_files[-1] + new = [] for i in config["train_labels"]: parsed = i.replace("]", "").replace("[", "").replace(",", "") diff --git a/flcore/datasets.py b/flcore/datasets.py index 1c9d03d..699c4a0 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -553,22 +553,14 @@ def min_max_normalize(col, min_val, max_val): return (col - min_val) / (max_val - min_val) def load_dt4h(config,id): -# print("LOAD_DT4H::",config["data_path"]+config['metadata_file']) - metadata_ = Path(os.path.join(config["data_path"],config['data_id'])) - metadata = Path(os.path.join(metadata_,config['metadata_file'])) + metadata = Path(config['metadata_file']) with open(metadata, 'r') as file: metadata = json.load(file) -# print("METADATA",metadata) - data_file = Path(os.path.join(metadata_,config['data_id']+".parquet")) -# print("LOAD_DT4H::",data_file) -# ext = data_file.split(".")[-1] -# if ext == "pqt" or ext == "parquet": + + data_file = Path(config['data_file']) dat = pd.read_parquet(data_file) -# elif ext == "csv": -# dat = pd.read_csv(data_file) dat_len = len(dat) -# print("PARQUET", dat_len, dat) # Numerical variables numeric_columns_non_zero = {} for feat in metadata["entries"][0]["featureSet"]["features"]: From 34918c9de6ade7e79acf754368537bd1db6c3571 Mon Sep 17 00:00:00 2001 From: Jorge Fabila Date: Wed, 3 Sep 2025 10:00:36 +0200 Subject: [PATCH 2/2] change of paths --- client_cmd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client_cmd.py b/client_cmd.py index 33b3b59..909d67a 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -60,7 +60,8 @@ # dir_name = os.path.dirname(config["data_id"]) dir_name_parent = str(Path(config["data_id"]).parent) - config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") +# config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") + config["metadata_file"] = os.path.join(est,"metadata.json") pattern = "*.parquet" parquet_files = glob.glob(os.path.join(est, pattern))