diff --git a/client_cmd.py b/client_cmd.py index c74a0c3..909d67a 100644 --- a/client_cmd.py +++ b/client_cmd.py @@ -1,5 +1,6 @@ import sys import os +import glob import time from pathlib import Path @@ -20,7 +21,7 @@ parser = argparse.ArgumentParser(description="Reads parameters from command line.") # # parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client") parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use") - parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") + #parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata") parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID") parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX") parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use") @@ -53,6 +54,20 @@ args = parser.parse_args() config = vars(args) + + est = config["data_id"] + id = est.split("/")[-1] +# dir_name = os.path.dirname(config["data_id"]) + dir_name_parent = str(Path(config["data_id"]).parent) + +# config["metadata_file"] = os.path.join(dir_name_parent,"metadata.json") + config["metadata_file"] = os.path.join(est,"metadata.json") + + pattern = "*.parquet" + parquet_files = glob.glob(os.path.join(est, pattern)) + # ¿How to choose one of the list? + config["data_file"] = parquet_files[-1] + new = [] for i in config["train_labels"]: parsed = i.replace("]", "").replace("[", "").replace(",", "") diff --git a/flcore/datasets.py b/flcore/datasets.py index 1c9d03d..699c4a0 100644 --- a/flcore/datasets.py +++ b/flcore/datasets.py @@ -553,22 +553,14 @@ def min_max_normalize(col, min_val, max_val): return (col - min_val) / (max_val - min_val) def load_dt4h(config,id): -# print("LOAD_DT4H::",config["data_path"]+config['metadata_file']) - metadata_ = Path(os.path.join(config["data_path"],config['data_id'])) - metadata = Path(os.path.join(metadata_,config['metadata_file'])) + metadata = Path(config['metadata_file']) with open(metadata, 'r') as file: metadata = json.load(file) -# print("METADATA",metadata) - data_file = Path(os.path.join(metadata_,config['data_id']+".parquet")) -# print("LOAD_DT4H::",data_file) -# ext = data_file.split(".")[-1] -# if ext == "pqt" or ext == "parquet": + + data_file = Path(config['data_file']) dat = pd.read_parquet(data_file) -# elif ext == "csv": -# dat = pd.read_csv(data_file) dat_len = len(dat) -# print("PARQUET", dat_len, dat) # Numerical variables numeric_columns_non_zero = {} for feat in metadata["entries"][0]["featureSet"]["features"]: