Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions client_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Reads parameters from command line.")
# parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client")
# # parser.add_argument("--client_id", type=int, default="Client Id", help="Number of client")
parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use")
parser.add_argument("--metadata_file", type=str, default="metadata.json", help="Json file with metadata")
parser.add_argument("--data_file", type=str, default="data.parquet" , help="parquet o csv file with actual data")
parser.add_argument("--data_id", type=str, default="data_id.parquet" , help="Dataset ID")
parser.add_argument("--normalization_method",type=str, default="IQR", help="Type of normalization: IQR STD MIN_MAX")
parser.add_argument("--train_labels", type=str, nargs='+', default=None, help="Dataloader to use")
parser.add_argument("--target_label", type=str, nargs='+', default=None, help="Dataloader to use")
Expand All @@ -40,21 +40,19 @@
parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs")
parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters")
parser.add_argument("--linear_models", type=json.loads, default={"n_features": 9}, help="Linear model parameters")
parser.add_argument("--n_features", type=int, default=0, help="Number of features")
# parser.add_argument("--n_features", type=int, default=0, help="Number of features")
parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters")
parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters")
parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters")

# Variables hardcoded
parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use")
parser.add_argument("--certs_path", type=str, default="./certs_path", help="Certificates path")
parser.add_argument("--data_path", type=str, default="./data", help="Data path")
parser.add_argument("--sandbox_path", type=str, default="/sandbox", help="Sandbox path to use")
parser.add_argument("--certs_path", type=str, default="/certs", help="Certificates path")
parser.add_argument("--data_path", type=str, default="/data", help="Data path")

args = parser.parse_args()

config = vars(args)
# config["sandbox_path"] = "./sandbox"
# config["certs_path"] = "/app/config/certificates"
# config["data_path"] = "./data"

if config["model"] in ("logistic_regression", "elastic_net", "lsvc"):
config["linear_models"] = {}
Expand Down
113 changes: 36 additions & 77 deletions flcore/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import openml
#import torch
from pathlib import Path
import pandas as pd

from sklearn.datasets import load_svmlight_file
Expand Down Expand Up @@ -552,46 +553,35 @@ def min_max_normalize(col, min_val, max_val):
return (col - min_val) / (max_val - min_val)

def load_dt4h(config,id):
with open(config["data_path"]+config['metadata_file'], 'r') as file:
# print("LOAD_DT4H::",config["data_path"]+config['metadata_file'])
metadata_ = Path(os.path.join(config["data_path"],config['data_id']))
metadata = Path(os.path.join(metadata_,config['metadata_file']))
with open(metadata, 'r') as file:
metadata = json.load(file)

data_file = config["data_path"] + config["data_file"]
ext = data_file.split(".")[-1]
if ext == "pqt" or ext == "parquet":
dat = pd.read_parquet(data_file)
elif ext == "csv":
dat = pd.read_csv(data_file)
# print("METADATA",metadata)
data_file = Path(os.path.join(metadata_,config['data_id']+".parquet"))
# print("LOAD_DT4H::",data_file)
# ext = data_file.split(".")[-1]
# if ext == "pqt" or ext == "parquet":
dat = pd.read_parquet(data_file)
# elif ext == "csv":
# dat = pd.read_csv(data_file)

dat_len = len(dat)

# print("PARQUET", dat_len, dat)
# Numerical variables
numeric_columns_non_zero = {}
for feat in metadata["entity"]["features"]:
if (feat["dataType"] == "NUMERIC"
and feat["name"] in train_labels
and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0):
for feat in metadata["entries"][0]["featureSet"]["features"]:
if feat["dataType"] == "NUMERIC" and feat["statistics"]["numOfNotNull"] != 0:
# statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull']
numeric_columns_non_zero[feat["name"]] = (
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q1"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["avg"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["min"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q2"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["max"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q3"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"],
)
for feat in metadata["entity"]["outcomes"]:
if (feat["dataType"] == "NUMERIC"
and feat["name"] in target_labels
and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0):
# statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull']
numeric_columns_non_zero[feat["name"]] = (
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q1"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["avg"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["min"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q2"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["max"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q3"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"],
feat["statistics"]["Q1"],
feat["statistics"]["avg"],
feat["statistics"]["min"],
feat["statistics"]["Q2"],
feat["statistics"]["max"],
feat["statistics"]["Q3"],
feat["statistics"]["numOfNotNull"],
)

for col, (q1,avg,mini,q2,maxi,q3,numOfNotNull) in numeric_columns_non_zero.items():
Expand All @@ -602,59 +592,28 @@ def load_dt4h(config,id):
pass # no std found in data set
elif config["normalization_method"] == "MIN_MAX":
dat[col] = min_max_normalize(col, mini, maxi)
#tipos=[]
tipos=[]
map_variables = {}
for feat in metadata["entity"]["features"]:
if (feat["dataType"] == "NOMINAL"
and feat["name"] in train_labels
and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0):
#print("FEAT", feat["name"])
for feat in metadata["entries"][0]["featureSet"]["features"]:
tipos.append(feat["dataType"])
if feat["dataType"] == "NOMINAL" and feat["statistics"]["numOfNotNull"] != 0:
num_cat = len(feat["statistics"]["valueset"])
map_cat = {}
if "valueSet" in feat.keys():
for ind, cat_ in enumerate(feat["valueSet"]["concept"]):
#print(ind,cat_["code"])
cat = cat_["code"]
map_cat[cat] = ind
else:
pass
#print("NO",feat["name"])
for ind, cat in enumerate(feat["statistics"]["valueset"]):
map_cat[cat] = ind
map_variables[feat["name"]] = map_cat

for feat in metadata["entity"]["outcomes"]:
if (feat["dataType"] == "NOMINAL"
and feat["name"] in target_labels
and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0):
#print("FEAT", feat["name"])
map_cat = {}
if "valueSet" in feat.keys():
for ind, cat_ in enumerate(feat["valueSet"]["concept"]):
#print(ind,cat_["code"])
cat = cat_["code"]
map_cat[cat] = ind
else:
pass # temporal
#print("NO",feat["name"])
map_variables[feat["name"]] = map_cat

for col,mapa in map_variables.items():
dat[col] = dat[col].map(mapa)

dat[map_variables.keys()].dropna()


tipos=[]
map_variables = {}
boolean_map = {np.bool_(False) :0, np.bool_(True):1, "False":0,"True":1}
for feat in metadata["entity"]["features"]:
if (feat["dataType"] == "BOOLEAN"
and feat["name"] in train_labels
and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0):
for feat in metadata["entries"][0]["featureSet"]["features"]:
tipos.append(feat["dataType"])
if feat["dataType"] == "BOOLEAN" and feat["statistics"]["numOfNotNull"] != 0:
map_variables[feat["name"]] = boolean_map

for feat in metadata["entity"]["outcomes"]:
if (feat["dataType"] == "BOOLEAN"
and feat["name"] in target_labels
and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0):
map_variables[feat["name"]] = boolean_map

for col,mapa in map_variables.items():
dat[col] = dat[col].map(boolean_map)

Expand Down
20 changes: 10 additions & 10 deletions server_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def check_config(config):
parser.add_argument("--num_rounds", type=int, default=50, help="Number of federated iterations")
parser.add_argument("--model", type=str, default="random_forest", help="Model to train")
parser.add_argument("--dataset", type=str, default="dt4h_format", help="Dataloader to use")
parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use")
parser.add_argument("--certs_path", type=str, default="./", help="Certificates path")
#parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use")
#parser.add_argument("--certs_path", type=str, default="./", help="Certificates path")

parser.add_argument("--smooth_method", type=str, default="EqualVoting", help="Weight smoothing")
parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters")
Expand All @@ -51,7 +51,7 @@ def check_config(config):
parser.add_argument("--checkpoint_selection_metric", type=str, default="precision", help="Metric used for checkpoints")
parser.add_argument("--production_mode", type=str, default="True", help="Production mode")

parser.add_argument("--data_path", type=str, default=None, help="Data path")
#parser.add_argument("--Wdata_path", type=str, default=None, help="Data path")
parser.add_argument("--local_port", type=int, default=8081, help="Local port")
parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs")
parser.add_argument("--random_forest", type=json.loads, default={"balanced_rf": "true"}, help="Random forest parameters")
Expand All @@ -71,7 +71,7 @@ def check_config(config):
config["experiment_dir"] = experiment_dir

# Create sandbox log file path
sandbox_log_file = Path(os.path.join(config["sandbox_path"], "log_server.txt"))
sandbox_log_file = Path(os.path.join("/sandbox", "log_server.txt"))

# Set up the file handler (writes to file)
file_handler = logging.FileHandler(sandbox_log_file)
Expand Down Expand Up @@ -125,13 +125,13 @@ def flush(self):
check_config(config)
if config["production_mode"] == "True":
print("TRUE")
data_path = os.getenv("DATA_PATH")
#data_path = ""
central_ip = os.getenv("FLOWER_CENTRAL_SERVER_IP")
central_port = os.getenv("FLOWER_CENTRAL_SERVER_PORT")

ca_cert = Path(os.path.join(config["certs_path"],"rootCA_cert.pem"))
server_cert = Path(os.path.join(config["certs_path"],"server_cert.pem"))
server_key = Path(os.path.join(config["certs_path"],"server_key.pem"))
ca_cert = Path(os.path.join("/certs","rootCA_cert.pem"))
server_cert = Path(os.path.join("/certs","server_cert.pem"))
server_key = Path(os.path.join("/certs","server_key.pem"))

certificates = (
Path(f"{ca_cert}").read_bytes(),
Expand All @@ -143,7 +143,7 @@ def flush(self):
# Path('.cache/certificates/server_key.pem').read_bytes(),
else:
print("ELSE")
data_path = config["data_path"]
#data_path = config["data_path"]
central_ip = "LOCALHOST"
central_port = config["local_port"]
certificates = None
Expand Down Expand Up @@ -254,4 +254,4 @@ def flush(self):

# Compile the results
compile_results(experiment_dir)
"""
"""
Loading