Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions client_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,9 @@
parser.add_argument("--smooth_method", type=str, default=None, help="Weight smoothing")
parser.add_argument("--seed", type=int, default=42, help="Seed")
parser.add_argument("--local_port", type=int, default=8081, help="Local port")
parser.add_argument("--data_path", type=str, default=None, help="Data path")
parser.add_argument("--production_mode", type=str, default="True", help="Production mode")
parser.add_argument("--certs_path", type=str, default="./", help="Certificates path")
parser.add_argument("--node_name", type=str, default="./", help="Node name for certificates")

parser.add_argument("--sandbox_path", type=str, default="./", help="Sandbox path to use")
parser.add_argument("--experiment", type=json.loads, default={"name": "experiment_1", "log_path": "logs", "debug": "true"}, help="experiment logs")
parser.add_argument("--smoothWeights", type=json.loads, default= {"smoothing_strenght": 0.5}, help="Smoothing parameters")
parser.add_argument("--linear_models", type=json.loads, default={"n_features": 9}, help="Linear model parameters")
Expand All @@ -48,9 +45,16 @@
parser.add_argument("--weighted_random_forest", type=json.loads, default={"balanced_rf": "true", "levelOfDetail": "DecisionTree"}, help="Weighted random forest parameters")
parser.add_argument("--xgb", type=json.loads, default={"batch_size": 32,"num_iterations": 100,"task_type": "BINARY","tree_num": 500}, help="XGB parameters")

# Variables hardcoded
parser.add_argument("--sandbox_path", type=str, default="./sandbox", help="Sandbox path to use")
parser.add_argument("--certs_path", type=str, default="./certs_path", help="Certificates path")
parser.add_argument("--data_path", type=str, default="./data", help="Data path")
args = parser.parse_args()

config = vars(args)
# config["sandbox_path"] = "./sandbox"
# config["certs_path"] = "/app/config/certificates"
# config["data_path"] = "./data"

if config["model"] in ("logistic_regression", "elastic_net", "lsvc"):
config["linear_models"] = {}
Expand Down
90 changes: 68 additions & 22 deletions flcore/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,19 +564,34 @@ def load_dt4h(config,id):

dat_len = len(dat)

# Numerical variables
numeric_columns_non_zero = {}
for feat in metadata["entries"][0]["featureSet"]["features"]:
if feat["dataType"] == "NUMERIC" and feat["statistics"]["numOfNotNull"] != 0:
for feat in metadata["entity"]["features"]:
if (feat["dataType"] == "NUMERIC"
and feat["name"] in train_labels
and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0):
# statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull']
numeric_columns_non_zero[feat["name"]] = (
feat["statistics"]["Q1"],
feat["statistics"]["avg"],
feat["statistics"]["min"],
feat["statistics"]["Q2"],
feat["statistics"]["max"],
feat["statistics"]["Q3"],
feat["statistics"]["numOfNotNull"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q1"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["avg"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["min"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q2"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["max"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["q3"],
metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"],
)
for feat in metadata["entity"]["outcomes"]:
if (feat["dataType"] == "NUMERIC"
and feat["name"] in target_labels
and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0):
# statistic keys = ['Q1', 'avg', 'min', 'Q2', 'max', 'Q3', 'numOfNotNull']
numeric_columns_non_zero[feat["name"]] = (
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q1"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["avg"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["min"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q2"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["max"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["q3"],
metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"],
)

for col, (q1,avg,mini,q2,maxi,q3,numOfNotNull) in numeric_columns_non_zero.items():
Expand All @@ -587,28 +602,59 @@ def load_dt4h(config,id):
pass # no std found in data set
elif config["normalization_method"] == "MIN_MAX":
dat[col] = min_max_normalize(col, mini, maxi)
tipos=[]
#tipos=[]
map_variables = {}
for feat in metadata["entries"][0]["featureSet"]["features"]:
tipos.append(feat["dataType"])
if feat["dataType"] == "NOMINAL" and feat["statistics"]["numOfNotNull"] != 0:
num_cat = len(feat["statistics"]["valueset"])
for feat in metadata["entity"]["features"]:
if (feat["dataType"] == "NOMINAL"
and feat["name"] in train_labels
and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0):
#print("FEAT", feat["name"])
map_cat = {}
if "valueSet" in feat.keys():
for ind, cat_ in enumerate(feat["valueSet"]["concept"]):
#print(ind,cat_["code"])
cat = cat_["code"]
map_cat[cat] = ind
else:
pass
#print("NO",feat["name"])
map_variables[feat["name"]] = map_cat

for feat in metadata["entity"]["outcomes"]:
if (feat["dataType"] == "NOMINAL"
and feat["name"] in target_labels
and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0):
#print("FEAT", feat["name"])
map_cat = {}
for ind, cat in enumerate(feat["statistics"]["valueset"]):
map_cat[cat] = ind
if "valueSet" in feat.keys():
for ind, cat_ in enumerate(feat["valueSet"]["concept"]):
#print(ind,cat_["code"])
cat = cat_["code"]
map_cat[cat] = ind
else:
pass # temporal
#print("NO",feat["name"])
map_variables[feat["name"]] = map_cat

for col,mapa in map_variables.items():
dat[col] = dat[col].map(mapa)

dat[map_variables.keys()].dropna()

tipos=[]

map_variables = {}
boolean_map = {np.bool_(False) :0, np.bool_(True):1, "False":0,"True":1}
for feat in metadata["entries"][0]["featureSet"]["features"]:
tipos.append(feat["dataType"])
if feat["dataType"] == "BOOLEAN" and feat["statistics"]["numOfNotNull"] != 0:
for feat in metadata["entity"]["features"]:
if (feat["dataType"] == "BOOLEAN"
and feat["name"] in train_labels
and metadata["entity"]["datasetStats"]["featureStats"][feat["name"]]["numOfNotNull"] != 0):
map_variables[feat["name"]] = boolean_map

for feat in metadata["entity"]["outcomes"]:
if (feat["dataType"] == "BOOLEAN"
and feat["name"] in target_labels
and metadata["entity"]["datasetStats"]["outcomeStats"][feat["name"]]["numOfNotNull"] != 0):
map_variables[feat["name"]] = boolean_map

for col,mapa in map_variables.items():
dat[col] = dat[col].map(boolean_map)

Expand Down
Loading