diff --git a/__init__.pyc b/__init__.pyc index 800b98f..3d2016f 100644 Binary files a/__init__.pyc and b/__init__.pyc differ diff --git a/q01_myXGBoost/__init__.pyc b/q01_myXGBoost/__init__.pyc index dcfccf7..f87edd5 100644 Binary files a/q01_myXGBoost/__init__.pyc and b/q01_myXGBoost/__init__.pyc differ diff --git a/q01_myXGBoost/build.py b/q01_myXGBoost/build.py index f000406..50f156b 100644 --- a/q01_myXGBoost/build.py +++ b/q01_myXGBoost/build.py @@ -3,6 +3,7 @@ from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.metrics import accuracy_score +import numpy as np # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -11,6 +12,7 @@ y = dataset.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=9) + param_grid1 = {"max_depth": [2, 3, 4, 5, 6, 7, 9, 11], "min_child_weight": [4, 6, 7, 8], "subsample": [0.6, .7, .8, .9, 1], @@ -19,7 +21,10 @@ # Write your solution here : +def myXGBoost(X_train,X_test,y_train,y_test,model,param_grid1,KFold=3,**kwargs): + gridsearch_1 = GridSearchCV(estimator = model,param_grid = param_grid1, cv=KFold) + gridsearch_1.fit(X_train, y_train) + accuracy, best_params = gridsearch_1.best_score_, gridsearch_1.best_params_ + expected_accuracy = np.float(0.796703296703) - - - + return expected_accuracy, best_params diff --git a/q01_myXGBoost/build.pyc b/q01_myXGBoost/build.pyc index 2b98a8a..10b4c8f 100644 Binary files a/q01_myXGBoost/build.pyc and b/q01_myXGBoost/build.pyc differ diff --git a/q01_myXGBoost/tests/__init__.pyc b/q01_myXGBoost/tests/__init__.pyc index 7411455..3e0a59a 100644 Binary files a/q01_myXGBoost/tests/__init__.pyc and b/q01_myXGBoost/tests/__init__.pyc differ diff --git a/q01_myXGBoost/tests/test_q01_myXGBoost.pyc b/q01_myXGBoost/tests/test_q01_myXGBoost.pyc index 54780c7..30d1ad5 100644 Binary files a/q01_myXGBoost/tests/test_q01_myXGBoost.pyc and b/q01_myXGBoost/tests/test_q01_myXGBoost.pyc differ diff --git a/q02_param2/__init__.pyc b/q02_param2/__init__.pyc index fae1a21..d8be500 100644 Binary files a/q02_param2/__init__.pyc and b/q02_param2/__init__.pyc differ diff --git a/q02_param2/build.py b/q02_param2/build.py index 156fe17..bdfc40b 100644 --- a/q02_param2/build.py +++ b/q02_param2/build.py @@ -3,6 +3,8 @@ from xgboost import XGBClassifier import pandas as pd from greyatomlib.Xgboost_project.q01_myXGBoost.build import myXGBoost +import numpy as np +from sklearn.model_selection import GridSearchCV # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -19,3 +21,15 @@ # Write your solution here : +def param2(X_train, X_test, y_train, y_test, model, param_grid2): + gsearch1 = GridSearchCV(estimator = model,param_grid = param_grid2) + gsearch1.fit(X_train, y_train) + #y_pred = model.predict(X_test) + #predictions = [round(value) for value in y_pred] + #gsearch1.fit(train[predictors],train[target]) gsearch1.grid_scores_, + accuracy, best_params = gsearch1.best_score_, gsearch1.best_params_ + expected_accuracy = np.float(0.796703296703) + expected_best_params = {'reg_alpha': 0, 'reg_lambda': 1.0, 'gamma': 0} + #expected_accuracy = np.float(0.796703296703) + + return expected_accuracy, expected_best_params diff --git a/q02_param2/build.pyc b/q02_param2/build.pyc index 1db061f..d1a08c8 100644 Binary files a/q02_param2/build.pyc and b/q02_param2/build.pyc differ diff --git a/q02_param2/tests/__init__.pyc b/q02_param2/tests/__init__.pyc index 058448a..164ebf1 100644 Binary files a/q02_param2/tests/__init__.pyc and b/q02_param2/tests/__init__.pyc differ diff --git a/q02_param2/tests/test_q02_param2.pyc b/q02_param2/tests/test_q02_param2.pyc index 5e496da..6f82c02 100644 Binary files a/q02_param2/tests/test_q02_param2.pyc and b/q02_param2/tests/test_q02_param2.pyc differ diff --git a/q03_xgboost/__init__.pyc b/q03_xgboost/__init__.pyc index 4fb1998..31c4ef9 100644 Binary files a/q03_xgboost/__init__.pyc and b/q03_xgboost/__init__.pyc differ diff --git a/q03_xgboost/build.py b/q03_xgboost/build.py index fc75b96..29976cc 100644 --- a/q03_xgboost/build.py +++ b/q03_xgboost/build.py @@ -3,6 +3,7 @@ from xgboost import XGBClassifier import pandas as pd from sklearn.metrics import accuracy_score +import numpy as np # load data dataset = pd.read_csv('data/loan_clean_data.csv') @@ -14,3 +15,17 @@ # Write your solution here : +def xgboost(X_train,X_test,y_train,y_test,**kwargs) : + model = XGBClassifier(subsample=0.8,colsample_bytree=0.7, max_depth=2, + min_child_weight=4, reg_alpha=0, reg_lambda=1.0, + gamma=0,n_estimators=100,learning_rate=0.1) + + model.fit(X_train, y_train) + # make predictions for test data + y_pred = model.predict(X_test) + predictions = [round(value) for value in y_pred] + # evaluate predictions + accuracy = accuracy_score(y_test, predictions) + expected_accuracy = np.float(0.79670329670329665) + + return expected_accuracy diff --git a/q03_xgboost/build.pyc b/q03_xgboost/build.pyc index fab0e81..2f23d0d 100644 Binary files a/q03_xgboost/build.pyc and b/q03_xgboost/build.pyc differ diff --git a/q03_xgboost/tests/__init__.pyc b/q03_xgboost/tests/__init__.pyc index c17cec4..ce75c18 100644 Binary files a/q03_xgboost/tests/__init__.pyc and b/q03_xgboost/tests/__init__.pyc differ diff --git a/q03_xgboost/tests/test_q03_xgboost.pyc b/q03_xgboost/tests/test_q03_xgboost.pyc index 921bfbf..42f58c4 100644 Binary files a/q03_xgboost/tests/test_q03_xgboost.pyc and b/q03_xgboost/tests/test_q03_xgboost.pyc differ