diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..1ad209c Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc b/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d6da8d1 Binary files /dev/null and b/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc b/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..6ce4732 Binary files /dev/null and b/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/build.py b/q01_my_decision_regressor/build.py index 5eb1927..4e4a951 100644 --- a/q01_my_decision_regressor/build.py +++ b/q01_my_decision_regressor/build.py @@ -1,3 +1,4 @@ +# %load q01_my_decision_regressor/build.py # default imports from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeRegressor @@ -5,13 +6,24 @@ from sklearn.model_selection import train_test_split import pandas as pd -data = pd.read_csv("./data/house_pricing.csv") +data = pd.read_csv('./data/house_pricing.csv') X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -param_grid = {"max_depth": [2, 3, 5, 6, 8, 10, 15, 20, 30, 50], - "max_leaf_nodes": [2, 3, 4, 5, 10, 15, 20], - "max_features": [4, 8, 20, 25]} +param_grid = {'max_depth': [2, 3, 5, 6, 8, 10, 15, 20, 30, 50], + 'max_leaf_nodes': [2, 3, 4, 5, 10, 15, 20], + 'max_features': [4, 8, 20, 25]} # Write your solution here : +def my_decision_regressor(X_train, X_test, y_train, y_test,param_grid): + dtr = DecisionTreeRegressor(random_state = 9) + grid_search = GridSearchCV(estimator = dtr, param_grid = param_grid,cv = 5) + grid_search.fit(X_train,y_train) + y_prediction = grid_search.predict(X_test) + + best_params = grid_search.best_params_ + r_square = r2_score(y_test,y_prediction) + return r_square, best_params + + diff --git a/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc b/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c2291a0 Binary files /dev/null and b/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc b/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc new file mode 100644 index 0000000..fb3241b Binary files /dev/null and b/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc b/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4832f14 Binary files /dev/null and b/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc b/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..764efc1 Binary files /dev/null and b/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/build.py b/q02_decision_regressor_plot/build.py index 020d81e..a3d8385 100644 --- a/q02_decision_regressor_plot/build.py +++ b/q02_decision_regressor_plot/build.py @@ -1,3 +1,4 @@ +# %load q02_decision_regressor_plot/build.py # default imports from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeRegressor @@ -7,7 +8,7 @@ import numpy as np plt.switch_backend('agg') -data = pd.read_csv("./data/house_pricing.csv") +data = pd.read_csv('./data/house_pricing.csv') X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) @@ -15,3 +16,21 @@ depth_list = [2, 8, 10, 15, 20, 25, 30, 35, 45, 50, 80] # Write your solution here : + + +def decision_regressor_plot(X_train, X_test, y_train, y_test,depth_list): + mse_train = list() + mse_test = list() + for i in depth_list: + dt=DecisionTreeRegressor(criterion='mse',max_depth=i,random_state=9) + dt=dt.fit(X_train,y_train) + y_predTrain = dt.predict(X_train) + mse_listTrain.append(mean_squared_error(y_train,y_pred_train)) + + y_predTest = dt.predict(X_test) + mse_listTest.append(mean_squared_error(y_test,y_pred_test)) + + plt.plot(depth_list,mse_train) + plt.plot(depth_list,mse_test) + plt.show() + diff --git a/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4fe7b52 Binary files /dev/null and b/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc b/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc new file mode 100644 index 0000000..b6e42b6 Binary files /dev/null and b/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/__pycache__/__init__.cpython-36.pyc b/q03_my_decision_classifier/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..125c9e7 Binary files /dev/null and b/q03_my_decision_classifier/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/__pycache__/build.cpython-36.pyc b/q03_my_decision_classifier/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..b1a3cac Binary files /dev/null and b/q03_my_decision_classifier/__pycache__/build.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/build.py b/q03_my_decision_classifier/build.py index 73c9856..b5a7b31 100644 --- a/q03_my_decision_classifier/build.py +++ b/q03_my_decision_classifier/build.py @@ -1,3 +1,4 @@ +# %load q03_my_decision_classifier/build.py # default imports from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier @@ -6,16 +7,33 @@ import pandas as pd import numpy as np -data = pd.read_csv("./data/loan_prediction.csv") +data = pd.read_csv('./data/loan_prediction.csv') np.random.seed(9) X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -param_grid = {"max_depth": [8, 10, 15, 20], - "max_leaf_nodes": [2, 5, 9, 15, 20], - "max_features": [1, 2, 3, 5]} +param_grid = {'max_depth': [8, 10, 15, 20], + 'max_leaf_nodes': [2, 5, 9, 15, 20], + 'max_features': [1, 2, 3, 5]} # Write your solution here : +def my_decision_classifier(X_train, X_test, y_train, y_test, param_grid, n_iter_search = 10): + model = DecisionTreeClassifier(random_state=9) + random_search = RandomizedSearchCV(estimator = model, param_distributions=param_grid, n_iter = n_iter_search) + random_search.fit(X_train, y_train) + best_params = random_search.best_params_ + y_predict = random_search.predict(X_test) + accuracy = accuracy_score(y_test,y_predict) + return accuracy, best_params +model = DecisionTreeClassifier(random_state=9) +random_search = RandomizedSearchCV(estimator = model, param_distributions=param_grid, n_iter = 10) +random_search.fit(X_train, y_train) +best_params = random_search.best_params_ +y_predict = random_search.predict(X_test) +accuracy = accuracy_score(y_test,y_predict) +type(best_params) +accuracy + diff --git a/q03_my_decision_classifier/tests/__pycache__/__init__.cpython-36.pyc b/q03_my_decision_classifier/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..1425f6f Binary files /dev/null and b/q03_my_decision_classifier/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_my_decision_classifier/tests/__pycache__/test_q03_my_decision_classifier.cpython-36.pyc b/q03_my_decision_classifier/tests/__pycache__/test_q03_my_decision_classifier.cpython-36.pyc new file mode 100644 index 0000000..905b4cc Binary files /dev/null and b/q03_my_decision_classifier/tests/__pycache__/test_q03_my_decision_classifier.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc b/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..57849f9 Binary files /dev/null and b/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc b/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..dc89d7c Binary files /dev/null and b/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/build.py b/q04_decision_classifier_plot/build.py index 44e9e87..398c51e 100644 --- a/q04_decision_classifier_plot/build.py +++ b/q04_decision_classifier_plot/build.py @@ -1,3 +1,4 @@ +# %load q04_decision_classifier_plot/build.py # default imports from sklearn.model_selection import RandomizedSearchCV from sklearn.tree import DecisionTreeClassifier @@ -8,7 +9,7 @@ import numpy as np plt.switch_backend('agg') -data = pd.read_csv("./data/loan_prediction.csv") +data = pd.read_csv('./data/loan_prediction.csv') np.random.seed(9) X = data.iloc[:, :-1] y = data.iloc[:, -1] @@ -18,3 +19,22 @@ # Write your solution here : +def decision_classifier_plot(X_train, X_test, y_train, y_test,depth_list): + mse_listTrain=list() + mse_listTest=list() + for i in depth_list: + dt=DecisionTreeClassifier(max_depth=i,random_state=9) + dt=dt.fit(X_train,y_train) + y_predTrain = dt.predict(X_train) + mse_listTrain.append(accuracy_score(y_train,y_predTrain)) + + y_predTest = dt.predict(X_test) + mse_listTest.append(accuracy_score(y_test,y_predTest)) + + plt.plot(depth_list,mse_listTrain) + plt.plot(depth_list,mse_listTest) + plt.xlabel('depths') + plt.ylabel('accuracy score') + plt.show() + + diff --git a/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc b/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..3e928b7 Binary files /dev/null and b/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc b/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc new file mode 100644 index 0000000..48f5373 Binary files /dev/null and b/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc differ