diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..6ffe1cd Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc b/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..f5b820a Binary files /dev/null and b/q01_my_decision_regressor/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc b/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..af8a113 Binary files /dev/null and b/q01_my_decision_regressor/__pycache__/build.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/build.py b/q01_my_decision_regressor/build.py index 5eb1927..ca8cdad 100644 --- a/q01_my_decision_regressor/build.py +++ b/q01_my_decision_regressor/build.py @@ -5,13 +5,23 @@ from sklearn.model_selection import train_test_split import pandas as pd -data = pd.read_csv("./data/house_pricing.csv") +data = pd.read_csv('./data/house_pricing.csv') X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -param_grid = {"max_depth": [2, 3, 5, 6, 8, 10, 15, 20, 30, 50], - "max_leaf_nodes": [2, 3, 4, 5, 10, 15, 20], - "max_features": [4, 8, 20, 25]} +param_grid = {'max_depth': [2, 3, 5, 6, 8, 10, 15, 20, 30, 50], + 'max_leaf_nodes': [2, 3, 4, 5, 10, 15, 20], + 'max_features': [4, 8, 20, 25]} # Write your solution here : +def my_decision_regressor(X_train, X_test,y_train, y_test, param_grid): + tree = DecisionTreeRegressor(random_state = 9) + gdcv = GridSearchCV(estimator = tree, param_grid= param_grid, cv=5) + grid_search = gdcv.fit(X_train,y_train) + y_pred = grid_search.predict(X_test) + return float(r2_score(y_test, y_pred)), grid_search.best_params_ + +my_decision_regressor(X_train, X_test,y_train, y_test, param_grid) + + diff --git a/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc b/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..9436eeb Binary files /dev/null and b/q01_my_decision_regressor/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc b/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc new file mode 100644 index 0000000..d2d95b1 Binary files /dev/null and b/q01_my_decision_regressor/tests/__pycache__/test_q01_my_decision_regressor.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc b/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c7036cd Binary files /dev/null and b/q02_decision_regressor_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc b/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..c61a7ec Binary files /dev/null and b/q02_decision_regressor_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/build.py b/q02_decision_regressor_plot/build.py index 020d81e..0013b0b 100644 --- a/q02_decision_regressor_plot/build.py +++ b/q02_decision_regressor_plot/build.py @@ -4,14 +4,39 @@ from sklearn.metrics import mean_squared_error import pandas as pd import matplotlib.pyplot as plt -import numpy as np plt.switch_backend('agg') -data = pd.read_csv("./data/house_pricing.csv") +import numpy as np + +data = pd.read_csv('./data/house_pricing.csv') X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=9) -depth_list = [2, 8, 10, 15, 20, 25, 30, 35, 45, 50, 80] +depth_list = [2, 8, 10, 15, 20, 25, 30, 35, 45, 50, 65, 80] # Write your solution here : + +def decision_regressor_plot(X_train, X_test, y_train, y_test, depths): + mean_test_scores = [] + mean_train_scores = [] + + for depth in depths: + dt_regressor = DecisionTreeRegressor(max_depth=depth) + dt_regressor.fit(X_train, y_train) + mse_train = mean_squared_error(y_train, dt_regressor.predict(X_train)) + mse_test = mean_squared_error(y_test, dt_regressor.predict(X_test)) + mean_test_scores.append(mse_test) + mean_train_scores.append(mse_train) + + plt.figure(figsize=(10, 6)) + plt.plot(depths, mean_train_scores, c='b', label='Train set') + plt.plot(depths, mean_test_scores, c='g', label='Test set') + plt.legend(loc='upper left') + plt.xlabel('depths') + plt.ylabel('mean square error') + plt.show() +decision_regressor_plot(X_train, X_test, y_train, y_test, depth_list) + + + diff --git a/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc b/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..0acc425 Binary files /dev/null and b/q02_decision_regressor_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc b/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc new file mode 100644 index 0000000..5b29a4b Binary files /dev/null and b/q02_decision_regressor_plot/tests/__pycache__/test_q02_decision_regressor_plot.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc b/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d1234ef Binary files /dev/null and b/q04_decision_classifier_plot/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc b/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..cdca40c Binary files /dev/null and b/q04_decision_classifier_plot/__pycache__/build.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/build.py b/q04_decision_classifier_plot/build.py index 44e9e87..9054bce 100644 --- a/q04_decision_classifier_plot/build.py +++ b/q04_decision_classifier_plot/build.py @@ -8,7 +8,7 @@ import numpy as np plt.switch_backend('agg') -data = pd.read_csv("./data/loan_prediction.csv") +data = pd.read_csv('./data/loan_prediction.csv') np.random.seed(9) X = data.iloc[:, :-1] y = data.iloc[:, -1] @@ -16,5 +16,23 @@ depth_list = [8, 10, 15, 20, 50, 100, 120, 150, 175, 200] - # Write your solution here : + +def decision_classifier_plot(X_train, X_test, y_train, y_test, depths): + lst = [] + for depth in depths: + tree_reg = DecisionTreeClassifier(max_depth=depth,random_state=9) + tree_reg.fit(X_train, y_train) + y_pred_test=tree_reg.predict(X_test) + y_pred_train=tree_reg.predict(X_train) + lst.append((depth,accuracy_score(y_train,y_pred_train),accuracy_score(y_test,y_pred_test))) + df = pd.DataFrame(lst) + plt.plot(df.iloc[:,0],df.iloc[:,1],c='r',label = 'Train') + plt.plot(df.iloc[:,0],df.iloc[:,2],c='g',label = 'Test') + plt.legend() + #plt.show() + +decision_classifier_plot(X_train, X_test, y_train, y_test, depths=depth_list) + + + diff --git a/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc b/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..3e4406b Binary files /dev/null and b/q04_decision_classifier_plot/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc b/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc new file mode 100644 index 0000000..14b6a0c Binary files /dev/null and b/q04_decision_classifier_plot/tests/__pycache__/test_q04_decision_classifier_plot.cpython-36.pyc differ