diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index ebbd53a..203cded 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 745b533..f9ea68e 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 108e4a3..fb4b9b1 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index e4cd8e3..5bef318 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,10 +1,16 @@ +# %load q01_load_data/build.py # Default imports import pandas as pd from sklearn.model_selection import train_test_split - path = 'data/house_prices_multivariate.csv' +path = 'data/house_prices_multivariate.csv' +def load_data(path, test_size=0.33, random_state=9): + df = pd.read_csv(path) + X = df.iloc[:, :-1] + y = df['SalePrice'] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) + return df, X_train, X_test, y_train, y_test -# Write your solution here diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 133357e..a0a7a8d 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc index 689755b..2413e58 100644 Binary files a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc index 93c9119..ef54e3c 100644 Binary files a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc index 2b7cfd4..622ac78 100644 Binary files a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc differ diff --git a/q02_Max_important_feature/build.py b/q02_Max_important_feature/build.py index 51fbde6..188a474 100644 --- a/q02_Max_important_feature/build.py +++ b/q02_Max_important_feature/build.py @@ -1,8 +1,25 @@ +# %load q02_Max_important_feature/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data - +import pandas as pd # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') # Write your code here +def Max_important_feature(data_set, target_variable='SalePrice', n=4): + columns = list(data_set.columns) + columns.remove(target_variable) + df = pd.DataFrame({'corr': []}, columns=['corr'], index=[]) + for column in columns: + corr = data_set[column].corr(data_set[target_variable]) + new_df = pd.DataFrame({ 'corr' : [corr] }, columns=['corr'], index=[column]) + df = df.append(new_df) + df = df.sort_values('corr') + + mlist = list(df.tail(n).index) + mlist = mlist[::-1] + return mlist + + + diff --git a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc index cec58d4..ae77396 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc index cb6849b..0e9e2ba 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/__pycache__/__init__.cpython-36.pyc index aa42922..9decf8a 100644 Binary files a/q03_polynomial/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/build.cpython-36.pyc b/q03_polynomial/__pycache__/build.cpython-36.pyc index 3be41d0..eee3b21 100644 Binary files a/q03_polynomial/__pycache__/build.cpython-36.pyc and b/q03_polynomial/__pycache__/build.cpython-36.pyc differ diff --git a/q03_polynomial/build.py b/q03_polynomial/build.py index 26d8971..9ac9d01 100644 --- a/q03_polynomial/build.py +++ b/q03_polynomial/build.py @@ -1,3 +1,4 @@ +# %load q03_polynomial/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data from sklearn.preprocessing import PolynomialFeatures @@ -9,3 +10,18 @@ # Write your solution here + +def polynomial(power=5, random_state=9): + data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv',random_state=random_state) + features = ['OverallQual','GrLivArea','GarageCars','GarageArea'] + X_train = X_train[features] + + poly_model = make_pipeline(PolynomialFeatures(degree=power, include_bias=False), + LinearRegression()) + + poly_model.fit(X_train, y_train) + + return poly_model + + + diff --git a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc index 6e20876..99ba70c 100644 Binary files a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc index ef8c88b..a69a3c8 100644 Binary files a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/__init__.cpython-36.pyc b/q04_ridge/__pycache__/__init__.cpython-36.pyc index 4342136..6c0cb68 100644 Binary files a/q04_ridge/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/build.cpython-36.pyc b/q04_ridge/__pycache__/build.cpython-36.pyc index ea08c01..76954f2 100644 Binary files a/q04_ridge/__pycache__/build.cpython-36.pyc and b/q04_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q04_ridge/build.py b/q04_ridge/build.py index 9ee00b1..38eae6a 100644 --- a/q04_ridge/build.py +++ b/q04_ridge/build.py @@ -1,15 +1,27 @@ +# %load q04_ridge/build.py # Default imports from sklearn.linear_model import Ridge import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def ridge(alpha=0.01): + data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv', random_state=9) + ridge = Ridge(alpha=alpha, normalize=True) + ridge = ridge.fit(X_train, y_train) + y_pred_test = ridge.predict(X_test) + y_pred_train = ridge.predict(X_train) + rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) + rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) + return rmse_train, rmse_test, ridge + diff --git a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc index 6d021b5..5564eb4 100644 Binary files a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc index 0549421..fb9a3fe 100644 Binary files a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc and b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/__init__.cpython-36.pyc b/q05_lasso/__pycache__/__init__.cpython-36.pyc index 1005306..30a5f2b 100644 Binary files a/q05_lasso/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/build.cpython-36.pyc b/q05_lasso/__pycache__/build.cpython-36.pyc index b4ea629..99bdc2d 100644 Binary files a/q05_lasso/__pycache__/build.cpython-36.pyc and b/q05_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q05_lasso/build.py b/q05_lasso/build.py index fb30d50..b64468a 100644 --- a/q05_lasso/build.py +++ b/q05_lasso/build.py @@ -1,14 +1,29 @@ +# %load q05_lasso/build.py # Default imports from sklearn.linear_model import Lasso import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def lasso(alpha=0.01): + data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv', random_state=9) + lasso = Lasso(alpha=alpha, normalize=True) + lasso.fit(X_train, y_train) + + y_pred_train = lasso.predict(X_train) + y_pred_test = lasso.predict(X_test) + + rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) + rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) + + return rmse_train, rmse_test + diff --git a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc index 8869434..426dbc5 100644 Binary files a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc index 438235e..18421a2 100644 Binary files a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc and b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index fa7d8bf..03b9a1c 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 19e8bd8..0cef42f 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index e39b93b..adb3487 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,13 +1,18 @@ +# %load q06_cross_validation/build.py # Default imports from sklearn.model_selection import cross_val_score import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def cross_validation(model, X, y): + scores = cross_val_score(model, X, y, cv=5, scoring=('neg_mean_squared_error')) + return scores.mean() diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index ca3f5cd..d4a80a8 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc index e7acaaf..fd6929c 100644 Binary files a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc differ