diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2ba0c81..31f887f 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc index a5c1ab2..e4aac8f 100644 Binary files a/q01_cond_prob/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/__pycache__/build.cpython-36.pyc b/q01_cond_prob/__pycache__/build.cpython-36.pyc index 4654504..cca1450 100644 Binary files a/q01_cond_prob/__pycache__/build.cpython-36.pyc and b/q01_cond_prob/__pycache__/build.cpython-36.pyc differ diff --git a/q01_cond_prob/build.py b/q01_cond_prob/build.py index 46a16ee..2d3947d 100644 --- a/q01_cond_prob/build.py +++ b/q01_cond_prob/build.py @@ -1,3 +1,4 @@ +# %load q01_cond_prob/build.py # So that float division is by default in python 2.7 from __future__ import division @@ -7,6 +8,22 @@ # Enter Code Here +def cond_prob(df): + + # number of ho + total_house_in_oldtown = len(df[df['Neighborhood']== 'OldTown']) # number of house in oldtown + total_house = len(df) # total number of house + + #calculate conditional probability + cond_probability = ((total_house_in_oldtown)/(total_house))*((total_house_in_oldtown-1)/(total_house-1)) * ((total_house_in_oldtown-2)/(total_house-2)) + + return cond_probability + + + + + +cond_prob(df) diff --git a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc index 9e8f52b..a7009f0 100644 Binary files a/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc index e8852e9..688a732 100644 Binary files a/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc and b/q01_cond_prob/tests/__pycache__/test_q01_cond_prob.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc index 741ad2d..09e489d 100644 Binary files a/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/__pycache__/build.cpython-36.pyc b/q02_confidence_interval/__pycache__/build.cpython-36.pyc index b478df2..3ad9c81 100644 Binary files a/q02_confidence_interval/__pycache__/build.cpython-36.pyc and b/q02_confidence_interval/__pycache__/build.cpython-36.pyc differ diff --git a/q02_confidence_interval/build.py b/q02_confidence_interval/build.py index 023b81e..a472c51 100644 --- a/q02_confidence_interval/build.py +++ b/q02_confidence_interval/build.py @@ -1,3 +1,4 @@ +# %load q02_confidence_interval/build.py # Default imports import math import scipy.stats as stats @@ -8,6 +9,22 @@ # Write your solution here : +def confidence_interval(df): + sample_mean= sample.mean() #find the mean of sample + z_critical = stats.norm.ppf(0.95) # z-value + sample_dev = sample.std() # stardard deviation of sample + standard_error = z_critical * (sample_dev/(len(sample))**0.5) #standa + lower_limit = sample_mean - standard_error + upper_limit = sample_mean + standard_error + + return lower_limit, upper_limit + + + + + + +confidence_interval(df) diff --git a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc index 2eb0cc4..c96a221 100644 Binary files a/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc index c3788ca..a79467e 100644 Binary files a/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc and b/q02_confidence_interval/tests/__pycache__/test_q02_confidence_interval.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/__init__.cpython-36.pyc b/q03_t_test/__pycache__/__init__.cpython-36.pyc index cac7d29..b19822b 100644 Binary files a/q03_t_test/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/__pycache__/build.cpython-36.pyc b/q03_t_test/__pycache__/build.cpython-36.pyc index d55dfcf..5835c94 100644 Binary files a/q03_t_test/__pycache__/build.cpython-36.pyc and b/q03_t_test/__pycache__/build.cpython-36.pyc differ diff --git a/q03_t_test/build.py b/q03_t_test/build.py index f966b62..a169574 100644 --- a/q03_t_test/build.py +++ b/q03_t_test/build.py @@ -1,9 +1,25 @@ +# %load q03_t_test/build.py # Default imports import scipy.stats as stats import pandas as pd +import numpy as np df = pd.read_csv('data/house_pricing.csv') # Enter Code Here +def t_statistic(df): + alpha = 0.10 + t_test, p_value = stats.ttest_1samp(df[df['Neighborhood' ]=='OldTown']['GrLivArea'], df['GrLivArea'].mean()) + + if p_value < alpha: + + return p_value, np.bool_(True) + else: + + return p_value, np.bool_(False) + + +t_statistic(df) + diff --git a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc index c489290..13f8cd3 100644 Binary files a/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc and b/q03_t_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc index ffd3551..5f76546 100644 Binary files a/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc and b/q03_t_test/tests/__pycache__/test_q03_t_test.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc index 07afcf0..945d32a 100644 Binary files a/q04_chi2_test/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/__pycache__/build.cpython-36.pyc b/q04_chi2_test/__pycache__/build.cpython-36.pyc index 699bd6a..ddb8af7 100644 Binary files a/q04_chi2_test/__pycache__/build.cpython-36.pyc and b/q04_chi2_test/__pycache__/build.cpython-36.pyc differ diff --git a/q04_chi2_test/build.py b/q04_chi2_test/build.py index 4f20455..e3c37ca 100644 --- a/q04_chi2_test/build.py +++ b/q04_chi2_test/build.py @@ -1,10 +1,31 @@ +# %load q04_chi2_test/build.py # Default imports import scipy.stats as stats import pandas as pd +import numpy as np df = pd.read_csv('data/house_pricing.csv') # Enter Code Here +def chi_square(df): + + #price converted into catogrical on basic of quantile + price = pd.qcut(df['SalePrice'],3, labels =['low', 'medium', 'High']) + + #contingency table group variable to show correlation between two varable + freq_table = pd.crosstab(df.LandSlope, price) + + #calculting chi and p value from + chi, p, dof, expected = stats.chi2_contingency(freq_table) + + if p < 0.05: # if value of p is very low than variable are independent + return p, np.bool_(True) + else: + return p, np.bool_(False) #else not independent + + + +chi_square(df) diff --git a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc index 45a1b92..de0a922 100644 Binary files a/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc index b2a8c04..203d080 100644 Binary files a/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc and b/q04_chi2_test/tests/__pycache__/test_q04_chi2_test.cpython-36.pyc differ