From 71d756f417c8c7f06b67162c9c5c37adc7e9b2c6 Mon Sep 17 00:00:00 2001 From: melviii100 Date: Mon, 25 Sep 2017 09:29:12 +0000 Subject: [PATCH] Done --- build.py | 147 +++++++++++++++++++++++++++++++++++++++- build.pyc | Bin 0 -> 1700 bytes tests/__init__.pyc | Bin 0 -> 160 bytes tests/test_solution.pyc | Bin 0 -> 872 bytes 4 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 build.pyc create mode 100644 tests/__init__.pyc create mode 100644 tests/test_solution.pyc diff --git a/build.py b/build.py index 5ee1375..5c17235 100644 --- a/build.py +++ b/build.py @@ -1,4 +1,147 @@ +from __future__ import division # Get float values for all division operations. +from scipy import stats +''' + Assumptions: + The populations from which the samples were obtained must be normally or approximately normally distributed. + The samples must be independent. + The variances of the populations must be equal. -def solution(set1, set2, set3, p_level): - pass \ No newline at end of file + Hypotheses + The null hypothesis will be that all population means are equal, the alternative hypothesis is that at least one mean is different. + + Decision Rule : + The decision will be to reject the null hypothesis if the test statistic from the table is greater than + the F critical value with k-1 numerator and N-k denominator degrees of freedom. + + If the decision is to reject the null, then at least one of the means is different. + However, the ANOVA does not tell you where the difference lies. For this, you need another test, either the Scheffe' or Tukey test. + + F Value/F Ratio in One Way ANOVA: When to Reject the Null : + The F Value in ANOVA + The F value (also called an F statistic or F Ratio) in one way ANOVA is a tool to help you answer + the question "Is the variance between the means of two populations significantly different?"" + The F value in the ANOVA test also determines the P value; The P value is the probability of getting a result at least as extreme as the one that was actually observed, given that the null hypothesis is true. + + Reject the null when your p value is smaller than your alpha level. + You should not reject the null if your critical f value is smaller than your F Value, + unless you also have a small p-value. + + Where this could get confusing is where one of these values seems to indicate that you should + reject the null hypothesis and one of the values indicates you should not. For example, + let's say your One Way ANOVA has a p value of 0.68 and an alpha level of 0.05. As the p value is large, + you should not reject the null hypothesis. However, your f value is 0.40 with an f critical value of 3.2. + Should you now reject the null hypothesis? The answer is NO. + + Why? + The F value should always be used along with the p value in deciding whether your results + are significant enough to reject the null hypothesis. If you get a large f value + (one that is bigger than the F critical value found in a table), it means something is significant, + while a small p value means all your results are significant. The F statistic just compares the + joint effect of all the variables together. + + To put it simply, reject the null hypothesis only if your alpha level is larger than your p value. + +''' + + +def solution(set1, set2, set3, probability_level): + + placebo = set1 + low_dose = set2 + moderate_dose = set3 + + # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # Method 1 + + f_statistic_or_f_value, p_value = stats.f_oneway(placebo, low_dose, moderate_dose) + print(f_statistic_or_f_value, p_value ) + + # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # Mthod 2 + + # Calculating using Python (i.e., pure Python ANOVA) + n = len(placebo) # Number of items in sample. Here all samples has equal length. If it has different length, different method has to follow. + k = 3 # Number of independent groups + N = len(placebo) + len(low_dose) + len(moderate_dose) # N is the total sample size. + + + T1 = sum(placebo) + T2 = sum(low_dose) + T3 = sum(moderate_dose) + + T1_square = T1**2 + T2_square = T2**2 + T3_square = T3**2 + + GT = T1 + T2 + T3 # Grand Total + + # SSwithin + sum_placebo_square = sum([x**2 for x in placebo]) + sum_low_does_square = sum([x**2 for x in low_dose]) + sum_moderate_dose_square = sum([x**2 for x in moderate_dose]) + + SSwithin = (sum_placebo_square - T1_square / n) + (sum_low_does_square - T2_square / n) + (sum_moderate_dose_square - T3_square / n) + + + # SSbetween + SSbetween = ((T1_square / n) + (T2_square / n) + (T3_square / n)) - (GT**2 / N) + + # SSTotal + SSTotal_original = (sum_placebo_square + sum_low_does_square + sum_moderate_dose_square) - (GT**2 / N ) + SSTotal = SSbetween + SSwithin + if round(SSTotal_original,5) != round(SSTotal, 5): + print("Error in calculation") + exit(-1) + + print(SSTotal_original, SSTotal) + + # ------------------------------------ + DFbetween = k - 1 + DFwithin = N - k + + # ------------------------------------ + MSbetween = SSbetween/DFbetween + MSwithin = SSwithin/DFwithin + + # ------------------------------------ + # Calculating the F-value + f_statistic_or_f_value = MSbetween/MSwithin + + ''' + To reject the null hypothesis we check if the obtained F-value is above the critical value for rejecting the null hypothesis. + We could look it up in a F-value table based on the DFwithin and DFbetween. However, there is a method in SciPy for obtaining a p-value. + ''' + + # ------------------------------------ + # Calculate p value + p_value = stats.f.sf(f_statistic_or_f_value, DFbetween, DFwithin) + + print(f_statistic_or_f_value, p_value ) + + # ------------------------------------ + # Finally, we are also going to calculate effect size. We start with the commonly used eta-squared : + eta_sqrd = SSbetween/SSTotal + + ''' + However, eta-squared is somewhat biased because it is based purely on sums of squares from the sample. + No adjustment is made for the fact that what we aiming to do is to estimate the effect size in the population. + Thus, we can use the less biased effect size measure Omega squared: + ''' + om_sqrd = (SSbetween - (DFbetween * MSwithin))/(SSTotal + MSwithin) + om_sqrd = (SSbetween - (DFbetween * MSwithin))/(SSTotal + MSwithin) + + print(eta_sqrd, om_sqrd) + + + # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ''' + Decision Rule : + The decision will be to reject the null hypothesis if the test statistic from the table is greater than + the F critical value with k-1 numerator and N-k denominator degrees of freedom. + ''' + + if probability_level > p_value: + return True + + return False diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4da3ec0e6e6ec3647ebc8e2fd09e3a8b3e0a3bff GIT binary patch literal 1700 zcmb_c%Wm676um=IlC6iU_#rBi>#8UaXi-OgY>Og5U8kEEg(aY<(UhR1Q8F<_D&$bM zf#J&ivv$!%|Dzw!4`|N}NjC=4T}5%`+;i_--WO^A)vCY#VhP$3m0Sley10i&?edD-K{p?fiU+n`Okb-akR5RVK z(mOO3e;nJ_4S(Ef9a#^?pB1ifa<>ZxyW7tO|HZ<5P(+2!Z_u?vv9vIuW4VdJtzil; zgpF0`@fT)JcY(f*<0u}8a4_?OSsM5fqiiNz=m{Uzv0OgP`ofDse&gR^7>EKMnmtLE zC=gK`rQsaaB;vP1N_}X7k;LRL3_FQaZVb=;AmKy(ZWun{^0)*iB{-E&pkKw&)Srq# z$U86KYaU3Q$13n=d>WazFNkiuc?1_ND3;Nj$G+q_(*d*a^vfs`DP~?2dkb&v2Z+LW zDX@++G|^t0^}@tFInK9}d^<%Ei6+Nh@;>!ro=Yc>q|;K;DZiW;U$=4_0t)06F_$(w z1)Yb_lalWi{G&`trT*HC$2UU06`_e>Jf3oS!_kMfQB5N02gp;r5uu;gYCJZiF+V#m z&9$eq^A9||dhn!GugZsLflD6&#B)=$Xld{eJeehV0W^9WE%|84gS8Oi_~>Z#Llj>p zD|BfzLrX>?T<{oC3O_hBAIC#KjMn~Wnu=ile066!nM6UVhwlgulkc&RRb{v;eb%Tp zfd+Py;j-!sm$hYo!Ejm6oM(0q%mJ&gmesJK)3;OK>{<<{UhA-q)u=a_V_lf9x6~8u vd5cu0G55Sn{U9@y$xN*7%&1>}3GUw%|K~ivLT}T6f34>((QiwewV(e3p~Y+~ literal 0 HcmV?d00001 diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1803cf44679e0e38a8b682090cc79789255ff1b7 GIT binary patch literal 160 zcmZSn%*(ZL(TT`p1}I1X8Urt0UW=9Fb- zW*Qn8=!fSQWfvDDCa3Br=clCVXXd4)7NzEuWG3e57MCQJ6zeAD<(DPum!uY#6zj*w fXXa&=#K-FuRF-f6O|!|(PbtkwwFB8%48#loSH32E literal 0 HcmV?d00001 diff --git a/tests/test_solution.pyc b/tests/test_solution.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17d84800fbcfa1a29de78853c64623640d2fafd7 GIT binary patch literal 872 zcmcJN&u$Yj5XL_zY1*buNs9!63pZFHg)Mgkh~9J9R6?zUR!hbyhW%5;-bzUADZB(6 zxpCwzcpn}BzHve;aj&)WTYEe^{$|$Q-^0Pr@4rrFSdT8>uW9}%A$$xWnJUw|cWa@v9PFikcX?*4EjhO@9m99tib3U9;idNFOvHn^s~K1@U3(EL7Op=a<& zW@grG8Z*r_rbFAN9nt(df~u%xCG3jAE`g67Tt;D(BO8!DWD{qcY(cgm1IP|!H+qJU zJ;)u%2(lkN2Uzy7)C$2`?(&4mf5rH%2HJWyz_+&zVU>a=)wU-3w>r+BE? zQ5-0=U1#6E>iF?0(WVD=y>DG9nU)&{MB%+%gyThPlj!Tw=WRKU5eXNF@Dt9`4^3s$ z%9a<-x#x$6>Dy-U$)6W9o6Z`ssjKI9Ve8Ypyi{pkWsSOj0qM!X-~a#s literal 0 HcmV?d00001