From 71d756f417c8c7f06b67162c9c5c37adc7e9b2c6 Mon Sep 17 00:00:00 2001
From: melviii100 <melvinjose1992@gmail.com>
Date: Mon, 25 Sep 2017 09:29:12 +0000
Subject: [PATCH] Done

---
 build.py                | 147 +++++++++++++++++++++++++++++++++++++++-
 build.pyc               | Bin 0 -> 1700 bytes
 tests/__init__.pyc      | Bin 0 -> 160 bytes
 tests/test_solution.pyc | Bin 0 -> 872 bytes
 4 files changed, 145 insertions(+), 2 deletions(-)
 create mode 100644 build.pyc
 create mode 100644 tests/__init__.pyc
 create mode 100644 tests/test_solution.pyc

diff --git a/build.py b/build.py
index 5ee1375..5c17235 100644
--- a/build.py
+++ b/build.py
@@ -1,4 +1,147 @@
+from __future__ import division   # Get float values for all division operations.
+from scipy import stats
 
+'''
+    Assumptions:
+        The populations from which the samples were obtained must be normally or approximately normally distributed.
+        The samples must be independent.
+        The variances of the populations must be equal.
 
-def solution(set1, set2, set3, p_level):
-    pass
\ No newline at end of file
+    Hypotheses
+        The null hypothesis will be that all population means are equal, the alternative hypothesis is that at least one mean is different.
+
+    Decision Rule :
+        The decision will be to reject the null hypothesis if the test statistic from the table is greater than
+        the F critical value with k-1 numerator and N-k denominator degrees of freedom.
+
+        If the decision is to reject the null, then at least one of the means is different.
+        However, the ANOVA does not tell you where the difference lies. For this, you need another test, either the Scheffe' or Tukey test.
+
+        F Value/F Ratio in One Way ANOVA: When to Reject the Null :
+            The F Value in ANOVA
+                The F value (also called an F statistic or F Ratio) in one way ANOVA is a tool to help you answer
+                the question "Is the variance between the means of two populations significantly different?""
+                The F value in the ANOVA test also determines the P value; The P value is the probability of getting a result at least as extreme as the one that was actually observed, given that the null hypothesis is true.
+
+            Reject the null when your p value is smaller than your alpha level.
+            You should not reject the null if your critical f value is smaller than your F Value,
+            unless you also have a small p-value.
+
+            Where this could get confusing is where one of these values seems to indicate that you should
+            reject the null hypothesis and one of the values indicates you should not. For example,
+            let's say your One Way ANOVA has a p value of 0.68 and an alpha level of 0.05. As the p value is large,
+            you should not reject the null hypothesis. However, your f value is 0.40 with an f critical value of 3.2.
+            Should you now reject the null hypothesis? The answer is NO.
+
+            Why?
+                The F value should always be used along with the p value in deciding whether your results
+                are significant enough to reject the null hypothesis. If you get a large f value
+                (one that is bigger than the F critical value found in a table), it means something is significant,
+                while a small p value means all your results are significant. The F statistic just compares the
+                joint effect of all the variables together.
+
+                To put it simply, reject the null hypothesis only if your alpha level is larger than your p value.
+
+'''
+
+
+def solution(set1, set2, set3, probability_level):
+
+    placebo  = set1
+    low_dose = set2
+    moderate_dose = set3
+
+    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    								# Method 1
+
+    f_statistic_or_f_value, p_value = stats.f_oneway(placebo, low_dose, moderate_dose)
+    print(f_statistic_or_f_value, p_value )
+
+    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    								# Mthod 2
+
+    # Calculating using Python (i.e., pure Python ANOVA)
+    n = len(placebo)  	# Number of items in sample. Here all samples has equal length. If it has different length, different method has to follow.
+    k = 3  				# Number of independent groups
+    N = len(placebo) + len(low_dose) + len(moderate_dose)  # N is the total sample size.
+
+
+    T1 = sum(placebo)
+    T2 = sum(low_dose)
+    T3 = sum(moderate_dose)
+
+    T1_square = T1**2
+    T2_square = T2**2
+    T3_square = T3**2
+
+    GT = T1 + T2 + T3 	# Grand Total
+
+    # SSwithin
+    sum_placebo_square = sum([x**2 for x in placebo])
+    sum_low_does_square =  sum([x**2 for x in low_dose])
+    sum_moderate_dose_square = sum([x**2 for x in moderate_dose])
+
+    SSwithin = (sum_placebo_square - T1_square / n) + (sum_low_does_square - T2_square / n) + (sum_moderate_dose_square - T3_square / n)
+
+
+    # SSbetween
+    SSbetween = ((T1_square / n) + (T2_square / n) + (T3_square / n)) - (GT**2 / N)
+
+    # SSTotal
+    SSTotal_original = (sum_placebo_square + sum_low_does_square + sum_moderate_dose_square) - (GT**2 / N )
+    SSTotal = SSbetween + SSwithin
+    if round(SSTotal_original,5) != round(SSTotal, 5):
+    	print("Error in calculation")
+    	exit(-1)
+
+    print(SSTotal_original, SSTotal)
+
+    # ------------------------------------
+    DFbetween = k - 1
+    DFwithin = N - k
+
+    # ------------------------------------
+    MSbetween = SSbetween/DFbetween
+    MSwithin = SSwithin/DFwithin
+
+    # ------------------------------------
+    # Calculating the F-value
+    f_statistic_or_f_value = MSbetween/MSwithin
+
+    '''
+    	To reject the null hypothesis we check if the obtained F-value is above the critical value for rejecting the null hypothesis.
+    	We could look it up in a F-value table based on the DFwithin and DFbetween. However, there is a method in SciPy for obtaining a p-value.
+    '''
+
+    # ------------------------------------
+    # Calculate p value
+    p_value = stats.f.sf(f_statistic_or_f_value, DFbetween, DFwithin)
+
+    print(f_statistic_or_f_value, p_value )
+
+    # ------------------------------------
+    # Finally, we are also going to calculate effect size. We start with the commonly used eta-squared :
+    eta_sqrd = SSbetween/SSTotal
+
+    '''
+     	However, eta-squared is somewhat biased because it is based purely on sums of squares from the sample.
+     	No adjustment is made for the fact that what we aiming to do is to estimate the effect size in the population.
+     	Thus, we can use the less biased effect size measure Omega squared:
+    '''
+    om_sqrd = (SSbetween - (DFbetween * MSwithin))/(SSTotal + MSwithin)
+    om_sqrd = (SSbetween - (DFbetween * MSwithin))/(SSTotal + MSwithin)
+
+    print(eta_sqrd, om_sqrd)
+
+
+    # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    '''
+    	Decision Rule :
+    		The decision will be to reject the null hypothesis if the test statistic from the table is greater than
+    		the F critical value with k-1 numerator and N-k denominator degrees of freedom.
+    '''
+
+    if probability_level > p_value:
+        return True
+
+    return False
diff --git a/build.pyc b/build.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4da3ec0e6e6ec3647ebc8e2fd09e3a8b3e0a3bff
GIT binary patch
literal 1700
zcmb_c%Wm676um=IlC6iU_#rBi>#8UaXi-OgY>Og5U8kEEg(aY<(UhR1Q8F<_D&$bM
zf#J&ivv$!%|Dzw!4`|N}NjC=4T}5%`+;i_--WO^A)vCY#<M$tDRQxRLF9t-(A;27e
zMgh^1(G?@6Ws3rfmNs44kZt;f=*pqFL*rouC&b_Q4H=N!&<?~}Bq9oPMjIs3m&wcl
z`W&}rz8QeM#d8%==8$5PRY+NsRY?-PW28WWsnWeoKY_1Nwn3^!YJ(OQz5GsE+T<SR
zHg&jJogNqsDq7`g1P?%2gQQ)aXc}xxvX;TdDBB{{)Vc0tpO9+hs7-1sM^CgCv<t$`
zw)MF;h~J^J*KNvnNbMNApDlX-n+a^UG^u>V<WFO=SDI8mVp3;1y$-2eQV3|1`dzh0
z3K41ODrgXs!I@i{6l%9nxoX=)r5ZSrb&an*q|m|rq2?(mC|xAhBZ<;KCA9h8GPqaZ
zYn$R<8M=<@S7>hP$3m0<hkJQfRGsEk>Sley10i&?edD-K{p?fiU+n`Okb-akR5RVK
z(mOO3e;nJ_4S(Ef9a#^?pB1ifa<>ZxyW7tO|HZ<5P(+2!Z_u?vv9vIuW4VdJtzil;
zgpF0`@fT)JcY(f*<0u}8a4_?OSsM5fqiiNz=m{Uzv0OgP`ofDse&gR^7>EKMnmtLE
zC=gK`rQsaaB;vP1N_}X7k;LRL3_FQaZVb=;AmKy(ZWun{^0)*iB{-E&pkKw&)Srq#
z$U86KYaU3Q$13n=d>WazFNkiuc?1_ND3;Nj$G+q_(*d*a^vfs`DP~?2dkb&v2Z+LW
zDX@++G|^t0^}@tFInK9}d^<%Ei6+Nh@;>!ro=Yc>q|;K;DZiW;U$=4_0t)06F_$(w
z1)Yb_lalWi{G&`trT*HC$2UU06`_e>Jf3oS!_kMfQB5N02gp;r5uu;gYCJZiF+V#m
z&9$eq^A9||dhn!GugZsLflD6&#B)=$Xld{eJeehV0W^9WE%|84gS8Oi_~>Z#Llj>p
zD|BfzLrX>?T<{oC3O_hBAIC#KjMn~Wnu=ile066!nM6UVhwlgulkc&RRb{v;eb%Tp
zfd+Py;j-!sm$hYo!Ejm6oM(0q%mJ&gmesJK)3;OK>{<<{UhA-q)u=a_V_lf9x6~8u
vd5cu0G55Sn{U9@y$xN*7%&1>}3GUw%|K~ivLT}T6f34>((QiwewV(e3p~Y+~

literal 0
HcmV?d00001

diff --git a/tests/__init__.pyc b/tests/__init__.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1803cf44679e0e38a8b682090cc79789255ff1b7
GIT binary patch
literal 160
zcmZSn%*(ZL(TT`p1}I<z(hfje%mO4*7#M;zKq7t`K!O2{DE0)3>1X8Urt0UW=9Fb-
zW*Qn8=!fSQWfvDDCa3Br=clCVXXd4)7NzEuWG3e57MCQJ6zeAD<(DPum!uY#6zj*w
fXXa&=#K-FuRF-f6O|!|(PbtkwwFB8%48#loSH32E

literal 0
HcmV?d00001

diff --git a/tests/test_solution.pyc b/tests/test_solution.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17d84800fbcfa1a29de78853c64623640d2fafd7
GIT binary patch
literal 872
zcmcJN&u$Yj5XL_zY1*buNs9!63pZFHg)Mgkh~9J9R6?zUR!hbyhW%5;-bzUADZB(6
zxpCwzcpn}BzHve;aj&)WTYEe^{$|$Q-^0Pr@4rrFSdT8>uW9}%A$$x<pfWf$Fw%iF
zm>WnJUw|cWa@v9PFikcX?*4EjhO@9m99tib3U9;idNFOvHn^s~K1@U3(EL7Op=a<&
zW@grG8Z*r_rbFAN9nt(df~u%xCG3jAE`g67Tt;D(BO8!DWD{qcY(cgm1IP|!H+qJU
zJ;)u%2(lkN2Uzy7)C$2`?(&4mf5rH<cK4W@E>%2HJWyz_+&zVU>a=)wU-3w>r+BE?
zQ5-0=U1#6E>iF?0(WVD=y>DG9nU)&{MB%+%gyThPlj!Tw=WRKU5eXNF@Dt9`4^3s$
z%9a<-x#x$6>Dy-U$)6W9o6Z`ssjKI9Ve8<Ea_mD9{J5x_iy{sDOFlCHSL3tu&tb@v
zx^?YTdA?7;j5=ep(yW!{d0kXC&!gfzuNrAfE;Eh2(Rf=zGrb?NFWRBLncN6#YZtVH
Yj2nn?w=XAN{1>Ypyi{pkWsSOj0qM!X-~a#s

literal 0
HcmV?d00001