From b6e655c8bee059512397ce9844b45b66f2482c99 Mon Sep 17 00:00:00 2001
From: Adithya <adi.sdboy@gmail.com>
Date: Sun, 20 Jun 2021 17:42:52 -0700
Subject: [PATCH 1/2] Increased KSample Cov

---
 permute/core.py               | 38 ++++++++++----------
 permute/irr.py                | 14 ++++----
 permute/ksample.py            | 22 ++++++------
 permute/npc.py                | 65 ++++++++++++++++++-----------------
 permute/stratified.py         | 12 +++----
 permute/tests/test_ksample.py | 43 +++++++++++++++++++----
 permute/tests/test_npc.py     | 30 ++++++++--------
 permute/utils.py              |  6 ++--
 8 files changed, 132 insertions(+), 98 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index fa7533cb..d7e24e30 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -30,7 +30,7 @@ def corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True):
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -70,7 +70,7 @@ def spearman_corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -78,7 +78,7 @@ def spearman_corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True
     tuple
         Returns test statistic, p-value, simulated distribution
     """
-    
+
     xnew = np.argsort(x)+1
     ynew = np.argsort(y)+1
     return corr(xnew, ynew, alternative=alternative, reps=reps, seed=seed)
@@ -112,7 +112,7 @@ def two_sample_core(potential_outcomes_all, nx, tst_stat, alternative='greater',
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -203,8 +203,8 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
             that function. The function should take two arguments:
             given a permutation of the pooled data, the first argument is the
             "new" x and the second argument is the "new" y.
-            For instance, if the test statistic is the Kolmogorov-Smirnov distance 
-            between the empirical distributions of the two samples, 
+            For instance, if the test statistic is the Kolmogorov-Smirnov distance
+            between the empirical distributions of the two samples,
             $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:
 
             f = lambda u, v: np.max( \
@@ -223,7 +223,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -305,14 +305,14 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater",
             that function. The function should take two arguments:
             given a permutation of the pooled data, the first argument is the
             "new" x and the second argument is the "new" y.
-            For instance, if the test statistic is the Kolmogorov-Smirnov distance 
-            between the empirical distributions of the two samples, 
+            For instance, if the test statistic is the Kolmogorov-Smirnov distance
+            between the empirical distributions of the two samples,
             $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:
 
             f = lambda u, v: np.max( \
                 [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
                 )
-                
+
     alternative : {'greater', 'less', 'two-sided'}
         The alternative hypothesis to test
     keep_dist : bool
@@ -332,7 +332,7 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater",
             $x_i = f(y_i)$ and $y_i = f^{-1}(x_i)$
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -421,14 +421,14 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
             that function. The function should take two arguments:
             given a permutation of the pooled data, the first argument is the
             "new" x and the second argument is the "new" y.
-            For instance, if the test statistic is the Kolmogorov-Smirnov distance 
-            between the empirical distributions of the two samples, 
+            For instance, if the test statistic is the Kolmogorov-Smirnov distance
+            between the empirical distributions of the two samples,
             $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:
 
             f = lambda u, v: np.max( \
                 [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
                 )
-                
+
     shift : float
         The relationship between x and y under the null hypothesis.
 
@@ -437,7 +437,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
             $x_i = f(y_i, d)$ and $y_i = f^{-1}(x_i, d)$
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -456,7 +456,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
     """
     # print warning
     warnings.warn('This function is under construction and outputs may be unreliable.')
-    
+
     assert alternative in ("two-sided", "lower", "upper")
 
     if shift is None:
@@ -488,7 +488,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
                                                 shift=q, reps=reps, stat=stat, plus1=plus1)[0]
         else:
             g = lambda q: cl - two_sample_shift(x, y, alternative="less", seed=seed,
-                                                shift=(lambda u: f(u, q), lambda u: finverse(u, q)), 
+                                                shift=(lambda u: f(u, q), lambda u: finverse(u, q)),
                                                 reps=reps, stat=stat, plus1=plus1)[0]
         ci_low = brentq(g, -2 * shift_limit, 2 * shift_limit)
 
@@ -498,7 +498,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
                                                 shift=q, reps=reps, stat=stat, plus1=plus1)[0]
         else:
             g = lambda q: cl - two_sample_shift(x, y, alternative="greater", seed=seed,
-                                                shift=(lambda u: f(u, q), lambda u: finverse(u, q)), 
+                                                shift=(lambda u: f(u, q), lambda u: finverse(u, q)),
                                                 reps=reps, stat=stat, plus1=plus1)[0]
         ci_upp = brentq(g, -2 * shift_limit, 2 * shift_limit)
 
@@ -566,7 +566,7 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
diff --git a/permute/irr.py b/permute/irr.py
index 9c29dcb5..96bb9fad 100644
--- a/permute/irr.py
+++ b/permute/irr.py
@@ -131,7 +131,7 @@ def simulate_ts_dist(ratings, obs_ts=None, num_perm=10000,
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -171,10 +171,10 @@ def simulate_ts_dist(ratings, obs_ts=None, num_perm=10000,
         for i in range(num_perm):
             r = permute_rows(r, prng)
             geq += (compute_ts(r) >= obs_ts)
-    return {"obs_ts": obs_ts, 
-            "geq": geq, 
+    return {"obs_ts": obs_ts,
+            "geq": geq,
             "num_perm": num_perm,
-            "pvalue": (geq+plus1) / (num_perm+plus1), 
+            "pvalue": (geq+plus1) / (num_perm+plus1),
             "dist": dist}
 
 
@@ -216,7 +216,7 @@ def simulate_npc_dist(perm_distr, size, obs_ts=None,
         If not input, obs_ts must be specified.
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -246,6 +246,6 @@ def simulate_npc_dist(perm_distr, size, obs_ts=None,
 
     obs_npc = combine_func(pvalues)
     res = npc(pvalues, perm_distr, combine_func)
-    return {"obs_npc": obs_npc, 
-            "pvalue": res, 
+    return {"obs_npc": obs_npc,
+            "pvalue": res,
             "num_perm": B}
diff --git a/permute/ksample.py b/permute/ksample.py
index 560773f6..a6801bb8 100644
--- a/permute/ksample.py
+++ b/permute/ksample.py
@@ -13,12 +13,12 @@
 def k_sample(x, group, reps=10**5, stat='one-way anova',
                keep_dist=False, seed=None, plus1=True):
     r"""
-    k-sample permutation test for equality of more than 2 means, 
+    k-sample permutation test for equality of more than 2 means,
     with p-value estimated by simulated random sampling with
     reps replications.
 
     Tests the hypothesis that groupings are a random partition of x
-    against the alternative that at least one group comes from a 
+    against the alternative that at least one group comes from a
     population with mean different from the rest
 
     If ``keep_dist``, return the distribution of values of the test statistic;
@@ -36,7 +36,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova',
     stat : {'one-way anova'}
         The test statistic.
 
-        (a) If stat == 'one-way anova', use the sum of squared 
+        (a) If stat == 'one-way anova', use the sum of squared
                distances between the group means and the overall mean
                weighted by group size.
                $\sum_{k=1}^K n_k(\overline{X_k} - \overline{X})^2$
@@ -50,7 +50,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova',
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -76,7 +76,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova',
     else:
         tst_fun = stats[stat]
 
-    xbar = np.mean(x)    
+    xbar = np.mean(x)
     observed_tst = tst_fun(x, group, xbar)
 
     if keep_dist:
@@ -112,7 +112,7 @@ def one_way_anova(x, group, overall_mean):
     Returns
     -------
     float
-        the one-way ANOVA statistic 
+        the one-way ANOVA statistic
         $\sum_{k=1}^K n_k(\overline{X_k} - \overline{X})^2$
         where $k$ indexes the groups
     """
@@ -129,12 +129,12 @@ def one_way_anova(x, group, overall_mean):
 def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova',
                keep_dist=False, seed=None, plus1=True):
     r"""
-    k-sample permutation test for equality of more than 2 means, 
+    k-sample permutation test for equality of more than 2 means,
     with p-value estimated by simulated random sampling with
     reps replications.
 
     Tests the hypothesis that within grouping 1, grouping 2 is
-     a random partition of x against the alternative that at 
+     a random partition of x against the alternative that at
     least one group 2 comes from a population with mean different from the rest
 
     If ``keep_dist``, return the distribution of values of the test statistic;
@@ -148,7 +148,7 @@ def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova',
     group1 : array-like
         Fixed group labels for each observation
     group2 : array-like
-        Group labels that, under the null, are exchangeable for each 
+        Group labels that, under the null, are exchangeable for each
         level of group1
     reps : int
         number of repetitions
@@ -171,7 +171,7 @@ def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova',
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -229,7 +229,7 @@ def two_way_anova(x, group1, group2, overall_mean):
     group1 : array-like
         Fixed group labels for each observation
     group2 : array-like
-        Group labels that, under the null, are exchangeable for each 
+        Group labels that, under the null, are exchangeable for each
         level of group1
     overall_mean : float
         mean of x
diff --git a/permute/npc.py b/permute/npc.py
index d41c8be5..a03ef604 100644
--- a/permute/npc.py
+++ b/permute/npc.py
@@ -2,6 +2,7 @@
 import copy
 from scipy.stats import norm, rankdata, ttest_ind, ttest_1samp
 from cryptorandom.sample import random_sample
+
 from .utils import get_prng, permute
 
 
@@ -143,7 +144,7 @@ def npc(pvalues, distr, combine="fisher", plus1=True):
         monotonically decreasing in each p-value.
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -187,7 +188,7 @@ def npc(pvalues, distr, combine="fisher", plus1=True):
 
 
 def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=None):
-    r''' 
+    r'''
     Combines p-values from individual partial test hypotheses $H_{0i}$ against
     $H_{1i}$, $i=1,\dots,n$ to test the global null hypothesis
 
@@ -198,7 +199,7 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=
     .. math:: \cup_{i=1}^n H_{1i}
 
     using an omnibus test statistic.
-    
+
     Parameters
     ----------
     data : Experiment object
@@ -217,22 +218,22 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=
         instance used by `np.random`;
         If int, seed is the seed used by the random number generator;
         If RandomState instance, seed is the pseudorandom number generator
-    
+
     Returns
     -------
     array
-        A single p-value for the global test, 
+        A single p-value for the global test,
         test statistic values on the original data,
         partial p-values
     '''
     # check data is of type Experiment
     if not isinstance(data, Experiment):
         raise ValueError("data not of class Experiment")
-        
+
     # if seed not none, reset seed
     if seed is not None:
         data.randomizer.reset_seed(seed)
-    
+
     ts = {}
     tv = {}
     ps = {}
@@ -242,13 +243,13 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=
         # apply test statistic function to column
         ts[c] = test[c](data)
         tv[c] = []
-        
+
     # check if randomization in place
     if in_place:
         data_copy = data
     else:
         data_copy = copy.deepcopy(data)
-        
+
     # get test statistics for random samples
     for i in range(reps):
         # randomly permute group
@@ -272,12 +273,12 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=
 def fwer_minp(pvalues, distr, combine='fisher', plus1=True):
     """
     Adjust p-values using the permutation "minP" variant of Holm's step-up method.
-    
-    When considering a closed testing procedure, the adjusted p-value 
-    $p_i$ for a given hypothesis $H_i$ is the maximum of all p-values for tests 
-    including $H_i$ as a special case (including the p-value for the $H_i$ 
+
+    When considering a closed testing procedure, the adjusted p-value
+    $p_i$ for a given hypothesis $H_i$ is the maximum of all p-values for tests
+    including $H_i$ as a special case (including the p-value for the $H_i$
     test itself).
-    
+
     Parameters
     ----------
     pvalues : array_like
@@ -339,7 +340,7 @@ def randomize_group(data):
 
 def randomize_in_strata(data):
     r"""
-    Stratified randomization where first covariate is the stratum 
+    Stratified randomization where first covariate is the stratum
 
     Parameters
     ----------
@@ -354,8 +355,8 @@ def randomize_in_strata(data):
     strata = data.covariate[:, 0]
     unique_strata = np.unique(strata)
     for value in unique_strata:
-        data.group[strata == value] = random_sample(data.group[strata == value], 
-                                                    len(data.group[strata == value]), 
+        data.group[strata == value] = random_sample(data.group[strata == value],
+                                                    len(data.group[strata == value]),
                                                     prng=data.randomizer.prng)
     return data
 
@@ -380,7 +381,7 @@ class Experiment():
     """
     def __init__(self, group = None, response = None, covariate = None, randomizer = None):
         self.group = None if group is None else np.array(group, dtype = object)
-        self.response = None if response is None else np.array(response, dtype = object) 
+        self.response = None if response is None else np.array(response, dtype = object)
         self.covariate = None if covariate is None else np.array(covariate, dtype = object)
         if randomizer is None:
             self.randomizer = Experiment.Randomizer(randomize = randomize_group)
@@ -388,15 +389,15 @@ def __init__(self, group = None, response = None, covariate = None, randomizer =
             self.randomizer = randomizer
         else:
             raise ValueError("Not of class Randomizer")
-        
-        
+
+
     def __str__(self):
         return "This experiment has " + str(len(self.group)) + " subjects, " + str(len(self.response[0])) \
     + " response variables, and " \
     + (str(len(self.covariate[0])) if self.covariate is not None else str(0)) \
     + " covariates."
-    
-    
+
+
     def randomize(self, in_place = True, seed = None):
         # reset seed, if seed not None
         if seed is not None:
@@ -409,7 +410,7 @@ def randomize(self, in_place = True, seed = None):
             randomized_self.randomizer.randomize(randomized_self)
         return randomized_self
 
-    
+
     @classmethod
     def make_test_array(cls, func, indices):
         def create_func(index):
@@ -418,8 +419,8 @@ def new_func(data):
             return new_func
         test = [create_func(index) for index in indices]
         return test
-    
-    
+
+
     class TestFunc:
         def mean_diff(self, index):
             # get unique groups
@@ -430,16 +431,16 @@ def mean_diff(self, index):
             mx = np.mean(self.response[:, index][self.group == groups[0]])
             my = np.mean(self.response[:, index][self.group == groups[1]])
             return mx-my
-        
+
         def ttest(self, index):
             # get unique groups
             groups = np.unique(self.group)
             if len(groups) != 2:
                 raise ValueError("Number of groups must be two")
-            t = ttest_ind(self.response[:, index][self.group == groups[0]], 
+            t = ttest_ind(self.response[:, index][self.group == groups[0]],
                              self.response[:, index][self.group == groups[1]], equal_var=True)[0]
             return t
-        
+
         def one_way_anova(self, index):
             tst = 0
             overall_mean = np.mean(self.response[:, index])
@@ -449,13 +450,13 @@ def one_way_anova(self, index):
                 nk = len(group_k)
                 tst += (group_mean - overall_mean)**2 * nk
             return tst
-    
-    
+
+
     class Randomizer():
         def __init__(self, randomize = randomize_group, seed = None):
             self.randomize = randomize
-            self.prng = get_prng(seed) 
-            
+            self.prng = get_prng(seed)
+
         # reset seed
         def reset_seed(self, seed = None):
             self.prng = get_prng(seed)
diff --git a/permute/stratified.py b/permute/stratified.py
index d36a861e..f1770331 100644
--- a/permute/stratified.py
+++ b/permute/stratified.py
@@ -58,7 +58,7 @@ def sim_corr(x, y, group, reps=10**4, alternative='greater', seed=None, plus1=Tr
         If RandomState instance, seed is the pseudorandom number generator.
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -81,7 +81,7 @@ def sim_corr(x, y, group, reps=10**4, alternative='greater', seed=None, plus1=Tr
     thePvalue = {
         'greater': lambda p: p + plus1/(reps+plus1),
         'less': lambda p: 1 - (p + plus1/(reps+plus1)),
-        'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), 
+        'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1),
                                            1 - (p + plus1/(reps+plus1))])
     }
     return thePvalue[alternative](right_pv), tst, dist
@@ -199,7 +199,7 @@ def stratified_permutationtest(
         If RandomState instance, seed is the pseudorandom number generator
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -229,7 +229,7 @@ def stratified_permutationtest(
     thePvalue = {
         'greater': lambda p: p + plus1/(reps+plus1),
         'less': lambda p: 1 - (p + plus1/(reps+plus1)),
-        'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), 
+        'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1),
                                            1 - (p + plus1/(reps+plus1))])
     }
 
@@ -322,7 +322,7 @@ def stratified_two_sample(
         If RandomState instance, seed is the pseudorandom number generator.
     plus1 : bool
         flag for whether to add 1 to the numerator and denominator of the
-        p-value based on the empirical permutation distribution. 
+        p-value based on the empirical permutation distribution.
         Default is True.
 
     Returns
@@ -368,7 +368,7 @@ def stratified_two_sample(
     thePvalue = {
         'greater': lambda p: p + plus1/(reps+plus1),
         'less': lambda p: 1 - (p + plus1/(reps+plus1)),
-        'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), 
+        'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1),
                                            1 - (p + plus1/(reps+plus1))])
     }
     observed_tst = tst_fun(response)
diff --git a/permute/tests/test_ksample.py b/permute/tests/test_ksample.py
index e135d383..3b84f1ae 100644
--- a/permute/tests/test_ksample.py
+++ b/permute/tests/test_ksample.py
@@ -2,7 +2,7 @@
 from scipy.stats import hypergeom, binom
 from cryptorandom.cryptorandom import SHA256
 
-from ..ksample import (k_sample, 
+from permute.ksample import (k_sample,
                        one_way_anova,
                        bivariate_k_sample,
                        two_way_anova)
@@ -13,6 +13,20 @@
 def test_worms_ksample():
     worms = data.worms()
     res = k_sample(worms.x, worms.y, stat='one-way anova', reps=1000, seed=1234)
+    print(res[0], " " , res[1])
+    np.testing.assert_array_less(0.006, res[0])
+    np.testing.assert_array_less(res[0], 0.02)
+
+def test_worms_ksample_keep_dist():
+    worms = data.worms()
+    res = k_sample(worms.x, worms.y, stat='one-way anova', reps=1000, seed=1234, keep_dist= True)
+    np.testing.assert_array_less(0.006, res[0])
+    np.testing.assert_array_less(res[0], 0.02)
+    assert len(res) == 3
+
+def test_worms_ksample_callable():
+    worms = data.worms()
+    res = k_sample(worms.x, worms.y, stat = one_way_anova, reps=1000, seed=1234)
     np.testing.assert_array_less(0.006, res[0])
     np.testing.assert_array_less(res[0], 0.02)
 
@@ -22,7 +36,7 @@ def test_one_way_anova():
     x = np.array(range(5))
     xbar = np.mean(x)
     assert one_way_anova(x, group, xbar) == 0
-    
+
     group = np.array([1]*3 + [2]*2)
     expected = 3*1**2 + 2*1.5**2
     assert one_way_anova(x, group, xbar) == expected
@@ -30,18 +44,17 @@ def test_one_way_anova():
 
 def test_two_way_anova():
     prng = get_prng(100)
-    group1 = np.array([1]*5 + [2]*5)                                              
-    group2 = np.array(list(range(5))*2)                                           
+    group1 = np.array([1]*5 + [2]*5)
+    group2 = np.array(list(range(5))*2)
     x = prng.randint(1, 10, 10)
     xbar = np.mean(x)
     val = two_way_anova(x, group1, group2, xbar)
     np.testing.assert_almost_equal(val, 0.296, 3)
-    
+
     x = group2 + 1
     xbar = 3
     assert two_way_anova(x, group1, group2, xbar) == 1
 
-
 def test_testosterone_ksample():
     testosterone = data.testosterone()
     x = np.hstack(testosterone.tolist())
@@ -51,4 +64,22 @@ def test_testosterone_ksample():
     assert len(group2) == 55
     assert len(x) == 55
     res = bivariate_k_sample(x, group1, group2, reps=5000, seed=5)
+    res_keep = bivariate_k_sample(x, group1, group2, reps=5000, seed=5, keep_dist = True)
+    assert len(res_keep) == 3
     np.testing.assert_array_less(res[0], 0.0002)
+    np.testing.assert_array_less(res_keep[0], 0.0002)
+
+
+def test_testosterone_ksample_stat():
+    testosterone = data.testosterone()
+    x = np.hstack(testosterone.tolist())
+    group1 = np.hstack([[i]*5 for i in range(len(testosterone))])
+    group2 = np.array(list(range(5))*len(testosterone))
+    assert len(group1) == 55
+    assert len(group2) == 55
+    assert len(x) == 55
+    res = bivariate_k_sample(x, group1, group2, reps=5000, seed=5, stat = two_way_anova)
+    np.testing.assert_array_less(res[0], 0.0002)
+
+test_worms_ksample_keep_dist()
+test_testosterone_ksample()
diff --git a/permute/tests/test_npc.py b/permute/tests/test_npc.py
index b245ef4d..c0c3b8cb 100644
--- a/permute/tests/test_npc.py
+++ b/permute/tests/test_npc.py
@@ -76,20 +76,20 @@ def test_npc_bad_distr():
 
 def test_npc_single_pvalue():
     pytest.raises(ValueError, npc, np.array([1]), np.array([1, 2, 3]))
-    
+
 
 def test_monotonic_checker():
     pvalues = np.array([0.1, 0.2, 0.3])
     np.testing.assert_equal(check_combfunc_monotonic(pvalues, fisher), True)
     np.testing.assert_equal(check_combfunc_monotonic(pvalues, liptak), True)
     np.testing.assert_equal(check_combfunc_monotonic(pvalues, tippett), True)
-    
+
     comb_function = lambda p: inverse_n_weight(p, np.array([2, 4, 6]))
     np.testing.assert_equal(check_combfunc_monotonic(pvalues, comb_function), True)
-    
+
     bad_comb_function = lambda p: -1*fisher(p)
     np.testing.assert_equal(check_combfunc_monotonic(pvalues, bad_comb_function), False)
-    
+
 
 def test_mono_checker_in_npc():
     prng = RandomState(55)
@@ -103,8 +103,8 @@ def test_minp_bad_distr():
     prng = RandomState(55)
     pvalues = np.linspace(0.05, 0.9, num=5)
     distr = prng.uniform(low=0, high=10, size=20).reshape(10, 2)
-    pytest.raises(ValueError, fwer_minp, pvalues, distr, "fisher")    
-    
+    pytest.raises(ValueError, fwer_minp, pvalues, distr, "fisher")
+
 
 def test_minp_one_pvalue():
     prng = RandomState(55)
@@ -115,12 +115,12 @@ def test_minp_one_pvalue():
 
 def test_sim_npc():
     prng = RandomState(55)
-    # test Y always greater than X so p-value should be 1 
+    # test Y always greater than X so p-value should be 1
     responses = np.array([[0, 1], [0, 1], [1, 2], [1, 2]])
     group = np.array([1, 1, 2, 2])
     my_randomizer = Experiment.Randomizer(randomize = randomize_group, seed = prng)
     data = Experiment(group, responses)
-    
+
     # create median test statistic to apply to every column
     def med_diff(data, resp_index):
         # get response variable for that index
@@ -131,19 +131,19 @@ def med_diff(data, resp_index):
         mx = np.nanmean(resp[data.group == groups[0]])
         my = np.nanmean(resp[data.group == groups[1]])
         return mx-my
-    
+
     test_array = Experiment.make_test_array(med_diff, [0, 1])
     res = sim_npc(data, test_array, combine="fisher", seed=None, reps=int(1000))
     np.testing.assert_almost_equal(res[0], 1)
-    
+
     # test X = Y so p-value should be 1
     responses = np.array([[0, 1], [0, 1], [0, 1], [0, 1]])
     group = np.array([1, 1, 2, 2])
     data = Experiment(group, responses, randomizer = my_randomizer)
-    res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]), 
+    res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]),
                   combine="fisher", seed=None, reps=int(1000))
     np.testing.assert_almost_equal(res[0], 1)
-    
+
     # test stat for cat_1 is smaller if X all 0s which about 0.015 chance so pvalue should be about 0.985
     responses = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [1, 1], [1, 1], [1, 1], [0, 1]])
     group = np.array([1, 1, 1, 1, 2, 2, 2, 2])
@@ -151,8 +151,10 @@ def med_diff(data, resp_index):
     res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]),
                   combine="fisher", seed=None, reps=int(1000))
     np.testing.assert_almost_equal(res[0], 0.985, decimal = 2)
-    
-    
+
+#Make unit tests for non fisher
+
+
 def test_fwer_minp():
     prng = RandomState(55)
     pvalues = np.linspace(0.05, 0.9, num=5)
diff --git a/permute/utils.py b/permute/utils.py
index bb08b488..f1fe4e69 100644
--- a/permute/utils.py
+++ b/permute/utils.py
@@ -135,7 +135,7 @@ def hypergeom_conf_interval(n, x, N, cl=0.975, alternative="two-sided", G=None,
 
 
 def hypergeometric(x, N, n, G, alternative='greater'):
-    
+
     """
     Parameters
     ----------
@@ -188,13 +188,13 @@ def binomial_p(x, n, p, alternative='greater'):
     p : int
        hypothesized number of successes in n trials
     n : int
-       number of trials 
+       number of trials
     alternative : {'greater', 'less', 'two-sided'}
        alternative hypothesis to test (default: 'greater')
     Returns
     -------
     float
-       estimated p-value 
+       estimated p-value
     """
 
     assert alternative in ("two-sided", "less", "greater")

From 8e1943795d45ea34c15b264aa9d568ee4cca94fc Mon Sep 17 00:00:00 2001
From: Adithya <adi.sdboy@gmail.com>
Date: Wed, 30 Jun 2021 23:15:52 -0700
Subject: [PATCH 2/2] Increase ksample coverage

---
 permute/.#ksample.py | 1 +
 permute/ksample.py   | 1 +
 2 files changed, 2 insertions(+)
 create mode 120000 permute/.#ksample.py

diff --git a/permute/.#ksample.py b/permute/.#ksample.py
new file mode 120000
index 00000000..d06bd327
--- /dev/null
+++ b/permute/.#ksample.py
@@ -0,0 +1 @@
+adithyarao@Adithyas-MacBook-Pro.local.15053
\ No newline at end of file
diff --git a/permute/ksample.py b/permute/ksample.py
index a6801bb8..59a29495 100644
--- a/permute/ksample.py
+++ b/permute/ksample.py
@@ -1,3 +1,4 @@
+
 """
 K-sample permutation tests.
 """