From b6e655c8bee059512397ce9844b45b66f2482c99 Mon Sep 17 00:00:00 2001 From: Adithya Date: Sun, 20 Jun 2021 17:42:52 -0700 Subject: [PATCH 1/2] Increased KSample Cov --- permute/core.py | 38 ++++++++++---------- permute/irr.py | 14 ++++---- permute/ksample.py | 22 ++++++------ permute/npc.py | 65 ++++++++++++++++++----------------- permute/stratified.py | 12 +++---- permute/tests/test_ksample.py | 43 +++++++++++++++++++---- permute/tests/test_npc.py | 30 ++++++++-------- permute/utils.py | 6 ++-- 8 files changed, 132 insertions(+), 98 deletions(-) diff --git a/permute/core.py b/permute/core.py index fa7533cb..d7e24e30 100644 --- a/permute/core.py +++ b/permute/core.py @@ -30,7 +30,7 @@ def corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True): If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -70,7 +70,7 @@ def spearman_corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -78,7 +78,7 @@ def spearman_corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True tuple Returns test statistic, p-value, simulated distribution """ - + xnew = np.argsort(x)+1 ynew = np.argsort(y)+1 return corr(xnew, ynew, alternative=alternative, reps=reps, seed=seed) @@ -112,7 +112,7 @@ def two_sample_core(potential_outcomes_all, nx, tst_stat, alternative='greater', If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -203,8 +203,8 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", that function. The function should take two arguments: given a permutation of the pooled data, the first argument is the "new" x and the second argument is the "new" y. - For instance, if the test statistic is the Kolmogorov-Smirnov distance - between the empirical distributions of the two samples, + For instance, if the test statistic is the Kolmogorov-Smirnov distance + between the empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ @@ -223,7 +223,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -305,14 +305,14 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater", that function. The function should take two arguments: given a permutation of the pooled data, the first argument is the "new" x and the second argument is the "new" y. - For instance, if the test statistic is the Kolmogorov-Smirnov distance - between the empirical distributions of the two samples, + For instance, if the test statistic is the Kolmogorov-Smirnov distance + between the empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) - + alternative : {'greater', 'less', 'two-sided'} The alternative hypothesis to test keep_dist : bool @@ -332,7 +332,7 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater", $x_i = f(y_i)$ and $y_i = f^{-1}(x_i)$ plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -421,14 +421,14 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, that function. The function should take two arguments: given a permutation of the pooled data, the first argument is the "new" x and the second argument is the "new" y. - For instance, if the test statistic is the Kolmogorov-Smirnov distance - between the empirical distributions of the two samples, + For instance, if the test statistic is the Kolmogorov-Smirnov distance + between the empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) - + shift : float The relationship between x and y under the null hypothesis. @@ -437,7 +437,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, $x_i = f(y_i, d)$ and $y_i = f^{-1}(x_i, d)$ plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -456,7 +456,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, """ # print warning warnings.warn('This function is under construction and outputs may be unreliable.') - + assert alternative in ("two-sided", "lower", "upper") if shift is None: @@ -488,7 +488,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, shift=q, reps=reps, stat=stat, plus1=plus1)[0] else: g = lambda q: cl - two_sample_shift(x, y, alternative="less", seed=seed, - shift=(lambda u: f(u, q), lambda u: finverse(u, q)), + shift=(lambda u: f(u, q), lambda u: finverse(u, q)), reps=reps, stat=stat, plus1=plus1)[0] ci_low = brentq(g, -2 * shift_limit, 2 * shift_limit) @@ -498,7 +498,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, shift=q, reps=reps, stat=stat, plus1=plus1)[0] else: g = lambda q: cl - two_sample_shift(x, y, alternative="greater", seed=seed, - shift=(lambda u: f(u, q), lambda u: finverse(u, q)), + shift=(lambda u: f(u, q), lambda u: finverse(u, q)), reps=reps, stat=stat, plus1=plus1)[0] ci_upp = brentq(g, -2 * shift_limit, 2 * shift_limit) @@ -566,7 +566,7 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater", If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns diff --git a/permute/irr.py b/permute/irr.py index 9c29dcb5..96bb9fad 100644 --- a/permute/irr.py +++ b/permute/irr.py @@ -131,7 +131,7 @@ def simulate_ts_dist(ratings, obs_ts=None, num_perm=10000, If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -171,10 +171,10 @@ def simulate_ts_dist(ratings, obs_ts=None, num_perm=10000, for i in range(num_perm): r = permute_rows(r, prng) geq += (compute_ts(r) >= obs_ts) - return {"obs_ts": obs_ts, - "geq": geq, + return {"obs_ts": obs_ts, + "geq": geq, "num_perm": num_perm, - "pvalue": (geq+plus1) / (num_perm+plus1), + "pvalue": (geq+plus1) / (num_perm+plus1), "dist": dist} @@ -216,7 +216,7 @@ def simulate_npc_dist(perm_distr, size, obs_ts=None, If not input, obs_ts must be specified. plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -246,6 +246,6 @@ def simulate_npc_dist(perm_distr, size, obs_ts=None, obs_npc = combine_func(pvalues) res = npc(pvalues, perm_distr, combine_func) - return {"obs_npc": obs_npc, - "pvalue": res, + return {"obs_npc": obs_npc, + "pvalue": res, "num_perm": B} diff --git a/permute/ksample.py b/permute/ksample.py index 560773f6..a6801bb8 100644 --- a/permute/ksample.py +++ b/permute/ksample.py @@ -13,12 +13,12 @@ def k_sample(x, group, reps=10**5, stat='one-way anova', keep_dist=False, seed=None, plus1=True): r""" - k-sample permutation test for equality of more than 2 means, + k-sample permutation test for equality of more than 2 means, with p-value estimated by simulated random sampling with reps replications. Tests the hypothesis that groupings are a random partition of x - against the alternative that at least one group comes from a + against the alternative that at least one group comes from a population with mean different from the rest If ``keep_dist``, return the distribution of values of the test statistic; @@ -36,7 +36,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova', stat : {'one-way anova'} The test statistic. - (a) If stat == 'one-way anova', use the sum of squared + (a) If stat == 'one-way anova', use the sum of squared distances between the group means and the overall mean weighted by group size. $\sum_{k=1}^K n_k(\overline{X_k} - \overline{X})^2$ @@ -50,7 +50,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova', If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -76,7 +76,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova', else: tst_fun = stats[stat] - xbar = np.mean(x) + xbar = np.mean(x) observed_tst = tst_fun(x, group, xbar) if keep_dist: @@ -112,7 +112,7 @@ def one_way_anova(x, group, overall_mean): Returns ------- float - the one-way ANOVA statistic + the one-way ANOVA statistic $\sum_{k=1}^K n_k(\overline{X_k} - \overline{X})^2$ where $k$ indexes the groups """ @@ -129,12 +129,12 @@ def one_way_anova(x, group, overall_mean): def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova', keep_dist=False, seed=None, plus1=True): r""" - k-sample permutation test for equality of more than 2 means, + k-sample permutation test for equality of more than 2 means, with p-value estimated by simulated random sampling with reps replications. Tests the hypothesis that within grouping 1, grouping 2 is - a random partition of x against the alternative that at + a random partition of x against the alternative that at least one group 2 comes from a population with mean different from the rest If ``keep_dist``, return the distribution of values of the test statistic; @@ -148,7 +148,7 @@ def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova', group1 : array-like Fixed group labels for each observation group2 : array-like - Group labels that, under the null, are exchangeable for each + Group labels that, under the null, are exchangeable for each level of group1 reps : int number of repetitions @@ -171,7 +171,7 @@ def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova', If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -229,7 +229,7 @@ def two_way_anova(x, group1, group2, overall_mean): group1 : array-like Fixed group labels for each observation group2 : array-like - Group labels that, under the null, are exchangeable for each + Group labels that, under the null, are exchangeable for each level of group1 overall_mean : float mean of x diff --git a/permute/npc.py b/permute/npc.py index d41c8be5..a03ef604 100644 --- a/permute/npc.py +++ b/permute/npc.py @@ -2,6 +2,7 @@ import copy from scipy.stats import norm, rankdata, ttest_ind, ttest_1samp from cryptorandom.sample import random_sample + from .utils import get_prng, permute @@ -143,7 +144,7 @@ def npc(pvalues, distr, combine="fisher", plus1=True): monotonically decreasing in each p-value. plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -187,7 +188,7 @@ def npc(pvalues, distr, combine="fisher", plus1=True): def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed=None): - r''' + r''' Combines p-values from individual partial test hypotheses $H_{0i}$ against $H_{1i}$, $i=1,\dots,n$ to test the global null hypothesis @@ -198,7 +199,7 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed= .. math:: \cup_{i=1}^n H_{1i} using an omnibus test statistic. - + Parameters ---------- data : Experiment object @@ -217,22 +218,22 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed= instance used by `np.random`; If int, seed is the seed used by the random number generator; If RandomState instance, seed is the pseudorandom number generator - + Returns ------- array - A single p-value for the global test, + A single p-value for the global test, test statistic values on the original data, partial p-values ''' # check data is of type Experiment if not isinstance(data, Experiment): raise ValueError("data not of class Experiment") - + # if seed not none, reset seed if seed is not None: data.randomizer.reset_seed(seed) - + ts = {} tv = {} ps = {} @@ -242,13 +243,13 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed= # apply test statistic function to column ts[c] = test[c](data) tv[c] = [] - + # check if randomization in place if in_place: data_copy = data else: data_copy = copy.deepcopy(data) - + # get test statistics for random samples for i in range(reps): # randomly permute group @@ -272,12 +273,12 @@ def sim_npc(data, test, combine="fisher", in_place=False, reps=int(10**4), seed= def fwer_minp(pvalues, distr, combine='fisher', plus1=True): """ Adjust p-values using the permutation "minP" variant of Holm's step-up method. - - When considering a closed testing procedure, the adjusted p-value - $p_i$ for a given hypothesis $H_i$ is the maximum of all p-values for tests - including $H_i$ as a special case (including the p-value for the $H_i$ + + When considering a closed testing procedure, the adjusted p-value + $p_i$ for a given hypothesis $H_i$ is the maximum of all p-values for tests + including $H_i$ as a special case (including the p-value for the $H_i$ test itself). - + Parameters ---------- pvalues : array_like @@ -339,7 +340,7 @@ def randomize_group(data): def randomize_in_strata(data): r""" - Stratified randomization where first covariate is the stratum + Stratified randomization where first covariate is the stratum Parameters ---------- @@ -354,8 +355,8 @@ def randomize_in_strata(data): strata = data.covariate[:, 0] unique_strata = np.unique(strata) for value in unique_strata: - data.group[strata == value] = random_sample(data.group[strata == value], - len(data.group[strata == value]), + data.group[strata == value] = random_sample(data.group[strata == value], + len(data.group[strata == value]), prng=data.randomizer.prng) return data @@ -380,7 +381,7 @@ class Experiment(): """ def __init__(self, group = None, response = None, covariate = None, randomizer = None): self.group = None if group is None else np.array(group, dtype = object) - self.response = None if response is None else np.array(response, dtype = object) + self.response = None if response is None else np.array(response, dtype = object) self.covariate = None if covariate is None else np.array(covariate, dtype = object) if randomizer is None: self.randomizer = Experiment.Randomizer(randomize = randomize_group) @@ -388,15 +389,15 @@ def __init__(self, group = None, response = None, covariate = None, randomizer = self.randomizer = randomizer else: raise ValueError("Not of class Randomizer") - - + + def __str__(self): return "This experiment has " + str(len(self.group)) + " subjects, " + str(len(self.response[0])) \ + " response variables, and " \ + (str(len(self.covariate[0])) if self.covariate is not None else str(0)) \ + " covariates." - - + + def randomize(self, in_place = True, seed = None): # reset seed, if seed not None if seed is not None: @@ -409,7 +410,7 @@ def randomize(self, in_place = True, seed = None): randomized_self.randomizer.randomize(randomized_self) return randomized_self - + @classmethod def make_test_array(cls, func, indices): def create_func(index): @@ -418,8 +419,8 @@ def new_func(data): return new_func test = [create_func(index) for index in indices] return test - - + + class TestFunc: def mean_diff(self, index): # get unique groups @@ -430,16 +431,16 @@ def mean_diff(self, index): mx = np.mean(self.response[:, index][self.group == groups[0]]) my = np.mean(self.response[:, index][self.group == groups[1]]) return mx-my - + def ttest(self, index): # get unique groups groups = np.unique(self.group) if len(groups) != 2: raise ValueError("Number of groups must be two") - t = ttest_ind(self.response[:, index][self.group == groups[0]], + t = ttest_ind(self.response[:, index][self.group == groups[0]], self.response[:, index][self.group == groups[1]], equal_var=True)[0] return t - + def one_way_anova(self, index): tst = 0 overall_mean = np.mean(self.response[:, index]) @@ -449,13 +450,13 @@ def one_way_anova(self, index): nk = len(group_k) tst += (group_mean - overall_mean)**2 * nk return tst - - + + class Randomizer(): def __init__(self, randomize = randomize_group, seed = None): self.randomize = randomize - self.prng = get_prng(seed) - + self.prng = get_prng(seed) + # reset seed def reset_seed(self, seed = None): self.prng = get_prng(seed) diff --git a/permute/stratified.py b/permute/stratified.py index d36a861e..f1770331 100644 --- a/permute/stratified.py +++ b/permute/stratified.py @@ -58,7 +58,7 @@ def sim_corr(x, y, group, reps=10**4, alternative='greater', seed=None, plus1=Tr If RandomState instance, seed is the pseudorandom number generator. plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -81,7 +81,7 @@ def sim_corr(x, y, group, reps=10**4, alternative='greater', seed=None, plus1=Tr thePvalue = { 'greater': lambda p: p + plus1/(reps+plus1), 'less': lambda p: 1 - (p + plus1/(reps+plus1)), - 'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), + 'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), 1 - (p + plus1/(reps+plus1))]) } return thePvalue[alternative](right_pv), tst, dist @@ -199,7 +199,7 @@ def stratified_permutationtest( If RandomState instance, seed is the pseudorandom number generator plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -229,7 +229,7 @@ def stratified_permutationtest( thePvalue = { 'greater': lambda p: p + plus1/(reps+plus1), 'less': lambda p: 1 - (p + plus1/(reps+plus1)), - 'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), + 'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), 1 - (p + plus1/(reps+plus1))]) } @@ -322,7 +322,7 @@ def stratified_two_sample( If RandomState instance, seed is the pseudorandom number generator. plus1 : bool flag for whether to add 1 to the numerator and denominator of the - p-value based on the empirical permutation distribution. + p-value based on the empirical permutation distribution. Default is True. Returns @@ -368,7 +368,7 @@ def stratified_two_sample( thePvalue = { 'greater': lambda p: p + plus1/(reps+plus1), 'less': lambda p: 1 - (p + plus1/(reps+plus1)), - 'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), + 'two-sided': lambda p: 2 * np.min([p + plus1/(reps+plus1), 1 - (p + plus1/(reps+plus1))]) } observed_tst = tst_fun(response) diff --git a/permute/tests/test_ksample.py b/permute/tests/test_ksample.py index e135d383..3b84f1ae 100644 --- a/permute/tests/test_ksample.py +++ b/permute/tests/test_ksample.py @@ -2,7 +2,7 @@ from scipy.stats import hypergeom, binom from cryptorandom.cryptorandom import SHA256 -from ..ksample import (k_sample, +from permute.ksample import (k_sample, one_way_anova, bivariate_k_sample, two_way_anova) @@ -13,6 +13,20 @@ def test_worms_ksample(): worms = data.worms() res = k_sample(worms.x, worms.y, stat='one-way anova', reps=1000, seed=1234) + print(res[0], " " , res[1]) + np.testing.assert_array_less(0.006, res[0]) + np.testing.assert_array_less(res[0], 0.02) + +def test_worms_ksample_keep_dist(): + worms = data.worms() + res = k_sample(worms.x, worms.y, stat='one-way anova', reps=1000, seed=1234, keep_dist= True) + np.testing.assert_array_less(0.006, res[0]) + np.testing.assert_array_less(res[0], 0.02) + assert len(res) == 3 + +def test_worms_ksample_callable(): + worms = data.worms() + res = k_sample(worms.x, worms.y, stat = one_way_anova, reps=1000, seed=1234) np.testing.assert_array_less(0.006, res[0]) np.testing.assert_array_less(res[0], 0.02) @@ -22,7 +36,7 @@ def test_one_way_anova(): x = np.array(range(5)) xbar = np.mean(x) assert one_way_anova(x, group, xbar) == 0 - + group = np.array([1]*3 + [2]*2) expected = 3*1**2 + 2*1.5**2 assert one_way_anova(x, group, xbar) == expected @@ -30,18 +44,17 @@ def test_one_way_anova(): def test_two_way_anova(): prng = get_prng(100) - group1 = np.array([1]*5 + [2]*5) - group2 = np.array(list(range(5))*2) + group1 = np.array([1]*5 + [2]*5) + group2 = np.array(list(range(5))*2) x = prng.randint(1, 10, 10) xbar = np.mean(x) val = two_way_anova(x, group1, group2, xbar) np.testing.assert_almost_equal(val, 0.296, 3) - + x = group2 + 1 xbar = 3 assert two_way_anova(x, group1, group2, xbar) == 1 - def test_testosterone_ksample(): testosterone = data.testosterone() x = np.hstack(testosterone.tolist()) @@ -51,4 +64,22 @@ def test_testosterone_ksample(): assert len(group2) == 55 assert len(x) == 55 res = bivariate_k_sample(x, group1, group2, reps=5000, seed=5) + res_keep = bivariate_k_sample(x, group1, group2, reps=5000, seed=5, keep_dist = True) + assert len(res_keep) == 3 np.testing.assert_array_less(res[0], 0.0002) + np.testing.assert_array_less(res_keep[0], 0.0002) + + +def test_testosterone_ksample_stat(): + testosterone = data.testosterone() + x = np.hstack(testosterone.tolist()) + group1 = np.hstack([[i]*5 for i in range(len(testosterone))]) + group2 = np.array(list(range(5))*len(testosterone)) + assert len(group1) == 55 + assert len(group2) == 55 + assert len(x) == 55 + res = bivariate_k_sample(x, group1, group2, reps=5000, seed=5, stat = two_way_anova) + np.testing.assert_array_less(res[0], 0.0002) + +test_worms_ksample_keep_dist() +test_testosterone_ksample() diff --git a/permute/tests/test_npc.py b/permute/tests/test_npc.py index b245ef4d..c0c3b8cb 100644 --- a/permute/tests/test_npc.py +++ b/permute/tests/test_npc.py @@ -76,20 +76,20 @@ def test_npc_bad_distr(): def test_npc_single_pvalue(): pytest.raises(ValueError, npc, np.array([1]), np.array([1, 2, 3])) - + def test_monotonic_checker(): pvalues = np.array([0.1, 0.2, 0.3]) np.testing.assert_equal(check_combfunc_monotonic(pvalues, fisher), True) np.testing.assert_equal(check_combfunc_monotonic(pvalues, liptak), True) np.testing.assert_equal(check_combfunc_monotonic(pvalues, tippett), True) - + comb_function = lambda p: inverse_n_weight(p, np.array([2, 4, 6])) np.testing.assert_equal(check_combfunc_monotonic(pvalues, comb_function), True) - + bad_comb_function = lambda p: -1*fisher(p) np.testing.assert_equal(check_combfunc_monotonic(pvalues, bad_comb_function), False) - + def test_mono_checker_in_npc(): prng = RandomState(55) @@ -103,8 +103,8 @@ def test_minp_bad_distr(): prng = RandomState(55) pvalues = np.linspace(0.05, 0.9, num=5) distr = prng.uniform(low=0, high=10, size=20).reshape(10, 2) - pytest.raises(ValueError, fwer_minp, pvalues, distr, "fisher") - + pytest.raises(ValueError, fwer_minp, pvalues, distr, "fisher") + def test_minp_one_pvalue(): prng = RandomState(55) @@ -115,12 +115,12 @@ def test_minp_one_pvalue(): def test_sim_npc(): prng = RandomState(55) - # test Y always greater than X so p-value should be 1 + # test Y always greater than X so p-value should be 1 responses = np.array([[0, 1], [0, 1], [1, 2], [1, 2]]) group = np.array([1, 1, 2, 2]) my_randomizer = Experiment.Randomizer(randomize = randomize_group, seed = prng) data = Experiment(group, responses) - + # create median test statistic to apply to every column def med_diff(data, resp_index): # get response variable for that index @@ -131,19 +131,19 @@ def med_diff(data, resp_index): mx = np.nanmean(resp[data.group == groups[0]]) my = np.nanmean(resp[data.group == groups[1]]) return mx-my - + test_array = Experiment.make_test_array(med_diff, [0, 1]) res = sim_npc(data, test_array, combine="fisher", seed=None, reps=int(1000)) np.testing.assert_almost_equal(res[0], 1) - + # test X = Y so p-value should be 1 responses = np.array([[0, 1], [0, 1], [0, 1], [0, 1]]) group = np.array([1, 1, 2, 2]) data = Experiment(group, responses, randomizer = my_randomizer) - res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]), + res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]), combine="fisher", seed=None, reps=int(1000)) np.testing.assert_almost_equal(res[0], 1) - + # test stat for cat_1 is smaller if X all 0s which about 0.015 chance so pvalue should be about 0.985 responses = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [1, 1], [1, 1], [1, 1], [0, 1]]) group = np.array([1, 1, 1, 1, 2, 2, 2, 2]) @@ -151,8 +151,10 @@ def med_diff(data, resp_index): res = sim_npc(data, test = Experiment.make_test_array(Experiment.TestFunc.mean_diff, [0, 1]), combine="fisher", seed=None, reps=int(1000)) np.testing.assert_almost_equal(res[0], 0.985, decimal = 2) - - + +#Make unit tests for non fisher + + def test_fwer_minp(): prng = RandomState(55) pvalues = np.linspace(0.05, 0.9, num=5) diff --git a/permute/utils.py b/permute/utils.py index bb08b488..f1fe4e69 100644 --- a/permute/utils.py +++ b/permute/utils.py @@ -135,7 +135,7 @@ def hypergeom_conf_interval(n, x, N, cl=0.975, alternative="two-sided", G=None, def hypergeometric(x, N, n, G, alternative='greater'): - + """ Parameters ---------- @@ -188,13 +188,13 @@ def binomial_p(x, n, p, alternative='greater'): p : int hypothesized number of successes in n trials n : int - number of trials + number of trials alternative : {'greater', 'less', 'two-sided'} alternative hypothesis to test (default: 'greater') Returns ------- float - estimated p-value + estimated p-value """ assert alternative in ("two-sided", "less", "greater") From 8e1943795d45ea34c15b264aa9d568ee4cca94fc Mon Sep 17 00:00:00 2001 From: Adithya Date: Wed, 30 Jun 2021 23:15:52 -0700 Subject: [PATCH 2/2] Increase ksample coverage --- permute/.#ksample.py | 1 + permute/ksample.py | 1 + 2 files changed, 2 insertions(+) create mode 120000 permute/.#ksample.py diff --git a/permute/.#ksample.py b/permute/.#ksample.py new file mode 120000 index 00000000..d06bd327 --- /dev/null +++ b/permute/.#ksample.py @@ -0,0 +1 @@ +adithyarao@Adithyas-MacBook-Pro.local.15053 \ No newline at end of file diff --git a/permute/ksample.py b/permute/ksample.py index a6801bb8..59a29495 100644 --- a/permute/ksample.py +++ b/permute/ksample.py @@ -1,3 +1,4 @@ + """ K-sample permutation tests. """