Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions permute/.#ksample.py
38 changes: 19 additions & 19 deletions permute/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True):
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -70,15 +70,15 @@ def spearman_corr(x, y, alternative='greater', reps=10**4, seed=None, plus1=True
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
-------
tuple
Returns test statistic, p-value, simulated distribution
"""

xnew = np.argsort(x)+1
ynew = np.argsort(y)+1
return corr(xnew, ynew, alternative=alternative, reps=reps, seed=seed)
Expand Down Expand Up @@ -112,7 +112,7 @@ def two_sample_core(potential_outcomes_all, nx, tst_stat, alternative='greater',
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -203,8 +203,8 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
that function. The function should take two arguments:
given a permutation of the pooled data, the first argument is the
"new" x and the second argument is the "new" y.
For instance, if the test statistic is the Kolmogorov-Smirnov distance
between the empirical distributions of the two samples,
For instance, if the test statistic is the Kolmogorov-Smirnov distance
between the empirical distributions of the two samples,
$\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:

f = lambda u, v: np.max( \
Expand All @@ -223,7 +223,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -305,14 +305,14 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater",
that function. The function should take two arguments:
given a permutation of the pooled data, the first argument is the
"new" x and the second argument is the "new" y.
For instance, if the test statistic is the Kolmogorov-Smirnov distance
between the empirical distributions of the two samples,
For instance, if the test statistic is the Kolmogorov-Smirnov distance
between the empirical distributions of the two samples,
$\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:

f = lambda u, v: np.max( \
[abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
)

alternative : {'greater', 'less', 'two-sided'}
The alternative hypothesis to test
keep_dist : bool
Expand All @@ -332,7 +332,7 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater",
$x_i = f(y_i)$ and $y_i = f^{-1}(x_i)$
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -421,14 +421,14 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
that function. The function should take two arguments:
given a permutation of the pooled data, the first argument is the
"new" x and the second argument is the "new" y.
For instance, if the test statistic is the Kolmogorov-Smirnov distance
between the empirical distributions of the two samples,
For instance, if the test statistic is the Kolmogorov-Smirnov distance
between the empirical distributions of the two samples,
$\max_t |F_x(t) - F_y(t)|$, the test statistic could be written:

f = lambda u, v: np.max( \
[abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\
)

shift : float
The relationship between x and y under the null hypothesis.

Expand All @@ -437,7 +437,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
$x_i = f(y_i, d)$ and $y_i = f^{-1}(x_i, d)$
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand All @@ -456,7 +456,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
"""
# print warning
warnings.warn('This function is under construction and outputs may be unreliable.')

assert alternative in ("two-sided", "lower", "upper")

if shift is None:
Expand Down Expand Up @@ -488,7 +488,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
shift=q, reps=reps, stat=stat, plus1=plus1)[0]
else:
g = lambda q: cl - two_sample_shift(x, y, alternative="less", seed=seed,
shift=(lambda u: f(u, q), lambda u: finverse(u, q)),
shift=(lambda u: f(u, q), lambda u: finverse(u, q)),
reps=reps, stat=stat, plus1=plus1)[0]
ci_low = brentq(g, -2 * shift_limit, 2 * shift_limit)

Expand All @@ -498,7 +498,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
shift=q, reps=reps, stat=stat, plus1=plus1)[0]
else:
g = lambda q: cl - two_sample_shift(x, y, alternative="greater", seed=seed,
shift=(lambda u: f(u, q), lambda u: finverse(u, q)),
shift=(lambda u: f(u, q), lambda u: finverse(u, q)),
reps=reps, stat=stat, plus1=plus1)[0]
ci_upp = brentq(g, -2 * shift_limit, 2 * shift_limit)

Expand Down Expand Up @@ -566,7 +566,7 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down
14 changes: 7 additions & 7 deletions permute/irr.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def simulate_ts_dist(ratings, obs_ts=None, num_perm=10000,
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -171,10 +171,10 @@ def simulate_ts_dist(ratings, obs_ts=None, num_perm=10000,
for i in range(num_perm):
r = permute_rows(r, prng)
geq += (compute_ts(r) >= obs_ts)
return {"obs_ts": obs_ts,
"geq": geq,
return {"obs_ts": obs_ts,
"geq": geq,
"num_perm": num_perm,
"pvalue": (geq+plus1) / (num_perm+plus1),
"pvalue": (geq+plus1) / (num_perm+plus1),
"dist": dist}


Expand Down Expand Up @@ -216,7 +216,7 @@ def simulate_npc_dist(perm_distr, size, obs_ts=None,
If not input, obs_ts must be specified.
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -246,6 +246,6 @@ def simulate_npc_dist(perm_distr, size, obs_ts=None,

obs_npc = combine_func(pvalues)
res = npc(pvalues, perm_distr, combine_func)
return {"obs_npc": obs_npc,
"pvalue": res,
return {"obs_npc": obs_npc,
"pvalue": res,
"num_perm": B}
23 changes: 12 additions & 11 deletions permute/ksample.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

"""
K-sample permutation tests.
"""
Expand All @@ -13,12 +14,12 @@
def k_sample(x, group, reps=10**5, stat='one-way anova',
keep_dist=False, seed=None, plus1=True):
r"""
k-sample permutation test for equality of more than 2 means,
k-sample permutation test for equality of more than 2 means,
with p-value estimated by simulated random sampling with
reps replications.

Tests the hypothesis that groupings are a random partition of x
against the alternative that at least one group comes from a
against the alternative that at least one group comes from a
population with mean different from the rest

If ``keep_dist``, return the distribution of values of the test statistic;
Expand All @@ -36,7 +37,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova',
stat : {'one-way anova'}
The test statistic.

(a) If stat == 'one-way anova', use the sum of squared
(a) If stat == 'one-way anova', use the sum of squared
distances between the group means and the overall mean
weighted by group size.
$\sum_{k=1}^K n_k(\overline{X_k} - \overline{X})^2$
Expand All @@ -50,7 +51,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova',
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand All @@ -76,7 +77,7 @@ def k_sample(x, group, reps=10**5, stat='one-way anova',
else:
tst_fun = stats[stat]

xbar = np.mean(x)
xbar = np.mean(x)
observed_tst = tst_fun(x, group, xbar)

if keep_dist:
Expand Down Expand Up @@ -112,7 +113,7 @@ def one_way_anova(x, group, overall_mean):
Returns
-------
float
the one-way ANOVA statistic
the one-way ANOVA statistic
$\sum_{k=1}^K n_k(\overline{X_k} - \overline{X})^2$
where $k$ indexes the groups
"""
Expand All @@ -129,12 +130,12 @@ def one_way_anova(x, group, overall_mean):
def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova',
keep_dist=False, seed=None, plus1=True):
r"""
k-sample permutation test for equality of more than 2 means,
k-sample permutation test for equality of more than 2 means,
with p-value estimated by simulated random sampling with
reps replications.

Tests the hypothesis that within grouping 1, grouping 2 is
a random partition of x against the alternative that at
a random partition of x against the alternative that at
least one group 2 comes from a population with mean different from the rest

If ``keep_dist``, return the distribution of values of the test statistic;
Expand All @@ -148,7 +149,7 @@ def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova',
group1 : array-like
Fixed group labels for each observation
group2 : array-like
Group labels that, under the null, are exchangeable for each
Group labels that, under the null, are exchangeable for each
level of group1
reps : int
number of repetitions
Expand All @@ -171,7 +172,7 @@ def bivariate_k_sample(x, group1, group2, reps=10**5, stat='two-way anova',
If RandomState instance, seed is the pseudorandom number generator
plus1 : bool
flag for whether to add 1 to the numerator and denominator of the
p-value based on the empirical permutation distribution.
p-value based on the empirical permutation distribution.
Default is True.

Returns
Expand Down Expand Up @@ -229,7 +230,7 @@ def two_way_anova(x, group1, group2, overall_mean):
group1 : array-like
Fixed group labels for each observation
group2 : array-like
Group labels that, under the null, are exchangeable for each
Group labels that, under the null, are exchangeable for each
level of group1
overall_mean : float
mean of x
Expand Down
Loading