diff --git a/permute/core.py b/permute/core.py index 26a0b84..b1b0d75 100644 --- a/permute/core.py +++ b/permute/core.py @@ -6,10 +6,12 @@ print_function, unicode_literals) import numpy as np +import math from scipy.optimize import brentq, fsolve -from scipy.stats import ttest_ind, ttest_1samp +from scipy.stats import ttest_ind, ttest_1samp, binom -from .utils import get_prng, potential_outcomes +from .utils import get_prng, potential_outcomes, binom_conf_interval +from .binomialp import binomial_p def corr(x, y, reps=10**4, seed=None): @@ -187,7 +189,9 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", # dictionary stats = { 'mean': lambda u, v: np.mean(u) - np.mean(v), - 't': lambda u, v: ttest_ind(u, v, equal_var=True)[0] + 't': lambda u, v: ttest_ind(u, v, equal_var=True)[0], + 'KS': lambda u, v: np.max([abs(sum(u <= a) / len(x) - sum(v <= a) / len(y)) + for a in set(u).union(set(y))]) } if callable(stat): tst_fun = stat @@ -205,6 +209,36 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", else: return res, observed_tst +def two_sample_fit(x, y, alpha, method='KS'): + ''' + Apply permutation methods to find the fitness of distribution X and Y. + Currently the method only implemented the Kolmogorov-Smirnov (KS) test. + + Parameters + ---------- + x: array-like. + sample X + y: array-like. + sample Y + alpha: float in [0, 1] + type I error + + Returns + ------- + boolean + True means two samples come from the same underlying distribution. + Otherwise means they do not. + + Notes + ----- + method + This will be the interface function invoking two_sample + for all goodness of fit test statistics for future implementations. + ''' + _, stats = two_sample(x, y, stat='KS') + c = math.sqrt((len(x) + len(y))) / (len(x) * len(y)) + c *= math.sqrt(-0.5 * math.log(alpha * 0.5)) + return stats <= c def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater", keep_dist=False, seed=None, shift=None): @@ -433,8 +467,105 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, return ci_low, ci_upp +def one_sample_percentile(x, x_p, p=50, alternative="less"): + r""" + One-sided or two-sided test for the true Pth percentile of a population distribution. + + The null hypothesis is that the given percentile is the true Pth percentile. + here is an equal P/100 chance for any value in a sample,of the same length of X, + and drawn from the population to lie at or below the Pth percentile. + + The P value returned would be the probability that the true population percentile is + as extreme as X_P. + + This test defaults to P=50, the 50th percentile. + + Parameters + ---------- + x : array-like + Sample + x_p : numeric + An inputted estimated pth percentile of the distribution. + p: int in [0,100] + Percentile of interest to test. + alternative : {'greater', 'less', 'two-sided'} + The alternative hypothesis to test + + Returns + ------- + float + the estimated p-value + float + the test statistic: Number of values at or below x_p in the samples + """ + + if not 0 <= p <= 100: + raise ValueError('p must be between 0 and 100') + + thePvalue = { + 'greater': lambda p: 1 - p, + 'less': lambda p: p, + 'two-sided': lambda p: 2 * np.min([p, 1 - p]) + } + + num_under = np.sum(x <= x_p) + p_value = binom.cdf(num_under, n=len(x), p=p/100) + + return thePvalue[alternative](p_value), num_under + + +def one_sample_percentile_ci(x, p=50, cl=0.95, alternative="two-sided"): + """ + Confidence intervals for a percentile P of the population distribution of a + univariate or paired sample of a confidence level CL solely based on a given + sample X drawn from the population. + + Parameters + ---------- + x : array-like + The sample + cl : float in (0, 1) + The desired confidence level. + alternative : {"two-sided", "lower", "upper"} + Indicates the alternative hypothesis. + p : int in [0,100] + Desired percentile of interest to get a confidence interval of. + + Returns + ------- + tuple + lower and upper confidence level with coverage (approximately) + 1-alpha. + + """ + + if not 0 <= p <= 100: + raise ValueError('p must be between 0 and 100') + + x = np.sort(x) + ci_low = np.min(x) + ci_upp = np.max(x) + + if alternative == 'two-sided': + cl = 1 - (1 - cl) / 2 + + if alternative != "upper": + g = lambda q: cl - one_sample_percentile(x, q, p=p, alternative="greater")[0] + ci_low = brentq(g, np.min(x), np.max(x)) + ci_low = x[np.searchsorted(x, ci_low) - 1] + + if alternative != "lower": + g = lambda q: cl - one_sample_percentile(x, q, p=p, alternative="less")[0] + ci_upp = brentq(g, np.min(x), np.max(x)) + ci_upp = x[np.searchsorted(x, ci_upp)] + + + return ci_low, ci_upp + + + def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater", - keep_dist=False, seed=None): + keep_dist=False, seed=None, center = None): r""" One-sided or two-sided, one-sample permutation test for the mean, with p-value estimated by simulated random sampling with @@ -443,15 +574,15 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater", Alternatively, a permutation test for equality of means of two paired samples. - Tests the hypothesis that x is distributed symmetrically symmetric about 0 - (or x and y have the same center) against the alternative that x comes from + Tests the hypothesis that x is distributed symmetrically symmetric about + CENTER (or x and y have the same center) against the alternative that x comes from a population with mean - (a) greater than 0 (greater than that of the population from which y comes), + (a) greater than CENTER (greater than that of the population from which y comes), if side = 'greater' - (b) less than 0 (less than that of the population from which y comes), + (b) less than CENTER (less than that of the population from which y comes), if side = 'less' - (c) different from 0 (different from that of the population from which y comes), + (c) different from CENTER (different from that of the population from which y comes), if side = 'two-sided' If ``keep_dist``, return the distribution of values of the test statistic; @@ -492,7 +623,8 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater", instance used by `np.random`; If int, seed is the seed used by the random number generator; If RandomState instance, seed is the pseudorandom number generator - + center : float + Assumption of symmetry around this center. Returns ------- @@ -520,7 +652,8 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater", } stats = { 'mean': lambda u: np.mean(u), - 't': lambda u: ttest_1samp(u, 0)[0] + 't': lambda u: ttest_1samp(u, 0)[0], + 'median': lambda u: np.median(u) } if callable(stat): tst_fun = stat @@ -528,14 +661,144 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater", tst_fun = stats[stat] tst = tst_fun(z) + + if center is None: + center = float(0) + n = len(z) if keep_dist: dist = [] for i in range(reps): - dist.append(tst_fun(z * (1 - 2 * prng.binomial(1, .5, size=n)))) + dist.append(tst_fun(center + (z - center) * (1 - 2 * prng.binomial(1, .5, size=n)))) hits = np.sum(dist >= tst) return thePvalue[alternative](hits / reps), tst, dist else: - hits = np.sum([(tst_fun(z * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst + hits = np.sum([(tst_fun(center + (z - center) * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst for i in range(reps)]) return thePvalue[alternative](hits / reps), tst + + + + +def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None, + reps=10**4, stat="mean", shift=None): + """ + One-sided or two-sided confidence interval for a test statistic of a sample with + or paired sample. The default is the two-sided confidence interval for the mean of a sample x. + + Parameters + ---------- + x : array-like + Sample 1 + y : array-like + Sample 2 + cl : float in (0, 1) + The desired confidence level. Default 0.95. + alternative : {"two-sided", "lower", "upper"} + Indicates the alternative hypothesis. + seed : RandomState instance or {None, int, RandomState instance} + If None, the pseudorandom number generator is the RandomState + instance used by `np.random`; + If int, seed is the seed used by the random number generator; + If RandomState instance, seed is the pseudorandom number generator + reps : int + number of repetitions in two_sample + stat : {'mean', 't'} + The test statistic. + + (a) If stat == 'mean', the test statistic is (mean(x) - mean(y)) + (equivalently, sum(x), since those are monotonically related) + (b) If stat == 't', the test statistic is the two-sample t-statistic-- + but the p-value is still estimated by the randomization, + approximating the permutation distribution. + The t-statistic is computed using scipy.stats.ttest_ind + (c) If stat is a function (a callable object), the test statistic is + that function. The function should take a permutation of the pooled + data and compute the test function from it. For instance, if the + test statistic is the Kolmogorov-Smirnov distance between the + empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$, + the test statistic could be written: + + f = lambda u: np.max( \ + [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y)) for v in u]\ + ) + shift : float + The relationship between x and y under the null hypothesis. + + (a) If None, the relationship is assumed to be additive (e.g. x = y+d) + (b) A tuple containing the function and its inverse $(f, f^{-1})$, so + $x_i = f(y_i, d)$ and $y_i = f^{-1}(x_i, d)$ + + Returns + ------- + tuple + the estimated confidence limits + + Notes + ----- + xtol : float + Tolerance in brentq + rtol : float + Tolerance in brentq + maxiter : int + Maximum number of iterations in brentq + """ + assert alternative in ("two-sided", "lower", "upper") + + if y is None: + z = x + elif len(x) != len(y): + raise ValueError('x and y must be pairs') + else: + z = np.array(x) - np.array(y) + + if shift is None: + shift_limit = max(z) - min(z) + + elif isinstance(shift, tuple): + assert (callable(shift[0])), "Supply f and finverse in shift tuple" + assert (callable(shift[1])), "Supply f and finverse in shift tuple" + f = shift[0] + finverse = shift[1] + # Check that f is increasing in d; this is very ad hoc! + assert (f(5, 1) < f(5, 2)), "f must be increasing in the parameter d" + + shift_limit = max(abs(fsolve(lambda d: f(max(z), d) - min(z), 0)), + abs(fsolve(lambda d: f(min(z), d) - max(z), 0))) + # FIXME: unused observed + # observed = fsolve(lambda d: np.mean(x) - np.mean(f(y, d)), 0) + else: + raise ValueError("Bad input for shift") + + stats = { + 'mean': lambda u: np.mean(u), + 't': lambda u: ttest_1samp(u, 0)[0], + 'median': lambda u: np.median(u) + } + if callable(stat): + tst_fun = stat + else: + tst_fun = stats[stat] + + tst = tst_fun(z) + + ci_low = tst - shift_limit + ci_upp = tst + shift_limit + + if alternative == 'two-sided': + cl = 1 - (1 - cl) / 2 + + if alternative != "upper": + g = lambda q: cl - one_sample(z, alternative="less", seed=seed, \ + reps=reps, stat=stat, center=q)[0] + ci_low = brentq(g, tst - 2 * shift_limit, tst + 2 * shift_limit) + + if alternative != "lower": + g = lambda q: cl - one_sample(z, alternative="greater", seed=seed, \ + reps=reps, stat=stat, center=q)[0] + ci_upp = brentq(g, tst - 2 * shift_limit, tst + 2 * shift_limit) + + return ci_low, ci_upp + + + diff --git a/permute/hoeffding.py b/permute/hoeffding.py new file mode 100644 index 0000000..1ec91e2 --- /dev/null +++ b/permute/hoeffding.py @@ -0,0 +1,72 @@ +from __future__ import (absolute_import, division, + print_function, unicode_literals) + +import numpy as np +import math + +def hoeffding_conf_int(x, N, lower_bound, upper_bound, cl=0.95, alternative="one-sided"): + r""" + Confidence interval for the mean of bounded, independent observations + derived by inverting Hoeffding's inequality. This method uses the + assumption that the observations have the same bounds. + + Parameters + ---------- + x : array-like + list of observations + N : int + population size + cl : float in (0, 1) + the desired confidence level. Default 0.95. + lower_bound : scalar or array-like + lower bound for the observations + upper_bound : scalar or array-like + upper bound for the observations + alternative : {'one-sided', 'two-sided'} + alternative hypothesis to test (default: 'one-sided') + + Returns + ------- + tuple + the confidence limits + """ + #Check if Bounds are Scalar or Array-Like + if (isinstance(lower_bound, float) or isinstance(lower_bound, int)) and (isinstance(upper_bound, float) or isinstance(upper_bound, int)): + if max(x) > upper_bound or min(x) < lower_bound: + raise ValueError("x values not contained in bounds") + tau_sq = N*((upper_bound - lower_bound)**2) + max_upper = upper_bound + min_lower = lower_bound + elif isinstance(lower_bound, np.ndarray) and isinstance(lower_bound, np.ndarray): + #Makes sure the bounds are valid, same length, makes sense, and values are in the bounds + if len(lower_bound) != N or len(upper_bound) != N: + raise ValueError("Bad Bound Input Length") + if np.sum(upper_bound >= lower_bound) != N: + raise ValueError("Invalid Upper and Lower bounds") + if np.sum((x <= upper_bound)) != N or np.sum((x >= lower_bound)) != N: + raise ValueError("x values not contained in bounds") + else: + tau_sq = np.sum((upper_bound - lower_bound)**2) + max_upper = np.mean(upper_bound) + min_lower = np.mean(lower_bound) + else: + raise ValueError("Invalid Upper and Lower Bounds") + + x_bar = np.mean(x) + + if alternative == "one-sided": + hCrit = np.sqrt(-math.log(1-cl)*tau_sq/(2*N**2)) + ci_low = x_bar - hCrit + ci_upp = max_upper + + if alternative == "two-sided": + hCrit = np.sqrt(-math.log((1-cl)/2)*tau_sq/(2*N**2)) + ci_low, ci_upp = x_bar - hCrit, x_bar + hCrit + + #Truncated Bounds of the Hoeffding Confidence Interval + if ci_upp > max_upper: + ci_upp = max_upper + if ci_low < min_lower: + ci_low = min_lower + + return ci_low, ci_upp diff --git a/permute/kaplanward.py b/permute/kaplanward.py new file mode 100644 index 0000000..2c16377 --- /dev/null +++ b/permute/kaplanward.py @@ -0,0 +1,110 @@ +import math +import numpy as np +import scipy as sp + +def kaplan_wald_CI(x, level=0.99, lo=0, hi=float('inf'), gamma=0.95, xtol=1.e-32): + ''' + Calculates the confidence interval for the mean of a nonnegative random variable + using the Kaplan-Wald method. + + Parameters + ---------- + x : array-like + your input sample + level : float in (0, 1) + indicates your desired confidence level + lo : float in [0, float('inf')] + the lower bound of your random variable, must be nonnegative + hi : float in (hi, float('inf')) + the upper bound of your random variable, optional + + Returns + ------- + tuple + The estimated confidence level. If the upper bound if not specified, + only the lower bound of the confidence interval will be returned. + + Notes + ----- + gamma : float in (0, 1) + the variable introduced in Kaplan-Wald method to hedge against small values + xtol : float + Tolerance in brentq + ''' + alpha = 1.0 - level + if not (0 < level < 1): + raise ValueError('CI level must be between 0 and 1.') + if any(x < 0): + raise ValueError('Data x must be nonnegative.') + if lo > hi or any(x < lo) or any(x > hi): + raise ValueError('Data x is not in the specified range.') + def find_t(data, start): + f = lambda t: (np.max(np.cumsum(np.log(gamma*data/t + 1-gamma))) + np.log(alpha)) + start, end = start, np.mean(data) + if f(start) * f(end) <= 0.0: + return sp.optimize.brentq(f, start, end, xtol=xtol) + else: + return start + lo = find_t(x, lo + xtol) + if hi != float('inf'): + hi = find_t(hi - x, xtol) + np.mean(x) + return (lo, hi) + + +def sprt_proportion(x, p0, p1, alpha, beta, start=1, plot=False): + ''' + The model aims to determine the quality of a batch of products by minimal sampling. + The idea is to sample the batch sequentially until a decision can be made whether + the batch conforms to specification and can be accepted or that it should be rejected. + + Parameters + ---------- + x : array-like. your input sample. + each value should indicate success or failure. + alpha: float in [0, 1] + type I error + beta: float in [0, 1] + type II error + p0: float in [0, 1]. proportion to accept null hypothesis + the proportion of positives below which + a decision that the proportion is null can be made + p1: float in [0, 1]. proportion to reject null hypothesis + the proportion of positives above which + a decision that the proportion is null can be made + + Returns + ------- + tuple + tuple[0] can be'Reject' the null, 'Accept' the null, + or 'Unknown' based on the data + tuple[1] is the log likelihood ratio for SPRT when a + decision is made or running out of data + tuple[2] is the index of the trail where a decision is made + + Notes + ----- + start + If the last batch of data gives unknown decision, + one can feed the tuple[1] into the param start to continue the SPRT. + ''' + if not (0 <= p0 <= 1 and 0 <= p1 <= 1): + raise ValueError('Proportion must lie in [0, 1]') + if not (0 < alpha < 1 and 0 < beta < 1): + raise ValueError('Error control param must lie in (0, 1)') + lr = start + s_fac = math.log(p1 / p0) + f_fac = math.log((1.0-p1) / (1.0-p0)) + s_bnd = math.log((1-beta) / alpha) + f_bnd = math.log(beta / (1-alpha)) + decision = 'Unknown' + for idx, trail in enumerate(x, 1): + trail = int(trail) + if trail != 0 and trail != 1: + raise ValueError('every trail in the data must be a truthy/falsy value.') + lr += s_fac if trail == 1 else f_fac + if lr >= s_bnd: + decision = 'Reject' + break + elif lr <= f_bnd: + decision = 'Accept' + return (decision, lr, idx) diff --git a/permute/regcoeff.py b/permute/regcoeff.py new file mode 100644 index 0000000..1c2ea31 --- /dev/null +++ b/permute/regcoeff.py @@ -0,0 +1,49 @@ +from __future__ import (absolute_import, division, + print_function, unicode_literals) + +import numpy as np +import math +from scipy.stats import linregress +from .utils import get_prng + +def reg_coeff(x, y, reps=10**5, cl=0.95, seed=None): + r""" + Testing the coefficients of a linear regression. The assumption of the linear regression + is that + + Parameters + ---------- + x : array-like + list of observations + N : int + population size + cl : float in (0, 1) + the desired confidence level. Default 0.95. + lower_bound : scalar or array-like + lower bound for the observations + upper_bound : scalar or array-like + upper bound for the observations + alternative : {'greater', 'less', 'two-sided'} + alternative hypothesis to test (default: 'greater') + + Returns + ------- + tuple + the confidence limits + """ + if len(x) != len(y): + raise ValueError("Input Vector lengths do not match") + + + true_slope = abs(linregress(x, y)[0]) + prng = get_prng(seed) + slopes = [] + for _ in range(reps): + shuffle_x = prng.permutation(x) + slope = linregress(shuffle_x, y)[0] + slopes.append(slope) + + p_value = sum(abs(np.array(slopes)) > true_slope)/float(reps) + + return p_value + diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py index a3668f8..965858f 100644 --- a/permute/tests/test_core.py +++ b/permute/tests/test_core.py @@ -6,13 +6,19 @@ import numpy as np from numpy.random import RandomState +from scipy.stats import binom from ..core import (corr, two_sample, two_sample_shift, two_sample_conf_int, - one_sample) + two_sample_fit, + one_sample, + one_sample, + one_sample_conf_int, + one_sample_percentile, + one_sample_percentile_ci) def test_corr(): @@ -90,6 +96,17 @@ def test_two_sample(): np.testing.assert_equal(res[0], expected[0]) np.testing.assert_equal(res[1], expected[1]) +def test_two_sample_fit(): + x = np.array(range(10)) + y = np.array(range(10)) + alpha = 0.01 + res = two_sample_fit(x, y, alpha) + np.testing.assert_equal(res, True) + + y = np.array(range(10, 20)) + alpha = 0.1 + res = two_sample_fit(x, y, alpha) + np.testing.assert_equal(res, False) def test_two_sample_shift(): prng = RandomState(42) @@ -217,3 +234,164 @@ def test_one_sample(): res = one_sample(x, y, seed=42, reps=100, stat="t", alternative="less") np.testing.assert_almost_equal(res[0], 0.05) np.testing.assert_almost_equal(res[1], -1.4491883) + + # case 6: use median as test statistic + res = one_sample(x, seed=42, reps=100, stat="median") + np.testing.assert_almost_equal(res[0], 0.14) + np.testing.assert_equal(res[1], 2) + + # case 7: Test statistic is a function + pcntl = lambda x: np.percentile(x, 20) + res = one_sample(x, seed=42, reps=100, stat=pcntl) + np.testing.assert_almost_equal(res[0], 0.059999999999) + np.testing.assert_almost_equal(res[1], 0.8) + + prng = RandomState(42) + + x = np.array(range(10)) + y = x - 1 + + # case 1: + res0 = one_sample(x, seed=prng, reps=100, center=3) + res1 = one_sample(x, seed=prng, reps=100, center=7) + np.testing.assert_almost_equal(res0[0], 0.07) + np.testing.assert_equal(res0[1], np.mean(x)) + np.testing.assert_almost_equal(res1[0], 1) + + # case 2: paired sample + res = one_sample(x, y, seed=42, reps=100, center=1, keep_dist=True) + np.testing.assert_almost_equal(res[0], 1) + np.testing.assert_equal(res[1], 1) + dist_unique = np.unique(res[2]) # Distribution should be all 1's + np.testing.assert_equal(len(dist_unique), 1) + np.testing.assert_equal(dist_unique[0], 1) + + # case 3: t test statistic + x = np.array(range(5)) + res = one_sample(x, seed=42, reps=100, stat="t", alternative="less", center=0.1) + np.testing.assert_almost_equal(res[0], 0.93999999999999995) + np.testing.assert_almost_equal(res[1], 2.8284271247461898) + + # case 4: break it - supply x and y, but not paired + y = np.append(y, 10) + assert_raises(ValueError, one_sample, x, y) + + # case 5: use median as test statistic + x = np.array(range(10)) + res = one_sample(x, seed=42, reps=100, stat="median", center=4.5) + np.testing.assert_almost_equal(res[0], 0.53) + np.testing.assert_equal(res[1], 4.5) + + # case 6: Test statistic is a function + pcntl = lambda x: np.percentile(x, 20) + res = one_sample(x, seed=42, reps=100, stat=pcntl, center=2) + np.testing.assert_almost_equal(res[0], 0.029999999999999999) + np.testing.assert_almost_equal(res[1], 1.8) + + +@attr('slow') +def test_one_sample_conf_int(): + prng = RandomState(42) + + # Standard confidence interval + x = np.array(range(10)) + res = one_sample_conf_int(x, seed=prng) + expected_ci = (2.2696168, 6.6684788) + np.testing.assert_almost_equal(res, expected_ci) + res = one_sample_conf_int(x, seed=prng, alternative="upper") + expected_ci = (-4.5, 6.255232305502077) + np.testing.assert_almost_equal(res, expected_ci) + res = one_sample_conf_int(x, seed=prng, alternative="lower") + expected_ci = (2.7680067828582393, 13.5) + np.testing.assert_almost_equal(res, expected_ci) + + # Normal distribution shift centered at 1 + norm = prng.normal(0, 1, size=100) + shift = prng.normal(1, 1, size=100) + diff = norm - shift + res = one_sample_conf_int(norm, diff, seed=prng) + expected_ci = (0.8, 1.2) + np.testing.assert_almost_equal(res, expected_ci, decimal=1) + + + # Specify shift with a function pair + shift = (lambda u, d: u + d, lambda u, d: u - d) + res = one_sample_conf_int(x, seed=5, shift=shift) + np.testing.assert_almost_equal(res, (2.333333333333319, 6.749999999999943)) + + # Specify shift with a multiplicative pair + shift = (lambda u, d: u * d, lambda u, d: u / d) + res = one_sample_conf_int(norm, seed=5, shift=shift) + np.testing.assert_almost_equal(res, (-0.0653441, 0.309073 )) + + # break it - supply x and y, but not paired + y = np.append(x, 10) + assert_raises(ValueError, one_sample_conf_int, x, y) + + # Testing with sample statistic of median + res = one_sample_conf_int(x, seed=42, reps=100, stat="median") + np.testing.assert_almost_equal(res, (2.499999999999458, 6.999999999999045)) + + # Testing with t statistic + prng = RandomState(42) + x = np.arange(20) + y = x + prng.normal(size=20) + res = one_sample_conf_int(x, y, reps=100, seed=prng, stat='t') + np.testing.assert_almost_equal(res, (-0.3018817477447192, 0.6510547144565948)) + + min_func = lambda x: np.min(x) + res = one_sample_conf_int(x, y, reps=100, seed=42, stat=min_func) + np.testing.assert_almost_equal(res, (-0.5084626431347878, 0.167033714575861)) + + +@raises(AssertionError) +def test_one_sample_conf_int_bad_shift(): + # Break it with a bad shift + x = np.array(range(5)) + y = np.array(range(1, 6)) + shift = (lambda u, d: -d * u, lambda u, d: -u / d) + one_sample_conf_int(x, y, seed=5, shift=shift) + + +def test_one_sample_percentile(): + prng = RandomState(42) + x = np.arange(1, 101) + res = one_sample_percentile(x, 50, p=50, alternative="less") + np.testing.assert_equal(res[1], 50) + expected_pval = binom.cdf(res[1], len(x), 0.5) + np.testing.assert_equal(res[0], expected_pval) + + res = one_sample_percentile(x, 75, p=70, alternative="greater") + np.testing.assert_equal(res[1], 75) + expected_pval = 1 - binom.cdf(res[1], len(x), 0.7) + np.testing.assert_equal(res[0], expected_pval) + + res = one_sample_percentile(x, 20, p=30, alternative="two-sided") + np.testing.assert_equal(res[1], 20) + expected_pval = 2 * binom.cdf(res[1], len(x), 0.3) + np.testing.assert_equal(res[0], expected_pval) + + np.testing.assert_raises(ValueError, one_sample_percentile, x, x_p = 50, p=101) + +def test_one_sample_percentile_ci(): + x = np.arange(0, 100) + res = one_sample_percentile_ci(x) + np.testing.assert_equal(res[0], 39) + np.testing.assert_almost_equal(res[1], 59) + + res = one_sample_percentile_ci(x, p=50, alternative="upper") + np.testing.assert_equal(res[0], 0) + np.testing.assert_equal(res[1], 58) + + res = one_sample_percentile_ci(x, p=50, alternative="lower") + np.testing.assert_equal(res[0], 40) + np.testing.assert_equal(res[1], 99) + + y = np.append(x, 10) + np.testing.assert_raises(ValueError, one_sample_percentile_ci, x, p=101) + + prng = RandomState(42) + z = prng.normal(0, 5, 100) + res = one_sample_percentile_ci(z, p=50) + expected_ci = (-1.546061879256073, 0.55461294854933041) + diff --git a/permute/tests/test_hoeffding.py b/permute/tests/test_hoeffding.py new file mode 100644 index 0000000..5095b21 --- /dev/null +++ b/permute/tests/test_hoeffding.py @@ -0,0 +1,63 @@ +from __future__ import (absolute_import, division, + print_function, unicode_literals) + +from nose.plugins.attrib import attr +from nose.tools import assert_raises, raises + +import numpy as np +from numpy.random import RandomState + + +from ..hoeffding import hoeffding_conf_int + +def test_hoeffding(): + values = np.array([0.5]*10) + res = hoeffding_conf_int(values, 10, 0, 1, cl=0.95, alternative="two-sided") + expected_ci = (0.070530591653262475, 0.92946940834673752) + np.testing.assert_almost_equal(res, expected_ci) + + res = hoeffding_conf_int(values, 10, 0, 1, cl=0.95) + expected_ci = (0.11297724397950515, 1) + np.testing.assert_almost_equal(res, expected_ci) + + lb = np.array(list([0]*10)) + ub = np.array(list([1]*10)) + res = hoeffding_conf_int(values, 10, lb, ub, cl=0.95, alternative="two-sided") + expected_ci = (0.070530591653262475, 0.92946940834673752) + np.testing.assert_almost_equal(res, expected_ci) + + +def test_hoeffding_conf_int_bad_bound(): + # Break it with incorrect upper and lower bounds + x = np.array(range(10)) + assert_raises(ValueError, hoeffding_conf_int, x, 10, 5, 6) + + # Invalid upper and lower bounds + upper_bound = 1 + lower_bound = np.array([10, 10, 10]) + assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound) + + # Bad bound input length + upper_bound = np.array([10, 10]) + lower_bound = np.array([10, 10]) + assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound) + + # bounds are not value + upper_bound = np.array([10]*10) + lower_bound = np.array([20]*10) + assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound) + + # x not in range of bounds + x = np.array([5]*10) + upper_bound = np.array([3]*10) + lower_bound = np.array([2]*10) + assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound) + + + + + + + + + \ No newline at end of file diff --git a/permute/tests/test_regcoeff.py b/permute/tests/test_regcoeff.py new file mode 100644 index 0000000..c595b52 --- /dev/null +++ b/permute/tests/test_regcoeff.py @@ -0,0 +1,21 @@ +from __future__ import (absolute_import, division, + print_function, unicode_literals) + +from nose.plugins.attrib import attr +from nose.tools import assert_raises, raises + +import numpy as np +from numpy.random import RandomState + +from ..regcoeff import reg_coeff + +def test_regcoeff(): + values = np.array([0.5]*10) + x = np.array(range(10)) + y = np.array(range(10)) + res = reg_coeff(x, y, cl = 0.95, seed=42) + expected = 0 + np.testing.assert_almost_equal(res, expected) + + y = np.array(range(11)) + assert_raises(ValueError, reg_coeff, x, y)