From 5e0b2b22f05a22a7712d298b2e0c398c5570a226 Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Sun, 5 Nov 2017 16:28:15 -0800
Subject: [PATCH 01/24] ENH: One sample confidence test and intervals for shift
 model. No comments/tests yet.

---
 permute/core.py | 238 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 237 insertions(+), 1 deletion(-)

diff --git a/permute/core.py b/permute/core.py
index 26a0b848..899da5d6 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -520,7 +520,8 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
     }
     stats = {
         'mean': lambda u: np.mean(u),
-        't': lambda u: ttest_1samp(u, 0)[0]
+        't': lambda u: ttest_1samp(u, 0)[0],
+        'median': lambda u: np.median(u)
     }
     if callable(stat):
         tst_fun = stat
@@ -528,6 +529,7 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
         tst_fun = stats[stat]
 
     tst = tst_fun(z)
+
     n = len(z)
     if keep_dist:
         dist = []
@@ -539,3 +541,237 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
         hits = np.sum([(tst_fun(z * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst
                        for i in range(reps)])
         return thePvalue[alternative](hits / reps), tst
+
+
+def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
+               keep_dist=False, seed=None, shift = None):
+    r"""
+    One-sided or two-sided, one-sample permutation test for the mean,
+    with p-value estimated by simulated random sampling with
+    reps replications.
+
+    Alternatively, a permutation test for equality of means of two paired
+    samples.
+
+    This function assumed a shift model.
+
+    Tests the hypothesis that x is distributed symmetrically symmetric about 0
+    (or x and y have the same center) against the alternative that x comes from
+    a population with mean
+
+    (a) greater than 0 (greater than that of the population from which y comes),
+        if side = 'greater'
+    (b) less than 0 (less than that of the population from which y comes),
+        if side = 'less'
+    (c) different from 0 (different from that of the population from which y comes),
+        if side = 'two-sided'
+
+    If ``keep_dist``, return the distribution of values of the test statistic;
+    otherwise, return only the number of permutations for which the value of
+    the test statistic and p-value.
+
+    Parameters
+    ----------
+    x : array-like
+        Sample 1
+    y : array-like
+        Sample 2. Must preserve the order of pairs with x.
+        If None, x is taken to be the one sample.
+    reps : int
+        number of repetitions
+    stat : {'mean', 't'}
+        The test statistic. The statistic is computed based on either z = x or
+        z = x - y, if y is specified.
+
+        (a) If stat == 'mean', the test statistic is mean(z).
+        (b) If stat == 't', the test statistic is the t-statistic--
+            but the p-value is still estimated by the randomization,
+            approximating the permutation distribution.
+        (c) If stat is a function (a callable object), the test statistic is
+            that function.  The function should take a permutation of the
+            data and compute the test function from it. For instance, if the
+            test statistic is the maximum absolute value, $\max_i |z_i|$,
+            the test statistic could be written:
+
+            f = lambda u: np.max(abs(u))
+    alternative : {'greater', 'less', 'two-sided'}
+        The alternative hypothesis to test
+    keep_dist : bool
+        flag for whether to store and return the array of values
+        of the irr test statistic
+    seed : RandomState instance or {None, int, RandomState instance}
+        If None, the pseudorandom number generator is the RandomState
+        instance used by `np.random`;
+        If int, seed is the seed used by the random number generator;
+        If RandomState instance, seed is the pseudorandom number generator
+    shift : float
+        Assumption of symmetry around the shift, d.
+
+    Returns
+    -------
+    float
+        the estimated p-value
+    float
+        the test statistic
+    list
+        The distribution of test statistics.
+        These values are only returned if `keep_dist` == True
+    """
+    prng = get_prng(seed)
+
+    if y is None:
+        z = x
+    elif len(x) != len(y):
+        raise ValueError('x and y must be pairs')
+    else:
+        z = np.array(x) - np.array(y)
+
+    thePvalue = {
+        'greater': lambda p: p,
+        'less': lambda p: 1 - p,
+        'two-sided': lambda p: 2 * np.min([p, 1 - p])
+    }
+    stats = {
+        'mean': lambda u: np.mean(u),
+        't': lambda u: ttest_1samp(u, 0)[0],
+        'median': lambda u: np.median(u)
+    }
+    if callable(stat):
+        tst_fun = stat
+    else:
+        tst_fun = stats[stat]
+
+    tst = tst_fun(z)
+
+    if shift is None:
+        shift = float(0)
+
+    n = len(z)
+    if keep_dist:
+        dist = []
+        for i in range(reps):
+            dist.append(tst_fun(shift + (z - shift) * (1 - 2 * prng.binomial(1, .5, size=n))))
+        hits = np.sum(dist >= tst)
+        return thePvalue[alternative](hits / reps), tst, dist
+    else:
+        hits = np.sum([(tst_fun(shift + (z - shift) * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst
+                       for i in range(reps)])
+        return thePvalue[alternative](hits / reps), tst
+
+
+
+
+def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None,
+                        reps=10**4, stat="mean"):
+    """
+    One-sided or two-sided confidence interval for the parameter determining
+    the treatment effect.  The default is the "shift model", where we are
+    interested in the parameter d such that x is equal in distribution to
+    y + d. In general, if we have some family of invertible functions parameterized
+    by d, we'd like to find d such that x is equal in distribution to f(y, d).
+
+    Parameters
+    ----------
+    x : array-like
+        Sample 1
+    y : array-like
+        Sample 2
+    cl : float in (0, 1)
+        The desired confidence level. Default 0.95.
+    alternative : {"two-sided", "lower", "upper"}
+        Indicates the alternative hypothesis.
+    seed : RandomState instance or {None, int, RandomState instance}
+        If None, the pseudorandom number generator is the RandomState
+        instance used by `np.random`;
+        If int, seed is the seed used by the random number generator;
+        If RandomState instance, seed is the pseudorandom number generator
+    reps : int
+        number of repetitions in two_sample
+    stat : {'mean', 't'}
+        The test statistic.
+
+        (a) If stat == 'mean', the test statistic is (mean(x) - mean(y))
+            (equivalently, sum(x), since those are monotonically related)
+        (b) If stat == 't', the test statistic is the two-sample t-statistic--
+            but the p-value is still estimated by the randomization,
+            approximating the permutation distribution.
+            The t-statistic is computed using scipy.stats.ttest_ind
+        (c) If stat is a function (a callable object), the test statistic is
+            that function.  The function should take a permutation of the pooled
+            data and compute the test function from it. For instance, if the
+            test statistic is the Kolmogorov-Smirnov distance between the
+            empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$,
+            the test statistic could be written:
+
+            f = lambda u: np.max( \
+                [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y)) for v in u]\
+                )
+    shift : float
+        The relationship between x and y under the null hypothesis.
+
+        (a) If None, the relationship is assumed to be additive (e.g. x = y+d)
+        (b) A tuple containing the function and its inverse $(f, f^{-1})$, so
+            $x_i = f(y_i, d)$ and $y_i = f^{-1}(x_i, d)$
+
+    Returns
+    -------
+    tuple
+        the estimated confidence limits
+
+    Notes
+    -----
+    xtol : float
+        Tolerance in brentq
+    rtol : float
+        Tolerance in brentq
+    maxiter : int
+        Maximum number of iterations in brentq
+    """
+    assert alternative in ("two-sided", "lower", "upper")
+
+    if shift is None:
+        shift_limit = max(abs(max(x) - min(y)), abs(max(y) - min(x)))
+        # FIXME: unused observed
+        # observed = np.mean(x) - np.mean(y)
+    elif isinstance(shift, tuple):
+        assert (callable(shift[0])), "Supply f and finverse in shift tuple"
+        assert (callable(shift[1])), "Supply f and finverse in shift tuple"
+        f = shift[0]
+        finverse = shift[1]
+        # Check that f is increasing in d; this is very ad hoc!
+        assert (f(5, 1) < f(5, 2)), "f must be increasing in the parameter d"
+        shift_limit = max(abs(fsolve(lambda d: f(max(y), d) - min(x), 0)),
+                          abs(fsolve(lambda d: f(min(y), d) - max(x), 0)))
+        # FIXME: unused observed
+        # observed = fsolve(lambda d: np.mean(x) - np.mean(f(y, d)), 0)
+    else:
+        raise ValueError("Bad input for shift")
+    ci_low = -shift_limit
+    ci_upp = shift_limit
+    max_around_zero = np.max([np.max(x), np.max(-x)])
+    ci_low = -max_around_zero
+    ci_upp = max_around_zero
+
+    if alternative == 'two-sided':
+        cl = 1 - (1 - cl) / 2
+
+    if alternative != "upper":
+        # if shift is None:
+        #     g = lambda q: cl - one_sample_shift(x, y, alternative="less", seed=seed, reps=reps, stat=stat, shift=q)[0]
+        # else:
+        g = lambda q: cl - one_sample_shift(x, y, alternative="less", seed=seed, \
+            reps=reps, stat=stat, shift=q)[0]
+        ci_low = brentq(g, -2 * max_around_zero, 2 * max_around_zero)
+
+    if alternative != "lower":
+        # if shift is None:
+        #     g = lambda q: cl - one_sample_shift(x, y, alternative="greater", seed=seed, reps=reps, stat=stat, shift=q)[0]
+        # else:
+        g = lambda q: cl - one_sample_shift(x, y, alternative="greater", seed=seed, \
+            reps=reps, stat=stat, shift=q)[0]
+        ci_upp = brentq(g, -2 * max_around_zero, 2 * max_around_zero)
+
+    return ci_low, ci_upp
+
+
+    

From 38bb698adad91446aa77dd92f60b5553305e1c7a Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 6 Nov 2017 21:29:59 -0800
Subject: [PATCH 02/24] EDH one sample tests

---
 permute/core.py            |  6 +++---
 permute/tests/test_core.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index 899da5d6..51ea2d2b 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -553,7 +553,7 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
     Alternatively, a permutation test for equality of means of two paired
     samples.
 
-    This function assumed a shift model.
+    This function assumed a shift model, assuming symmetry around a certain shifted mean.
 
     Tests the hypothesis that x is distributed symmetrically symmetric about 0
     (or x and y have the same center) against the alternative that x comes from
@@ -665,7 +665,7 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
                         reps=10**4, stat="mean"):
     """
     One-sided or two-sided confidence interval for the parameter determining
-    the treatment effect.  The default is the "shift model", where we are
+    a statistic of a univariate sample. The default is the "shift model", where we are
     interested in the parameter d such that x is equal in distribution to
     y + d. In general, if we have some family of invertible functions parameterized
     by d, we'd like to find d such that x is equal in distribution to f(y, d).
@@ -730,7 +730,7 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
     assert alternative in ("two-sided", "lower", "upper")
 
     if shift is None:
-        shift_limit = max(abs(max(x) - min(y)), abs(max(y) - min(x)))
+        shift_limit = np.max([np.max(x), np.max(-x)])
         # FIXME: unused observed
         # observed = np.mean(x) - np.mean(y)
     elif isinstance(shift, tuple):
diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index a3668f8b..6ac8b35f 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -217,3 +217,40 @@ def test_one_sample():
     res = one_sample(x, y, seed=42, reps=100, stat="t", alternative="less")
     np.testing.assert_almost_equal(res[0], 0.05)
     np.testing.assert_almost_equal(res[1], -1.4491883)
+
+
+def test_one_sample_shift():
+    prng = RandomState(42)
+
+    x = np.array(range(5))
+    y = x - 1
+
+    # case 1: one sample only
+    res = one_sample_shift(x, seed=42, reps=100)
+    np.testing.assert_almost_equal(res[0], 0.05999999)
+    np.testing.assert_equal(res[1], 2)
+
+    # case 2: paired sample
+    res = one_sample(x, y, seed=42, reps=100)
+    np.testing.assert_equal(res[0], 0.02)
+    np.testing.assert_equal(res[1], 1)
+
+    # case 3: break it - supply x and y, but not paired
+    y = np.append(y, 10)
+    assert_raises(ValueError, one_sample, x, y)
+
+    # case 4: say keep_dist=True
+    res = one_sample(x, seed=42, reps=100, keep_dist=True)
+    np.testing.assert_almost_equal(res[0], 0.05999999)
+    np.testing.assert_equal(res[1], 2)
+    np.testing.assert_equal(min(res[2]), -2)
+    np.testing.assert_equal(max(res[2]), 2)
+    np.testing.assert_equal(np.median(res[2]), 0)
+
+    # case 5: use t as test statistic
+    y = x + prng.normal(size=5)
+    res = one_sample(x, y, seed=42, reps=100, stat="t", alternative="less")
+    np.testing.assert_almost_equal(res[0], 0.05)
+    np.testing.assert_almost_equal(res[1], -1.4491883)
+
+def test_one_sample_conf_int():

From 76941a3ea424dba19cad9a94d9ce9bde9dfe191b Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Tue, 14 Nov 2017 01:23:58 -0800
Subject: [PATCH 03/24] WIP one sample confidence intervals and tests assuming
 symmetry

---
 permute/core.py            | 56 ++++++++++++++++++++------------
 permute/tests/test_core.py | 66 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 21 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index 899da5d6..abbd6f33 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -542,7 +542,6 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
                        for i in range(reps)])
         return thePvalue[alternative](hits / reps), tst
 
-
 def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
                keep_dist=False, seed=None, shift = None):
     r"""
@@ -553,7 +552,8 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
     Alternatively, a permutation test for equality of means of two paired
     samples.
 
-    This function assumed a shift model.
+    This function assumed a shift model. Given a shift (float), this test will
+    find the 
 
     Tests the hypothesis that x is distributed symmetrically symmetric about 0
     (or x and y have the same center) against the alternative that x comes from
@@ -662,13 +662,11 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
 
 
 def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None,
-                        reps=10**4, stat="mean"):
+                        reps=10**4, stat="mean", shift=None):
     """
-    One-sided or two-sided confidence interval for the parameter determining
-    the treatment effect.  The default is the "shift model", where we are
-    interested in the parameter d such that x is equal in distribution to
-    y + d. In general, if we have some family of invertible functions parameterized
-    by d, we'd like to find d such that x is equal in distribution to f(y, d).
+    One-sided or two-sided confidence interval for a test statistic of a sample with
+    or paired sample.  The default is the two-sided confidence interval for the mean of a sample x.
+    Giving 
 
     Parameters
     ----------
@@ -729,8 +727,15 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
     """
     assert alternative in ("two-sided", "lower", "upper")
 
+    if y is None:
+        z = x
+    elif len(x) != len(y):
+        raise ValueError('x and y must be pairs')
+    else:
+        z = np.array(x) - np.array(y)
+
     if shift is None:
-        shift_limit = max(abs(max(x) - min(y)), abs(max(y) - min(x)))
+        shift_limit = max(z) - min(z)
         # FIXME: unused observed
         # observed = np.mean(x) - np.mean(y)
     elif isinstance(shift, tuple):
@@ -740,17 +745,28 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
         finverse = shift[1]
         # Check that f is increasing in d; this is very ad hoc!
         assert (f(5, 1) < f(5, 2)), "f must be increasing in the parameter d"
-        shift_limit = max(abs(fsolve(lambda d: f(max(y), d) - min(x), 0)),
-                          abs(fsolve(lambda d: f(min(y), d) - max(x), 0)))
+
+        shift_limit = max(abs(fsolve(lambda d: f(max(z), d) - min(z), 0)),
+                          abs(fsolve(lambda d: f(min(z), d) - max(z), 0)))
         # FIXME: unused observed
         # observed = fsolve(lambda d: np.mean(x) - np.mean(f(y, d)), 0)
     else:
         raise ValueError("Bad input for shift")
-    ci_low = -shift_limit
-    ci_upp = shift_limit
-    max_around_zero = np.max([np.max(x), np.max(-x)])
-    ci_low = -max_around_zero
-    ci_upp = max_around_zero
+
+    stats = {
+        'mean': lambda u: np.mean(u),
+        't': lambda u: ttest_1samp(u, 0)[0],
+        'median': lambda u: np.median(u)
+    }
+    if callable(stat):
+        tst_fun = stat
+    else:
+        tst_fun = stats[stat]
+
+    tst = tst_fun(z)
+
+    ci_low = tst - shift_limit
+    ci_upp = tst + shift_limit
 
     if alternative == 'two-sided':
         cl = 1 - (1 - cl) / 2
@@ -759,17 +775,17 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
         # if shift is None:
         #     g = lambda q: cl - one_sample_shift(x, y, alternative="less", seed=seed, reps=reps, stat=stat, shift=q)[0]
         # else:
-        g = lambda q: cl - one_sample_shift(x, y, alternative="less", seed=seed, \
+        g = lambda q: cl - one_sample_shift(z, alternative="less", seed=seed, \
             reps=reps, stat=stat, shift=q)[0]
-        ci_low = brentq(g, -2 * max_around_zero, 2 * max_around_zero)
+        ci_low = brentq(g, tst - 2 * shift_limit, tst + 2 * shift_limit)
 
     if alternative != "lower":
         # if shift is None:
         #     g = lambda q: cl - one_sample_shift(x, y, alternative="greater", seed=seed, reps=reps, stat=stat, shift=q)[0]
         # else:
-        g = lambda q: cl - one_sample_shift(x, y, alternative="greater", seed=seed, \
+        g = lambda q: cl - one_sample_shift(z, alternative="greater", seed=seed, \
             reps=reps, stat=stat, shift=q)[0]
-        ci_upp = brentq(g, -2 * max_around_zero, 2 * max_around_zero)
+        ci_upp = brentq(g, tst - 2 * shift_limit, tst + 2 * shift_limit)
 
     return ci_low, ci_upp
 
diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index a3668f8b..6959ef6e 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -12,7 +12,9 @@
                     two_sample,
                     two_sample_shift,
                     two_sample_conf_int,
-                    one_sample)
+                    one_sample,
+                    one_sample_shift,
+                    one_sample_conf_int)
 
 
 def test_corr():
@@ -217,3 +219,65 @@ def test_one_sample():
     res = one_sample(x, y, seed=42, reps=100, stat="t", alternative="less")
     np.testing.assert_almost_equal(res[0], 0.05)
     np.testing.assert_almost_equal(res[1], -1.4491883)
+
+
+def test_one_sample_shift():
+    prng = RandomState(42)
+
+    x = np.array(range(10))
+    y = x - 1
+
+    # case 1: one sample without shift. should work the same as test_one_sample
+    res0 = one_sample_shift(x, seed=prng, reps=100, shift=3)
+    res1 = one_sample_shift(x, seed=prng, reps=100, shift=7)
+    np.testing.assert_almost_equal(res0[0], 0.07)
+    np.testing.assert_equal(res0[1], np.mean(x))
+    np.testing.assert_almost_equal(res1[0], 1)
+
+    # case 2: paired sample
+    res = one_sample_shift(x, y, seed=42, reps=100, shift=1, keep_dist=True)
+    np.testing.assert_almost_equal(res[0], 1)
+    np.testing.assert_equal(res[1], 1)
+    dist_unique = np.unique(res[2]) # Distribution should be all 1's
+    np.testing.assert_equal(len(dist_unique), 1)
+    np.testing.assert_equal(dist_unique[0], 1)
+
+    # case 3:
+    x = np.array(range(5))
+    res = one_sample_shift(x, seed=42, reps=100, stat="t", alternative="less", shift = 0.1)
+    np.testing.assert_almost_equal(res[0], 0.93999999999999995)
+    np.testing.assert_almost_equal(res[1], 2.8284271247461898)
+
+@attr('slow')
+def test_one_sample_conf_int():
+    prng = RandomState(42)
+
+    x = np.array(range(10))
+    res = one_sample_conf_int(x, seed=prng)
+    expected_ci = (2.2696168, 6.6684788)
+    np.testing.assert_almost_equal(res, expected_ci)
+    res = one_sample_conf_int(x, seed=prng, alternative="upper")
+    expected_ci = (-4.5, 6.255232305502077)
+    np.testing.assert_almost_equal(res, expected_ci)
+    res = one_sample_conf_int(x, seed=prng, alternative="lower")
+    expected_ci = (2.7680067828582393, 13.5)
+    np.testing.assert_almost_equal(res, expected_ci)
+
+    norm = prng.normal(0, 1, size=100)
+    shift = prng.normal(1, 1, size=100)
+    diff = norm - shift
+    res = one_sample_conf_int(norm, diff, seed=prng)
+    expected_ci = (0.8, 1.2)
+    np.testing.assert_almost_equal(res, expected_ci, decimal=1)
+    
+
+    # Specify shift with a function pair
+    shift = (lambda u, d: u + d, lambda u, d: u - d)
+    res = one_sample_conf_int(x, seed=5, shift=shift)
+    np.testing.assert_almost_equal(res, (2.333333333333319, 6.749999999999943))
+
+    # Specify shift with a multiplicative pair
+    shift = (lambda u, d: u * d, lambda u, d: u / d)
+    res = one_sample_conf_int(norm, seed=5, shift=shift)
+    np.testing.assert_almost_equal(res, (-0.0653441,  0.309073 ))
+

From 6ebd63191ea241919637b7e8b8ea23873380da8a Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Tue, 14 Nov 2017 01:38:50 -0800
Subject: [PATCH 04/24] removed some merge conflicts

---
 permute/core.py | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index b14f9b57..e4ffd465 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -552,12 +552,8 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
     Alternatively, a permutation test for equality of means of two paired
     samples.
 
-<<<<<<< HEAD
-    This function assumed a shift model, assuming symmetry around a certain shifted mean.
-=======
     This function assumed a shift model. Given a shift (float), this test will
     find the 
->>>>>>> one-sample
 
     Tests the hypothesis that x is distributed symmetrically symmetric about 0
     (or x and y have the same center) against the alternative that x comes from
@@ -668,17 +664,8 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
 def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None,
                         reps=10**4, stat="mean", shift=None):
     """
-<<<<<<< HEAD
-    One-sided or two-sided confidence interval for the parameter determining
-    a statistic of a univariate sample. The default is the "shift model", where we are
-    interested in the parameter d such that x is equal in distribution to
-    y + d. In general, if we have some family of invertible functions parameterized
-    by d, we'd like to find d such that x is equal in distribution to f(y, d).
-=======
     One-sided or two-sided confidence interval for a test statistic of a sample with
     or paired sample.  The default is the two-sided confidence interval for the mean of a sample x.
-    Giving 
->>>>>>> one-sample
 
     Parameters
     ----------
@@ -747,13 +734,8 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
         z = np.array(x) - np.array(y)
 
     if shift is None:
-<<<<<<< HEAD
-        shift_limit = np.max([np.max(x), np.max(-x)])
-=======
         shift_limit = max(z) - min(z)
->>>>>>> one-sample
-        # FIXME: unused observed
-        # observed = np.mean(x) - np.mean(y)
+
     elif isinstance(shift, tuple):
         assert (callable(shift[0])), "Supply f and finverse in shift tuple"
         assert (callable(shift[1])), "Supply f and finverse in shift tuple"

From 6c05b138cbe12d2fd32afda79222d52ea2b9d13b Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Tue, 14 Nov 2017 01:48:10 -0800
Subject: [PATCH 05/24] removed some merge conflicts, improved coverage a
 little

---
 permute/tests/test_core.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 41e26619..e4f1e034 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -248,6 +248,10 @@ def test_one_sample_shift():
     np.testing.assert_almost_equal(res[0], 0.93999999999999995)
     np.testing.assert_almost_equal(res[1], 2.8284271247461898)
 
+    # case 4: break it - supply x and y, but not paired
+    y = np.append(y, 10)
+    assert_raises(ValueError, one_sample_shift, x, y)
+
 @attr('slow')
 def test_one_sample_conf_int():
     prng = RandomState(42)
@@ -280,3 +284,7 @@ def test_one_sample_conf_int():
     shift = (lambda u, d: u * d, lambda u, d: u / d)
     res = one_sample_conf_int(norm, seed=5, shift=shift)
     np.testing.assert_almost_equal(res, (-0.0653441,  0.309073 ))
+
+    # case 3: break it - supply x and y, but not paired
+    y = np.append(x, 10)
+    assert_raises(ValueError, one_sample_conf_int, x, y)

From 769e5d75fdbd6add99ec4e0b553596f9dc831e14 Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 20 Nov 2017 19:48:28 -0800
Subject: [PATCH 06/24] Attempt to raise coverage. Adding initial one sample
 percentile test for symmetric distributions

---
 permute/core.py            | 65 ++++++++++++++++++++++++++++++++++++++
 permute/tests/test_core.py | 43 +++++++++++++++++++++++--
 2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index e4ffd465..7b5c9cbb 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -10,6 +10,7 @@
 from scipy.stats import ttest_ind, ttest_1samp
 
 from .utils import get_prng, potential_outcomes
+from .binomialp import binomial_p
 
 
 def corr(x, y, reps=10**4, seed=None):
@@ -432,6 +433,67 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
 
     return ci_low, ci_upp
 
+"""
+ROUGH DRAFT: One sample test for percentile.
+def one_sample_percentile(x, y=None, p=50, alternative="greater", keep_dist=False, seed=None):
+    One-sided or two-sided test for the percentile P of a population distribution.
+    assuming a there is an P/100 chance for each value of the sample to be within the
+    Pth percentile or not.
+
+    This test defaults to P=50, a test for a symmetrical distribution.
+    /"/"/"
+    Parameters
+    ----------
+    x : array-like
+        Sample 1
+    y : array-like
+        Sample 2. Must preserve the order of pairs with x.
+        If None, x is taken to be the one sample.
+    p: int in [0,100]
+        Percentile of interest to test.
+    reps : int
+        number of repetitions
+    alternative : {'greater', 'less', 'two-sided'}
+        The alternative hypothesis to test
+    keep_dist : bool
+        flag for whether to store and return the array of values
+        of the irr test statistic
+    seed : RandomState instance or {None, int, RandomState instance}
+        If None, the pseudorandom number generator is the RandomState
+        instance used by `np.random`;
+        If int, seed is the seed used by the random number generator;
+        If RandomState instance, seed is the pseudorandom number generator
+
+    Returns
+    -------
+    float
+        the estimated p-value
+    float
+        the test statistic
+    list
+        The distribution of test statistics.
+        These values are only returned if `keep_dist` == True
+    /"/"/"
+    prng = get_prng(seed)
+
+    if y is None:
+        z = x
+    elif len(x) != len(y):
+        raise ValueError('x and y must be pairs')
+    else:
+        z = np.array(x) - np.array(y)
+
+    if not 0 <= p <= 100:
+        raise ValueError('p must be between 0 and 100')
+
+    return binomial_p(np.sum(x < np.percentile(x, p)), len(x), p/100, alternative=alternative, keep_dist=keep_dist, seed=seed)
+"""
+
+"""
+ROUGH DRAFT: One sample confidence intervals for percentiles
+def one_sample_p_int(x, y=None, p=50, cl=0.95, alternative="two-sided", seed=None):
+    return binom_conf_interval(len(x), x, cl=0.95, alternative="two-sided", p=p/100)
+"""
 
 def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
                keep_dist=False, seed=None):
@@ -518,11 +580,13 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
         'less': lambda p: 1 - p,
         'two-sided': lambda p: 2 * np.min([p, 1 - p])
     }
+
     stats = {
         'mean': lambda u: np.mean(u),
         't': lambda u: ttest_1samp(u, 0)[0],
         'median': lambda u: np.median(u)
     }
+
     if callable(stat):
         tst_fun = stat
     else:
@@ -542,6 +606,7 @@ def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
                        for i in range(reps)])
         return thePvalue[alternative](hits / reps), tst
 
+
 def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
                keep_dist=False, seed=None, shift = None):
     r"""
diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index e4f1e034..a12c3f17 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -220,6 +220,11 @@ def test_one_sample():
     np.testing.assert_almost_equal(res[0], 0.05)
     np.testing.assert_almost_equal(res[1], -1.4491883)
 
+    # case 6: use median as test statistic
+    res = one_sample(x, seed=42, reps=100, stat="median")
+    np.testing.assert_almost_equal(res[0], 0.14)
+    np.testing.assert_equal(res[1], 2)
+
 
 def test_one_sample_shift():
     prng = RandomState(42)
@@ -242,9 +247,9 @@ def test_one_sample_shift():
     np.testing.assert_equal(len(dist_unique), 1)
     np.testing.assert_equal(dist_unique[0], 1)
 
-    # case 3:
+    # case 3: t test statistic
     x = np.array(range(5))
-    res = one_sample_shift(x, seed=42, reps=100, stat="t", alternative="less", shift = 0.1)
+    res = one_sample_shift(x, seed=42, reps=100, stat="t", alternative="less", shift=0.1)
     np.testing.assert_almost_equal(res[0], 0.93999999999999995)
     np.testing.assert_almost_equal(res[1], 2.8284271247461898)
 
@@ -252,10 +257,18 @@ def test_one_sample_shift():
     y = np.append(y, 10)
     assert_raises(ValueError, one_sample_shift, x, y)
 
+    # case 5: use median as test statistic
+    x = np.array(range(10))
+    res = one_sample_shift(x, seed=42, reps=100, stat="median", shift=4.5)
+    np.testing.assert_almost_equal(res[0], 0.53)
+    np.testing.assert_equal(res[1], 4.5)
+
+
 @attr('slow')
 def test_one_sample_conf_int():
     prng = RandomState(42)
 
+    # Standard confidenceinterval
     x = np.array(range(10))
     res = one_sample_conf_int(x, seed=prng)
     expected_ci = (2.2696168, 6.6684788)
@@ -267,6 +280,7 @@ def test_one_sample_conf_int():
     expected_ci = (2.7680067828582393, 13.5)
     np.testing.assert_almost_equal(res, expected_ci)
 
+    # Normal distribution shift centered at 1
     norm = prng.normal(0, 1, size=100)
     shift = prng.normal(1, 1, size=100)
     diff = norm - shift
@@ -285,6 +299,29 @@ def test_one_sample_conf_int():
     res = one_sample_conf_int(norm, seed=5, shift=shift)
     np.testing.assert_almost_equal(res, (-0.0653441,  0.309073 ))
 
-    # case 3: break it - supply x and y, but not paired
+    # break it - supply x and y, but not paired
     y = np.append(x, 10)
     assert_raises(ValueError, one_sample_conf_int, x, y)
+
+    # Testing with sample statistic of median
+    res = one_sample_conf_int(x, seed=42, stat="median")
+    np.testing.assert_almost_equal(res, (2.499999999999531, 6.500000000000918))
+
+    # Testing with t statistic
+    prng = RandomState(42)
+    x = np.arange(20)
+    y = x + prng.normal(size=20)
+    res = one_sample_conf_int(x, y, seed=prng, stat='t')
+    np.testing.assert_almost_equal(res[0], -0.27271209581516753)
+    np.testing.assert_almost_equal(res[0], 0.6217655068645991)
+
+
+@raises(AssertionError)
+def test_one_sample_conf_int_bad_shift():
+    # Break it with a bad shift
+    x = np.array(range(5))
+    y = np.array(range(1, 6))
+    shift = (lambda u, d: -d * u, lambda u, d: -u / d)
+    one_sample_conf_int(x, y, seed=5, shift=shift)
+
+

From 0ab6cbad4e24aea21c828a819912d1849ec861ae Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 20 Nov 2017 21:12:18 -0800
Subject: [PATCH 07/24] fix error with test:

---
 permute/tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index a12c3f17..cd24a63e 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -313,7 +313,7 @@ def test_one_sample_conf_int():
     y = x + prng.normal(size=20)
     res = one_sample_conf_int(x, y, seed=prng, stat='t')
     np.testing.assert_almost_equal(res[0], -0.27271209581516753)
-    np.testing.assert_almost_equal(res[0], 0.6217655068645991)
+    np.testing.assert_almost_equal(res[1], 0.6217655068645991)
 
 
 @raises(AssertionError)

From 05e90ee042b76b75fdebfaccd2463dfabdcd0baa Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 20 Nov 2017 21:52:08 -0800
Subject: [PATCH 08/24] Sped up tests a little bit

---
 permute/tests/test_core.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index cd24a63e..058c05ff 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -304,16 +304,15 @@ def test_one_sample_conf_int():
     assert_raises(ValueError, one_sample_conf_int, x, y)
 
     # Testing with sample statistic of median
-    res = one_sample_conf_int(x, seed=42, stat="median")
-    np.testing.assert_almost_equal(res, (2.499999999999531, 6.500000000000918))
+    res = one_sample_conf_int(x, seed=42, reps=100, stat="median")
+    np.testing.assert_almost_equal(res, (2.499999999999458, 6.999999999999045))
 
     # Testing with t statistic
     prng = RandomState(42)
     x = np.arange(20)
     y = x + prng.normal(size=20)
-    res = one_sample_conf_int(x, y, seed=prng, stat='t')
-    np.testing.assert_almost_equal(res[0], -0.27271209581516753)
-    np.testing.assert_almost_equal(res[1], 0.6217655068645991)
+    res = one_sample_conf_int(x, y, reps=100, seed=prng, stat='t')
+    np.testing.assert_almost_equal(res, (-0.3018817477447192, 0.6510547144565948))
 
 
 @raises(AssertionError)

From b2ea3af69f1e913e7d7876944ac53a00fa51faf8 Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 20 Nov 2017 23:18:11 -0800
Subject: [PATCH 09/24] Dont think this helps

---
 permute/tests/test_core.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 058c05ff..c7b86688 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -232,13 +232,18 @@ def test_one_sample_shift():
     x = np.array(range(10))
     y = x - 1
 
-    # case 1: one sample without shift. should work the same as test_one_sample
+    # case 1:
     res0 = one_sample_shift(x, seed=prng, reps=100, shift=3)
     res1 = one_sample_shift(x, seed=prng, reps=100, shift=7)
     np.testing.assert_almost_equal(res0[0], 0.07)
     np.testing.assert_equal(res0[1], np.mean(x))
     np.testing.assert_almost_equal(res1[0], 1)
 
+    # one_sample_shift with no shift is the same as a shift of 0
+    zero = one_sample_shift(x, seed=42, reps=100)
+    zero_reg = one_sample(x, seed=42, reps=100)
+    np.testing.assert_almost_equal(zero, zero_reg)
+
     # case 2: paired sample
     res = one_sample_shift(x, y, seed=42, reps=100, shift=1, keep_dist=True)
     np.testing.assert_almost_equal(res[0], 1)

From af65f08168b577a53583143bc245eebfc2c81c42 Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Tue, 21 Nov 2017 00:12:19 -0800
Subject: [PATCH 10/24] alternate test stats

---
 permute/tests/test_core.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index c7b86688..4b946cd1 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -225,6 +225,12 @@ def test_one_sample():
     np.testing.assert_almost_equal(res[0], 0.14)
     np.testing.assert_equal(res[1], 2)
 
+    # case 7: Test statistic is a function
+    pcntl = lambda x: np.percentile(x, 20)
+    res = one_sample(x, seed=42, reps=100, stat=pcntl)
+    np.testing.assert_almost_equal(res[0], 0.059999999999)
+    np.testing.assert_almost_equal(res[1], 0.8)
+
 
 def test_one_sample_shift():
     prng = RandomState(42)
@@ -268,6 +274,12 @@ def test_one_sample_shift():
     np.testing.assert_almost_equal(res[0], 0.53)
     np.testing.assert_equal(res[1], 4.5)
 
+    # case 6: Test statistic is a function
+    pcntl = lambda x: np.percentile(x, 20)
+    res = one_sample_shift(x, seed=42, reps=100, stat=pcntl, shift=2)
+    np.testing.assert_almost_equal(res[0], 0.029999999999999999)
+    np.testing.assert_almost_equal(res[1], 1.8)
+
 
 @attr('slow')
 def test_one_sample_conf_int():
@@ -319,6 +331,11 @@ def test_one_sample_conf_int():
     res = one_sample_conf_int(x, y, reps=100, seed=prng, stat='t')
     np.testing.assert_almost_equal(res, (-0.3018817477447192, 0.6510547144565948))
 
+    min_func = lambda x: np.min(x)
+    res = one_sample_conf_int(x, y, reps=100, seed=42, stat=min_func)
+    np.testing.assert_almost_equal(res, (-0.5084626431347878, 0.167033714575861))
+
+
 
 @raises(AssertionError)
 def test_one_sample_conf_int_bad_shift():

From 03233116709f920cb338828a7305d35a295cef1e Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 27 Nov 2017 16:51:56 -0800
Subject: [PATCH 11/24] developed one sample percentile and confidence
 intervals

---
 permute/core.py            | 83 +++++++++++++++++++++++++++++---------
 permute/tests/test_core.py | 45 +++++++++++++++++++--
 2 files changed, 105 insertions(+), 23 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index 7b5c9cbb..3d9c42c7 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -9,7 +9,7 @@
 from scipy.optimize import brentq, fsolve
 from scipy.stats import ttest_ind, ttest_1samp
 
-from .utils import get_prng, potential_outcomes
+from .utils import get_prng, potential_outcomes, binom_conf_interval
 from .binomialp import binomial_p
 
 
@@ -433,15 +433,19 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
 
     return ci_low, ci_upp
 
-"""
-ROUGH DRAFT: One sample test for percentile.
-def one_sample_percentile(x, y=None, p=50, alternative="greater", keep_dist=False, seed=None):
+
+#ROUGH DRAFT: One sample test for percentile.
+def one_sample_percentile(x, y=None, p=50, reps=10**5, alternative="greater", keep_dist=False, seed=None):
+    r"""
     One-sided or two-sided test for the percentile P of a population distribution.
-    assuming a there is an P/100 chance for each value of the sample to be within the
-    Pth percentile or not.
+    assuming there is an P/100 chance for each value of the sample to be in the
+    Pth percentile.
 
-    This test defaults to P=50, a test for a symmetrical distribution.
-    /"/"/"
+    The null hypothesis is that there is an equal P/100 chance for any value of the 
+    sample to lie at or below the sample Pth percentile.
+
+    This test defaults to P=50.
+    
     Parameters
     ----------
     x : array-like
@@ -469,12 +473,10 @@ def one_sample_percentile(x, y=None, p=50, alternative="greater", keep_dist=Fals
     float
         the estimated p-value
     float
-        the test statistic
+        the test statistic: Number of values at or below percentile P of sample
     list
-        The distribution of test statistics.
-        These values are only returned if `keep_dist` == True
-    /"/"/"
-    prng = get_prng(seed)
+       distribution of test statistics (only if keep_dist == True)
+    """
 
     if y is None:
         z = x
@@ -486,14 +488,55 @@ def one_sample_percentile(x, y=None, p=50, alternative="greater", keep_dist=Fals
     if not 0 <= p <= 100:
         raise ValueError('p must be between 0 and 100')
 
-    return binomial_p(np.sum(x < np.percentile(x, p)), len(x), p/100, alternative=alternative, keep_dist=keep_dist, seed=seed)
-"""
+    return binomial_p(np.sum(z <= np.percentile(z, p)), len(z), p/100, reps=reps, \
+        alternative=alternative, keep_dist=keep_dist, seed=seed)
+
+
+
+# ROUGH DRAFT: One sample confidence intervals for percentiles
+def one_sample_percentile_conf_int(x, y=None, p=50, cl=0.95, alternative="two-sided", seed=None):
+    """
+    Confidence intervals for a percentile P of the population distribution of a
+    univariate or paired sample.
+    
+    Compute a confidence interval for a binomial p, the probability of success in each trial.
+
+    Parameters
+    ----------
+    n : int
+        The number of Bernoulli trials.
+    x : int
+        The number of successes.
+    cl : float in (0, 1)
+        The desired confidence level.
+    alternative : {"two-sided", "lower", "upper"}
+        Indicates the alternative hypothesis.
+    p : int in [0,100]
+        Desired percentile of interest to test.
+    kwargs : dict
+        Key word arguments
+
+    Returns
+    -------
+    tuple
+        lower and upper confidence level with coverage (approximately)
+        1-alpha.
+
+    """
+
+    if y is None:
+        z = x
+    elif len(x) != len(y):
+        raise ValueError('x and y must be pairs')
+    else:
+        z = np.array(x) - np.array(y)
+
+    if not 0 <= p <= 100:
+        raise ValueError('p must be between 0 and 100')
+
+    conf_int = binom_conf_interval(len(z), np.sum(z <= np.percentile(z, p)), cl=cl, alternative="two-sided", p=p/100)
+    return (np.percentile(z, conf_int[0]*100), np.percentile(z, conf_int[1]*100))
 
-"""
-ROUGH DRAFT: One sample confidence intervals for percentiles
-def one_sample_p_int(x, y=None, p=50, cl=0.95, alternative="two-sided", seed=None):
-    return binom_conf_interval(len(x), x, cl=0.95, alternative="two-sided", p=p/100)
-"""
 
 def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
                keep_dist=False, seed=None):
diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 4b946cd1..7d083b38 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -14,7 +14,9 @@
                     two_sample_conf_int,
                     one_sample,
                     one_sample_shift,
-                    one_sample_conf_int)
+                    one_sample_conf_int,
+                    one_sample_percentile,
+                    one_sample_percentile_conf_int)
 
 
 def test_corr():
@@ -134,7 +136,7 @@ def test_two_sample_shift():
 
     # Define a lambda function
     f = lambda u, v: np.max(u) - np.max(v)
-    res = two_sample(x, y, seed=42, stat=f, reps=100)
+    res = two_sample_shift(x, y, seed=42, stat=f, reps=100)
     expected = (1, -3.2730653690015465)
     np.testing.assert_equal(res[0], expected[0])
     np.testing.assert_equal(res[1], expected[1])
@@ -336,7 +338,6 @@ def test_one_sample_conf_int():
     np.testing.assert_almost_equal(res, (-0.5084626431347878, 0.167033714575861))
 
 
-
 @raises(AssertionError)
 def test_one_sample_conf_int_bad_shift():
     # Break it with a bad shift
@@ -346,3 +347,41 @@ def test_one_sample_conf_int_bad_shift():
     one_sample_conf_int(x, y, seed=5, shift=shift)
 
 
+def test_one_sample_percentile():
+    prng = RandomState(42)
+    x = np.arange(0, 100)
+    y = x + prng.normal(size=100)
+    res = one_sample_percentile(x=x, y=y, p=50, seed=42, reps=100)
+    np.testing.assert_almost_equal(res[0], 0.53925999999999996)
+    np.testing.assert_equal(res[1], 50)
+
+    y = np.append(y, 10)
+    np.testing.assert_raises(ValueError, one_sample, x, y)
+
+    np.testing.assert_raises(ValueError, one_sample, x, p=101)
+
+    # Skewed sample
+    x_skew = np.array(list(x) + [80] * 50)
+    res = one_sample_percentile(x=x_skew, p=80, seed=42, reps=100)
+    np.testing.assert_almost_equal(res[0], 0.01)
+    np.testing.assert_equal(res[1], 131)
+
+@attr('slow')
+def test_one_sample_percentile_conf_int():
+    x = np.arange(0, 100)
+    res = one_sample_percentile_conf_int(x, seed=42, p=50)
+    np.testing.assert_almost_equal(res[0], 39.43379182082676)
+    np.testing.assert_almost_equal(res[1], 59.56620817917322)
+
+
+    y = np.append(y, 10)
+    np.testing.assert_raises(ValueError, one_sample, x, y)
+
+    np.testing.assert_raises(ValueError, one_sample, x, p=101)
+
+    prng = RandomState(42)
+    y = x + prng.normal(size=100)
+    res = one_sample_percentile_conf_int(x=x, y=y, p=20, seed=42, reps=100)
+    expected_ci = (-0.94850907410172258, -0.34506992048853352)
+    np.testing.assert_almost_equal(res, expected_ci)
+

From c6df5222ac2f6535a412aeadf1c3ab952d88f682 Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 27 Nov 2017 16:56:19 -0800
Subject: [PATCH 12/24] developed one sample percentile and confidence
 intervals

---
 permute/tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 7d083b38..50958301 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -352,7 +352,7 @@ def test_one_sample_percentile():
     x = np.arange(0, 100)
     y = x + prng.normal(size=100)
     res = one_sample_percentile(x=x, y=y, p=50, seed=42, reps=100)
-    np.testing.assert_almost_equal(res[0], 0.53925999999999996)
+    np.testing.assert_almost_equal(res[0], 0.52)
     np.testing.assert_equal(res[1], 50)
 
     y = np.append(y, 10)
@@ -374,7 +374,7 @@ def test_one_sample_percentile_conf_int():
     np.testing.assert_almost_equal(res[1], 59.56620817917322)
 
 
-    y = np.append(y, 10)
+    y = np.append(x, 10)
     np.testing.assert_raises(ValueError, one_sample, x, y)
 
     np.testing.assert_raises(ValueError, one_sample, x, p=101)

From 6dc8aa59267c68b670031753ba80a45e2d717d76 Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@berkeley.edu>
Date: Mon, 27 Nov 2017 17:10:30 -0800
Subject: [PATCH 13/24] Fixed tests for one sample percentile confidence tests
 and intervals

---
 permute/tests/test_core.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 50958301..6664dee3 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -136,7 +136,7 @@ def test_two_sample_shift():
 
     # Define a lambda function
     f = lambda u, v: np.max(u) - np.max(v)
-    res = two_sample_shift(x, y, seed=42, stat=f, reps=100)
+    res = two_sample(x, y, seed=42, stat=f, reps=100)
     expected = (1, -3.2730653690015465)
     np.testing.assert_equal(res[0], expected[0])
     np.testing.assert_equal(res[1], expected[1])
@@ -356,9 +356,9 @@ def test_one_sample_percentile():
     np.testing.assert_equal(res[1], 50)
 
     y = np.append(y, 10)
-    np.testing.assert_raises(ValueError, one_sample, x, y)
+    np.testing.assert_raises(ValueError, one_sample_percentile, x, y)
 
-    np.testing.assert_raises(ValueError, one_sample, x, p=101)
+    np.testing.assert_raises(ValueError, one_sample_percentile, x, p=101)
 
     # Skewed sample
     x_skew = np.array(list(x) + [80] * 50)
@@ -375,13 +375,13 @@ def test_one_sample_percentile_conf_int():
 
 
     y = np.append(x, 10)
-    np.testing.assert_raises(ValueError, one_sample, x, y)
+    np.testing.assert_raises(ValueError, one_sample_percentile_conf_int, x, y)
 
-    np.testing.assert_raises(ValueError, one_sample, x, p=101)
+    np.testing.assert_raises(ValueError, one_sample_percentile_conf_int, x, p=101)
 
     prng = RandomState(42)
     y = x + prng.normal(size=100)
-    res = one_sample_percentile_conf_int(x=x, y=y, p=20, seed=42, reps=100)
+    res = one_sample_percentile_conf_int(x=x, y=y, p=20, seed=42)
     expected_ci = (-0.94850907410172258, -0.34506992048853352)
     np.testing.assert_almost_equal(res, expected_ci)
 

From be94bad413a69b8cd434016fca8bd73c43b39e7b Mon Sep 17 00:00:00 2001
From: Michael Zheng <mhzbox@gmail.com>
Date: Mon, 27 Nov 2017 22:44:04 -0800
Subject: [PATCH 14/24] hoeffding

---
 permute/hoeffding.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 permute/hoeffding.py

diff --git a/permute/hoeffding.py b/permute/hoeffding.py
new file mode 100644
index 00000000..b79a36c3
--- /dev/null
+++ b/permute/hoeffding.py
@@ -0,0 +1,38 @@
+from __future__ import (absolute_import, division,
+                        print_function, unicode_literals)
+
+import numpy as np
+
+def hoeffding_conf_int(x, N, cl=0.95, lower_bound=min(x), upper_bound=max(x), alernative="one-sided"):
+
+    r"""
+    Confidence interval for the mean of bounded, independent observations 
+	derived by inverting Hoeffding's inequality. This method uses the 
+	assumption that the observations have the same bounds.
+
+    Parameters
+    ----------
+    x : array-like
+        list of observations
+    N : int
+        population size
+    cl : float in (0, 1)
+    	the desired confidence level. Default 0.95.
+    lower_bound : float
+    	lower bound for the observations
+    upper_bound : float
+    	upper bound for the observations
+    alternative : {'greater', 'less', 'two-sided'}
+       alternative hypothesis to test (default: 'greater')
+    
+    Returns
+    -------
+    tuple
+       the confidence limits
+    """	
+	x_bar = np.mean(x)
+	tau_sq = N*math.pow(upper_bound - lower_bound)
+	alpha = 1-cl
+	hCrit = np.sqrt(-math.log(alpha/2)*tau_sq/(2*N**2))
+	ci_low, ci_upp = x_bar - hCrit, x_bar + hCrit
+	return ci_low, ci_upp 
\ No newline at end of file

From 3e1ab19490eb1d243c38a5fa222b064b2a073fa8 Mon Sep 17 00:00:00 2001
From: Michael Zheng <mhzbox@gmail.com>
Date: Thu, 30 Nov 2017 10:32:41 -0800
Subject: [PATCH 15/24] heoffding cases

---
 permute/hoeffding.py | 53 +++++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/permute/hoeffding.py b/permute/hoeffding.py
index b79a36c3..219ff968 100644
--- a/permute/hoeffding.py
+++ b/permute/hoeffding.py
@@ -2,13 +2,13 @@
                         print_function, unicode_literals)
 
 import numpy as np
+import math
 
-def hoeffding_conf_int(x, N, cl=0.95, lower_bound=min(x), upper_bound=max(x), alernative="one-sided"):
-
+def hoeffding_conf_int(x, N, lower_bound, upper_bound, cl=0.95, alternative="one-sided"):
     r"""
     Confidence interval for the mean of bounded, independent observations 
-	derived by inverting Hoeffding's inequality. This method uses the 
-	assumption that the observations have the same bounds.
+    derived by inverting Hoeffding's inequality. This method uses the 
+    assumption that the observations have the same bounds.
 
     Parameters
     ----------
@@ -17,11 +17,11 @@ def hoeffding_conf_int(x, N, cl=0.95, lower_bound=min(x), upper_bound=max(x), al
     N : int
         population size
     cl : float in (0, 1)
-    	the desired confidence level. Default 0.95.
-    lower_bound : float
-    	lower bound for the observations
-    upper_bound : float
-    	upper bound for the observations
+        the desired confidence level. Default 0.95.
+    lower_bound : scalar or array-like
+        lower bound for the observations
+    upper_bound : scalar or array-like
+        upper bound for the observations
     alternative : {'greater', 'less', 'two-sided'}
        alternative hypothesis to test (default: 'greater')
     
@@ -29,10 +29,31 @@ def hoeffding_conf_int(x, N, cl=0.95, lower_bound=min(x), upper_bound=max(x), al
     -------
     tuple
        the confidence limits
-    """	
-	x_bar = np.mean(x)
-	tau_sq = N*math.pow(upper_bound - lower_bound)
-	alpha = 1-cl
-	hCrit = np.sqrt(-math.log(alpha/2)*tau_sq/(2*N**2))
-	ci_low, ci_upp = x_bar - hCrit, x_bar + hCrit
-	return ci_low, ci_upp 
\ No newline at end of file
+    """ 
+    #Check if Bounds are Scalar or Array-Like
+    if (isinstance(lower_bound, float) or isinstance(lower_bound, int)) and (isinstance(upper_bound, float) or isinstance(upper_bound, int)):
+        tau_sq = N*((upper_bound - lower_bound)**2)
+        max_upper = upper_bound
+        min_lower = lower_bound
+    elif isinstance(lower_bound, tuple) and isinstance(lower_bound, tuple):
+        if len(lower_bound) != N or len(upper_bound) != N:
+            raise ValueError("Bad Bound Input Length")
+        if np.sum(upper_bound > lower_bound) != N:
+            raise ValueError("Invalid Upper and Lower bounds")
+        else:
+            tau_sq = np.sum(upper_bound - lower_bound)**2
+
+        max_upper = np.mean(upper_bound)
+        min_lower = np.mean(lower_bound)
+
+    x_bar = np.mean(x)
+    hCrit = np.sqrt(-math.log((1-cl)/2)*tau_sq/(2*N**2))
+    ci_low, ci_upp = x_bar - hCrit, x_bar + hCrit
+
+    #Truncated Bounds of the Hoeffding Confidence Interval
+    if ci_upp > max_upper:
+        ci_upp = max_upper
+    if ci_low < min_lower:
+        ci_low = min_lower
+
+    return ci_low, ci_upp
\ No newline at end of file

From f538e968c1e1552830a013ca6f8e582627b4b71d Mon Sep 17 00:00:00 2001
From: Andrew Linxie <alinxie@Andrews-MacBook-Pro-6.local>
Date: Wed, 13 Dec 2017 02:16:33 -0800
Subject: [PATCH 16/24] Major changes to one sample tests. Merged
 one_sample_shift into one_sample with 'shifted' center parameter named
 'center'. Changed function signatures of one_sample_percentile,
 one_sample_percentile_ci (once one_sample_percentile_conf_int)

---
 permute/core.py            | 209 +++++++++----------------------------
 permute/tests/test_core.py |  80 +++++++-------
 2 files changed, 89 insertions(+), 200 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index 3d9c42c7..6f4b1ddf 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 from scipy.optimize import brentq, fsolve
-from scipy.stats import ttest_ind, ttest_1samp
+from scipy.stats import ttest_ind, ttest_1samp, binom
 
 from .utils import get_prng, potential_outcomes, binom_conf_interval
 from .binomialp import binomial_p
@@ -434,8 +434,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
     return ci_low, ci_upp
 
 
-#ROUGH DRAFT: One sample test for percentile.
-def one_sample_percentile(x, y=None, p=50, reps=10**5, alternative="greater", keep_dist=False, seed=None):
+def one_sample_percentile(x, x_p, p=50, alternative="less"):
     r"""
     One-sided or two-sided test for the percentile P of a population distribution.
     assuming there is an P/100 chance for each value of the sample to be in the
@@ -450,9 +449,8 @@ def one_sample_percentile(x, y=None, p=50, reps=10**5, alternative="greater", ke
     ----------
     x : array-like
         Sample 1
-    y : array-like
-        Sample 2. Must preserve the order of pairs with x.
-        If None, x is taken to be the one sample.
+    x_p : numeric
+        Null Hypothesis
     p: int in [0,100]
         Percentile of interest to test.
     reps : int
@@ -473,28 +471,27 @@ def one_sample_percentile(x, y=None, p=50, reps=10**5, alternative="greater", ke
     float
         the estimated p-value
     float
-        the test statistic: Number of values at or below percentile P of sample
+        the test statistic: Number of values at or below x_p in the samples
     list
        distribution of test statistics (only if keep_dist == True)
     """
 
-    if y is None:
-        z = x
-    elif len(x) != len(y):
-        raise ValueError('x and y must be pairs')
-    else:
-        z = np.array(x) - np.array(y)
-
     if not 0 <= p <= 100:
         raise ValueError('p must be between 0 and 100')
 
-    return binomial_p(np.sum(z <= np.percentile(z, p)), len(z), p/100, reps=reps, \
-        alternative=alternative, keep_dist=keep_dist, seed=seed)
+    thePvalue = {
+        'greater': lambda p: 1 - p,
+        'less': lambda p: p,
+        'two-sided': lambda p: 2 * np.min([p, 1 - p])
+    }
+
+    num_under = np.sum(x <= x_p)
+    p_value = binom.cdf(num_under, n=len(x), p=p/100)
 
+    return thePvalue[alternative](p_value), num_under
 
 
-# ROUGH DRAFT: One sample confidence intervals for percentiles
-def one_sample_percentile_conf_int(x, y=None, p=50, cl=0.95, alternative="two-sided", seed=None):
+def one_sample_percentile_ci(x, p=50, cl=0.95, alternative="two-sided", seed=None, reps=10**5):
     """
     Confidence intervals for a percentile P of the population distribution of a
     univariate or paired sample.
@@ -524,134 +521,37 @@ def one_sample_percentile_conf_int(x, y=None, p=50, cl=0.95, alternative="two-si
 
     """
 
-    if y is None:
-        z = x
-    elif len(x) != len(y):
-        raise ValueError('x and y must be pairs')
-    else:
-        z = np.array(x) - np.array(y)
-
     if not 0 <= p <= 100:
         raise ValueError('p must be between 0 and 100')
 
-    conf_int = binom_conf_interval(len(z), np.sum(z <= np.percentile(z, p)), cl=cl, alternative="two-sided", p=p/100)
-    return (np.percentile(z, conf_int[0]*100), np.percentile(z, conf_int[1]*100))
-
-
-def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
-               keep_dist=False, seed=None):
-    r"""
-    One-sided or two-sided, one-sample permutation test for the mean,
-    with p-value estimated by simulated random sampling with
-    reps replications.
-
-    Alternatively, a permutation test for equality of means of two paired
-    samples.
-
-    Tests the hypothesis that x is distributed symmetrically symmetric about 0
-    (or x and y have the same center) against the alternative that x comes from
-    a population with mean
-
-    (a) greater than 0 (greater than that of the population from which y comes),
-        if side = 'greater'
-    (b) less than 0 (less than that of the population from which y comes),
-        if side = 'less'
-    (c) different from 0 (different from that of the population from which y comes),
-        if side = 'two-sided'
-
-    If ``keep_dist``, return the distribution of values of the test statistic;
-    otherwise, return only the number of permutations for which the value of
-    the test statistic and p-value.
-
-    Parameters
-    ----------
-    x : array-like
-        Sample 1
-    y : array-like
-        Sample 2. Must preserve the order of pairs with x.
-        If None, x is taken to be the one sample.
-    reps : int
-        number of repetitions
-    stat : {'mean', 't'}
-        The test statistic. The statistic is computed based on either z = x or
-        z = x - y, if y is specified.
-
-        (a) If stat == 'mean', the test statistic is mean(z).
-        (b) If stat == 't', the test statistic is the t-statistic--
-            but the p-value is still estimated by the randomization,
-            approximating the permutation distribution.
-        (c) If stat is a function (a callable object), the test statistic is
-            that function.  The function should take a permutation of the
-            data and compute the test function from it. For instance, if the
-            test statistic is the maximum absolute value, $\max_i |z_i|$,
-            the test statistic could be written:
+    #shift_limit = np.max([np.max(x) - np.median(x), np.median(x) - np.min(x)]
+    x = np.sort(x)
+    ci_low = np.min(x)
+    ci_upp = np.max(x)
 
-            f = lambda u: np.max(abs(u))
-    alternative : {'greater', 'less', 'two-sided'}
-        The alternative hypothesis to test
-    keep_dist : bool
-        flag for whether to store and return the array of values
-        of the irr test statistic
-    seed : RandomState instance or {None, int, RandomState instance}
-        If None, the pseudorandom number generator is the RandomState
-        instance used by `np.random`;
-        If int, seed is the seed used by the random number generator;
-        If RandomState instance, seed is the pseudorandom number generator
-
-
-    Returns
-    -------
-    float
-        the estimated p-value
-    float
-        the test statistic
-    list
-        The distribution of test statistics.
-        These values are only returned if `keep_dist` == True
-    """
-    prng = get_prng(seed)
+    if alternative == 'two-sided':
+        cl = 1 - (1 - cl) / 2
 
-    if y is None:
-        z = x
-    elif len(x) != len(y):
-        raise ValueError('x and y must be pairs')
-    else:
-        z = np.array(x) - np.array(y)
+    if alternative != "upper":
+        g = lambda q: cl - one_sample_percentile(x, q, p=p, alternative="greater")[0]
+        ci_low = brentq(g, np.min(x), np.max(x))
+        ci_low = x[np.searchsorted(x, ci_low) - 1]
 
-    thePvalue = {
-        'greater': lambda p: p,
-        'less': lambda p: 1 - p,
-        'two-sided': lambda p: 2 * np.min([p, 1 - p])
-    }
+    if alternative != "lower":
+        g = lambda q: cl - one_sample_percentile(x, q, p=p, alternative="less")[0]
+        ci_upp = brentq(g, np.min(x), np.max(x))
+        ci_upp = x[np.searchsorted(x, ci_upp)]
 
-    stats = {
-        'mean': lambda u: np.mean(u),
-        't': lambda u: ttest_1samp(u, 0)[0],
-        'median': lambda u: np.median(u)
-    }
 
-    if callable(stat):
-        tst_fun = stat
-    else:
-        tst_fun = stats[stat]
+    return ci_low, ci_upp
 
-    tst = tst_fun(z)
 
-    n = len(z)
-    if keep_dist:
-        dist = []
-        for i in range(reps):
-            dist.append(tst_fun(z * (1 - 2 * prng.binomial(1, .5, size=n))))
-        hits = np.sum(dist >= tst)
-        return thePvalue[alternative](hits / reps), tst, dist
-    else:
-        hits = np.sum([(tst_fun(z * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst
-                       for i in range(reps)])
-        return thePvalue[alternative](hits / reps), tst
 
+    # conf_int = binom_conf_interval(len(z), np.sum(z <= np.percentile(z, p)), cl=cl, alternative="two-sided", p=p/100)
+    # return (np.percentile(z, conf_int[0]*100), np.percentile(z, conf_int[1]*100))
 
-def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
-               keep_dist=False, seed=None, shift = None):
+def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
+               keep_dist=False, seed=None, center = None):
     r"""
     One-sided or two-sided, one-sample permutation test for the mean,
     with p-value estimated by simulated random sampling with
@@ -660,18 +560,15 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
     Alternatively, a permutation test for equality of means of two paired
     samples.
 
-    This function assumed a shift model. Given a shift (float), this test will
-    find the 
-
-    Tests the hypothesis that x is distributed symmetrically symmetric about 0
-    (or x and y have the same center) against the alternative that x comes from
+    Tests the hypothesis that x is distributed symmetrically symmetric about 
+    CENTER (or x and y have the same center) against the alternative that x comes from
     a population with mean
 
-    (a) greater than 0 (greater than that of the population from which y comes),
+    (a) greater than CENTER (greater than that of the population from which y comes),
         if side = 'greater'
-    (b) less than 0 (less than that of the population from which y comes),
+    (b) less than CENTER (less than that of the population from which y comes),
         if side = 'less'
-    (c) different from 0 (different from that of the population from which y comes),
+    (c) different from CENTER (different from that of the population from which y comes),
         if side = 'two-sided'
 
     If ``keep_dist``, return the distribution of values of the test statistic;
@@ -712,8 +609,8 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
         instance used by `np.random`;
         If int, seed is the seed used by the random number generator;
         If RandomState instance, seed is the pseudorandom number generator
-    shift : float
-        Assumption of symmetry around the shift, d.
+    center : float
+        Assumption of symmetry around this center.
 
     Returns
     -------
@@ -751,18 +648,18 @@ def one_sample_shift(x, y=None, reps=10**5, stat='mean', alternative="greater",
 
     tst = tst_fun(z)
 
-    if shift is None:
-        shift = float(0)
+    if center is None:
+        center = float(0)
 
     n = len(z)
     if keep_dist:
         dist = []
         for i in range(reps):
-            dist.append(tst_fun(shift + (z - shift) * (1 - 2 * prng.binomial(1, .5, size=n))))
+            dist.append(tst_fun(center + (z - center) * (1 - 2 * prng.binomial(1, .5, size=n))))
         hits = np.sum(dist >= tst)
         return thePvalue[alternative](hits / reps), tst, dist
     else:
-        hits = np.sum([(tst_fun(shift + (z - shift) * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst
+        hits = np.sum([(tst_fun(center + (z - center) * (1 - 2 * prng.binomial(1, .5, size=n)))) >= tst
                        for i in range(reps)])
         return thePvalue[alternative](hits / reps), tst
 
@@ -792,7 +689,7 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
         If RandomState instance, seed is the pseudorandom number generator
     reps : int
         number of repetitions in two_sample
-    stat : {'mean', 't'}
+    stat : {'mean', 't'} 
         The test statistic.
 
         (a) If stat == 'mean', the test statistic is (mean(x) - mean(y))
@@ -878,19 +775,13 @@ def one_sample_conf_int(x, y = None, cl=0.95, alternative="two-sided", seed=None
         cl = 1 - (1 - cl) / 2
 
     if alternative != "upper":
-        # if shift is None:
-        #     g = lambda q: cl - one_sample_shift(x, y, alternative="less", seed=seed, reps=reps, stat=stat, shift=q)[0]
-        # else:
-        g = lambda q: cl - one_sample_shift(z, alternative="less", seed=seed, \
-            reps=reps, stat=stat, shift=q)[0]
+        g = lambda q: cl - one_sample(z, alternative="less", seed=seed, \
+            reps=reps, stat=stat, center=q)[0]
         ci_low = brentq(g, tst - 2 * shift_limit, tst + 2 * shift_limit)
 
     if alternative != "lower":
-        # if shift is None:
-        #     g = lambda q: cl - one_sample_shift(x, y, alternative="greater", seed=seed, reps=reps, stat=stat, shift=q)[0]
-        # else:
-        g = lambda q: cl - one_sample_shift(z, alternative="greater", seed=seed, \
-            reps=reps, stat=stat, shift=q)[0]
+        g = lambda q: cl - one_sample(z, alternative="greater", seed=seed, \
+            reps=reps, stat=stat, center=q)[0]
         ci_upp = brentq(g, tst - 2 * shift_limit, tst + 2 * shift_limit)
 
     return ci_low, ci_upp
diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 6664dee3..858649fb 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 from numpy.random import RandomState
+from scipy.stats import binom
 
 
 from ..core import (corr,
@@ -13,10 +14,10 @@
                     two_sample_shift,
                     two_sample_conf_int,
                     one_sample,
-                    one_sample_shift,
+                    one_sample,
                     one_sample_conf_int,
                     one_sample_percentile,
-                    one_sample_percentile_conf_int)
+                    one_sample_percentile_ci)
 
 
 def test_corr():
@@ -233,27 +234,20 @@ def test_one_sample():
     np.testing.assert_almost_equal(res[0], 0.059999999999)
     np.testing.assert_almost_equal(res[1], 0.8)
 
-
-def test_one_sample_shift():
     prng = RandomState(42)
 
     x = np.array(range(10))
     y = x - 1
 
     # case 1:
-    res0 = one_sample_shift(x, seed=prng, reps=100, shift=3)
-    res1 = one_sample_shift(x, seed=prng, reps=100, shift=7)
+    res0 = one_sample(x, seed=prng, reps=100, center=3)
+    res1 = one_sample(x, seed=prng, reps=100, center=7)
     np.testing.assert_almost_equal(res0[0], 0.07)
     np.testing.assert_equal(res0[1], np.mean(x))
     np.testing.assert_almost_equal(res1[0], 1)
 
-    # one_sample_shift with no shift is the same as a shift of 0
-    zero = one_sample_shift(x, seed=42, reps=100)
-    zero_reg = one_sample(x, seed=42, reps=100)
-    np.testing.assert_almost_equal(zero, zero_reg)
-
     # case 2: paired sample
-    res = one_sample_shift(x, y, seed=42, reps=100, shift=1, keep_dist=True)
+    res = one_sample(x, y, seed=42, reps=100, center=1, keep_dist=True)
     np.testing.assert_almost_equal(res[0], 1)
     np.testing.assert_equal(res[1], 1)
     dist_unique = np.unique(res[2]) # Distribution should be all 1's
@@ -262,23 +256,23 @@ def test_one_sample_shift():
 
     # case 3: t test statistic
     x = np.array(range(5))
-    res = one_sample_shift(x, seed=42, reps=100, stat="t", alternative="less", shift=0.1)
+    res = one_sample(x, seed=42, reps=100, stat="t", alternative="less", center=0.1)
     np.testing.assert_almost_equal(res[0], 0.93999999999999995)
     np.testing.assert_almost_equal(res[1], 2.8284271247461898)
 
     # case 4: break it - supply x and y, but not paired
     y = np.append(y, 10)
-    assert_raises(ValueError, one_sample_shift, x, y)
+    assert_raises(ValueError, one_sample, x, y)
 
     # case 5: use median as test statistic
     x = np.array(range(10))
-    res = one_sample_shift(x, seed=42, reps=100, stat="median", shift=4.5)
+    res = one_sample(x, seed=42, reps=100, stat="median", center=4.5)
     np.testing.assert_almost_equal(res[0], 0.53)
     np.testing.assert_equal(res[1], 4.5)
 
     # case 6: Test statistic is a function
     pcntl = lambda x: np.percentile(x, 20)
-    res = one_sample_shift(x, seed=42, reps=100, stat=pcntl, shift=2)
+    res = one_sample(x, seed=42, reps=100, stat=pcntl, center=2)
     np.testing.assert_almost_equal(res[0], 0.029999999999999999)
     np.testing.assert_almost_equal(res[1], 1.8)
 
@@ -287,7 +281,7 @@ def test_one_sample_shift():
 def test_one_sample_conf_int():
     prng = RandomState(42)
 
-    # Standard confidenceinterval
+    # Standard confidence interval
     x = np.array(range(10))
     res = one_sample_conf_int(x, seed=prng)
     expected_ci = (2.2696168, 6.6684788)
@@ -349,39 +343,43 @@ def test_one_sample_conf_int_bad_shift():
 
 def test_one_sample_percentile():
     prng = RandomState(42)
-    x = np.arange(0, 100)
-    y = x + prng.normal(size=100)
-    res = one_sample_percentile(x=x, y=y, p=50, seed=42, reps=100)
-    np.testing.assert_almost_equal(res[0], 0.52)
+    x = np.arange(1, 101)
+    res = one_sample_percentile(x, 50, p=50, alternative="less")
     np.testing.assert_equal(res[1], 50)
+    expected_pval = binom.cdf(res[1], len(x), 0.5)
+    np.testing.assert_equal(res[0], expected_pval)
 
-    y = np.append(y, 10)
-    np.testing.assert_raises(ValueError, one_sample_percentile, x, y)
+    res = one_sample_percentile(x, 75, p=70, alternative="greater")
+    np.testing.assert_equal(res[1], 75)
+    expected_pval = 1 - binom.cdf(res[1], len(x), 0.7)
+    np.testing.assert_equal(res[0], expected_pval)
 
-    np.testing.assert_raises(ValueError, one_sample_percentile, x, p=101)
+    res = one_sample_percentile(x, 20, p=30, alternative="two-sided")
+    np.testing.assert_equal(res[1], 20)
+    expected_pval = 2 * binom.cdf(res[1], len(x), 0.3)
+    np.testing.assert_equal(res[0], expected_pval)
 
-    # Skewed sample
-    x_skew = np.array(list(x) + [80] * 50)
-    res = one_sample_percentile(x=x_skew, p=80, seed=42, reps=100)
-    np.testing.assert_almost_equal(res[0], 0.01)
-    np.testing.assert_equal(res[1], 131)
+    np.testing.assert_raises(ValueError, one_sample_percentile, x, x_p = 50, p=101)
 
-@attr('slow')
-def test_one_sample_percentile_conf_int():
+def test_one_sample_percentile_ci():
     x = np.arange(0, 100)
-    res = one_sample_percentile_conf_int(x, seed=42, p=50)
-    np.testing.assert_almost_equal(res[0], 39.43379182082676)
-    np.testing.assert_almost_equal(res[1], 59.56620817917322)
+    res = one_sample_percentile_ci(x)
+    np.testing.assert_equal(res[0], 39)
+    np.testing.assert_almost_equal(res[1], 59)
 
+    res = one_sample_percentile_ci(x, p=50, alternative="upper")
+    np.testing.assert_equal(res[0], 0)
+    np.testing.assert_equal(res[1], 58)
 
-    y = np.append(x, 10)
-    np.testing.assert_raises(ValueError, one_sample_percentile_conf_int, x, y)
+    res = one_sample_percentile_ci(x, p=50, alternative="lower")
+    np.testing.assert_equal(res[0], 40)
+    np.testing.assert_equal(res[1], 99)
 
-    np.testing.assert_raises(ValueError, one_sample_percentile_conf_int, x, p=101)
+    y = np.append(x, 10)
+    np.testing.assert_raises(ValueError, one_sample_percentile_ci, x, p=101)
 
     prng = RandomState(42)
-    y = x + prng.normal(size=100)
-    res = one_sample_percentile_conf_int(x=x, y=y, p=20, seed=42)
-    expected_ci = (-0.94850907410172258, -0.34506992048853352)
-    np.testing.assert_almost_equal(res, expected_ci)
+    z = prng.normal(0, 5, 100)
+    res = one_sample_percentile_ci(z, p=50)
+    expected_ci = (-1.546061879256073, 0.55461294854933041)
 

From b6cbb2b2e62d3a2d17ae8c8f07b5003d2a0013ac Mon Sep 17 00:00:00 2001
From: Michael Zheng <mhzbox@gmail.com>
Date: Thu, 14 Dec 2017 20:28:45 -0800
Subject: [PATCH 17/24] hoeffding and tests, regression

---
 permute/hoeffding.py            | 31 +++++++++++++++-------
 permute/regcoeff.py             | 47 +++++++++++++++++++++++++++++++++
 permute/tests/test_hoeffding.py | 39 +++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 9 deletions(-)
 create mode 100644 permute/regcoeff.py
 create mode 100644 permute/tests/test_hoeffding.py

diff --git a/permute/hoeffding.py b/permute/hoeffding.py
index 219ff968..91b8f340 100644
--- a/permute/hoeffding.py
+++ b/permute/hoeffding.py
@@ -22,8 +22,8 @@ def hoeffding_conf_int(x, N, lower_bound, upper_bound, cl=0.95, alternative="one
         lower bound for the observations
     upper_bound : scalar or array-like
         upper bound for the observations
-    alternative : {'greater', 'less', 'two-sided'}
-       alternative hypothesis to test (default: 'greater')
+    alternative : {'one-sided', 'two-sided'}
+       alternative hypothesis to test (default: 'one-sided')
     
     Returns
     -------
@@ -32,23 +32,36 @@ def hoeffding_conf_int(x, N, lower_bound, upper_bound, cl=0.95, alternative="one
     """ 
     #Check if Bounds are Scalar or Array-Like
     if (isinstance(lower_bound, float) or isinstance(lower_bound, int)) and (isinstance(upper_bound, float) or isinstance(upper_bound, int)):
+        if max(x) > upper_bound or min(x) < lower_bound:
+            raise ValueError("x values not contained in bounds")
         tau_sq = N*((upper_bound - lower_bound)**2)
         max_upper = upper_bound
         min_lower = lower_bound
-    elif isinstance(lower_bound, tuple) and isinstance(lower_bound, tuple):
+    elif isinstance(lower_bound, np.ndarray) and isinstance(lower_bound, np.ndarray):
+        #Makes sure the bounds are valid, same length, makes sense, and values are in the bounds
         if len(lower_bound) != N or len(upper_bound) != N:
             raise ValueError("Bad Bound Input Length")
-        if np.sum(upper_bound > lower_bound) != N:
+        if np.sum(upper_bound >= lower_bound) != N:
             raise ValueError("Invalid Upper and Lower bounds")
+        if np.sum((x <= upper_bound) and (x >= lower_bound)) != N:
+            raise ValueError("x values not contained in bounds")
         else:
-            tau_sq = np.sum(upper_bound - lower_bound)**2
-
+            tau_sq = np.sum((upper_bound - lower_bound)**2)
         max_upper = np.mean(upper_bound)
         min_lower = np.mean(lower_bound)
+    else:
+        raise ValueError("Invalid Upper and Lower Bounds")
 
     x_bar = np.mean(x)
-    hCrit = np.sqrt(-math.log((1-cl)/2)*tau_sq/(2*N**2))
-    ci_low, ci_upp = x_bar - hCrit, x_bar + hCrit
+
+    if alternative == "one-sided":
+        hCrit = np.sqrt(-math.log(1-cl)*tau_sq/(2*N**2))
+        ci_low = x_bar - hCrit
+        ci_upp = max_upper
+
+    if alternative == "two-sided":
+        hCrit = np.sqrt(-math.log((1-cl)/2)*tau_sq/(2*N**2))
+        ci_low, ci_upp = x_bar - hCrit, x_bar + hCrit
 
     #Truncated Bounds of the Hoeffding Confidence Interval
     if ci_upp > max_upper:
@@ -56,4 +69,4 @@ def hoeffding_conf_int(x, N, lower_bound, upper_bound, cl=0.95, alternative="one
     if ci_low < min_lower:
         ci_low = min_lower
 
-    return ci_low, ci_upp
\ No newline at end of file
+    return ci_low, ci_upp
diff --git a/permute/regcoeff.py b/permute/regcoeff.py
new file mode 100644
index 00000000..54e5a96d
--- /dev/null
+++ b/permute/regcoeff.py
@@ -0,0 +1,47 @@
+from __future__ import (absolute_import, division,
+                        print_function, unicode_literals)
+
+import numpy as np
+import math
+from scipy.stats import linregress
+
+from utils import get_prng
+
+def reg_coeff(x, y, reps=10**5, cl=0.95, alternative="one-sided", seed=None):
+    r"""
+    Testing the coefficients of a linear regression. The assumption of the linear regression
+    is that 
+
+    Parameters
+    ----------
+    x : array-like
+        list of observations
+    N : int
+        population size
+    cl : float in (0, 1)
+        the desired confidence level. Default 0.95.
+    lower_bound : scalar or array-like
+        lower bound for the observations
+    upper_bound : scalar or array-like
+        upper bound for the observations
+    alternative : {'greater', 'less', 'two-sided'}
+       alternative hypothesis to test (default: 'greater')
+    
+    Returns
+    -------
+    tuple
+       the confidence limits
+    """
+    true_slope = abs(linregress(x, y)[0])
+
+    prng = get_prng(seed)
+    slopes = []
+    for _ in range(reps):
+        shuffle_x = np.random.permutation(x)
+        slope = linregress(shuffle_x, y)[0]
+        slopes.append(slope)
+
+    p_value = sum(abs(np.array(slopes)) > true_slope)/float(reps)
+
+    return p_value  
+    
diff --git a/permute/tests/test_hoeffding.py b/permute/tests/test_hoeffding.py
new file mode 100644
index 00000000..eaf14dd4
--- /dev/null
+++ b/permute/tests/test_hoeffding.py
@@ -0,0 +1,39 @@
+from __future__ import (absolute_import, division,
+                        print_function, unicode_literals)
+
+from nose.plugins.attrib import attr
+from nose.tools import assert_raises, raises
+
+import numpy as np
+from numpy.random import RandomState
+
+
+from ..hoeffding import hoeffding_conf_int
+
+def test_hoeffding():
+	values = np.array([0.5]*10)
+	res = hoeffding_conf_int(values, 10, 0, 1, cl=0.95, alternative="two-sided")
+	expected_ci = (0.112977243, 1)
+	np.testing.assert_almost_equal(res, expected_ci)
+
+	res = hoeffding_conf_int(values, 10, 0, 1, cl=0.95)
+	expected_ci = (0.070530591653262475, 0.92946940834673752)
+	np.testing.assert_almost_equal(res, expected_ci)
+
+	lb = np.array(list([0]*10))
+	ub = np.array(list([1]*10))
+	res = hoeffding_conf_int(values, 10, lb, up, cl=0.95, alternative="two-sided")
+	expected_ci = (0.070530591653262475, 0.92946940834673752)
+	np.testing.assert_almost_equal(res, expected_ci)
+
+@raises(ValueError)
+def test_hoeffding_conf_int_bad_bound():
+    # Break it with a bad shift
+    values = np.array(range(10))
+    res = hoeffding_conf_int(values, 0, 9, 5, cl=0.95, alternative="two-sided")
+
+
+
+
+
+		
\ No newline at end of file

From cef709f6d88ba39abdd30bcce12703723f604a9b Mon Sep 17 00:00:00 2001
From: Andrew Linxie
 <alinxie@airbears2-10-142-38-18.airbears2.1918.berkeley.edu>
Date: Thu, 14 Dec 2017 20:33:23 -0800
Subject: [PATCH 18/24] Changes to documentation of one_sample_percentile and
 one_sample_percentile_ci

---
 permute/core.py | 51 ++++++++++++++++---------------------------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/permute/core.py b/permute/core.py
index 6f4b1ddf..1d1e8867 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -436,35 +436,27 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None,
 
 def one_sample_percentile(x, x_p, p=50, alternative="less"):
     r"""
-    One-sided or two-sided test for the percentile P of a population distribution.
-    assuming there is an P/100 chance for each value of the sample to be in the
-    Pth percentile.
+    One-sided or two-sided test for the true Pth percentile of a population distribution.
 
-    The null hypothesis is that there is an equal P/100 chance for any value of the 
-    sample to lie at or below the sample Pth percentile.
+    The null hypothesis is that the given percentile is the true Pth percentile.
+    here is an equal P/100 chance for any value in a sample,of the same length of X,
+    and drawn from the population to lie at or below the Pth percentile.
 
-    This test defaults to P=50.
+    The P value returned would be the probability that the true population percentile is
+    as extreme as X_P.
+
+    This test defaults to P=50, the 50th percentile.
     
     Parameters
     ----------
     x : array-like
-        Sample 1
+        Sample
     x_p : numeric
-        Null Hypothesis
+        An inputted estimated pth percentile of the distribution.
     p: int in [0,100]
         Percentile of interest to test.
-    reps : int
-        number of repetitions
     alternative : {'greater', 'less', 'two-sided'}
         The alternative hypothesis to test
-    keep_dist : bool
-        flag for whether to store and return the array of values
-        of the irr test statistic
-    seed : RandomState instance or {None, int, RandomState instance}
-        If None, the pseudorandom number generator is the RandomState
-        instance used by `np.random`;
-        If int, seed is the seed used by the random number generator;
-        If RandomState instance, seed is the pseudorandom number generator
 
     Returns
     -------
@@ -472,8 +464,6 @@ def one_sample_percentile(x, x_p, p=50, alternative="less"):
         the estimated p-value
     float
         the test statistic: Number of values at or below x_p in the samples
-    list
-       distribution of test statistics (only if keep_dist == True)
     """
 
     if not 0 <= p <= 100:
@@ -491,27 +481,22 @@ def one_sample_percentile(x, x_p, p=50, alternative="less"):
     return thePvalue[alternative](p_value), num_under
 
 
-def one_sample_percentile_ci(x, p=50, cl=0.95, alternative="two-sided", seed=None, reps=10**5):
+def one_sample_percentile_ci(x, p=50, cl=0.95, alternative="two-sided"):
     """
     Confidence intervals for a percentile P of the population distribution of a
-    univariate or paired sample.
-    
-    Compute a confidence interval for a binomial p, the probability of success in each trial.
+    univariate or paired sample of a confidence level CL solely based on a given
+    sample X drawn from the population.
 
     Parameters
     ----------
-    n : int
-        The number of Bernoulli trials.
-    x : int
-        The number of successes.
+    x : array-like
+        The sample
     cl : float in (0, 1)
         The desired confidence level.
     alternative : {"two-sided", "lower", "upper"}
         Indicates the alternative hypothesis.
     p : int in [0,100]
-        Desired percentile of interest to test.
-    kwargs : dict
-        Key word arguments
+        Desired percentile of interest to get a confidence interval of.
 
     Returns
     -------
@@ -524,7 +509,6 @@ def one_sample_percentile_ci(x, p=50, cl=0.95, alternative="two-sided", seed=Non
     if not 0 <= p <= 100:
         raise ValueError('p must be between 0 and 100')
 
-    #shift_limit = np.max([np.max(x) - np.median(x), np.median(x) - np.min(x)]
     x = np.sort(x)
     ci_low = np.min(x)
     ci_upp = np.max(x)
@@ -547,9 +531,6 @@ def one_sample_percentile_ci(x, p=50, cl=0.95, alternative="two-sided", seed=Non
 
 
 
-    # conf_int = binom_conf_interval(len(z), np.sum(z <= np.percentile(z, p)), cl=cl, alternative="two-sided", p=p/100)
-    # return (np.percentile(z, conf_int[0]*100), np.percentile(z, conf_int[1]*100))
-
 def one_sample(x, y=None, reps=10**5, stat='mean', alternative="greater",
                keep_dist=False, seed=None, center = None):
     r"""

From a21a801e0e2b096bea3366334b9853e24586e2c2 Mon Sep 17 00:00:00 2001
From: Michael Zheng <mhzbox@gmail.com>
Date: Thu, 14 Dec 2017 21:25:00 -0800
Subject: [PATCH 19/24] tests for hoeff and reg

---
 permute/hoeffding.py            |  2 +-
 permute/regcoeff.py             |  5 +----
 permute/tests/test_hoeffding.py | 38 +++++++++++++++++++++++++++------
 permute/tests/test_regcoeff.py  | 19 +++++++++++++++++
 4 files changed, 52 insertions(+), 12 deletions(-)
 create mode 100644 permute/tests/test_regcoeff.py

diff --git a/permute/hoeffding.py b/permute/hoeffding.py
index 91b8f340..1ec91e23 100644
--- a/permute/hoeffding.py
+++ b/permute/hoeffding.py
@@ -43,7 +43,7 @@ def hoeffding_conf_int(x, N, lower_bound, upper_bound, cl=0.95, alternative="one
             raise ValueError("Bad Bound Input Length")
         if np.sum(upper_bound >= lower_bound) != N:
             raise ValueError("Invalid Upper and Lower bounds")
-        if np.sum((x <= upper_bound) and (x >= lower_bound)) != N:
+        if np.sum((x <= upper_bound)) != N or np.sum((x >= lower_bound)) != N:
             raise ValueError("x values not contained in bounds")
         else:
             tau_sq = np.sum((upper_bound - lower_bound)**2)
diff --git a/permute/regcoeff.py b/permute/regcoeff.py
index 54e5a96d..a70410b8 100644
--- a/permute/regcoeff.py
+++ b/permute/regcoeff.py
@@ -5,9 +5,7 @@
 import math
 from scipy.stats import linregress
 
-from utils import get_prng
-
-def reg_coeff(x, y, reps=10**5, cl=0.95, alternative="one-sided", seed=None):
+def reg_coeff(x, y, reps=10**5, cl=0.95):
     r"""
     Testing the coefficients of a linear regression. The assumption of the linear regression
     is that 
@@ -34,7 +32,6 @@ def reg_coeff(x, y, reps=10**5, cl=0.95, alternative="one-sided", seed=None):
     """
     true_slope = abs(linregress(x, y)[0])
 
-    prng = get_prng(seed)
     slopes = []
     for _ in range(reps):
         shuffle_x = np.random.permutation(x)
diff --git a/permute/tests/test_hoeffding.py b/permute/tests/test_hoeffding.py
index eaf14dd4..5095b217 100644
--- a/permute/tests/test_hoeffding.py
+++ b/permute/tests/test_hoeffding.py
@@ -13,24 +13,48 @@
 def test_hoeffding():
 	values = np.array([0.5]*10)
 	res = hoeffding_conf_int(values, 10, 0, 1, cl=0.95, alternative="two-sided")
-	expected_ci = (0.112977243, 1)
+	expected_ci = (0.070530591653262475, 0.92946940834673752)
 	np.testing.assert_almost_equal(res, expected_ci)
 
 	res = hoeffding_conf_int(values, 10, 0, 1, cl=0.95)
-	expected_ci = (0.070530591653262475, 0.92946940834673752)
+	expected_ci = (0.11297724397950515, 1)
 	np.testing.assert_almost_equal(res, expected_ci)
 
 	lb = np.array(list([0]*10))
 	ub = np.array(list([1]*10))
-	res = hoeffding_conf_int(values, 10, lb, up, cl=0.95, alternative="two-sided")
+	res = hoeffding_conf_int(values, 10, lb, ub, cl=0.95, alternative="two-sided")
 	expected_ci = (0.070530591653262475, 0.92946940834673752)
 	np.testing.assert_almost_equal(res, expected_ci)
 
-@raises(ValueError)
+
 def test_hoeffding_conf_int_bad_bound():
-    # Break it with a bad shift
-    values = np.array(range(10))
-    res = hoeffding_conf_int(values, 0, 9, 5, cl=0.95, alternative="two-sided")
+    # Break it with incorrect upper and lower bounds
+    x = np.array(range(10))
+    assert_raises(ValueError, hoeffding_conf_int, x, 10, 5, 6)
+
+    # Invalid upper and lower bounds
+    upper_bound = 1
+    lower_bound = np.array([10, 10, 10])
+    assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound)
+
+    # Bad bound input length
+    upper_bound = np.array([10, 10])
+    lower_bound = np.array([10, 10])
+    assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound)
+
+    # bounds are not value 
+    upper_bound = np.array([10]*10)
+    lower_bound = np.array([20]*10)
+    assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound)
+
+    # x not in range of bounds
+    x = np.array([5]*10)
+    upper_bound = np.array([3]*10)
+    lower_bound = np.array([2]*10)
+    assert_raises(ValueError, hoeffding_conf_int, x, 10, lower_bound, upper_bound)
+
+    
+
 
 
 
diff --git a/permute/tests/test_regcoeff.py b/permute/tests/test_regcoeff.py
new file mode 100644
index 00000000..bc6174ea
--- /dev/null
+++ b/permute/tests/test_regcoeff.py
@@ -0,0 +1,19 @@
+from __future__ import (absolute_import, division,
+                        print_function, unicode_literals)
+
+from nose.plugins.attrib import attr
+from nose.tools import assert_raises, raises
+
+import numpy as np
+from numpy.random import RandomState
+
+from ..regcoeff import reg_coeff
+
+def test_regcoeff():
+	values = np.array([0.5]*10)
+	x = np.array(range(10))
+	y = np.array(range(10))
+
+	res = reg_coeff(x, y, cl = 0.95)
+	expected = 0
+	np.testing.assert_almost_equal(res, expected)
\ No newline at end of file

From 5584bb821c909865becf7d9ad4fd6f5c19627d9f Mon Sep 17 00:00:00 2001
From: Andrew Linxie
 <alinxie@airbears2-10-142-38-18.airbears2.1918.berkeley.edu>
Date: Thu, 14 Dec 2017 22:34:07 -0800
Subject: [PATCH 20/24] Added prng to regression tests

---
 permute/regcoeff.py            | 11 ++++++++---
 permute/tests/test_regcoeff.py |  8 +++++---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/permute/regcoeff.py b/permute/regcoeff.py
index a70410b8..1c2ea31f 100644
--- a/permute/regcoeff.py
+++ b/permute/regcoeff.py
@@ -4,8 +4,9 @@
 import numpy as np
 import math
 from scipy.stats import linregress
+from .utils import get_prng
 
-def reg_coeff(x, y, reps=10**5, cl=0.95):
+def reg_coeff(x, y, reps=10**5, cl=0.95, seed=None):
     r"""
     Testing the coefficients of a linear regression. The assumption of the linear regression
     is that 
@@ -30,11 +31,15 @@ def reg_coeff(x, y, reps=10**5, cl=0.95):
     tuple
        the confidence limits
     """
-    true_slope = abs(linregress(x, y)[0])
+    if len(x) != len(y):
+        raise ValueError("Input Vector lengths do not match")
+
 
+    true_slope = abs(linregress(x, y)[0])
+    prng = get_prng(seed)
     slopes = []
     for _ in range(reps):
-        shuffle_x = np.random.permutation(x)
+        shuffle_x = prng.permutation(x)
         slope = linregress(shuffle_x, y)[0]
         slopes.append(slope)
 
diff --git a/permute/tests/test_regcoeff.py b/permute/tests/test_regcoeff.py
index bc6174ea..42f3a981 100644
--- a/permute/tests/test_regcoeff.py
+++ b/permute/tests/test_regcoeff.py
@@ -13,7 +13,9 @@ def test_regcoeff():
 	values = np.array([0.5]*10)
 	x = np.array(range(10))
 	y = np.array(range(10))
-
-	res = reg_coeff(x, y, cl = 0.95)
+	res = reg_coeff(x, y, cl = 0.95, seed=42)
 	expected = 0
-	np.testing.assert_almost_equal(res, expected)
\ No newline at end of file
+	np.testing.assert_almost_equal(res, expected)
+
+	y.append(10)
+	assert_raises(ValueError, reg_coeff, x, y)
\ No newline at end of file

From 932df1c60391241e6fe3199082d36c648b04e76c Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwuyinze@berkeley.edu>
Date: Fri, 15 Dec 2017 03:50:33 -0800
Subject: [PATCH 21/24] Implemented two_sample_fit but only KS is supported.

---
 permute/core.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/permute/core.py b/permute/core.py
index 1d1e8867..9c8b0c16 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -6,6 +6,7 @@
                         print_function, unicode_literals)
 
 import numpy as np
+import math
 from scipy.optimize import brentq, fsolve
 from scipy.stats import ttest_ind, ttest_1samp, binom
 
@@ -188,7 +189,9 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
     # dictionary
     stats = {
         'mean': lambda u, v: np.mean(u) - np.mean(v),
-        't': lambda u, v: ttest_ind(u, v, equal_var=True)[0]
+        't': lambda u, v: ttest_ind(u, v, equal_var=True)[0],
+        'KS': lambda u, v: np.max([abs(sum(u <= a) / len(x) - sum(v <= a) / len(y))
+                                   for a in set(u).intersection(set(y))])
     }
     if callable(stat):
         tst_fun = stat
@@ -206,6 +209,36 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
     else:
         return res, observed_tst
 
+def two_sample_fit(x, y, alpha, method='KS'):
+    '''
+    Apply permutation methods to find the fitness of distribution X and Y.
+    Currently the method only implemented the Kolmogorov-Smirnov (KS) test.
+       
+    Parameters
+    ----------
+    x: array-like. 
+        sample X
+    y: array-like. 
+        sample Y
+    alpha: float in [0, 1]
+            type I error
+       
+    Returns
+    -------
+    boolean
+        True means two samples come from the same underlying distribution.
+        Otherwise means they do not.
+
+    Notes
+    -----
+    method
+        This will be the interface function invoking two_sample
+        for all goodness of fit test statistics for future implementations.
+    '''
+    _, stats = two_sample(x, y, stat='KS')
+    c = math.sqrt((len(x) + len(y))) / (len(x) * len(y))
+    c *= math.sqrt(-0.5 * math.log(alpha * 0.5))
+    return stats <= c
 
 def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater",
                      keep_dist=False, seed=None, shift=None):

From 1a66639d7813db002df08b4a9a5b2d9102dfd46c Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwuyinze@berkeley.edu>
Date: Fri, 15 Dec 2017 03:54:46 -0800
Subject: [PATCH 22/24] added kaplan-ward and sprt

---
 permute/kaplanward.py | 110 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 permute/kaplanward.py

diff --git a/permute/kaplanward.py b/permute/kaplanward.py
new file mode 100644
index 00000000..2c163778
--- /dev/null
+++ b/permute/kaplanward.py
@@ -0,0 +1,110 @@
+import math
+import numpy as np
+import scipy as sp
+
+def kaplan_wald_CI(x, level=0.99, lo=0, hi=float('inf'), gamma=0.95, xtol=1.e-32):
+    '''
+       Calculates the confidence interval for the mean of a nonnegative random variable
+       using the Kaplan-Wald method.
+       
+       Parameters
+       ----------
+       x : array-like
+           your input sample
+       level : float in (0, 1)
+           indicates your desired confidence level
+       lo : float in [0, float('inf')]
+            the lower bound of your random variable, must be nonnegative
+       hi : float in (hi, float('inf'))
+            the upper bound of your random variable, optional
+       
+       Returns
+       -------
+       tuple
+           The estimated confidence level. If the upper bound if not specified,
+           only the lower bound of the confidence interval will be returned.
+
+       Notes
+       -----
+       gamma : float in (0, 1)
+               the variable introduced in Kaplan-Wald method to hedge against small values
+       xtol : float
+           Tolerance in brentq
+    '''
+    alpha = 1.0 - level
+    if not (0 < level < 1):
+        raise ValueError('CI level must be between 0 and 1.')
+    if any(x < 0):
+        raise ValueError('Data x must be nonnegative.')
+    if lo > hi or any(x < lo) or any(x > hi):
+        raise ValueError('Data x is not in the specified range.')
+    def find_t(data, start):
+        f = lambda t: (np.max(np.cumsum(np.log(gamma*data/t + 1-gamma))) + np.log(alpha))
+        start, end = start, np.mean(data)
+        if f(start) * f(end) <= 0.0:
+            return sp.optimize.brentq(f, start, end, xtol=xtol)
+        else:
+            return start
+    lo = find_t(x, lo + xtol)
+    if hi != float('inf'):
+        hi = find_t(hi - x, xtol) + np.mean(x)
+    return (lo, hi)
+
+
+def sprt_proportion(x, p0, p1, alpha, beta, start=1, plot=False):
+    '''
+       The model aims to determine the quality of a batch of products by minimal sampling.
+       The idea is to sample the batch sequentially until a decision can be made whether
+       the batch conforms to specification and can be accepted or that it should be rejected.
+       
+       Parameters
+       ----------
+       x : array-like. your input sample.
+           each value should indicate success or failure. 
+       alpha: float in [0, 1]
+              type I error
+       beta: float in [0, 1]
+             type II error
+       p0: float in [0, 1]. proportion to accept null hypothesis
+           the proportion of positives below which
+           a decision that the proportion is null can be made
+       p1: float in [0, 1]. proportion to reject null hypothesis
+           the proportion of positives above which
+           a decision that the proportion is null can be made 
+       
+       Returns
+       -------
+       tuple
+           tuple[0] can be'Reject' the null, 'Accept' the null,
+           or 'Unknown' based on the data
+           tuple[1] is the log likelihood ratio for SPRT when a
+           decision is made or running out of data
+           tuple[2] is the index of the trail where a decision is made
+
+       Notes
+       -----
+       start
+           If the last batch of data gives unknown decision,
+           one can feed the tuple[1] into the param start to continue the SPRT.
+    '''
+    if not (0 <= p0 <= 1 and 0 <= p1 <= 1):
+        raise ValueError('Proportion must lie in [0, 1]')
+    if not (0 < alpha < 1 and 0 < beta < 1):
+        raise ValueError('Error control param must lie in (0, 1)')
+    lr = start
+    s_fac = math.log(p1 / p0)
+    f_fac = math.log((1.0-p1) / (1.0-p0))
+    s_bnd = math.log((1-beta) / alpha)
+    f_bnd = math.log(beta / (1-alpha))
+    decision = 'Unknown'
+    for idx, trail in enumerate(x, 1):
+        trail = int(trail)
+        if trail != 0 and trail != 1:
+            raise ValueError('every trail in the data must be a truthy/falsy value.')
+        lr += s_fac if trail == 1 else f_fac
+        if lr >= s_bnd:
+            decision = 'Reject'
+            break
+        elif lr <= f_bnd:
+            decision = 'Accept'
+    return (decision, lr, idx)

From e413f7350bbeeca6ca91b9d48a136f8c442cd738 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwuyinze@berkeley.edu>
Date: Fri, 15 Dec 2017 16:41:35 -0800
Subject: [PATCH 23/24] Fixed test_regcoeff syntax error

---
 permute/tests/test_regcoeff.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/permute/tests/test_regcoeff.py b/permute/tests/test_regcoeff.py
index 42f3a981..c595b520 100644
--- a/permute/tests/test_regcoeff.py
+++ b/permute/tests/test_regcoeff.py
@@ -17,5 +17,5 @@ def test_regcoeff():
 	expected = 0
 	np.testing.assert_almost_equal(res, expected)
 
-	y.append(10)
-	assert_raises(ValueError, reg_coeff, x, y)
\ No newline at end of file
+	y = np.array(range(11))
+	assert_raises(ValueError, reg_coeff, x, y)

From e3ece9dcff0c110b3af0b02285cf55cd82055940 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwuyinze@berkeley.edu>
Date: Fri, 15 Dec 2017 17:20:01 -0800
Subject: [PATCH 24/24] Fixed tests for two_sample_fit

---
 permute/core.py            |  2 +-
 permute/tests/test_core.py | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/permute/core.py b/permute/core.py
index 9c8b0c16..b1b0d756 100644
--- a/permute/core.py
+++ b/permute/core.py
@@ -191,7 +191,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater",
         'mean': lambda u, v: np.mean(u) - np.mean(v),
         't': lambda u, v: ttest_ind(u, v, equal_var=True)[0],
         'KS': lambda u, v: np.max([abs(sum(u <= a) / len(x) - sum(v <= a) / len(y))
-                                   for a in set(u).intersection(set(y))])
+                                   for a in set(u).union(set(y))])
     }
     if callable(stat):
         tst_fun = stat
diff --git a/permute/tests/test_core.py b/permute/tests/test_core.py
index 858649fb..965858fe 100644
--- a/permute/tests/test_core.py
+++ b/permute/tests/test_core.py
@@ -13,6 +13,7 @@
                     two_sample,
                     two_sample_shift,
                     two_sample_conf_int,
+                    two_sample_fit,
                     one_sample,
                     one_sample,
                     one_sample_conf_int,
@@ -95,6 +96,17 @@ def test_two_sample():
     np.testing.assert_equal(res[0], expected[0])
     np.testing.assert_equal(res[1], expected[1])
 
+def test_two_sample_fit():
+    x = np.array(range(10))
+    y = np.array(range(10))
+    alpha = 0.01
+    res = two_sample_fit(x, y, alpha)
+    np.testing.assert_equal(res, True)
+
+    y = np.array(range(10, 20))
+    alpha = 0.1
+    res = two_sample_fit(x, y, alpha)
+    np.testing.assert_equal(res, False)
 
 def test_two_sample_shift():
     prng = RandomState(42)