Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 43 additions & 31 deletions Statistics/Test/KruskalWallis.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,22 @@ import qualified Statistics.Sample.Internal as Sample(sum)
--
-- The samples and values need not to be ordered but the values in the result
-- are ordered. Assigned ranks (ties are given their average rank).
kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double]
kruskalWallisRank samples = groupByTags
. sortBy (comparing fst)
. U.zip tags
$ rank (==) joinSample
kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> Maybe [U.Vector Double]
kruskalWallisRank samples =
if firstRank == lastRank
then Nothing
else Just
. groupByTags
. sortBy (comparing fst)
. U.zip tags
$ rank (==) joinSample
where
(tags,joinSample) = U.unzip
. sortBy (comparing snd)
$ foldMap (uncurry tagSample) $ zip [(1::Int)..] samples
tagSample t = U.map (\x -> (t,x))

firstRank = U.head joinSample
lastRank = U.last joinSample
groupByTags xs
| U.null xs = []
| otherwise = sort (U.map snd ys) : groupByTags zs
Expand All @@ -56,21 +61,24 @@ kruskalWallisRank samples = groupByTags
--
-- In textbooks the output value is usually represented by 'K' or 'H'. This
-- function already does the ranking.
kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double
kruskalWallis samples = (nTot - 1) * numerator / denominator
where
-- Total number of elements in all samples
nTot = fromIntegral $ sumWith rsamples U.length
-- Average rank of all samples
avgRank = (nTot + 1) / 2
--
numerator = sumWith rsamples $ \sample ->
let n = fromIntegral $ U.length sample
in n * square (mean sample - avgRank)
denominator = sumWith rsamples $ \sample ->
Sample.sum $ U.map (\r -> square (r - avgRank)) sample
kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Maybe Double
kruskalWallis samples = case kruskalWallisRank samples of
Nothing -> Nothing
Just rsamples ->
Just $ (nTot - 1) * numerator / denominator
where
-- Total number of elements in all samples
nTot = fromIntegral $ sumWith rsamples U.length
-- Average rank of all samples
avgRank = (nTot + 1) / 2
--
numerator = sumWith rsamples $ \sample ->
let n = fromIntegral $ U.length sample
in n * square (mean sample - avgRank)
denominator = sumWith rsamples $ \sample ->
Sample.sum $ U.map (\r -> square (r - avgRank)) sample


rsamples = kruskalWallisRank samples


-- | Perform Kruskal-Wallis Test for the given samples and required
Expand All @@ -80,18 +88,22 @@ kruskalWallis samples = (nTot - 1) * numerator / denominator
-- It uses /Chi-Squared/ distribution for approximation as long as the sizes are
-- larger than 5. Otherwise the test returns 'Nothing'.
kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ())
-- If there are no samples, return Nothing since there's nothing to test.
kruskalWallisTest [] = Nothing
kruskalWallisTest samples
-- We use chi-squared approximation here
| all (>4) ns = Just Test { testSignificance = mkPValue $ complCumulative d k
, testStatistics = k
, testDistribution = ()
}
| otherwise = Nothing
where
k = kruskalWallis samples
ns = map U.length samples
d = chiSquared (length ns - 1)
-- If there is only one sample, return Nothing since there's nothing to test against.
kruskalWallisTest [_] = Nothing
kruskalWallisTest samples = case kruskalWallis samples of
Nothing -> Nothing
Just k ->
-- We use chi-squared approximation here
if all (>4) ns then Just Test { testSignificance = mkPValue $ complCumulative d k
, testStatistics = k
, testDistribution = ()
}
else Nothing
where
ns = map U.length samples
d = chiSquared (length ns - 1)

-- * Helper functions

Expand Down
16 changes: 14 additions & 2 deletions tests/Tests/NonParametric.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ tests = testGroup "Nonparametric tests"
, wilcoxonPairTests
, kruskalWallisRankTests
, kruskalWallisTests
, kruskalWallisFailureTests
, kolmogorovSmirnovDTest
]

Expand Down Expand Up @@ -161,7 +162,7 @@ kruskalWallisRankTests :: [Tst.TestTree]
kruskalWallisRankTests = zipWith test [(0::Int)..] testData
where
test n (a, b) = testCase "Kruskal-Wallis Ranking"
$ assertEqual ("Kruskal-Wallis " ++ show n) (map U.fromList b) (kruskalWallisRank $ map U.fromList a)
$ assertEqual ("Kruskal-Wallis " ++ show n) (Just $ map U.fromList b) (kruskalWallisRank $ map U.fromList a)
testData :: [([[Int]],[[Double]])]
testData = [ ( [ [68,93,123,83,108,122]
, [119,116,101,103,113,84]
Expand All @@ -180,7 +181,7 @@ kruskalWallisTests :: [Tst.TestTree]
kruskalWallisTests = zipWith test [(0::Int)..] testData
where
test n (a, b, c) = testCase "Kruskal-Wallis" $ do
assertEqual ("Kruskal-Wallis " ++ show n) (round100 b) (round100 kw)
assertEqual ("Kruskal-Wallis " ++ show n) (Just $ round100 b) (round100 <$> kw)
assertEqual ("Kruskal-Wallis Sig " ++ show n) c kwt
where
kw = kruskalWallis $ map U.fromList a
Expand Down Expand Up @@ -221,6 +222,17 @@ kruskalWallisTests = zipWith test [(0::Int)..] testData
)
]

kruskalWallisFailureTests :: [Tst.TestTree]
kruskalWallisFailureTests = zipWith test [(0::Int)..] testData
where
test n (a, b) = testCase "Kruskal-Wallis" $ do
assertEqual ("Kruskal-Wallis test case fails" ++ show n) a (kruskalWallisTest $ map U.fromList b)
testData :: [(Maybe (Test ()), [[Double]])]
testData =
[ (Nothing, []) -- No samples were provided
, (Nothing, [[0, 1, 2]]) -- Only one sample
, (Nothing, [[2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2]]) -- no variation
]

----------------------------------------------------------------
-- K-S test
Expand Down
Loading