diff --git a/Statistics/Test/KruskalWallis.hs b/Statistics/Test/KruskalWallis.hs index 3a91754..e15a04d 100644 --- a/Statistics/Test/KruskalWallis.hs +++ b/Statistics/Test/KruskalWallis.hs @@ -34,17 +34,22 @@ import qualified Statistics.Sample.Internal as Sample(sum) -- -- The samples and values need not to be ordered but the values in the result -- are ordered. Assigned ranks (ties are given their average rank). -kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double] -kruskalWallisRank samples = groupByTags - . sortBy (comparing fst) - . U.zip tags - $ rank (==) joinSample +kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> Maybe [U.Vector Double] +kruskalWallisRank samples = + if firstRank == lastRank + then Nothing + else Just + . groupByTags + . sortBy (comparing fst) + . U.zip tags + $ rank (==) joinSample where (tags,joinSample) = U.unzip . sortBy (comparing snd) $ foldMap (uncurry tagSample) $ zip [(1::Int)..] samples tagSample t = U.map (\x -> (t,x)) - + firstRank = U.head joinSample + lastRank = U.last joinSample groupByTags xs | U.null xs = [] | otherwise = sort (U.map snd ys) : groupByTags zs @@ -56,21 +61,24 @@ kruskalWallisRank samples = groupByTags -- -- In textbooks the output value is usually represented by 'K' or 'H'. This -- function already does the ranking. -kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double -kruskalWallis samples = (nTot - 1) * numerator / denominator - where - -- Total number of elements in all samples - nTot = fromIntegral $ sumWith rsamples U.length - -- Average rank of all samples - avgRank = (nTot + 1) / 2 - -- - numerator = sumWith rsamples $ \sample -> - let n = fromIntegral $ U.length sample - in n * square (mean sample - avgRank) - denominator = sumWith rsamples $ \sample -> - Sample.sum $ U.map (\r -> square (r - avgRank)) sample +kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Maybe Double +kruskalWallis samples = case kruskalWallisRank samples of + Nothing -> Nothing + Just rsamples -> + Just $ (nTot - 1) * numerator / denominator + where + -- Total number of elements in all samples + nTot = fromIntegral $ sumWith rsamples U.length + -- Average rank of all samples + avgRank = (nTot + 1) / 2 + -- + numerator = sumWith rsamples $ \sample -> + let n = fromIntegral $ U.length sample + in n * square (mean sample - avgRank) + denominator = sumWith rsamples $ \sample -> + Sample.sum $ U.map (\r -> square (r - avgRank)) sample + - rsamples = kruskalWallisRank samples -- | Perform Kruskal-Wallis Test for the given samples and required @@ -80,18 +88,22 @@ kruskalWallis samples = (nTot - 1) * numerator / denominator -- It uses /Chi-Squared/ distribution for approximation as long as the sizes are -- larger than 5. Otherwise the test returns 'Nothing'. kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ()) +-- If there are no samples, return Nothing since there's nothing to test. kruskalWallisTest [] = Nothing -kruskalWallisTest samples - -- We use chi-squared approximation here - | all (>4) ns = Just Test { testSignificance = mkPValue $ complCumulative d k - , testStatistics = k - , testDistribution = () - } - | otherwise = Nothing - where - k = kruskalWallis samples - ns = map U.length samples - d = chiSquared (length ns - 1) +-- If there is only one sample, return Nothing since there's nothing to test against. +kruskalWallisTest [_] = Nothing +kruskalWallisTest samples = case kruskalWallis samples of + Nothing -> Nothing + Just k -> + -- We use chi-squared approximation here + if all (>4) ns then Just Test { testSignificance = mkPValue $ complCumulative d k + , testStatistics = k + , testDistribution = () + } + else Nothing + where + ns = map U.length samples + d = chiSquared (length ns - 1) -- * Helper functions diff --git a/tests/Tests/NonParametric.hs b/tests/Tests/NonParametric.hs index a37add1..9d6fd6a 100644 --- a/tests/Tests/NonParametric.hs +++ b/tests/Tests/NonParametric.hs @@ -26,6 +26,7 @@ tests = testGroup "Nonparametric tests" , wilcoxonPairTests , kruskalWallisRankTests , kruskalWallisTests + , kruskalWallisFailureTests , kolmogorovSmirnovDTest ] @@ -161,7 +162,7 @@ kruskalWallisRankTests :: [Tst.TestTree] kruskalWallisRankTests = zipWith test [(0::Int)..] testData where test n (a, b) = testCase "Kruskal-Wallis Ranking" - $ assertEqual ("Kruskal-Wallis " ++ show n) (map U.fromList b) (kruskalWallisRank $ map U.fromList a) + $ assertEqual ("Kruskal-Wallis " ++ show n) (Just $ map U.fromList b) (kruskalWallisRank $ map U.fromList a) testData :: [([[Int]],[[Double]])] testData = [ ( [ [68,93,123,83,108,122] , [119,116,101,103,113,84] @@ -180,7 +181,7 @@ kruskalWallisTests :: [Tst.TestTree] kruskalWallisTests = zipWith test [(0::Int)..] testData where test n (a, b, c) = testCase "Kruskal-Wallis" $ do - assertEqual ("Kruskal-Wallis " ++ show n) (round100 b) (round100 kw) + assertEqual ("Kruskal-Wallis " ++ show n) (Just $ round100 b) (round100 <$> kw) assertEqual ("Kruskal-Wallis Sig " ++ show n) c kwt where kw = kruskalWallis $ map U.fromList a @@ -221,6 +222,17 @@ kruskalWallisTests = zipWith test [(0::Int)..] testData ) ] +kruskalWallisFailureTests :: [Tst.TestTree] +kruskalWallisFailureTests = zipWith test [(0::Int)..] testData + where + test n (a, b) = testCase "Kruskal-Wallis" $ do + assertEqual ("Kruskal-Wallis test case fails" ++ show n) a (kruskalWallisTest $ map U.fromList b) + testData :: [(Maybe (Test ()), [[Double]])] + testData = + [ (Nothing, []) -- No samples were provided + , (Nothing, [[0, 1, 2]]) -- Only one sample + , (Nothing, [[2, 2, 2], [2, 2, 2, 2, 2, 2], [2, 2, 2, 2, 2, 2]]) -- no variation + ] ---------------------------------------------------------------- -- K-S test