diff --git a/README.md b/README.md index 2ae071e2a..2228ddf7b 100644 --- a/README.md +++ b/README.md @@ -256,4 +256,4 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection `cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries. #### Examples: -- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw)) \ No newline at end of file +- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVSJhEg0SXfRViQLaauqVMjjMYyVwZ76AUFR_vteezxkCKRaLZGSYN_HueceX_sp0kxrLoWO-t-fIp5G_V4c5UQsLVmyqB9Rm5IojrS0irrvnXczAe_gShZbxZeZgQZtwmn39I8W_jqLYfxldD0awtXt5O52Mrwf3Y7bzsE7feKUCc1SsCJlCkzGYFgQin_CTgxfmHJo4LTdhYYzmEVhbxY1z32UrbSwIlsQ0oDVDMNwDQueM2CPlBUGuAAqV0XOiaAMNtxkPlWI4-HAtxBEJoagPUGPAr8t6pZAzA66-2TGFP1OZ7PZtImH3ZZq2clLY935NLq6GU9vWgh95_ZZ5EgvKPbTcoWFJ1sgBSKjJEG8OdmAVECWiuGekQ75RnHDxTIGLRdmQxTzcVKujeKJNXvkVTix_roB0kcEEjecwmg6i-DDcDqaxj7O19H9X7ef7-HrcDIZju9HN1O4nWCzxtcj1yr89hGG42_w92h8HQND6jAVeyyUqwKhckcrS0sOp4ztwVjIEpYuGOULTqGSESzlmimBZUHB1IqXgkOQqY-T8xU3xPi1g-J8qs5MzMRvXNDcpgwuqKWyk-RSrubYd8NUm9rsct_GZMpq06HSCtN2mwdbKVtjivmaUSPVcRP2yKh1wOaFxKZtj1tp7C5DqbVfY-ASm8LIyi9zYVBxXDTWkqfNmXjCwsAtUqzbOI5B2NX8gW21E9sAet3fu93uOew-nU7nAv5kgiliWNgGZ388kilKt8FL3HfQbZ-dh0gjZFcZz_WCK20gI_nCx3PBZNjw9L6RQBwkaIXM54ce2iahWxo9Tn1lHgf-6zZb1SZKBHxzQ3ZHnjN10MBsi1Jo-xaAk8DJC03m3mTgsp_vXKcF2bhTXvcqJ0PKFsTmBsoGO02-BuSrd5Lr9-uau6hyXYZ4T7UKn0NqbdJ-HzVo4OICT-QHmz8gMM-75_htPAssh6lCORKXZc_dXCxh9jHWTEDtg-F9LibSPOQu1dnv7-m8hto1rFF1zs_WnUul6IbbaidsibJtxt6jjRnc_71mSEOs00ox91bIe93lvG6Ajl4sO9uTulhKK7ELUzrsbVUBXmC8NPi4mMM89w7OMMiZpGmjQhGHTM1fspZImV86SxyE2J1GCTyGBck12-fuqKOoO-6dlFqMUMo_lqlt7fTt5ipO4TWXVufboCGc9bvS0PHeXYQ6kzZPoUwH_mozyrJWITVeLGsGfnggMfd3k0GvzgoeVXcX6gNq4pfCq8b-F7i-TJMRA3iL-dvaXSNMeFSvhos7otzd1vUDob1joWRCcrze8EJJiSGoc2WpsRgrroUJUdhjxhNu0LXk9VXdH-8ml108YwWW5eaFLJFgKxJEjqx4X17VVqvKDYZ904zojGn3BEn9zMJ6j9MpdnSKQKd4g85FLpEux7YDOyiPuF9sVPLyF1pjX2xHGlRfCifWyaDZhE4IWMqv1G6Zd_E_8r6upL70i7yieXxAoii_rxjR2N30xwDfLrha0eENXAcPLALwg4kYzPH6wcNYMy8bVC3PvVDmlVB8qIqdY3NVMRSfgC5-fcaXsXtq4uNTvTygI7GmtHd6Znu4LQtTvq6jFgYa0JOT3ntoEUWzgV7N33eh1UIgBn8ZzMHSVk5WiX9y5zypxaSU5ri4Lt_HuIC0iofoOa728YLa28cWRc8__M-_9KAQpw==)) \ No newline at end of file diff --git a/benchmarks/bloom_filter/utils.hpp b/benchmarks/bloom_filter/utils.hpp index cec7e06c3..089ac3dcd 100644 --- a/benchmarks/bloom_filter/utils.hpp +++ b/benchmarks/bloom_filter/utils.hpp @@ -62,13 +62,24 @@ void add_fpr_summary(nvbench::state& state, FilterType& filter) filter.add(tp_begin, tp_end); filter.contains(tn_begin, tn_end, result.begin()); - float fp = thrust::count(thrust::device, result.begin(), result.end(), true); + auto const fp_emp = + static_cast(thrust::count(thrust::device, result.begin(), result.end(), true)) / + static_cast(num_keys); - auto& summ = state.add_summary("FalsePositiveRate"); - summ.set_string("hint", "FPR"); - summ.set_string("short_name", "FPR"); - summ.set_string("description", "False-positive rate of the bloom filter."); - summ.set_float64("value", fp / num_keys); + auto& summ_fp = state.add_summary("FalsePositiveRate"); + summ_fp.set_string("hint", "FPR"); + summ_fp.set_string("short_name", "FPR"); + summ_fp.set_string("description", "False-positive rate of the bloom filter."); + summ_fp.set_float64("value", fp_emp); + + auto const fp_theo = filter.expected_false_positive_rate(num_keys); + auto const fp_dev = fp_emp - fp_theo; + + auto& summ_dev = state.add_summary("FalsePositiveRateDeviation"); + summ_dev.set_string("hint", "FPRDev"); + summ_dev.set_string("short_name", "FPRDev"); + summ_dev.set_string("description", "Deviation of false-positive rate over theoretical value."); + summ_dev.set_float64("value", fp_dev); filter.clear(); } diff --git a/examples/bloom_filter/host_bulk_example.cu b/examples/bloom_filter/host_bulk_example.cu index f02f6e657..9b184b383 100644 --- a/examples/bloom_filter/host_bulk_example.cu +++ b/examples/bloom_filter/host_bulk_example.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,7 +68,8 @@ int main(void) float fp_rate = float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn); - std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl; + std::cout << "TPR[measured]=" << tp_rate << " FPR[measured]=" << fp_rate + << " FPR[expected]=" << filter.expected_false_positive_rate(num_tp) << std::endl; return 0; } \ No newline at end of file diff --git a/include/cuco/bloom_filter.cuh b/include/cuco/bloom_filter.cuh index 0d6003b19..3bfe66691 100644 --- a/include/cuco/bloom_filter.cuh +++ b/include/cuco/bloom_filter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -312,6 +312,17 @@ class bloom_filter { OutputIt output_begin, cuda::stream_ref stream = {}) const noexcept; + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items) const; + /** * @brief Gets a pointer to the underlying filter storage. * diff --git a/include/cuco/bloom_filter_policies.cuh b/include/cuco/bloom_filter_policies.cuh index 0d28b166a..4fce16aea 100644 --- a/include/cuco/bloom_filter_policies.cuh +++ b/include/cuco/bloom_filter_policies.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ using arrow_filter_policy = detail::arrow_filter_policy; * @brief The default policy that defines how a Blocked Bloom Filter generates and stores a key's * fingerprint. * - * @note `Word` type must be an atomically updatable integral type. `WordsPerBlock` must + * @note `Word` type must be an atomically updatable unsigned integral type. `WordsPerBlock` must * be a power-of-two. * * @tparam Hash Hash function used to generate a key's fingerprint @@ -92,7 +92,7 @@ class default_filter_policy { * * @return The hash value of the key */ - __device__ constexpr hash_result_type hash(hash_argument_type const& key) const; + [[nodiscard]] __device__ constexpr hash_result_type hash(hash_argument_type const& key) const; /** * @brief Determines the filter block a key is added into. @@ -108,7 +108,8 @@ class default_filter_policy { * @return The block index for the given key's hash value */ template - __device__ constexpr auto block_index(hash_result_type hash, Extent num_blocks) const; + [[nodiscard]] __device__ constexpr auto block_index(hash_result_type hash, + Extent num_blocks) const; /** * @brief Determines the fingerprint pattern for a word/segment within the filter block for a @@ -122,8 +123,21 @@ class default_filter_policy { * * @return The bit pattern for the word/segment in the filter block */ - __device__ constexpr word_type word_pattern(hash_result_type hash, - std::uint32_t word_index) const; + [[nodiscard]] __device__ constexpr word_type word_pattern(hash_result_type hash, + std::uint32_t word_index) const; + + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * @param num_blocks The total number of blocks in the filter + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items, + size_t num_blocks) const; private: impl_type impl_; ///< Policy implementation diff --git a/include/cuco/bloom_filter_ref.cuh b/include/cuco/bloom_filter_ref.cuh index 2f3dcfa2b..98b5aa8c4 100644 --- a/include/cuco/bloom_filter_ref.cuh +++ b/include/cuco/bloom_filter_ref.cuh @@ -369,6 +369,17 @@ class bloom_filter_ref { OutputIt output_begin, cuda::stream_ref stream = {}) const noexcept; + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items) const; + /** * @brief Gets a pointer to the underlying filter storage. * diff --git a/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh b/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh index 2f17fa726..39b616ed1 100644 --- a/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh +++ b/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -135,7 +136,8 @@ class arrow_filter_policy { * @return The block index for the given key's hash value */ template - __device__ constexpr auto block_index(hash_result_type hash, Extent num_blocks) const + [[nodiscard]] __device__ constexpr auto block_index(hash_result_type hash, + Extent num_blocks) const { constexpr auto hash_bits = cuda::std::numeric_limits::digits; // TODO: assert if num_blocks > max_filter_blocks @@ -153,7 +155,8 @@ class arrow_filter_policy { * * @return The bit pattern for the word/segment in the filter block */ - __device__ constexpr word_type word_pattern(hash_result_type hash, std::uint32_t word_index) const + [[nodiscard]] __device__ constexpr word_type word_pattern(hash_result_type hash, + std::uint32_t word_index) const { word_type const key = static_cast(hash); std::uint32_t salt; @@ -182,6 +185,27 @@ class arrow_filter_policy { return word_type{1} << ((key * salt) >> 27); } + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * @param num_blocks The total number of blocks in the filter + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items, + size_t num_blocks) const + { + return blocked_bloom_filter_expected_fpr(num_items, + num_blocks * words_per_block * sizeof(word_type), + cuda::std::numeric_limits::digits, + words_per_block, + cuda::std::numeric_limits::digits, + words_per_block); + } + private: hasher hash_; }; diff --git a/include/cuco/detail/bloom_filter/bloom_filter.inl b/include/cuco/detail/bloom_filter/bloom_filter.inl index 870509cb8..2694a2852 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter.inl +++ b/include/cuco/detail/bloom_filter/bloom_filter.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -129,6 +129,14 @@ __host__ constexpr void bloom_filter::con ref_.contains_if_async(first, last, stencil, pred, output_begin, stream); } +template +[[nodiscard]] __host__ double +bloom_filter::expected_false_positive_rate( + size_t num_items) const +{ + return ref_.expected_false_positive_rate(num_items); +} + template [[nodiscard]] __host__ constexpr typename bloom_filter::word_type* diff --git a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh index 81f644c8c..35f5d5d6b 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh +++ b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh @@ -414,6 +414,11 @@ class bloom_filter_impl { first, num_keys, stencil, pred, output_begin, *this); } + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items) const + { + return policy_.expected_false_positive_rate(num_items, num_blocks_); + } + [[nodiscard]] __host__ __device__ constexpr word_type* data() noexcept { return words_; } [[nodiscard]] __host__ __device__ constexpr word_type const* data() const noexcept @@ -428,7 +433,6 @@ class bloom_filter_impl { // TODO // [[nodiscard]] __host__ double occupancy() const; - // [[nodiscard]] __host__ double expected_false_positive_rate(size_t unique_keys) const // [[nodiscard]] __host__ __device__ static uint32_t optimal_pattern_bits(size_t num_blocks) // template // [[nodiscard]] __device__ constexpr auto make_copy(CG const& group, word_type* const diff --git a/include/cuco/detail/bloom_filter/bloom_filter_ref.inl b/include/cuco/detail/bloom_filter/bloom_filter_ref.inl index 96d2c0573..02786f0fa 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_ref.inl +++ b/include/cuco/detail/bloom_filter/bloom_filter_ref.inl @@ -180,6 +180,13 @@ template return impl_.data(); } +template +[[nodiscard]] __host__ double +bloom_filter_ref::expected_false_positive_rate(size_t num_items) const +{ + return impl_.expected_false_positive_rate(num_items); +} + template [[nodiscard]] __host__ __device__ constexpr typename bloom_filter_ref::word_type const* diff --git a/include/cuco/detail/bloom_filter/default_filter_policy.inl b/include/cuco/detail/bloom_filter/default_filter_policy.inl index eb8dbf703..d679cf738 100644 --- a/include/cuco/detail/bloom_filter/default_filter_policy.inl +++ b/include/cuco/detail/bloom_filter/default_filter_policy.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,16 +29,18 @@ __host__ } template -__device__ constexpr typename default_filter_policy::hash_result_type -default_filter_policy::hash( - typename default_filter_policy::hash_argument_type const& key) const +[[nodiscard]] __device__ constexpr + typename default_filter_policy::hash_result_type + default_filter_policy::hash( + typename default_filter_policy::hash_argument_type const& key) const { return impl_.hash(key); } template template -__device__ constexpr auto default_filter_policy::block_index( +[[nodiscard]] __device__ constexpr auto +default_filter_policy::block_index( typename default_filter_policy::hash_result_type hash, Extent num_blocks) const { @@ -46,12 +48,21 @@ __device__ constexpr auto default_filter_policy::bloc } template -__device__ constexpr typename default_filter_policy::word_type -default_filter_policy::word_pattern( - default_filter_policy::hash_result_type hash, - std::uint32_t word_index) const +[[nodiscard]] __device__ constexpr + typename default_filter_policy::word_type + default_filter_policy::word_pattern( + default_filter_policy::hash_result_type hash, + std::uint32_t word_index) const { return impl_.word_pattern(hash, word_index); } +template +[[nodiscard]] __host__ double +default_filter_policy::expected_false_positive_rate( + size_t num_items, size_t num_blocks) const +{ + return impl_.expected_false_positive_rate(num_items, num_blocks); +} + } // namespace cuco \ No newline at end of file diff --git a/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh b/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh index 14509b9b0..cd6d2e337 100644 --- a/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh +++ b/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -36,6 +37,14 @@ class default_filter_policy_impl { using hash_argument_type = typename hasher::argument_type; using hash_result_type = decltype(std::declval()(std::declval())); + static_assert(cuda::std::is_integral::value && + cuda::std::is_unsigned::value, + "Word type must be an unsigned integral type"); + + static_assert(cuda::std::is_integral::value && + cuda::std::is_unsigned::value, + "Hash result type must be an unsigned integral type"); + static constexpr std::uint32_t words_per_block = WordsPerBlock; private: @@ -105,6 +114,17 @@ class default_filter_policy_impl { return word; } + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items, + size_t num_blocks) const + { + return blocked_bloom_filter_expected_fpr(num_items, + num_blocks * words_per_block * sizeof(word_type), + cuda::std::numeric_limits::digits, + words_per_block, + cuda::std::numeric_limits::digits, + pattern_bits_); + } + private: uint32_t pattern_bits_; uint32_t min_bits_per_word_; diff --git a/include/cuco/detail/bloom_filter/utils.hpp b/include/cuco/detail/bloom_filter/utils.hpp new file mode 100644 index 000000000..fb28c42d4 --- /dev/null +++ b/include/cuco/detail/bloom_filter/utils.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * Copyright (c) 2022, Jim Apple. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace cuco::detail { + +/** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * Reference implementation: + * https://github.com/jbapple/libfilter/blob/4ebeaef1215969aee9edb05eb145e94b8dd98e16/c/lib/util.c#L5 + * + * @param ndv Number of distinct inserted elements + * @param bytes Filter size in bytes + * @param word_bits Number of bits in the underlying word type of a filter block + * @param block_words Number of words in each filter block + * @param hash_bits Total number of bits in the hash value type + * @param k Number of pattern bits to set for a key + * @param max_iters Maximum number of iterations for accuracy refinement + * + * @return Approximation of the expected false-positive rate + */ +__host__ inline double blocked_bloom_filter_expected_fpr(double ndv, + double bytes, + double word_bits, + double block_words, + double hash_bits, + double k, + std::uint64_t max_iters = 1000) +{ + if (ndv == 0) return 0.0; + if (bytes <= 0) return 1.0; + if (ndv / (bytes * cuda::std::numeric_limits::digits) >= 2.0) return 1.0; + + double result = 0; + double const lam = + block_words * word_bits / ((bytes * cuda::std::numeric_limits::digits) / ndv); + double const loglam = cuda::std::log(lam); + double const log1collide = -hash_bits * cuda::std::log(2.0); + + for (std::uint64_t j = 0; j < max_iters; ++j) { + double const i = static_cast(max_iters - 1 - j); + double const logp = i * loglam - lam - cuda::std::lgamma(i + 1.0); + double const logfinner = k * cuda::std::log(1.0 - cuda::std::pow(1.0 - 1.0 / word_bits, i * k)); + double const logcollide = cuda::std::log(i) + log1collide; + result += cuda::std::exp(logp + logfinner) + cuda::std::exp(logp + logcollide); + // result += exp(logp + logfinner); // alternative approach + } + + return (result > 1.0) ? 1.0 : result; +} + +} // namespace cuco::detail \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 96e89863d..d98f2fd05 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -144,4 +144,5 @@ ConfigureTest(HYPERLOGLOG_TEST ConfigureTest(BLOOM_FILTER_TEST bloom_filter/unique_sequence_test.cu bloom_filter/arrow_policy_test.cu - bloom_filter/variable_cg_test.cu) + bloom_filter/variable_cg_test.cu + bloom_filter/fpr_test.cu) diff --git a/tests/bloom_filter/fpr_test.cu b/tests/bloom_filter/fpr_test.cu new file mode 100644 index 000000000..a839fe72f --- /dev/null +++ b/tests/bloom_filter/fpr_test.cu @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +using size_type = uint64_t; + +template +void test_fpr(Filter& filter, size_type num_keys, double max_deviation) +{ + using Key = typename Filter::key_type; + + // Generate keys + thrust::device_vector keys(num_keys); + thrust::sequence(thrust::device, keys.begin(), keys.end()); + + size_type num_tp = num_keys * 0.5; + size_type num_tn = num_keys - num_tp; + + auto tp_begin = keys.begin(); + auto tp_end = tp_begin + num_tp; + auto tn_begin = tp_end; + auto tn_end = keys.end(); + + filter.add(tp_begin, tp_end); + + thrust::device_vector tp_result(num_tp, false); + thrust::device_vector tn_result(num_keys - num_tp, false); + + // Query the filter for the previously inserted keys. + // This should result in a true-positive rate of TPR=1. + filter.contains(tp_begin, tp_end, tp_result.begin()); + + // Query the filter for the keys that are not present in the filter. + // Since bloom filters are probalistic data structures, the filter + // exhibits a false-positive rate FPR>0 depending on the number of bits in + // the filter and the number of hashes used per key. + filter.contains(tn_begin, tn_end, tn_result.begin()); + + auto const tp_rate = + static_cast(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / + static_cast(num_tp); + auto const fp_rate = + static_cast(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / + static_cast(num_tn); + + SECTION("True-positive rate must be 1.") { REQUIRE(tp_rate == 1.0); } + + SECTION("False-positive rate should be close to the theoretical value.") + { + auto const expected_fpr = filter.expected_false_positive_rate(num_tp); + INFO("expected_fpr = " << expected_fpr << ", fp_rate = " << fp_rate); + + // If the expected FPR is zero, then we expect fp_rate to be zero. + if (expected_fpr == 0.0) { + REQUIRE(fp_rate == 0.0); + } else { + // Only fail if fp_rate exceeds expected_fpr by more than max_deviation + auto const relative_excess = (fp_rate > expected_fpr) ? fp_rate - expected_fpr : 0.0; + REQUIRE(relative_excess <= max_deviation); + } + } +} + +TEMPLATE_TEST_CASE_SIG( + "bloom_filter false-positive rate tests", + "", + ((class Key, class Policy), Key, Policy), + (int32_t, cuco::default_filter_policy, uint32_t, 1>), + (int32_t, cuco::default_filter_policy, uint32_t, 8>), + (int32_t, cuco::default_filter_policy, uint64_t, 1>), + (int32_t, cuco::default_filter_policy, uint64_t, 8>)) +{ + using filter_type = + cuco::bloom_filter, cuda::thread_scope_device, Policy>; + size_type num_keys_pow2 = GENERATE(28, 29, 30); + size_type const num_keys = size_type{1} << num_keys_pow2; + constexpr size_type filter_bits_pow2 = 29; // 64MB + constexpr size_type filter_bits = size_type{1} << filter_bits_pow2; + size_type const num_blocks = (filter_bits / CHAR_BIT) / (sizeof(typename filter_type::word_type) * + filter_type::words_per_block); + + uint32_t pattern_bits = Policy::words_per_block + GENERATE(0, 1, 2, 3, 4); + + INFO("num_keys_pow2=" << num_keys_pow2); + INFO("filter_bits_pow2=" << filter_bits_pow2); + INFO("num_blocks=" << num_blocks); + INFO("pattern_bits=" << pattern_bits); + + // some parameter combinations might be invalid so we skip them + try { + [[maybe_unused]] auto policy = Policy{pattern_bits}; + } catch (std::exception const& e) { + SKIP(e.what()); + } + + auto filter = filter_type{num_blocks, {}, {pattern_bits}}; + + // The maximum allowed deviation of the false-positive rate from the theoretical value + constexpr auto max_deviation = 0.02; + + test_fpr(filter, num_keys, max_deviation); +}