From 9363f3d7847c260a85ee03330b1892c3bf45eac1 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:03:15 -0800 Subject: [PATCH 1/9] Remove unused tparam --- include/cuco/detail/bloom_filter/bloom_filter_impl.cuh | 5 ++--- include/cuco/detail/bloom_filter/kernels.cuh | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh index 9809a7edb..8570c0e17 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh +++ b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh @@ -273,13 +273,12 @@ class bloom_filter_impl { auto const num_keys = cuco::detail::distance(first, last); if (num_keys == 0) { return; } - auto constexpr cg_size = add_optimal_cg_size(); auto constexpr block_size = cuco::detail::default_block_size(); void const* kernel = reinterpret_cast( - detail::bloom_filter_ns::add); + detail::bloom_filter_ns::add); auto const grid_size = cuco::detail::max_occupancy_grid_size(block_size, kernel); - detail::bloom_filter_ns::add + detail::bloom_filter_ns::add <<>>(first, num_keys, *this); } } diff --git a/include/cuco/detail/bloom_filter/kernels.cuh b/include/cuco/detail/bloom_filter/kernels.cuh index 9e04b73c4..2014ef80f 100644 --- a/include/cuco/detail/bloom_filter/kernels.cuh +++ b/include/cuco/detail/bloom_filter/kernels.cuh @@ -26,7 +26,7 @@ namespace cuco::detail::bloom_filter_ns { CUCO_SUPPRESS_KERNEL_WARNINGS -template +template CUCO_KERNEL __launch_bounds__(BlockSize) void add(InputIt first, cuco::detail::index_type n, Ref ref) From 5c4c68d11c71ceeeaad5ae053ef31d7289586587 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:04:45 -0800 Subject: [PATCH 2/9] Add unit test for variable CG sizes --- tests/CMakeLists.txt | 2 +- tests/bloom_filter/variable_cg_test.cu | 110 +++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 tests/bloom_filter/variable_cg_test.cu diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 602627166..96e89863d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -144,4 +144,4 @@ ConfigureTest(HYPERLOGLOG_TEST ConfigureTest(BLOOM_FILTER_TEST bloom_filter/unique_sequence_test.cu bloom_filter/arrow_policy_test.cu - ) + bloom_filter/variable_cg_test.cu) diff --git a/tests/bloom_filter/variable_cg_test.cu b/tests/bloom_filter/variable_cg_test.cu new file mode 100644 index 000000000..b230a3922 --- /dev/null +++ b/tests/bloom_filter/variable_cg_test.cu @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +using size_type = int32_t; + +template +void test_variable_cg_size(Filter& filter, size_type num_keys) +{ + constexpr int32_t block_size = 128; + constexpr int32_t grid_size = 128; + + using Key = typename Filter::key_type; + + auto ref = filter.ref(); + + // Generate keys + thrust::device_vector keys(num_keys); + thrust::sequence(thrust::device, keys.begin(), keys.end()); + + thrust::device_vector contained(num_keys, false); + + auto const always_true = thrust::constant_iterator{true}; + + SECTION("Check if fallback kernels work for varying combinations of CG sizes.") + { + cuco::detail::bloom_filter_ns::add_if_n + <<>>(keys.begin(), num_keys, always_true, cuda::std::identity{}, ref); + cuco::detail::bloom_filter_ns::contains_if_n + <<>>( + keys.begin(), num_keys, always_true, cuda::std::identity{}, contained.begin(), ref); + REQUIRE(cuco::test::all_of(contained.begin(), contained.end(), cuda::std::identity{})); + } + + filter.clear(); + thrust::fill(contained.begin(), contained.end(), false); // reset output vector + + SECTION("Check if adaptive add kernel works with fallback contains kernel.") + { + cuco::detail::bloom_filter_ns::add + <<>>(keys.begin(), num_keys, ref); + cuco::detail::bloom_filter_ns::contains_if_n + <<>>( + keys.begin(), num_keys, always_true, cuda::std::identity{}, contained.begin(), ref); + REQUIRE(cuco::test::all_of(contained.begin(), contained.end(), cuda::std::identity{})); + } + + // TODO adaptive vs. adaptive and fallback add vs. adaptive contains (requires #673) +} + +TEMPLATE_TEST_CASE_SIG( + "bloom_filter variable CG size tests", + "", + ((int32_t AddCGSize, int32_t ContainsCGSize, class Key, class Policy), + AddCGSize, + ContainsCGSize, + Key, + Policy), + (1, 4, int32_t, cuco::default_filter_policy, uint32_t, 1>), + (1, 4, int32_t, cuco::default_filter_policy, uint32_t, 8>), + (1, 4, int32_t, cuco::default_filter_policy, uint64_t, 1>), + (1, 4, int32_t, cuco::default_filter_policy, uint64_t, 8>), + (4, 1, int32_t, cuco::default_filter_policy, uint32_t, 1>), + (4, 1, int32_t, cuco::default_filter_policy, uint32_t, 8>), + (4, 1, int32_t, cuco::default_filter_policy, uint64_t, 1>), + (4, 1, int32_t, cuco::default_filter_policy, uint64_t, 8>)) +{ + using filter_type = + cuco::bloom_filter, cuda::thread_scope_device, Policy>; + constexpr size_type num_keys{400}; + + uint32_t pattern_bits = GENERATE(Policy::words_per_block, + Policy::words_per_block + 1, + Policy::words_per_block + 2, + , + Policy::words_per_block + 3); + + auto filter = filter_type{1000, {}, {pattern_bits}}; + + test_variable_cg_size(filter, num_keys); +} From 644dfb22d5f3ac9d274313e3d752c4cd30f297f8 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Wed, 26 Feb 2025 17:13:13 -0800 Subject: [PATCH 3/9] Remove stray comma --- tests/bloom_filter/variable_cg_test.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/bloom_filter/variable_cg_test.cu b/tests/bloom_filter/variable_cg_test.cu index b230a3922..b6e2ec2c7 100644 --- a/tests/bloom_filter/variable_cg_test.cu +++ b/tests/bloom_filter/variable_cg_test.cu @@ -101,7 +101,6 @@ TEMPLATE_TEST_CASE_SIG( uint32_t pattern_bits = GENERATE(Policy::words_per_block, Policy::words_per_block + 1, Policy::words_per_block + 2, - , Policy::words_per_block + 3); auto filter = filter_type{1000, {}, {pattern_bits}}; From c4f605a00af05f50fa6e7efd02d61670170fdd61 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:01:39 -0800 Subject: [PATCH 4/9] WIP: Add accuracy test --- tests/CMakeLists.txt | 2 +- tests/bloom_filter/fpr_test.cu | 95 ++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 tests/bloom_filter/fpr_test.cu diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 602627166..95e353ea9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -144,4 +144,4 @@ ConfigureTest(HYPERLOGLOG_TEST ConfigureTest(BLOOM_FILTER_TEST bloom_filter/unique_sequence_test.cu bloom_filter/arrow_policy_test.cu - ) + fpr_test.cu) diff --git a/tests/bloom_filter/fpr_test.cu b/tests/bloom_filter/fpr_test.cu new file mode 100644 index 000000000..ac76f0a22 --- /dev/null +++ b/tests/bloom_filter/fpr_test.cu @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include +#include + +#include +#include + +using size_type = int32_t; + +template +void test_fpr(Filter& filter, size_type num_keys) +{ + using Key = typename Filter::key_type; + + // Generate keys + thrust::device_vector keys(num_keys); + thrust::sequence(thrust::device, keys.begin(), keys.end()); + + size_type num_tp = num_keys * 0.5; ///< Insert the first half keys into the filter. + size_type num_tn = num_keys - num_tp; + + auto tp_begin = keys.begin(); + auto tp_end = tp_begin + num_tp; + auto tn_begin = tp_end; + auto tn_end = keys.end(); + + filter.add(tp_begin, tp_end); + + thrust::device_vector tp_result(num_tp, false); + thrust::device_vector tn_result(num_keys - num_tp, false); + + // Query the filter for the previously inserted keys. + // This should result in a true-positive rate of TPR=1. + filter.contains(tp_begin, tp_end, tp_result.begin()); + + // Query the filter for the keys that are not present in the filter. + // Since bloom filters are probalistic data structures, the filter + // exhibits a false-positive rate FPR>0 depending on the number of bits in + // the filter and the number of hashes used per key. + filter.contains(tn_begin, tn_end, tn_result.begin()); + + float tp_rate = + float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp); + float fp_rate = + float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn); + + SECTION("True-positive rate must be 1.") { REQUIRE(tp_rate == 1.0f); } + + SECTION("Fals-positive rate should be close to the theoretical value.") + { + REQUIRE(fp_rate < 1.0f); // TODO use actual theoretical FPR value + } +} + +TEMPLATE_TEST_CASE_SIG( + "bloom_filter false-positive rate tests", + "", + ((class Key, class Policy), Key, Policy), + (int32_t, cuco::default_filter_policy, uint32_t, 1>), + (int32_t, cuco::default_filter_policy, uint32_t, 8>), + (int32_t, cuco::default_filter_policy, uint64_t, 1>), + (int32_t, cuco::default_filter_policy, uint64_t, 8>)) +{ + using filter_type = + cuco::bloom_filter, cuda::thread_scope_device, Policy>; + constexpr size_type num_keys{400}; + + uint32_t pattern_bits = + GENERATE(Policy::words_per_block, Policy::words_per_block + 1, Policy::words_per_block + 2); + + auto filter = filter_type{1000, {}, {pattern_bits}}; + + test_fpr(filter, num_keys); +} From ae41efad11ed2df7b725059983c8dfe1972b3263 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Thu, 27 Feb 2025 07:02:25 -0800 Subject: [PATCH 5/9] Skip tests with invalid policy configuration --- tests/bloom_filter/unique_sequence_test.cu | 12 ++++++++++-- tests/bloom_filter/variable_cg_test.cu | 13 +++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/bloom_filter/unique_sequence_test.cu b/tests/bloom_filter/unique_sequence_test.cu index 472685d4d..0e5314898 100644 --- a/tests/bloom_filter/unique_sequence_test.cu +++ b/tests/bloom_filter/unique_sequence_test.cu @@ -27,6 +27,8 @@ #include #include +#include + using size_type = int32_t; template @@ -96,8 +98,14 @@ TEMPLATE_TEST_CASE_SIG( cuco::bloom_filter, cuda::thread_scope_device, Policy>; constexpr size_type num_keys{400}; - uint32_t pattern_bits = - GENERATE(Policy::words_per_block, Policy::words_per_block + 1, Policy::words_per_block + 2); + uint32_t pattern_bits = Policy::words_per_block + GENERATE(0, 1, 2, 3, 4); + + // some parameter combinations might be invalid so we skip them + try { + [[maybe_unused]] auto policy = Policy{pattern_bits}; + } catch (std::exception const& e) { + SKIP(e.what()); + } auto filter = filter_type{1000, {}, {pattern_bits}}; diff --git a/tests/bloom_filter/variable_cg_test.cu b/tests/bloom_filter/variable_cg_test.cu index b6e2ec2c7..6a9997692 100644 --- a/tests/bloom_filter/variable_cg_test.cu +++ b/tests/bloom_filter/variable_cg_test.cu @@ -30,6 +30,7 @@ #include #include +#include using size_type = int32_t; @@ -98,10 +99,14 @@ TEMPLATE_TEST_CASE_SIG( cuco::bloom_filter, cuda::thread_scope_device, Policy>; constexpr size_type num_keys{400}; - uint32_t pattern_bits = GENERATE(Policy::words_per_block, - Policy::words_per_block + 1, - Policy::words_per_block + 2, - Policy::words_per_block + 3); + uint32_t pattern_bits = Policy::words_per_block + GENERATE(0, 1, 2, 3, 4); + + // some parameter combinations might be invalid so we skip them + try { + [[maybe_unused]] auto policy = Policy{pattern_bits}; + } catch (std::exception const& e) { + SKIP(e.what()); + } auto filter = filter_type{1000, {}, {pattern_bits}}; From 51bc20bf09f2a4838dc146786bc333b434e293d4 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Thu, 27 Feb 2025 16:57:17 -0800 Subject: [PATCH 6/9] Add utility for calculating the expected FPR --- README.md | 2 +- examples/bloom_filter/host_bulk_example.cu | 6 +- include/cuco/bloom_filter.cuh | 11 +++ include/cuco/bloom_filter_policies.cuh | 26 +++++-- include/cuco/bloom_filter_ref.cuh | 11 +++ .../bloom_filter/arrow_filter_policy.cuh | 28 ++++++- .../cuco/detail/bloom_filter/bloom_filter.inl | 8 ++ .../detail/bloom_filter/bloom_filter_impl.cuh | 6 +- .../detail/bloom_filter/bloom_filter_ref.inl | 7 ++ .../bloom_filter/default_filter_policy.inl | 29 ++++--- .../default_filter_policy_impl.cuh | 20 +++++ include/cuco/detail/bloom_filter/utils.hpp | 75 +++++++++++++++++++ tests/CMakeLists.txt | 2 +- 13 files changed, 209 insertions(+), 22 deletions(-) create mode 100644 include/cuco/detail/bloom_filter/utils.hpp diff --git a/README.md b/README.md index 2ae071e2a..38297c3ef 100644 --- a/README.md +++ b/README.md @@ -256,4 +256,4 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection `cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries. #### Examples: -- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw)) \ No newline at end of file +- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVSJhEg0SXfRViQLaauqVMjjMYyVwZ71A4Ki_Pe99njIEEh3tURKgn0f5557fO2nSDOtuRQ66n9_inga9XtxlBOxtGTJon5EbUqiONLSKuq-d97NBLyDK1lsFV9mBhq0Cafd099a-OsshvGX0fVoCFe3k7vbyfB-dDtuOwfv9IlTJjRLwYqUKTAZg2FBKP4JOzF8YcqhgdN2FxrOYBaFvVnUPPdRttLCimxBSANWMwzDNSx4zoA9UlYY4AKoXBU5J4Iy2HCT-VQhjocD30IQmRiC9gQ9Cvy2qFsCMTvo7pMZU_Q7nc1m0yYedluqZScvjXXn0-jqZjy9aSH0ndtnkSO9oNjflissPNkCKRAZJQnizckGpAKyVAz3jHTIN4obLpYxaLkwG6KYj5NybRRPrNkjr8KJ9dcNkD4ikLjhFEbTWQQfhtPRNPZxvo7u_7j9fA9fh5PJcHw_upnC7QSbNb4euVbht48wHH-DP0fj6xgYUoep2GOhXBUIlTtaWVpyOGVsD8ZClrB0wShfcAqVjGAp10wJLAsKpla8FByCTH2cnK-4IcavHRTnU3VmYiZ-4YLmNmVwQS2VnSSXcjXHvhum2tRml_s2JlNWmw6VVpi22zzYStkaU8zXjBqpjpuwR0atAzYvJDZte9xKY3cZSq39GgOX2BRGVn6ZC4OK46KxljxtzsQTFgZukWLdxnEMwq7mD2yrndgG0Ov-2u12z2H36XQ6F_A7E0wRw8I2OPvjkUxRug1e4r6DbvvsPEQaIbvKeK4XXGkDGckXPp4LJsOGp_eNBOIgQStkPj_00DYJ3dLoceor8zjwX7fZqjZRIuCbG7I78pypgwZmW5RC27cAnAROXmgy9yYDl_185zotyMad8rpXORlStiA2N1A22GnyNSBfvZNcv1_X3EWV6zLEe6pV-BxSa5P2-6hBAxcXeCI_2PwBgXnePcdv41lgOUwVypG4LHvu5mIJs4-xZgJqHwzvczGR5iF3qc5-f0_nNdSuYY2qc3627lwqRTfcVjthS5RtM_Yebczg_u81QxpinVaKubdC3usu53UDdPRi2dme1MVSWoldmNJhb6sK8ALjpcHHxRzmuXdwhkHOJE0bFYo4ZGr-lLVEyvzSWeIgxO40SuAxLEiu2T53Rx1F3XHvpNRihFL-skxta6dvN1dxCq-5tDrfBg3hrN-Vho737iLUmbR5CmU68FebUZa1CqnxYlkz8MMDibm_mwx6dVbwqLq7UB9QE78UXjX2v8D1ZZqMGMBbzN_W7hphwqN6NVzcEeXutq4fCO0dCyUTkuP1hhdKSgxBnStLjcVYcS1MiMIeM55wg64lr6_q_ng3ueziGSuwLDcvZIkEW5EgcmTF-_KqtlpVbjDsm2ZEZ0y7J0jqZxbWe5xOsaNTBDrFG3Qucol0ObYd2EF5xP1io5KXv9Aa-2I70qD6UjixTgbNJnRCwFJ-pXbLvIv_kfd1JfWln-QVzeMDEkX5fcWIxu6mPwb4dsHVig5v4Dp4YBGAH0zEYI7XDx7GmnnZoGp57oUyr4TiQzX2r8zmvw5bxVCRArr49Rmfy-79iS9S9fKqjsSa0t7pme3htixM-eSOWhhoQE9Oeu-hRRTNBno1f9-FVgvRGfxlMAdLWzlZJf4dnvOkFpNSmuPiunw04wJyLR6i57jax1trbx_7Fj3_8D__AFOKFKY=)) \ No newline at end of file diff --git a/examples/bloom_filter/host_bulk_example.cu b/examples/bloom_filter/host_bulk_example.cu index f02f6e657..a4916def9 100644 --- a/examples/bloom_filter/host_bulk_example.cu +++ b/examples/bloom_filter/host_bulk_example.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,7 +68,9 @@ int main(void) float fp_rate = float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn); - std::cout << "TPR=" << tp_rate << " FPR=" << fp_rate << std::endl; + std::cout << "TPR[measured]=" << tp_rate << " FPR[measured]=" << fp_rate + << " FPR[expected]=" << filter.expected_false_positive_rate(num_keys * 0.5) + << std::endl; return 0; } \ No newline at end of file diff --git a/include/cuco/bloom_filter.cuh b/include/cuco/bloom_filter.cuh index 0d6003b19..7754fb977 100644 --- a/include/cuco/bloom_filter.cuh +++ b/include/cuco/bloom_filter.cuh @@ -312,6 +312,17 @@ class bloom_filter { OutputIt output_begin, cuda::stream_ref stream = {}) const noexcept; + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items) const; + /** * @brief Gets a pointer to the underlying filter storage. * diff --git a/include/cuco/bloom_filter_policies.cuh b/include/cuco/bloom_filter_policies.cuh index 0d28b166a..4fce16aea 100644 --- a/include/cuco/bloom_filter_policies.cuh +++ b/include/cuco/bloom_filter_policies.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ using arrow_filter_policy = detail::arrow_filter_policy; * @brief The default policy that defines how a Blocked Bloom Filter generates and stores a key's * fingerprint. * - * @note `Word` type must be an atomically updatable integral type. `WordsPerBlock` must + * @note `Word` type must be an atomically updatable unsigned integral type. `WordsPerBlock` must * be a power-of-two. * * @tparam Hash Hash function used to generate a key's fingerprint @@ -92,7 +92,7 @@ class default_filter_policy { * * @return The hash value of the key */ - __device__ constexpr hash_result_type hash(hash_argument_type const& key) const; + [[nodiscard]] __device__ constexpr hash_result_type hash(hash_argument_type const& key) const; /** * @brief Determines the filter block a key is added into. @@ -108,7 +108,8 @@ class default_filter_policy { * @return The block index for the given key's hash value */ template - __device__ constexpr auto block_index(hash_result_type hash, Extent num_blocks) const; + [[nodiscard]] __device__ constexpr auto block_index(hash_result_type hash, + Extent num_blocks) const; /** * @brief Determines the fingerprint pattern for a word/segment within the filter block for a @@ -122,8 +123,21 @@ class default_filter_policy { * * @return The bit pattern for the word/segment in the filter block */ - __device__ constexpr word_type word_pattern(hash_result_type hash, - std::uint32_t word_index) const; + [[nodiscard]] __device__ constexpr word_type word_pattern(hash_result_type hash, + std::uint32_t word_index) const; + + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * @param num_blocks The total number of blocks in the filter + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items, + size_t num_blocks) const; private: impl_type impl_; ///< Policy implementation diff --git a/include/cuco/bloom_filter_ref.cuh b/include/cuco/bloom_filter_ref.cuh index 2f3dcfa2b..98b5aa8c4 100644 --- a/include/cuco/bloom_filter_ref.cuh +++ b/include/cuco/bloom_filter_ref.cuh @@ -369,6 +369,17 @@ class bloom_filter_ref { OutputIt output_begin, cuda::stream_ref stream = {}) const noexcept; + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items) const; + /** * @brief Gets a pointer to the underlying filter storage. * diff --git a/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh b/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh index 2f17fa726..39b616ed1 100644 --- a/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh +++ b/include/cuco/detail/bloom_filter/arrow_filter_policy.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -135,7 +136,8 @@ class arrow_filter_policy { * @return The block index for the given key's hash value */ template - __device__ constexpr auto block_index(hash_result_type hash, Extent num_blocks) const + [[nodiscard]] __device__ constexpr auto block_index(hash_result_type hash, + Extent num_blocks) const { constexpr auto hash_bits = cuda::std::numeric_limits::digits; // TODO: assert if num_blocks > max_filter_blocks @@ -153,7 +155,8 @@ class arrow_filter_policy { * * @return The bit pattern for the word/segment in the filter block */ - __device__ constexpr word_type word_pattern(hash_result_type hash, std::uint32_t word_index) const + [[nodiscard]] __device__ constexpr word_type word_pattern(hash_result_type hash, + std::uint32_t word_index) const { word_type const key = static_cast(hash); std::uint32_t salt; @@ -182,6 +185,27 @@ class arrow_filter_policy { return word_type{1} << ((key * salt) >> 27); } + /** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * @param num_items Number of inserted distinct elements + * @param num_blocks The total number of blocks in the filter + * + * @return Approximation of the expected false-positive rate + */ + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items, + size_t num_blocks) const + { + return blocked_bloom_filter_expected_fpr(num_items, + num_blocks * words_per_block * sizeof(word_type), + cuda::std::numeric_limits::digits, + words_per_block, + cuda::std::numeric_limits::digits, + words_per_block); + } + private: hasher hash_; }; diff --git a/include/cuco/detail/bloom_filter/bloom_filter.inl b/include/cuco/detail/bloom_filter/bloom_filter.inl index 870509cb8..b35d0ec9c 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter.inl +++ b/include/cuco/detail/bloom_filter/bloom_filter.inl @@ -129,6 +129,14 @@ __host__ constexpr void bloom_filter::con ref_.contains_if_async(first, last, stencil, pred, output_begin, stream); } +template +[[nodiscard]] __host__ double +bloom_filter::expected_false_positive_rate( + size_t num_items) const +{ + return ref_.expected_false_positive_rate(num_items); +} + template [[nodiscard]] __host__ constexpr typename bloom_filter::word_type* diff --git a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh index 9809a7edb..2d8d8fff5 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh +++ b/include/cuco/detail/bloom_filter/bloom_filter_impl.cuh @@ -417,6 +417,11 @@ class bloom_filter_impl { first, num_keys, stencil, pred, output_begin, *this); } + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items) const + { + return policy_.expected_false_positive_rate(num_items, num_blocks_); + } + [[nodiscard]] __host__ __device__ constexpr word_type* data() noexcept { return words_; } [[nodiscard]] __host__ __device__ constexpr word_type const* data() const noexcept @@ -431,7 +436,6 @@ class bloom_filter_impl { // TODO // [[nodiscard]] __host__ double occupancy() const; - // [[nodiscard]] __host__ double expected_false_positive_rate(size_t unique_keys) const // [[nodiscard]] __host__ __device__ static uint32_t optimal_pattern_bits(size_t num_blocks) // template // [[nodiscard]] __device__ constexpr auto make_copy(CG const& group, word_type* const diff --git a/include/cuco/detail/bloom_filter/bloom_filter_ref.inl b/include/cuco/detail/bloom_filter/bloom_filter_ref.inl index 96d2c0573..02786f0fa 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter_ref.inl +++ b/include/cuco/detail/bloom_filter/bloom_filter_ref.inl @@ -180,6 +180,13 @@ template return impl_.data(); } +template +[[nodiscard]] __host__ double +bloom_filter_ref::expected_false_positive_rate(size_t num_items) const +{ + return impl_.expected_false_positive_rate(num_items); +} + template [[nodiscard]] __host__ __device__ constexpr typename bloom_filter_ref::word_type const* diff --git a/include/cuco/detail/bloom_filter/default_filter_policy.inl b/include/cuco/detail/bloom_filter/default_filter_policy.inl index eb8dbf703..d679cf738 100644 --- a/include/cuco/detail/bloom_filter/default_filter_policy.inl +++ b/include/cuco/detail/bloom_filter/default_filter_policy.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,16 +29,18 @@ __host__ } template -__device__ constexpr typename default_filter_policy::hash_result_type -default_filter_policy::hash( - typename default_filter_policy::hash_argument_type const& key) const +[[nodiscard]] __device__ constexpr + typename default_filter_policy::hash_result_type + default_filter_policy::hash( + typename default_filter_policy::hash_argument_type const& key) const { return impl_.hash(key); } template template -__device__ constexpr auto default_filter_policy::block_index( +[[nodiscard]] __device__ constexpr auto +default_filter_policy::block_index( typename default_filter_policy::hash_result_type hash, Extent num_blocks) const { @@ -46,12 +48,21 @@ __device__ constexpr auto default_filter_policy::bloc } template -__device__ constexpr typename default_filter_policy::word_type -default_filter_policy::word_pattern( - default_filter_policy::hash_result_type hash, - std::uint32_t word_index) const +[[nodiscard]] __device__ constexpr + typename default_filter_policy::word_type + default_filter_policy::word_pattern( + default_filter_policy::hash_result_type hash, + std::uint32_t word_index) const { return impl_.word_pattern(hash, word_index); } +template +[[nodiscard]] __host__ double +default_filter_policy::expected_false_positive_rate( + size_t num_items, size_t num_blocks) const +{ + return impl_.expected_false_positive_rate(num_items, num_blocks); +} + } // namespace cuco \ No newline at end of file diff --git a/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh b/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh index 14509b9b0..cd6d2e337 100644 --- a/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh +++ b/include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -36,6 +37,14 @@ class default_filter_policy_impl { using hash_argument_type = typename hasher::argument_type; using hash_result_type = decltype(std::declval()(std::declval())); + static_assert(cuda::std::is_integral::value && + cuda::std::is_unsigned::value, + "Word type must be an unsigned integral type"); + + static_assert(cuda::std::is_integral::value && + cuda::std::is_unsigned::value, + "Hash result type must be an unsigned integral type"); + static constexpr std::uint32_t words_per_block = WordsPerBlock; private: @@ -105,6 +114,17 @@ class default_filter_policy_impl { return word; } + [[nodiscard]] __host__ double expected_false_positive_rate(size_t num_items, + size_t num_blocks) const + { + return blocked_bloom_filter_expected_fpr(num_items, + num_blocks * words_per_block * sizeof(word_type), + cuda::std::numeric_limits::digits, + words_per_block, + cuda::std::numeric_limits::digits, + pattern_bits_); + } + private: uint32_t pattern_bits_; uint32_t min_bits_per_word_; diff --git a/include/cuco/detail/bloom_filter/utils.hpp b/include/cuco/detail/bloom_filter/utils.hpp new file mode 100644 index 000000000..d9338db99 --- /dev/null +++ b/include/cuco/detail/bloom_filter/utils.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * Copyright (c) 2022, Jim Apple. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace cuco::detail { + +/** + * @brief Computes the expected false-positive rate of a blocked Bloom filter + * using the Poisson-based formula (Eq. 3) from Putze et.al. "Cache-, Hash- and Space-Efficient + * Bloom Filters". + * + * Reference implementation: + * https://github.com/jbapple/libfilter/blob/4ebeaef1215969aee9edb05eb145e94b8dd98e16/c/lib/util.c#L5 + * + * @param ndv Number of distinct inserted elements + * @param bytes Filter size in bytes + * @param word_bits Number of bits in the underlying word type of a filter block + * @param bucket_words Number of words in each filter block + * @param hash_bits Total number of bits in the hash value type + * @param k Number of pattern bits to set for a key + * @param max_iters Maximum number of iterations for accuracy refinement + * + * @return Approximation of the expected false-positive rate + */ +__host__ inline double blocked_bloom_filter_expected_fpr(double ndv, + double bytes, + double word_bits, + double bucket_words, + double hash_bits, + double k, + std::uint64_t max_iters = 10000) +{ + if (ndv == 0) return 0.0; + if (bytes <= 0) return 1.0; + if (ndv / (bytes * cuda::std::numeric_limits::digits) > 3) return 1.0; + + double result = 0; + double const lam = + bucket_words * word_bits / ((bytes * cuda::std::numeric_limits::digits) / ndv); + double const loglam = cuda::std::log(lam); + double const log1collide = -hash_bits * cuda::std::log(2.0); + + for (std::uint64_t j = 0; j < max_iters; ++j) { + std::uint64_t i = max_iters - 1 - j; + double logp = i * loglam - lam - cuda::std::lgamma(i + 1); + double const logfinner = k * cuda::std::log(1.0 - cuda::std::pow(1.0 - 1.0 / word_bits, i)); + double const logcollide = cuda::std::log(i) + log1collide; + result += cuda::std::exp(logp + logfinner) + cuda::std::exp(logp + logcollide); + } + + return (result > 1.0) ? 1.0 : result; +} + +} // namespace cuco::detail \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 95e353ea9..e36860c97 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -144,4 +144,4 @@ ConfigureTest(HYPERLOGLOG_TEST ConfigureTest(BLOOM_FILTER_TEST bloom_filter/unique_sequence_test.cu bloom_filter/arrow_policy_test.cu - fpr_test.cu) + bloom_filter/fpr_test.cu) From 118cab90b64244579ccc15f1924ab9719ae22d8a Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Thu, 27 Feb 2025 17:51:20 -0800 Subject: [PATCH 7/9] WIP test --- README.md | 2 +- examples/bloom_filter/host_bulk_example.cu | 3 +-- tests/bloom_filter/fpr_test.cu | 27 +++++++++++++++++++--- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 38297c3ef..2228ddf7b 100644 --- a/README.md +++ b/README.md @@ -256,4 +256,4 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection `cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries. #### Examples: -- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVSJhEg0SXfRViQLaauqVMjjMYyVwZ71A4Ki_Pe99njIEEh3tURKgn0f5557fO2nSDOtuRQ66n9_inga9XtxlBOxtGTJon5EbUqiONLSKuq-d97NBLyDK1lsFV9mBhq0Cafd099a-OsshvGX0fVoCFe3k7vbyfB-dDtuOwfv9IlTJjRLwYqUKTAZg2FBKP4JOzF8YcqhgdN2FxrOYBaFvVnUPPdRttLCimxBSANWMwzDNSx4zoA9UlYY4AKoXBU5J4Iy2HCT-VQhjocD30IQmRiC9gQ9Cvy2qFsCMTvo7pMZU_Q7nc1m0yYedluqZScvjXXn0-jqZjy9aSH0ndtnkSO9oNjflissPNkCKRAZJQnizckGpAKyVAz3jHTIN4obLpYxaLkwG6KYj5NybRRPrNkjr8KJ9dcNkD4ikLjhFEbTWQQfhtPRNPZxvo7u_7j9fA9fh5PJcHw_upnC7QSbNb4euVbht48wHH-DP0fj6xgYUoep2GOhXBUIlTtaWVpyOGVsD8ZClrB0wShfcAqVjGAp10wJLAsKpla8FByCTH2cnK-4IcavHRTnU3VmYiZ-4YLmNmVwQS2VnSSXcjXHvhum2tRml_s2JlNWmw6VVpi22zzYStkaU8zXjBqpjpuwR0atAzYvJDZte9xKY3cZSq39GgOX2BRGVn6ZC4OK46KxljxtzsQTFgZukWLdxnEMwq7mD2yrndgG0Ov-2u12z2H36XQ6F_A7E0wRw8I2OPvjkUxRug1e4r6DbvvsPEQaIbvKeK4XXGkDGckXPp4LJsOGp_eNBOIgQStkPj_00DYJ3dLoceor8zjwX7fZqjZRIuCbG7I78pypgwZmW5RC27cAnAROXmgy9yYDl_185zotyMad8rpXORlStiA2N1A22GnyNSBfvZNcv1_X3EWV6zLEe6pV-BxSa5P2-6hBAxcXeCI_2PwBgXnePcdv41lgOUwVypG4LHvu5mIJs4-xZgJqHwzvczGR5iF3qc5-f0_nNdSuYY2qc3627lwqRTfcVjthS5RtM_Yebczg_u81QxpinVaKubdC3usu53UDdPRi2dme1MVSWoldmNJhb6sK8ALjpcHHxRzmuXdwhkHOJE0bFYo4ZGr-lLVEyvzSWeIgxO40SuAxLEiu2T53Rx1F3XHvpNRihFL-skxta6dvN1dxCq-5tDrfBg3hrN-Vho737iLUmbR5CmU68FebUZa1CqnxYlkz8MMDibm_mwx6dVbwqLq7UB9QE78UXjX2v8D1ZZqMGMBbzN_W7hphwqN6NVzcEeXutq4fCO0dCyUTkuP1hhdKSgxBnStLjcVYcS1MiMIeM55wg64lr6_q_ng3ueziGSuwLDcvZIkEW5EgcmTF-_KqtlpVbjDsm2ZEZ0y7J0jqZxbWe5xOsaNTBDrFG3Qucol0ObYd2EF5xP1io5KXv9Aa-2I70qD6UjixTgbNJnRCwFJ-pXbLvIv_kfd1JfWln-QVzeMDEkX5fcWIxu6mPwb4dsHVig5v4Dp4YBGAH0zEYI7XDx7GmnnZoGp57oUyr4TiQzX2r8zmvw5bxVCRArr49Rmfy-79iS9S9fKqjsSa0t7pme3htixM-eSOWhhoQE9Oeu-hRRTNBno1f9-FVgvRGfxlMAdLWzlZJf4dnvOkFpNSmuPiunw04wJyLR6i57jax1trbx_7Fj3_8D__AFOKFKY=)) \ No newline at end of file +- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVSJhEg0SXfRViQLaauqVMjjMYyVwZ76AUFR_vteezxkCKRaLZGSYN_HueceX_sp0kxrLoWO-t-fIp5G_V4c5UQsLVmyqB9Rm5IojrS0irrvnXczAe_gShZbxZeZgQZtwmn39I8W_jqLYfxldD0awtXt5O52Mrwf3Y7bzsE7feKUCc1SsCJlCkzGYFgQin_CTgxfmHJo4LTdhYYzmEVhbxY1z32UrbSwIlsQ0oDVDMNwDQueM2CPlBUGuAAqV0XOiaAMNtxkPlWI4-HAtxBEJoagPUGPAr8t6pZAzA66-2TGFP1OZ7PZtImH3ZZq2clLY935NLq6GU9vWgh95_ZZ5EgvKPbTcoWFJ1sgBSKjJEG8OdmAVECWiuGekQ75RnHDxTIGLRdmQxTzcVKujeKJNXvkVTix_roB0kcEEjecwmg6i-DDcDqaxj7O19H9X7ef7-HrcDIZju9HN1O4nWCzxtcj1yr89hGG42_w92h8HQND6jAVeyyUqwKhckcrS0sOp4ztwVjIEpYuGOULTqGSESzlmimBZUHB1IqXgkOQqY-T8xU3xPi1g-J8qs5MzMRvXNDcpgwuqKWyk-RSrubYd8NUm9rsct_GZMpq06HSCtN2mwdbKVtjivmaUSPVcRP2yKh1wOaFxKZtj1tp7C5DqbVfY-ASm8LIyi9zYVBxXDTWkqfNmXjCwsAtUqzbOI5B2NX8gW21E9sAet3fu93uOew-nU7nAv5kgiliWNgGZ388kilKt8FL3HfQbZ-dh0gjZFcZz_WCK20gI_nCx3PBZNjw9L6RQBwkaIXM54ce2iahWxo9Tn1lHgf-6zZb1SZKBHxzQ3ZHnjN10MBsi1Jo-xaAk8DJC03m3mTgsp_vXKcF2bhTXvcqJ0PKFsTmBsoGO02-BuSrd5Lr9-uau6hyXYZ4T7UKn0NqbdJ-HzVo4OICT-QHmz8gMM-75_htPAssh6lCORKXZc_dXCxh9jHWTEDtg-F9LibSPOQu1dnv7-m8hto1rFF1zs_WnUul6IbbaidsibJtxt6jjRnc_71mSEOs00ox91bIe93lvG6Ajl4sO9uTulhKK7ELUzrsbVUBXmC8NPi4mMM89w7OMMiZpGmjQhGHTM1fspZImV86SxyE2J1GCTyGBck12-fuqKOoO-6dlFqMUMo_lqlt7fTt5ipO4TWXVufboCGc9bvS0PHeXYQ6kzZPoUwH_mozyrJWITVeLGsGfnggMfd3k0GvzgoeVXcX6gNq4pfCq8b-F7i-TJMRA3iL-dvaXSNMeFSvhos7otzd1vUDob1joWRCcrze8EJJiSGoc2WpsRgrroUJUdhjxhNu0LXk9VXdH-8ml108YwWW5eaFLJFgKxJEjqx4X17VVqvKDYZ904zojGn3BEn9zMJ6j9MpdnSKQKd4g85FLpEux7YDOyiPuF9sVPLyF1pjX2xHGlRfCifWyaDZhE4IWMqv1G6Zd_E_8r6upL70i7yieXxAoii_rxjR2N30xwDfLrha0eENXAcPLALwg4kYzPH6wcNYMy8bVC3PvVDmlVB8qIqdY3NVMRSfgC5-fcaXsXtq4uNTvTygI7GmtHd6Znu4LQtTvq6jFgYa0JOT3ntoEUWzgV7N33eh1UIgBn8ZzMHSVk5WiX9y5zypxaSU5ri4Lt_HuIC0iofoOa728YLa28cWRc8__M-_9KAQpw==)) \ No newline at end of file diff --git a/examples/bloom_filter/host_bulk_example.cu b/examples/bloom_filter/host_bulk_example.cu index a4916def9..9b184b383 100644 --- a/examples/bloom_filter/host_bulk_example.cu +++ b/examples/bloom_filter/host_bulk_example.cu @@ -69,8 +69,7 @@ int main(void) float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn); std::cout << "TPR[measured]=" << tp_rate << " FPR[measured]=" << fp_rate - << " FPR[expected]=" << filter.expected_false_positive_rate(num_keys * 0.5) - << std::endl; + << " FPR[expected]=" << filter.expected_false_positive_rate(num_tp) << std::endl; return 0; } \ No newline at end of file diff --git a/tests/bloom_filter/fpr_test.cu b/tests/bloom_filter/fpr_test.cu index ac76f0a22..511474f85 100644 --- a/tests/bloom_filter/fpr_test.cu +++ b/tests/bloom_filter/fpr_test.cu @@ -23,9 +23,13 @@ #include #include +#include #include #include +#include +#include + using size_type = int32_t; template @@ -69,7 +73,18 @@ void test_fpr(Filter& filter, size_type num_keys) SECTION("Fals-positive rate should be close to the theoretical value.") { - REQUIRE(fp_rate < 1.0f); // TODO use actual theoretical FPR value + auto expected_fpr = filter.expected_false_positive_rate(num_tp); + INFO("expected_fpr = " << expected_fpr << ", fp_rate = " << fp_rate); + + // If the expected FPR is zero, then we expect fp_rate to be zero. + if (expected_fpr == 0.0) { + REQUIRE(fp_rate == 0.0f); + } else { + // Only fail if fp_rate exceeds expected_fpr by more than 3% + float relative_excess = + (fp_rate > expected_fpr) ? (fp_rate - expected_fpr) / expected_fpr : 0.0f; + REQUIRE(relative_excess <= 0.03f); + } } } @@ -86,8 +101,14 @@ TEMPLATE_TEST_CASE_SIG( cuco::bloom_filter, cuda::thread_scope_device, Policy>; constexpr size_type num_keys{400}; - uint32_t pattern_bits = - GENERATE(Policy::words_per_block, Policy::words_per_block + 1, Policy::words_per_block + 2); + uint32_t pattern_bits = Policy::words_per_block + GENERATE(0, 1, 2, 3, 4); + + // some parameter combinations might be invalid so we skip them + try { + [[maybe_unused]] auto policy = Policy{pattern_bits}; + } catch (std::exception const& e) { + SKIP(e.what()); + } auto filter = filter_type{1000, {}, {pattern_bits}}; From fb8d14e1c8804e70a12c153ac3f2994bf6cc2be0 Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Thu, 27 Feb 2025 17:55:20 -0800 Subject: [PATCH 8/9] Update copyright year --- include/cuco/bloom_filter.cuh | 2 +- include/cuco/detail/bloom_filter/bloom_filter.inl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/cuco/bloom_filter.cuh b/include/cuco/bloom_filter.cuh index 7754fb977..3bfe66691 100644 --- a/include/cuco/bloom_filter.cuh +++ b/include/cuco/bloom_filter.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/cuco/detail/bloom_filter/bloom_filter.inl b/include/cuco/detail/bloom_filter/bloom_filter.inl index b35d0ec9c..2694a2852 100644 --- a/include/cuco/detail/bloom_filter/bloom_filter.inl +++ b/include/cuco/detail/bloom_filter/bloom_filter.inl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From bf81a69d1a2d2ab661ab1bb744ab924df4e2a5bd Mon Sep 17 00:00:00 2001 From: Daniel Juenger <2955913+sleeepyjack@users.noreply.github.com> Date: Tue, 4 Mar 2025 16:29:13 -0800 Subject: [PATCH 9/9] Fix FPR calculation and unit test --- benchmarks/bloom_filter/utils.hpp | 23 ++++++--- include/cuco/detail/bloom_filter/utils.hpp | 17 +++---- tests/bloom_filter/fpr_test.cu | 55 +++++++++++++--------- 3 files changed, 60 insertions(+), 35 deletions(-) diff --git a/benchmarks/bloom_filter/utils.hpp b/benchmarks/bloom_filter/utils.hpp index cec7e06c3..089ac3dcd 100644 --- a/benchmarks/bloom_filter/utils.hpp +++ b/benchmarks/bloom_filter/utils.hpp @@ -62,13 +62,24 @@ void add_fpr_summary(nvbench::state& state, FilterType& filter) filter.add(tp_begin, tp_end); filter.contains(tn_begin, tn_end, result.begin()); - float fp = thrust::count(thrust::device, result.begin(), result.end(), true); + auto const fp_emp = + static_cast(thrust::count(thrust::device, result.begin(), result.end(), true)) / + static_cast(num_keys); - auto& summ = state.add_summary("FalsePositiveRate"); - summ.set_string("hint", "FPR"); - summ.set_string("short_name", "FPR"); - summ.set_string("description", "False-positive rate of the bloom filter."); - summ.set_float64("value", fp / num_keys); + auto& summ_fp = state.add_summary("FalsePositiveRate"); + summ_fp.set_string("hint", "FPR"); + summ_fp.set_string("short_name", "FPR"); + summ_fp.set_string("description", "False-positive rate of the bloom filter."); + summ_fp.set_float64("value", fp_emp); + + auto const fp_theo = filter.expected_false_positive_rate(num_keys); + auto const fp_dev = fp_emp - fp_theo; + + auto& summ_dev = state.add_summary("FalsePositiveRateDeviation"); + summ_dev.set_string("hint", "FPRDev"); + summ_dev.set_string("short_name", "FPRDev"); + summ_dev.set_string("description", "Deviation of false-positive rate over theoretical value."); + summ_dev.set_float64("value", fp_dev); filter.clear(); } diff --git a/include/cuco/detail/bloom_filter/utils.hpp b/include/cuco/detail/bloom_filter/utils.hpp index d9338db99..fb28c42d4 100644 --- a/include/cuco/detail/bloom_filter/utils.hpp +++ b/include/cuco/detail/bloom_filter/utils.hpp @@ -36,7 +36,7 @@ namespace cuco::detail { * @param ndv Number of distinct inserted elements * @param bytes Filter size in bytes * @param word_bits Number of bits in the underlying word type of a filter block - * @param bucket_words Number of words in each filter block + * @param block_words Number of words in each filter block * @param hash_bits Total number of bits in the hash value type * @param k Number of pattern bits to set for a key * @param max_iters Maximum number of iterations for accuracy refinement @@ -46,27 +46,28 @@ namespace cuco::detail { __host__ inline double blocked_bloom_filter_expected_fpr(double ndv, double bytes, double word_bits, - double bucket_words, + double block_words, double hash_bits, double k, - std::uint64_t max_iters = 10000) + std::uint64_t max_iters = 1000) { if (ndv == 0) return 0.0; if (bytes <= 0) return 1.0; - if (ndv / (bytes * cuda::std::numeric_limits::digits) > 3) return 1.0; + if (ndv / (bytes * cuda::std::numeric_limits::digits) >= 2.0) return 1.0; double result = 0; double const lam = - bucket_words * word_bits / ((bytes * cuda::std::numeric_limits::digits) / ndv); + block_words * word_bits / ((bytes * cuda::std::numeric_limits::digits) / ndv); double const loglam = cuda::std::log(lam); double const log1collide = -hash_bits * cuda::std::log(2.0); for (std::uint64_t j = 0; j < max_iters; ++j) { - std::uint64_t i = max_iters - 1 - j; - double logp = i * loglam - lam - cuda::std::lgamma(i + 1); - double const logfinner = k * cuda::std::log(1.0 - cuda::std::pow(1.0 - 1.0 / word_bits, i)); + double const i = static_cast(max_iters - 1 - j); + double const logp = i * loglam - lam - cuda::std::lgamma(i + 1.0); + double const logfinner = k * cuda::std::log(1.0 - cuda::std::pow(1.0 - 1.0 / word_bits, i * k)); double const logcollide = cuda::std::log(i) + log1collide; result += cuda::std::exp(logp + logfinner) + cuda::std::exp(logp + logcollide); + // result += exp(logp + logfinner); // alternative approach } return (result > 1.0) ? 1.0 : result; diff --git a/tests/bloom_filter/fpr_test.cu b/tests/bloom_filter/fpr_test.cu index 511474f85..a839fe72f 100644 --- a/tests/bloom_filter/fpr_test.cu +++ b/tests/bloom_filter/fpr_test.cu @@ -23,17 +23,16 @@ #include #include -#include #include #include #include #include -using size_type = int32_t; +using size_type = uint64_t; -template -void test_fpr(Filter& filter, size_type num_keys) +template +void test_fpr(Filter& filter, size_type num_keys, double max_deviation) { using Key = typename Filter::key_type; @@ -41,7 +40,7 @@ void test_fpr(Filter& filter, size_type num_keys) thrust::device_vector keys(num_keys); thrust::sequence(thrust::device, keys.begin(), keys.end()); - size_type num_tp = num_keys * 0.5; ///< Insert the first half keys into the filter. + size_type num_tp = num_keys * 0.5; size_type num_tn = num_keys - num_tp; auto tp_begin = keys.begin(); @@ -64,26 +63,27 @@ void test_fpr(Filter& filter, size_type num_keys) // the filter and the number of hashes used per key. filter.contains(tn_begin, tn_end, tn_result.begin()); - float tp_rate = - float(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / float(num_tp); - float fp_rate = - float(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / float(num_tn); + auto const tp_rate = + static_cast(thrust::count(thrust::device, tp_result.begin(), tp_result.end(), true)) / + static_cast(num_tp); + auto const fp_rate = + static_cast(thrust::count(thrust::device, tn_result.begin(), tn_result.end(), true)) / + static_cast(num_tn); - SECTION("True-positive rate must be 1.") { REQUIRE(tp_rate == 1.0f); } + SECTION("True-positive rate must be 1.") { REQUIRE(tp_rate == 1.0); } - SECTION("Fals-positive rate should be close to the theoretical value.") + SECTION("False-positive rate should be close to the theoretical value.") { - auto expected_fpr = filter.expected_false_positive_rate(num_tp); + auto const expected_fpr = filter.expected_false_positive_rate(num_tp); INFO("expected_fpr = " << expected_fpr << ", fp_rate = " << fp_rate); // If the expected FPR is zero, then we expect fp_rate to be zero. if (expected_fpr == 0.0) { - REQUIRE(fp_rate == 0.0f); + REQUIRE(fp_rate == 0.0); } else { - // Only fail if fp_rate exceeds expected_fpr by more than 3% - float relative_excess = - (fp_rate > expected_fpr) ? (fp_rate - expected_fpr) / expected_fpr : 0.0f; - REQUIRE(relative_excess <= 0.03f); + // Only fail if fp_rate exceeds expected_fpr by more than max_deviation + auto const relative_excess = (fp_rate > expected_fpr) ? fp_rate - expected_fpr : 0.0; + REQUIRE(relative_excess <= max_deviation); } } } @@ -98,11 +98,21 @@ TEMPLATE_TEST_CASE_SIG( (int32_t, cuco::default_filter_policy, uint64_t, 8>)) { using filter_type = - cuco::bloom_filter, cuda::thread_scope_device, Policy>; - constexpr size_type num_keys{400}; + cuco::bloom_filter, cuda::thread_scope_device, Policy>; + size_type num_keys_pow2 = GENERATE(28, 29, 30); + size_type const num_keys = size_type{1} << num_keys_pow2; + constexpr size_type filter_bits_pow2 = 29; // 64MB + constexpr size_type filter_bits = size_type{1} << filter_bits_pow2; + size_type const num_blocks = (filter_bits / CHAR_BIT) / (sizeof(typename filter_type::word_type) * + filter_type::words_per_block); uint32_t pattern_bits = Policy::words_per_block + GENERATE(0, 1, 2, 3, 4); + INFO("num_keys_pow2=" << num_keys_pow2); + INFO("filter_bits_pow2=" << filter_bits_pow2); + INFO("num_blocks=" << num_blocks); + INFO("pattern_bits=" << pattern_bits); + // some parameter combinations might be invalid so we skip them try { [[maybe_unused]] auto policy = Policy{pattern_bits}; @@ -110,7 +120,10 @@ TEMPLATE_TEST_CASE_SIG( SKIP(e.what()); } - auto filter = filter_type{1000, {}, {pattern_bits}}; + auto filter = filter_type{num_blocks, {}, {pattern_bits}}; + + // The maximum allowed deviation of the false-positive rate from the theoretical value + constexpr auto max_deviation = 0.02; - test_fpr(filter, num_keys); + test_fpr(filter, num_keys, max_deviation); }