From 9c8b4588c832b81874999fdec7254aa6e100350a Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 11:59:50 -0500 Subject: [PATCH 1/7] Add basic NVCC HMAC DRBG testing --- test/nvcc_jamfile | 2 + test/test_hmac_drbg.cu | 118 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 test/test_hmac_drbg.cu diff --git a/test/nvcc_jamfile b/test/nvcc_jamfile index 7bdbdccc..b65f1e6f 100644 --- a/test/nvcc_jamfile +++ b/test/nvcc_jamfile @@ -24,3 +24,5 @@ run test_shake128_nvcc.cu ; run test_shake256_nvcc.cu ; run test_hmac.cu ; + +run test_hmac_drbg.cu ; diff --git a/test/test_hmac_drbg.cu b/test/test_hmac_drbg.cu new file mode 100644 index 00000000..744c25a6 --- /dev/null +++ b/test/test_hmac_drbg.cu @@ -0,0 +1,118 @@ +// Copyright Matt Borland 2024 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" +#include "generate_random_strings.hpp" +#include +#include +#include +#include +#include + +using digest_type = typename cuda::std::array; + +// The kernel function +__global__ void cuda_test(char** in, digest_type* out, int numElements) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numElements) + { + boost::crypt::sha1_hmac_drbg drbg; + cuda::std::span in_span {in[i], static_cast(64)}; + drbg.init(in_span); + drbg.generate(out[i], 640U); + } +} + +int main() +{ + try + { + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + constexpr int numElements = 50000; + constexpr std::size_t elementSize = 64; + + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + char** input_vector1; + cudaMallocManaged(&input_vector1, numElements * sizeof(char*)); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + for (int i = 0; i < numElements; ++i) + { + cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char)); + if (input_vector1[i] == nullptr) + { + throw std::runtime_error("Failed to allocate memory for input_vector1"); + } + boost::crypt::generate_random_string(input_vector1[i], elementSize); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1, output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + digest_type out; + boost::crypt::sha1_hmac_drbg drbg; + std::span in_span(input_vector1[i], static_cast(64)); + drbg.init(in_span); + drbg.generate(out, 640U); + } + double t = w.elapsed(); + + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i][0] != results[i][0]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + // Cleanup all the memory we allocated + for (int i = 0; i < numElements; ++i) + { + cudaFree(input_vector1[i]); + } + cudaFree(input_vector1); + } + catch (const std::exception& e) + { + std::cerr << "Terminated with exception: " << e.what() << std::endl; + } +} From b3e90d02305de7afd3672563bb84e99a1fdeb983 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 12:01:37 -0500 Subject: [PATCH 2/7] Add basic NVCC HASH DRBG testing --- test/nvcc_jamfile | 1 + test/test_hash_drbg.cu | 118 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 test/test_hash_drbg.cu diff --git a/test/nvcc_jamfile b/test/nvcc_jamfile index b65f1e6f..37a9be9f 100644 --- a/test/nvcc_jamfile +++ b/test/nvcc_jamfile @@ -26,3 +26,4 @@ run test_shake256_nvcc.cu ; run test_hmac.cu ; run test_hmac_drbg.cu ; +run test_hash_drbg.cu ; diff --git a/test/test_hash_drbg.cu b/test/test_hash_drbg.cu new file mode 100644 index 00000000..c588a052 --- /dev/null +++ b/test/test_hash_drbg.cu @@ -0,0 +1,118 @@ +// Copyright Matt Borland 2024 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" +#include "generate_random_strings.hpp" +#include +#include +#include +#include +#include + +using digest_type = typename cuda::std::array; + +// The kernel function +__global__ void cuda_test(char** in, digest_type* out, int numElements) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numElements) + { + boost::crypt::sha1_hash_drbg drbg; + cuda::std::span in_span {in[i], static_cast(64)}; + drbg.init(in_span); + drbg.generate(out[i], 640U); + } +} + +int main() +{ + try + { + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + constexpr int numElements = 50000; + constexpr std::size_t elementSize = 64; + + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + char** input_vector1; + cudaMallocManaged(&input_vector1, numElements * sizeof(char*)); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + for (int i = 0; i < numElements; ++i) + { + cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char)); + if (input_vector1[i] == nullptr) + { + throw std::runtime_error("Failed to allocate memory for input_vector1"); + } + boost::crypt::generate_random_string(input_vector1[i], elementSize); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1, output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + digest_type out; + boost::crypt::sha1_hash_drbg drbg; + std::span in_span(input_vector1[i], static_cast(64)); + drbg.init(in_span); + drbg.generate(out, 640U); + } + double t = w.elapsed(); + + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i][0] != results[i][0]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + // Cleanup all the memory we allocated + for (int i = 0; i < numElements; ++i) + { + cudaFree(input_vector1[i]); + } + cudaFree(input_vector1); + } + catch (const std::exception& e) + { + std::cerr << "Terminated with exception: " << e.what() << std::endl; + } +} From 8848edcdeaf9fb67d8e95a503069318210be6b36 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 12:09:40 -0500 Subject: [PATCH 3/7] Fix usage of std:: instead of compat:: --- include/boost/crypt2/drbg/detail/hash_drbg.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/crypt2/drbg/detail/hash_drbg.hpp b/include/boost/crypt2/drbg/detail/hash_drbg.hpp index 285f769b..06241c2a 100644 --- a/include/boost/crypt2/drbg/detail/hash_drbg.hpp +++ b/include/boost/crypt2/drbg/detail/hash_drbg.hpp @@ -61,7 +61,7 @@ class hash_drbg static constexpr compat::uint64_t reseed_interval {281474976710656ULL}; // 2^48 compat::array constant_ {}; - compat::span constant_span_ {constant_}; + compat::span constant_span_ {constant_}; compat::array value_ {}; compat::span value_span_ {value_}; From a36181274174453c6a8b48fe3647b2332fba7f70 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 12:23:11 -0500 Subject: [PATCH 4/7] Default SizedRange2 --- include/boost/crypt2/drbg/detail/hash_drbg.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/crypt2/drbg/detail/hash_drbg.hpp b/include/boost/crypt2/drbg/detail/hash_drbg.hpp index 06241c2a..52b0e396 100644 --- a/include/boost/crypt2/drbg/detail/hash_drbg.hpp +++ b/include/boost/crypt2/drbg/detail/hash_drbg.hpp @@ -108,7 +108,7 @@ class hash_drbg compat::span personalization = compat::span{}) noexcept -> state; template , concepts::sized_range SizedRange3 = compat::span> BOOST_CRYPT_GPU_ENABLED auto init(SizedRange1&& entropy, SizedRange2&& nonce = compat::span {}, From d91d01005198df515cfdbde0934e34b6a248913e Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 12:45:55 -0500 Subject: [PATCH 5/7] Add missing header for assert --- include/boost/crypt2/drbg/detail/hash_drbg.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/boost/crypt2/drbg/detail/hash_drbg.hpp b/include/boost/crypt2/drbg/detail/hash_drbg.hpp index 52b0e396..a751e9fe 100644 --- a/include/boost/crypt2/drbg/detail/hash_drbg.hpp +++ b/include/boost/crypt2/drbg/detail/hash_drbg.hpp @@ -6,6 +6,7 @@ #define BOOST_CRYPT2_DRBG_HASH_DRBG_HPP #include +#include #include #include #include From a7549cbe3cb2e4c7df09f0f7361e7a098c2e3bba Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 13:20:01 -0500 Subject: [PATCH 6/7] Fix HMAC DRBG compatibility with NVCC --- include/boost/crypt/utility/config.hpp | 2 +- include/boost/crypt2/drbg/detail/hmac_drbg.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/crypt/utility/config.hpp b/include/boost/crypt/utility/config.hpp index a46640c6..4e0a1606 100644 --- a/include/boost/crypt/utility/config.hpp +++ b/include/boost/crypt/utility/config.hpp @@ -127,7 +127,7 @@ // ----- Has something ----- // ----- Unreachable ----- -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) || defined(__clang__) || defined(BOOST_CRYPT_HAS_CUDA) # define BOOST_CRYPT_UNREACHABLE __builtin_unreachable() #elif defined(_MSC_VER) # define BOOST_CRYPT_UNREACHABLE __assume(0) diff --git a/include/boost/crypt2/drbg/detail/hmac_drbg.hpp b/include/boost/crypt2/drbg/detail/hmac_drbg.hpp index 8a7c985a..74be5218 100644 --- a/include/boost/crypt2/drbg/detail/hmac_drbg.hpp +++ b/include/boost/crypt2/drbg/detail/hmac_drbg.hpp @@ -135,7 +135,7 @@ BOOST_CRYPT_GPU_ENABLED_CONSTEXPR auto hmac_drbg storage_gap {std::byte{0x00}}; + compat::array storage_gap {compat::byte{0x00}}; compat::span storage_gap_span {storage_gap}; HMACType hmac; From b47793d5df98bc7390ed8504f09045855b27f3af Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Feb 2025 13:33:05 -0500 Subject: [PATCH 7/7] Adjust CPU comparison path --- test/test_hash_drbg.cu | 1 + test/test_hmac_drbg.cu | 1 + 2 files changed, 2 insertions(+) diff --git a/test/test_hash_drbg.cu b/test/test_hash_drbg.cu index c588a052..e3589405 100644 --- a/test/test_hash_drbg.cu +++ b/test/test_hash_drbg.cu @@ -88,6 +88,7 @@ int main() std::span in_span(input_vector1[i], static_cast(64)); drbg.init(in_span); drbg.generate(out, 640U); + results.emplace_back(out); } double t = w.elapsed(); diff --git a/test/test_hmac_drbg.cu b/test/test_hmac_drbg.cu index 744c25a6..41a479cc 100644 --- a/test/test_hmac_drbg.cu +++ b/test/test_hmac_drbg.cu @@ -88,6 +88,7 @@ int main() std::span in_span(input_vector1[i], static_cast(64)); drbg.init(in_span); drbg.generate(out, 640U); + results.emplace_back(out); } double t = w.elapsed();