cppalliance · mborland · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025 · Feb 6, 2025
diff --git a/include/boost/crypt/utility/config.hpp b/include/boost/crypt/utility/config.hpp
@@ -127,7 +127,7 @@
 // ----- Has something -----
 
 // ----- Unreachable -----
-#if defined(__GNUC__) || defined(__clang__)
+#if defined(__GNUC__) || defined(__clang__) || defined(BOOST_CRYPT_HAS_CUDA)
 #  define BOOST_CRYPT_UNREACHABLE __builtin_unreachable()
 #elif defined(_MSC_VER)
 #  define BOOST_CRYPT_UNREACHABLE __assume(0)

diff --git a/include/boost/crypt2/drbg/detail/hash_drbg.hpp b/include/boost/crypt2/drbg/detail/hash_drbg.hpp
@@ -6,6 +6,7 @@
 #define BOOST_CRYPT2_DRBG_HASH_DRBG_HPP
 
 #include <boost/crypt2/detail/config.hpp>
+#include <boost/crypt2/detail/assert.hpp>
 #include <boost/crypt2/detail/compat.hpp>
 #include <boost/crypt2/detail/concepts.hpp>
 #include <boost/crypt2/detail/clear_mem.hpp>
@@ -61,7 +62,7 @@ class hash_drbg
     static constexpr compat::uint64_t reseed_interval {281474976710656ULL}; // 2^48
 
     compat::array<compat::byte, seedlen_bytes> constant_ {};
-    compat::span<const std::byte, seedlen_bytes> constant_span_ {constant_};
+    compat::span<const compat::byte, seedlen_bytes> constant_span_ {constant_};
     compat::array<compat::byte, seedlen_bytes> value_ {};
     compat::span<const compat::byte, seedlen_bytes> value_span_ {value_};
 
@@ -108,7 +109,7 @@ class hash_drbg
                                                 compat::span<const compat::byte, Extent3> personalization = compat::span<const compat::byte, 0>{}) noexcept -> state;
 
     template <concepts::sized_range SizedRange1,
-              concepts::sized_range SizedRange2,
+              concepts::sized_range SizedRange2 = compat::span<const compat::byte, 0U>,
               concepts::sized_range SizedRange3 = compat::span<const compat::byte, 0U>>
     BOOST_CRYPT_GPU_ENABLED auto init(SizedRange1&& entropy,
                                       SizedRange2&& nonce = compat::span<const compat::byte, 0U> {},

diff --git a/include/boost/crypt2/drbg/detail/hmac_drbg.hpp b/include/boost/crypt2/drbg/detail/hmac_drbg.hpp
@@ -135,7 +135,7 @@ BOOST_CRYPT_GPU_ENABLED_CONSTEXPR auto hmac_drbg<HMACType, max_hasher_security,
     const auto provided_data_size {provided_data_1.size() + provided_data_2.size() + provided_data_3.size()};
 
     // Step 1: V || 0x00 || provided data
-    compat::array<compat::byte, 1U> storage_gap {std::byte{0x00}};
+    compat::array<compat::byte, 1U> storage_gap {compat::byte{0x00}};
     compat::span<const compat::byte, 1U> storage_gap_span {storage_gap};
 
     HMACType hmac;

diff --git a/test/nvcc_jamfile b/test/nvcc_jamfile
@@ -24,3 +24,6 @@ run test_shake128_nvcc.cu ;
 run test_shake256_nvcc.cu ;
 
 run test_hmac.cu ;
+
+run test_hmac_drbg.cu ;
+run test_hash_drbg.cu ;
diff --git a/test/test_hash_drbg.cu b/test/test_hash_drbg.cu
@@ -0,0 +1,119 @@
+//  Copyright Matt Borland 2024
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <cuda_runtime.h>
+#include <boost/crypt2/drbg/sha1_drbg.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+#include "generate_random_strings.hpp"
+#include <iostream>
+#include <iomanip>
+#include <exception>
+#include <memory>
+#include <span>
+
+using digest_type = typename cuda::std::array<cuda::std::byte, 80>;
+
+// The kernel function
+__global__ void cuda_test(char** in, digest_type* out, int numElements)
+{
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        boost::crypt::sha1_hash_drbg drbg;
+        cuda::std::span<char> in_span {in[i], static_cast<cuda::std::size_t>(64)};
+        drbg.init(in_span);
+        drbg.generate(out[i], 640U);
+    }
+}
+
+int main()
+{
+    try
+    {
+        // Error code to check return values for CUDA calls
+        cudaError_t err = cudaSuccess;
+
+        // Print the vector length to be used, and compute its size
+        constexpr int numElements = 50000;
+        constexpr std::size_t elementSize = 64;
+
+        std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+        // Allocate the managed input vector A
+        char** input_vector1;
+        cudaMallocManaged(&input_vector1, numElements * sizeof(char*));
+
+        // Allocate the managed output vector C
+        cuda_managed_ptr<digest_type> output_vector(numElements);
+
+        for (int i = 0; i < numElements; ++i)
+        {
+            cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char));
+            if (input_vector1[i] == nullptr)
+            {
+                throw std::runtime_error("Failed to allocate memory for input_vector1");
+            }
+            boost::crypt::generate_random_string(input_vector1[i], elementSize);
+        }
+
+        // Launch the Vector Add CUDA Kernel
+        int threadsPerBlock = 256;
+        int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+        watch w;
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1, output_vector.get(), numElements);
+        cudaDeviceSynchronize();
+        std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<digest_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            digest_type out;
+            boost::crypt::sha1_hash_drbg drbg;
+            std::span<char> in_span(input_vector1[i], static_cast<std::size_t>(64));
+            drbg.init(in_span);
+            drbg.generate(out, 640U);
+            results.emplace_back(out);
+        }
+        double t = w.elapsed();
+
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i][0] != results[i][0])
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+        std::cout << "Done\n";
+
+        // Cleanup all the memory we allocated
+        for (int i = 0; i < numElements; ++i)
+        {
+            cudaFree(input_vector1[i]);
+        }
+        cudaFree(input_vector1);
+    }
+    catch (const std::exception& e)
+    {
+        std::cerr << "Terminated with exception: " << e.what() << std::endl;
+    }
+}
diff --git a/test/test_hmac_drbg.cu b/test/test_hmac_drbg.cu
@@ -0,0 +1,119 @@
+//  Copyright Matt Borland 2024
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <cuda_runtime.h>
+#include <boost/crypt2/drbg/sha1_drbg.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+#include "generate_random_strings.hpp"
+#include <iostream>
+#include <iomanip>
+#include <exception>
+#include <memory>
+#include <span>
+
+using digest_type = typename cuda::std::array<cuda::std::byte, 80>;
+
+// The kernel function
+__global__ void cuda_test(char** in, digest_type* out, int numElements)
+{
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        boost::crypt::sha1_hmac_drbg drbg;
+        cuda::std::span<char> in_span {in[i], static_cast<cuda::std::size_t>(64)};
+        drbg.init(in_span);
+        drbg.generate(out[i], 640U);
+    }
+}
+
+int main()
+{
+    try
+    {
+        // Error code to check return values for CUDA calls
+        cudaError_t err = cudaSuccess;
+
+        // Print the vector length to be used, and compute its size
+        constexpr int numElements = 50000;
+        constexpr std::size_t elementSize = 64;
+
+        std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+        // Allocate the managed input vector A
+        char** input_vector1;
+        cudaMallocManaged(&input_vector1, numElements * sizeof(char*));
+
+        // Allocate the managed output vector C
+        cuda_managed_ptr<digest_type> output_vector(numElements);
+
+        for (int i = 0; i < numElements; ++i)
+        {
+            cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char));
+            if (input_vector1[i] == nullptr)
+            {
+                throw std::runtime_error("Failed to allocate memory for input_vector1");
+            }
+            boost::crypt::generate_random_string(input_vector1[i], elementSize);
+        }
+
+        // Launch the Vector Add CUDA Kernel
+        int threadsPerBlock = 256;
+        int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+        watch w;
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1, output_vector.get(), numElements);
+        cudaDeviceSynchronize();
+        std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<digest_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            digest_type out;
+            boost::crypt::sha1_hmac_drbg drbg;
+            std::span<char> in_span(input_vector1[i], static_cast<std::size_t>(64));
+            drbg.init(in_span);
+            drbg.generate(out, 640U);
+            results.emplace_back(out);
+        }
+        double t = w.elapsed();
+
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i][0] != results[i][0])
+            {
+                std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
+        std::cout << "Done\n";
+
+        // Cleanup all the memory we allocated
+        for (int i = 0; i < numElements; ++i)
+        {
+            cudaFree(input_vector1[i]);
+        }
+        cudaFree(input_vector1);
+    }
+    catch (const std::exception& e)
+    {
+        std::cerr << "Terminated with exception: " << e.what() << std::endl;
+    }
+}