From d1f676c56b000194829831627c3edbcee78b8882 Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sun, 21 Dec 2025 04:21:03 +0100 Subject: [PATCH 1/8] wip --- include/CppCore/Math/Util.h | 52 ++++++++++++++++ src/CppCore.Debug/main.cpp | 115 ++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/include/CppCore/Math/Util.h b/include/CppCore/Math/Util.h index c1aaf0a1..7f8bc960 100644 --- a/include/CppCore/Math/Util.h +++ b/include/CppCore/Math/Util.h @@ -3812,6 +3812,58 @@ namespace CppCore CppCore::upowmod(a, b, m, r, t); } + template + INLINE static void upowmod_kary(UINT base, UINT exp, const UINT& mod, UINT& result, uint32_t k = 4U) + { + if (mod == 1) { CppCore::clear(result); return; } + if (exp == 0) { result = 1; return; } + + base %= mod; + + // Precompute powers: base^0, base^1, ..., base^(2^k - 1) + uint32_t table_size = 1 << k; + std::vector powers(table_size); + powers[0] = 1; + powers[1] = base; + + for (uint32_t i = 2; i < table_size; i++) { + CppCore::umulmod(powers[i-1], base, mod, powers[i]); + } + + // Find the position of the highest bit in exp + constexpr auto NUMBITS = sizeof(UINT) * 8U; + const auto LZB = CppCore::lzcnt(exp); + const auto bits = NUMBITS - LZB; + + // Round up to multiple of k + const int total_bits = ((bits + k - 1) / k) * k; + //int total_bits = CppCore::rup32(bits, k); + //int total_bits = CppCore::rupptwo32(bits, k); + + const uint32_t MASK = ((1 << k) - 1); + + result = 1; + + // Process k bits at a time from left to right (MSB to LSB) + for (int pos = total_bits - k; pos >= 0; pos -= k) { + // Square result k times (except for the first iteration) + if (pos < total_bits - k) { + for (int i = 0; i < k; i++) { + CppCore::umulmod(result, result, mod, result); + //result = (__uint128_t)result * result % mod; + } + } + + // Extract k-bit chunk at position pos + int chunk = (exp >> pos) & MASK; + + // Multiply by base^chunk + //result = (__uint128_t)result * powers[chunk] % mod; + CppCore::umulmod(result, powers[chunk], mod, result); + } + //return result; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // MERSENNE NUMBERS (2^i)-1 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/CppCore.Debug/main.cpp b/src/CppCore.Debug/main.cpp index 1277fce3..f6f75861 100644 --- a/src/CppCore.Debug/main.cpp +++ b/src/CppCore.Debug/main.cpp @@ -1,8 +1,123 @@ #include +#include +#include +#include +#include +#include using namespace CppCore; +typedef uint64_t UINTX; + +#define N 1024*1024 + +CppCore::Random::Default64 prng; + +/*UINTX d; +UINTX a[N]; +UINTX r1[N]; +UINTX r2[N]; + +UINTX bc[2]; + +uint64_t NOINLINE test1() +{ + barrett_constant(bc, d); + + uint64_t t = __rdtsc(); + + for (size_t i = 0; i < N; i++) + barrett_umod(r1[i], a[i], bc, d); + + return __rdtsc() - t; +} + +uint64_t NOINLINE test2() +{ + uint64_t t = __rdtsc(); + + for (size_t i = 0; i < N; i++) + CppCore::umod(r2[i], a[i], d); + + return __rdtsc() - t; +} + int main() { + prng.fill(a); + prng.fill(d); + + for (size_t i = 0; i < N; i++) + ((uint8_t*)&a[i])[sizeof(UINTX) - 1] |= 0x80; + + uint64_t t2 = test2(); + uint64_t t1 = test1(); + + std::cout << "BARRETT: " << t1 << std::endl; + std::cout << "KNUTH: " << t2 << std::endl; + + for (size_t i = 0; i < N; i++) + { + if (r1[i] != r2[i]) + { + std::cout << "FAIL" << std::endl; + //std::cout << std::hex << r2.toHexString() << std::endl; + return 1; + } + } + + return 0; +}*/ + +UINTX a[N]; +UINTX b[N]; +UINTX m[N]; +UINTX r1[N]; +UINTX r2[N]; + +int main() +{ + uint64_t ta = 245373463472; + uint64_t tb = 3252764873; + uint64_t tm = 32534684; + uint64_t tr; + CppCore::upowmod_kary(ta, tb, tm, tr); + std::cout << tr << std::endl; + + CppCore::upowmod(ta, tb, tm, tr); + std::cout << tr << std::endl; + + + prng.fill(a); + prng.fill(b); + prng.fill(m); + + for (size_t i = 0; i < N; i++) + { + UINTX at; + CppCore::clone(at, a[i]); + CppCore::upowmod(at, b[i], m[i], r1[i]); + CppCore::upowmod_kary(a[i], b[i], m[i], r2[i]); + + if (!CppCore::equal(r1[i], r2[i])) { + std::cout << a[i] << "^" << b[i] << " % " << m[i] << " = " << r1[i] << std::endl; + std::cout << a[i] << "^" << b[i] << " % " << m[i] << " = " << r2[i] << std::endl; + return 1; + } + } return 0; } + + + + +//std::cout << std::setfill('0') << std::setw(5) << std::setprecision(10); + +/*double t = 0.5; +double z; +for (size_t i = 0; i < Primes::NUMODDPRIMES; i++) +{ + z = (1.0 - 1.0 / (double)Primes::ODDPRIMES[i]); + t *= z; + std::cout << std::setfill('0') << std::setw(5) << i << " " << Primes::ODDPRIMES[i] << " " << std::fixed << std::setprecision(10) << (1.0 - t) << std::endl; +}*/ \ No newline at end of file From 3cb7743ab39cb7110107ff813884d3840d80ba76 Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sat, 3 Jan 2026 15:56:39 +0100 Subject: [PATCH 2/8] wip --- include/CppCore/Math/Util.h | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/include/CppCore/Math/Util.h b/include/CppCore/Math/Util.h index f880745f..2922b9a7 100644 --- a/include/CppCore/Math/Util.h +++ b/include/CppCore/Math/Util.h @@ -3855,55 +3855,50 @@ namespace CppCore } template - INLINE static void upowmod_kary(UINT base, UINT exp, const UINT& mod, UINT& result, uint32_t k = 4U) + INLINE static void upowmod_kary(UINT& base, const UINT& exp, const UINT& m, UINT& result, uint32_t k = 4U) { - if (mod == 1) { CppCore::clear(result); return; } - if (exp == 0) { result = 1; return; } + //assert((&a != &r) && (&b != &r) && (&m != &r)); + assert(!CppCore::testzero(m)); + CppCore::clear(result); + if (m == 1U) + return; // n % 1 = 0 + *(uint32_t*)&result = 1U; + if (CppCore::testzero(exp)) + return; // 1 % n = 1 - base %= mod; + base %= m; // Precompute powers: base^0, base^1, ..., base^(2^k - 1) uint32_t table_size = 1 << k; std::vector powers(table_size); powers[0] = 1; powers[1] = base; - for (uint32_t i = 2; i < table_size; i++) { - CppCore::umulmod(powers[i-1], base, mod, powers[i]); + CppCore::umulmod(powers[i-1], base, m, powers[i]); } // Find the position of the highest bit in exp constexpr auto NUMBITS = sizeof(UINT) * 8U; const auto LZB = CppCore::lzcnt(exp); const auto bits = NUMBITS - LZB; - // Round up to multiple of k const int total_bits = ((bits + k - 1) / k) * k; - //int total_bits = CppCore::rup32(bits, k); - //int total_bits = CppCore::rupptwo32(bits, k); - - const uint32_t MASK = ((1 << k) - 1); - - result = 1; // Process k bits at a time from left to right (MSB to LSB) for (int pos = total_bits - k; pos >= 0; pos -= k) { + // Square result k times (except for the first iteration) if (pos < total_bits - k) { for (int i = 0; i < k; i++) { - CppCore::umulmod(result, result, mod, result); - //result = (__uint128_t)result * result % mod; + CppCore::umulmod(result, result, m, result); } } // Extract k-bit chunk at position pos - int chunk = (exp >> pos) & MASK; - + const uint32_t chunk = CppCore::getbits32(exp, pos, k); // Multiply by base^chunk - //result = (__uint128_t)result * powers[chunk] % mod; - CppCore::umulmod(result, powers[chunk], mod, result); + CppCore::umulmod(result, powers[chunk], m, result); } - //return result; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// From ac159c7383816d8f5bf2f3b20de5359bf9711078 Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sat, 3 Jan 2026 16:32:08 +0100 Subject: [PATCH 3/8] wip --- include/CppCore/Math/Util.h | 40 ++++++++++++++++-------------------- src/CppCore.Debug/main.cpp | 41 +++++++++++++++++++++++++------------ 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/include/CppCore/Math/Util.h b/include/CppCore/Math/Util.h index 2922b9a7..25ba2826 100644 --- a/include/CppCore/Math/Util.h +++ b/include/CppCore/Math/Util.h @@ -3854,50 +3854,46 @@ namespace CppCore CppCore::upowmod(a, b, m, r, t); } - template - INLINE static void upowmod_kary(UINT& base, const UINT& exp, const UINT& m, UINT& result, uint32_t k = 4U) + template + INLINE static void upowmod_kary(UINT& base, const UINT& exp, const UINT& m, UINT& r) { //assert((&a != &r) && (&b != &r) && (&m != &r)); assert(!CppCore::testzero(m)); - CppCore::clear(result); + CppCore::clear(r); if (m == 1U) return; // n % 1 = 0 - *(uint32_t*)&result = 1U; + *(uint32_t*)&r = 1U; if (CppCore::testzero(exp)) return; // 1 % n = 1 - base %= m; + base %= m;//TODO // Precompute powers: base^0, base^1, ..., base^(2^k - 1) - uint32_t table_size = 1 << k; - std::vector powers(table_size); - powers[0] = 1; + constexpr size_t TABLE_SIZE = 1U << K; + UINT powers[TABLE_SIZE]; + powers[0] = 1U; powers[1] = base; - for (uint32_t i = 2; i < table_size; i++) { + for (size_t i = 2; i < TABLE_SIZE; i++) CppCore::umulmod(powers[i-1], base, m, powers[i]); - } // Find the position of the highest bit in exp + // Round up to multiple of k constexpr auto NUMBITS = sizeof(UINT) * 8U; const auto LZB = CppCore::lzcnt(exp); - const auto bits = NUMBITS - LZB; - // Round up to multiple of k - const int total_bits = ((bits + k - 1) / k) * k; + const auto HIDX = NUMBITS - LZB; + const auto HIDX_K = ((HIDX + K - 1U) / K) * K; // Process k bits at a time from left to right (MSB to LSB) - for (int pos = total_bits - k; pos >= 0; pos -= k) { + for (int pos = HIDX_K - K; pos >= 0; pos -= K) { // Square result k times (except for the first iteration) - if (pos < total_bits - k) { - for (int i = 0; i < k; i++) { - CppCore::umulmod(result, result, m, result); + if (pos < HIDX_K - K) { + for (size_t i = 0; i < K; i++) { + CppCore::umulmod(r, r, m, r); } } - - // Extract k-bit chunk at position pos - const uint32_t chunk = CppCore::getbits32(exp, pos, k); - // Multiply by base^chunk - CppCore::umulmod(result, powers[chunk], m, result); + const uint32_t CHUNK = CppCore::getbits32(exp, pos, K); + CppCore::umulmod(r, powers[CHUNK], m, r); } } diff --git a/src/CppCore.Debug/main.cpp b/src/CppCore.Debug/main.cpp index f6f75861..87743f39 100644 --- a/src/CppCore.Debug/main.cpp +++ b/src/CppCore.Debug/main.cpp @@ -7,9 +7,9 @@ using namespace CppCore; -typedef uint64_t UINTX; +typedef uint1024_t UINTX; -#define N 1024*1024 +#define N 1024 CppCore::Random::Default64 prng; @@ -69,15 +69,17 @@ int main() return 0; }*/ -UINTX a[N]; +UINTX a1[N]; +UINTX a2[N]; UINTX b[N]; UINTX m[N]; UINTX r1[N]; UINTX r2[N]; +//UINTX at; int main() { - uint64_t ta = 245373463472; + /*uint64_t ta = 245373463472; uint64_t tb = 3252764873; uint64_t tm = 32534684; uint64_t tr; @@ -85,26 +87,39 @@ int main() std::cout << tr << std::endl; CppCore::upowmod(ta, tb, tm, tr); - std::cout << tr << std::endl; + std::cout << tr << std::endl;*/ - prng.fill(a); + + prng.fill(a1); prng.fill(b); prng.fill(m); + CppCore::clone(a2, a1); + uint64_t t = __rdtsc(); for (size_t i = 0; i < N; i++) { - UINTX at; - CppCore::clone(at, a[i]); - CppCore::upowmod(at, b[i], m[i], r1[i]); - CppCore::upowmod_kary(a[i], b[i], m[i], r2[i]); - + /*if (i % 1024 == 0) { + std::cout << i << std::endl; + }*/ + CppCore::upowmod(a1[i], b[i], m[i], r1[i]); + CppCore::upowmod_kary(a2[i], b[i], m[i], r2[i]); if (!CppCore::equal(r1[i], r2[i])) { - std::cout << a[i] << "^" << b[i] << " % " << m[i] << " = " << r1[i] << std::endl; - std::cout << a[i] << "^" << b[i] << " % " << m[i] << " = " << r2[i] << std::endl; + std::cout << a1[i] << "^" << b[i] << " % " << m[i] << " = " << r1[i] << std::endl; + std::cout << a2[i] << "^" << b[i] << " % " << m[i] << " = " << r2[i] << std::endl; return 1; } } + t = __rdtsc() - t; + std::cout << t << std::endl; + + UINTX r = 0; + for (size_t i = 0; i < N; i++) + { + r += r2[i]; + } + std::cout << r << std::endl; + return 0; } From 6848e60f74653061e6d616c0a0b870c96d66375a Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sat, 3 Jan 2026 17:03:00 +0100 Subject: [PATCH 4/8] wip --- include/CppCore/Math/Util.h | 3 ++- src/CppCore.Debug/main.cpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/CppCore/Math/Util.h b/include/CppCore/Math/Util.h index 25ba2826..87b4625f 100644 --- a/include/CppCore/Math/Util.h +++ b/include/CppCore/Math/Util.h @@ -3892,7 +3892,8 @@ namespace CppCore CppCore::umulmod(r, r, m, r); } } - const uint32_t CHUNK = CppCore::getbits32(exp, pos, K); + const uint32_t N = MIN(K, NUMBITS-pos); + const uint32_t CHUNK = CppCore::getbits32(exp, pos, N); CppCore::umulmod(r, powers[CHUNK], m, r); } } diff --git a/src/CppCore.Debug/main.cpp b/src/CppCore.Debug/main.cpp index 87743f39..812f7ce0 100644 --- a/src/CppCore.Debug/main.cpp +++ b/src/CppCore.Debug/main.cpp @@ -7,7 +7,7 @@ using namespace CppCore; -typedef uint1024_t UINTX; +typedef uint2048_t UINTX; #define N 1024 @@ -103,7 +103,7 @@ int main() std::cout << i << std::endl; }*/ CppCore::upowmod(a1[i], b[i], m[i], r1[i]); - CppCore::upowmod_kary(a2[i], b[i], m[i], r2[i]); + CppCore::upowmod_kary(a2[i], b[i], m[i], r2[i]); if (!CppCore::equal(r1[i], r2[i])) { std::cout << a1[i] << "^" << b[i] << " % " << m[i] << " = " << r1[i] << std::endl; std::cout << a2[i] << "^" << b[i] << " % " << m[i] << " = " << r2[i] << std::endl; @@ -116,7 +116,7 @@ int main() UINTX r = 0; for (size_t i = 0; i < N; i++) { - r += r2[i]; + r += r1[i]; } std::cout << r << std::endl; From b23581dc503eb85690d23351b27ea55062448c0d Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Fri, 9 Jan 2026 23:34:33 +0100 Subject: [PATCH 5/8] wip --- include/CppCore/Math/Util.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/include/CppCore/Math/Util.h b/include/CppCore/Math/Util.h index 87b4625f..6ea83994 100644 --- a/include/CppCore/Math/Util.h +++ b/include/CppCore/Math/Util.h @@ -3854,8 +3854,11 @@ namespace CppCore CppCore::upowmod(a, b, m, r, t); } + /// + /// a^b mod m + /// template - INLINE static void upowmod_kary(UINT& base, const UINT& exp, const UINT& m, UINT& r) + INLINE static void upowmod_kary(UINT& base, const UINT& exp, const UINT& m, UINT& r, UINT t[3]) { //assert((&a != &r) && (&b != &r) && (&m != &r)); assert(!CppCore::testzero(m)); @@ -3866,15 +3869,16 @@ namespace CppCore if (CppCore::testzero(exp)) return; // 1 % n = 1 - base %= m;//TODO + //base %= m;//TODO // Precompute powers: base^0, base^1, ..., base^(2^k - 1) constexpr size_t TABLE_SIZE = 1U << K; UINT powers[TABLE_SIZE]; - powers[0] = 1U; - powers[1] = base; + CppCore::clear(powers[0]); + *(uint32_t*)&powers[0] = 1U; + CppCore::clone(powers[1], base); for (size_t i = 2; i < TABLE_SIZE; i++) - CppCore::umulmod(powers[i-1], base, m, powers[i]); + CppCore::umulmod(powers[i-1], base, m, powers[i], t); // Find the position of the highest bit in exp // Round up to multiple of k @@ -3889,15 +3893,25 @@ namespace CppCore // Square result k times (except for the first iteration) if (pos < HIDX_K - K) { for (size_t i = 0; i < K; i++) { - CppCore::umulmod(r, r, m, r); + CppCore::umulmod(r, r, m, r, t); } } const uint32_t N = MIN(K, NUMBITS-pos); const uint32_t CHUNK = CppCore::getbits32(exp, pos, N); - CppCore::umulmod(r, powers[CHUNK], m, r); + CppCore::umulmod(r, powers[CHUNK], m, r, t); } } + /// + /// a^b mod m + /// + template + INLINE static void upowmod_kary(UINT& a, const UINT& b, const UINT& m, UINT& r) + { + CPPCORE_ALIGN_OPTIM(UINT) t[3]; + CppCore::upowmod_kary(a, b, m, r, t); + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // MERSENNE NUMBERS (2^i)-1 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// From 4258da55e3bada0f3caa61403b9e1dbade0f286b Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sat, 10 Jan 2026 21:24:57 +0100 Subject: [PATCH 6/8] wip --- include/CppCore.Test/Math/Util.h | 21 ++++++++++++++ include/CppCore/Math/Primes.h | 2 +- include/CppCore/Math/Util.h | 47 ++++++++++++++------------------ src/CppCore.Debug/main.cpp | 22 +++++++++++---- src/CppCore.Test/Test.cpp | 2 ++ 5 files changed, 61 insertions(+), 33 deletions(-) diff --git a/include/CppCore.Test/Math/Util.h b/include/CppCore.Test/Math/Util.h index ab258fb3..112e4362 100644 --- a/include/CppCore.Test/Math/Util.h +++ b/include/CppCore.Test/Math/Util.h @@ -894,6 +894,24 @@ namespace CppCore { namespace Test { namespace Math return true; } + template + INLINE static bool upowmod() + { + uint64_t a1[N64]; uint64_t b1[N64]; uint64_t m1[N64]; uint64_t r1[N64]; + uint64_t a2[N64]; uint64_t b2[N64]; uint64_t m2[N64]; uint64_t r2[N64]; + CppCore::Random::Default64 prng; + for (size_t i = 0; i < 100; i++) { + prng.fill(a1); CppCore::clone(a2, a1); + prng.fill(b1); CppCore::clone(b2, b1); + prng.fill(m1); CppCore::clone(m2, m1); + CppCore::upowmod_single(a1, b1, m1, r1); + CppCore::upowmod(a2, b2, m2, r2); + if (!CppCore::equal(r1, r2)) + return false; + } + return true; + } + INLINE static bool upow32() { for (uint32_t base = 0; base < 20; base++) @@ -1553,6 +1571,9 @@ namespace CppCore { namespace Test { namespace VS { namespace Math { TEST_METHOD(UMULMOD64) { Assert::AreEqual(true, CppCore::Test::Math::Util::umulmod64()); } TEST_METHOD(UPOWMOD32) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod32()); } TEST_METHOD(UPOWMOD64) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod64()); } + TEST_METHOD(UPOWMOD128) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod<2>()); } + TEST_METHOD(UPOWMOD256) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod<4>()); } + TEST_METHOD(UPOW32) { Assert::AreEqual(true, CppCore::Test::Math::Util::upow32()); } TEST_METHOD(UPOW64) { Assert::AreEqual(true, CppCore::Test::Math::Util::upow64()); } TEST_METHOD(UDIVMOD32) { Assert::AreEqual(true, CppCore::Test::Math::Util::udivmod32()); } diff --git a/include/CppCore/Math/Primes.h b/include/CppCore/Math/Primes.h index 14e9c460..98df104c 100644 --- a/include/CppCore/Math/Primes.h +++ b/include/CppCore/Math/Primes.h @@ -122,7 +122,7 @@ namespace CppCore /// SPRP with precalculated t, s and d and work memory m /// template - INLINE static bool sprp(const UINT& n, UINT& a, const UINT& t, const uint32_t& s, const UINT& d, UINT& r, UINT m[3]) + INLINE static bool sprp(const UINT& n, const UINT& a, const UINT& t, const uint32_t& s, const UINT& d, UINT& r, UINT m[3]) { assert(a != 1U); CppCore::upowmod(a, d, n, r, m); diff --git a/include/CppCore/Math/Util.h b/include/CppCore/Math/Util.h index 6ea83994..d6ee6b06 100644 --- a/include/CppCore/Math/Util.h +++ b/include/CppCore/Math/Util.h @@ -3822,7 +3822,7 @@ namespace CppCore /// a^b mod m /// template - INLINE static void upowmod(UINT& a, const UINT& b, const UINT& m, UINT& r, UINT t[3]) + INLINE static void upowmod_single(UINT& a, const UINT& b, const UINT& m, UINT& r, UINT t[3]) { assert((&a != &r) && (&b != &r) && (&m != &r)); assert(!CppCore::testzero(m)); @@ -3848,56 +3848,51 @@ namespace CppCore /// a^b mod m /// template - INLINE static void upowmod(UINT& a, const UINT& b, const UINT& m, UINT& r) + INLINE static void upowmod_single(UINT& a, const UINT& b, const UINT& m, UINT& r) { CPPCORE_ALIGN_OPTIM(UINT) t[3]; - CppCore::upowmod(a, b, m, r, t); + CppCore::upowmod_single(a, b, m, r, t); } /// /// a^b mod m /// template - INLINE static void upowmod_kary(UINT& base, const UINT& exp, const UINT& m, UINT& r, UINT t[3]) + INLINE static void upowmod(const UINT& a, const UINT& b, const UINT& m, UINT& r, UINT t[3]) { - //assert((&a != &r) && (&b != &r) && (&m != &r)); + assert((&a != &r) && (&b != &r) && (&m != &r)); assert(!CppCore::testzero(m)); CppCore::clear(r); - if (m == 1U) - return; // n % 1 = 0 + constexpr auto NUMBITS = sizeof(UINT)*8U; + const auto LZB = CppCore::lzcnt(b); + if (LZB == NUMBITS) CPPCORE_UNLIKELY { + if (NUMBITS-CppCore::lzcnt(m) != 1U) CPPCORE_LIKELY + *(uint32_t*)&r = 1U; + return; + } *(uint32_t*)&r = 1U; - if (CppCore::testzero(exp)) - return; // 1 % n = 1 - - //base %= m;//TODO // Precompute powers: base^0, base^1, ..., base^(2^k - 1) constexpr size_t TABLE_SIZE = 1U << K; - UINT powers[TABLE_SIZE]; - CppCore::clear(powers[0]); - *(uint32_t*)&powers[0] = 1U; - CppCore::clone(powers[1], base); + CPPCORE_ALIGN_OPTIM(UINT) powers[TABLE_SIZE]; + CppCore::clear(powers[0]); *(uint32_t*)&powers[0] = 1U; + CppCore::clone(powers[1], a); for (size_t i = 2; i < TABLE_SIZE; i++) - CppCore::umulmod(powers[i-1], base, m, powers[i], t); + CppCore::umulmod(powers[i-1], a, m, powers[i], t); // Find the position of the highest bit in exp // Round up to multiple of k - constexpr auto NUMBITS = sizeof(UINT) * 8U; - const auto LZB = CppCore::lzcnt(exp); const auto HIDX = NUMBITS - LZB; const auto HIDX_K = ((HIDX + K - 1U) / K) * K; // Process k bits at a time from left to right (MSB to LSB) for (int pos = HIDX_K - K; pos >= 0; pos -= K) { - // Square result k times (except for the first iteration) - if (pos < HIDX_K - K) { - for (size_t i = 0; i < K; i++) { + if (pos < HIDX_K - K) + for (size_t i = 0; i < K; i++) CppCore::umulmod(r, r, m, r, t); - } - } const uint32_t N = MIN(K, NUMBITS-pos); - const uint32_t CHUNK = CppCore::getbits32(exp, pos, N); + const uint32_t CHUNK = CppCore::getbits32(b, pos, N); CppCore::umulmod(r, powers[CHUNK], m, r, t); } } @@ -3906,10 +3901,10 @@ namespace CppCore /// a^b mod m /// template - INLINE static void upowmod_kary(UINT& a, const UINT& b, const UINT& m, UINT& r) + INLINE static void upowmod(const UINT& a, const UINT& b, const UINT& m, UINT& r) { CPPCORE_ALIGN_OPTIM(UINT) t[3]; - CppCore::upowmod_kary(a, b, m, r, t); + CppCore::upowmod(a, b, m, r, t); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/CppCore.Debug/main.cpp b/src/CppCore.Debug/main.cpp index 812f7ce0..4ba9c2b0 100644 --- a/src/CppCore.Debug/main.cpp +++ b/src/CppCore.Debug/main.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +//#include #include using namespace CppCore; @@ -90,10 +90,20 @@ int main() std::cout << tr << std::endl;*/ + /*CppCore::clear(a1); + CppCore::clear(b); + CppCore::clear(m); + for (size_t i = 0; i < N; i++) + { + *(uint32_t*)&a1[i] = (uint32_t)prng.next(); + *(uint32_t*)&b[i] = (uint32_t)prng.next(); + *(uint32_t*)&m[i] = (uint32_t)prng.next(); + }*/ prng.fill(a1); prng.fill(b); prng.fill(m); + CppCore::clone(a2, a1); uint64_t t = __rdtsc(); @@ -102,13 +112,13 @@ int main() /*if (i % 1024 == 0) { std::cout << i << std::endl; }*/ - CppCore::upowmod(a1[i], b[i], m[i], r1[i]); - CppCore::upowmod_kary(a2[i], b[i], m[i], r2[i]); - if (!CppCore::equal(r1[i], r2[i])) { + //CppCore::upowmod_single(a1[i], b[i], m[i], r1[i]); + CppCore::upowmod(a2[i], b[i], m[i], r2[i]); + /*if (!CppCore::equal(r1[i], r2[i])) { std::cout << a1[i] << "^" << b[i] << " % " << m[i] << " = " << r1[i] << std::endl; std::cout << a2[i] << "^" << b[i] << " % " << m[i] << " = " << r2[i] << std::endl; return 1; - } + }*/ } t = __rdtsc() - t; std::cout << t << std::endl; @@ -116,7 +126,7 @@ int main() UINTX r = 0; for (size_t i = 0; i < N; i++) { - r += r1[i]; + r += r2[i]; } std::cout << r << std::endl; diff --git a/src/CppCore.Test/Test.cpp b/src/CppCore.Test/Test.cpp index 8ea3e87c..2d3c88e7 100644 --- a/src/CppCore.Test/Test.cpp +++ b/src/CppCore.Test/Test.cpp @@ -378,6 +378,8 @@ int main() TEST(CppCore::Test::Math::Util::umulmod64, "umulmod64: ", std::endl); TEST(CppCore::Test::Math::Util::upowmod32, "upowmod32: ", std::endl); TEST(CppCore::Test::Math::Util::upowmod64, "upowmod64: ", std::endl); + TEST(CppCore::Test::Math::Util::upowmod<2>, "upowmod128: ", std::endl); + TEST(CppCore::Test::Math::Util::upowmod<4>, "upowmod256: ", std::endl); TEST(CppCore::Test::Math::Util::upow32, "upow32: ", std::endl); TEST(CppCore::Test::Math::Util::upow64, "upow64: ", std::endl); TEST(CppCore::Test::Math::Util::udivmod32, "udivmod32: ", std::endl); From 30c366b448c0f81df6a878870fd27a99cf45c9c3 Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sun, 11 Jan 2026 15:18:20 +0100 Subject: [PATCH 7/8] wip --- src/CppCore.Debug/main.cpp | 140 ------------------------------------- 1 file changed, 140 deletions(-) diff --git a/src/CppCore.Debug/main.cpp b/src/CppCore.Debug/main.cpp index 4ba9c2b0..1277fce3 100644 --- a/src/CppCore.Debug/main.cpp +++ b/src/CppCore.Debug/main.cpp @@ -1,148 +1,8 @@ #include -#include -#include -#include -//#include -#include using namespace CppCore; -typedef uint2048_t UINTX; - -#define N 1024 - -CppCore::Random::Default64 prng; - -/*UINTX d; -UINTX a[N]; -UINTX r1[N]; -UINTX r2[N]; - -UINTX bc[2]; - -uint64_t NOINLINE test1() -{ - barrett_constant(bc, d); - - uint64_t t = __rdtsc(); - - for (size_t i = 0; i < N; i++) - barrett_umod(r1[i], a[i], bc, d); - - return __rdtsc() - t; -} - -uint64_t NOINLINE test2() -{ - uint64_t t = __rdtsc(); - - for (size_t i = 0; i < N; i++) - CppCore::umod(r2[i], a[i], d); - - return __rdtsc() - t; -} - -int main() -{ - prng.fill(a); - prng.fill(d); - - for (size_t i = 0; i < N; i++) - ((uint8_t*)&a[i])[sizeof(UINTX) - 1] |= 0x80; - - uint64_t t2 = test2(); - uint64_t t1 = test1(); - - std::cout << "BARRETT: " << t1 << std::endl; - std::cout << "KNUTH: " << t2 << std::endl; - - for (size_t i = 0; i < N; i++) - { - if (r1[i] != r2[i]) - { - std::cout << "FAIL" << std::endl; - //std::cout << std::hex << r2.toHexString() << std::endl; - return 1; - } - } - - return 0; -}*/ - -UINTX a1[N]; -UINTX a2[N]; -UINTX b[N]; -UINTX m[N]; -UINTX r1[N]; -UINTX r2[N]; -//UINTX at; - int main() { - /*uint64_t ta = 245373463472; - uint64_t tb = 3252764873; - uint64_t tm = 32534684; - uint64_t tr; - CppCore::upowmod_kary(ta, tb, tm, tr); - std::cout << tr << std::endl; - - CppCore::upowmod(ta, tb, tm, tr); - std::cout << tr << std::endl;*/ - - - /*CppCore::clear(a1); - CppCore::clear(b); - CppCore::clear(m); - for (size_t i = 0; i < N; i++) - { - *(uint32_t*)&a1[i] = (uint32_t)prng.next(); - *(uint32_t*)&b[i] = (uint32_t)prng.next(); - *(uint32_t*)&m[i] = (uint32_t)prng.next(); - }*/ - - prng.fill(a1); - prng.fill(b); - prng.fill(m); - - CppCore::clone(a2, a1); - - uint64_t t = __rdtsc(); - for (size_t i = 0; i < N; i++) - { - /*if (i % 1024 == 0) { - std::cout << i << std::endl; - }*/ - //CppCore::upowmod_single(a1[i], b[i], m[i], r1[i]); - CppCore::upowmod(a2[i], b[i], m[i], r2[i]); - /*if (!CppCore::equal(r1[i], r2[i])) { - std::cout << a1[i] << "^" << b[i] << " % " << m[i] << " = " << r1[i] << std::endl; - std::cout << a2[i] << "^" << b[i] << " % " << m[i] << " = " << r2[i] << std::endl; - return 1; - }*/ - } - t = __rdtsc() - t; - std::cout << t << std::endl; - - UINTX r = 0; - for (size_t i = 0; i < N; i++) - { - r += r2[i]; - } - std::cout << r << std::endl; - return 0; } - - - - -//std::cout << std::setfill('0') << std::setw(5) << std::setprecision(10); - -/*double t = 0.5; -double z; -for (size_t i = 0; i < Primes::NUMODDPRIMES; i++) -{ - z = (1.0 - 1.0 / (double)Primes::ODDPRIMES[i]); - t *= z; - std::cout << std::setfill('0') << std::setw(5) << i << " " << Primes::ODDPRIMES[i] << " " << std::fixed << std::setprecision(10) << (1.0 - t) << std::endl; -}*/ \ No newline at end of file From 08dc865804873e0471abce0fd85055ebd5c3a5e6 Mon Sep 17 00:00:00 2001 From: Clint Banzhaf Date: Sun, 11 Jan 2026 15:20:57 +0100 Subject: [PATCH 8/8] cleanup --- include/CppCore.Test/Math/Util.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/CppCore.Test/Math/Util.h b/include/CppCore.Test/Math/Util.h index 112e4362..5374c681 100644 --- a/include/CppCore.Test/Math/Util.h +++ b/include/CppCore.Test/Math/Util.h @@ -1573,7 +1573,6 @@ namespace CppCore { namespace Test { namespace VS { namespace Math { TEST_METHOD(UPOWMOD64) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod64()); } TEST_METHOD(UPOWMOD128) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod<2>()); } TEST_METHOD(UPOWMOD256) { Assert::AreEqual(true, CppCore::Test::Math::Util::upowmod<4>()); } - TEST_METHOD(UPOW32) { Assert::AreEqual(true, CppCore::Test::Math::Util::upow32()); } TEST_METHOD(UPOW64) { Assert::AreEqual(true, CppCore::Test::Math::Util::upow64()); } TEST_METHOD(UDIVMOD32) { Assert::AreEqual(true, CppCore::Test::Math::Util::udivmod32()); }