From c15215fbeda0c5e96c8e52b628fddbdea95e7ad1 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Tue, 25 Nov 2025 05:37:50 +0000 Subject: [PATCH 1/8] Introduce mld_polymat_get_row() helper function - Add mld_polymat_get_row() to retrieve matrix row pointer - Update mld_polyvec_matrix_pointwise_montgomery() to use helper Addresses #738 (steps 2-3 of #736) Signed-off-by: Hanno Becker --- mldsa/mldsa_native.S | 1 + mldsa/mldsa_native.c | 1 + mldsa/src/polyvec.c | 9 ++++++++- mldsa/src/polyvec.h | 15 +++++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/mldsa/mldsa_native.S b/mldsa/mldsa_native.S index cae532bcb..4818317e3 100644 --- a/mldsa/mldsa_native.S +++ b/mldsa/mldsa_native.S @@ -266,6 +266,7 @@ /* mldsa/src/polyvec.h */ #undef MLD_POLYVEC_H #undef mld_polymat +#undef mld_polymat_get_row #undef mld_polyvec_matrix_expand #undef mld_polyvec_matrix_pointwise_montgomery #undef mld_polyveck diff --git a/mldsa/mldsa_native.c b/mldsa/mldsa_native.c index 537e8b3cc..51e46248a 100644 --- a/mldsa/mldsa_native.c +++ b/mldsa/mldsa_native.c @@ -262,6 +262,7 @@ /* mldsa/src/polyvec.h */ #undef MLD_POLYVEC_H #undef mld_polymat +#undef mld_polymat_get_row #undef mld_polyvec_matrix_expand #undef mld_polyvec_matrix_pointwise_montgomery #undef mld_polyveck diff --git a/mldsa/src/polyvec.c b/mldsa/src/polyvec.c index 05c67d9ba..e688940c9 100644 --- a/mldsa/src/polyvec.c +++ b/mldsa/src/polyvec.c @@ -66,6 +66,12 @@ __contract__( #endif /* !MLD_USE_NATIVE_NTT_CUSTOM_ORDER */ } +MLD_INTERNAL_API +const mld_polyvecl *mld_polymat_get_row(const mld_polymat *mat, + unsigned int row) +{ + return &mat->vec[row]; +} MLD_INTERNAL_API void mld_polyvec_matrix_expand(mld_polymat *mat, @@ -172,7 +178,8 @@ void mld_polyvec_matrix_pointwise_montgomery(mld_polyveck *t, array_abs_bound(t->vec[k0].coeffs, 0, MLDSA_N, MLDSA_Q))) ) { - mld_polyvecl_pointwise_acc_montgomery(&t->vec[i], &mat->vec[i], v); + const mld_polyvecl *row = mld_polymat_get_row(mat, i); + mld_polyvecl_pointwise_acc_montgomery(&t->vec[i], row, v); } mld_assert_abs_bound_2d(t->vec, MLDSA_K, MLDSA_N, MLDSA_Q); diff --git a/mldsa/src/polyvec.h b/mldsa/src/polyvec.h index 57f65fdd2..fa06dede5 100644 --- a/mldsa/src/polyvec.h +++ b/mldsa/src/polyvec.h @@ -750,6 +750,21 @@ __contract__( array_bound(p->vec[k1].coeffs, 0, MLDSA_N, -(1<<(MLDSA_D-1)) + 1, (1<<(MLDSA_D-1)) + 1))) ); +#define mld_polymat_get_row MLD_NAMESPACE_KL(polymat_get_row) +/************************************************* + * Name: mld_polymat_get_row + * + * Description: Retrieve a pointer to a specific row of the matrix. + * + * Arguments: - const mld_polymat *mat: pointer to matrix + * - unsigned int row: row index (must be < MLDSA_K) + * + * Returns pointer to the row (mld_polyvecl) + **************************************************/ +MLD_INTERNAL_API +const mld_polyvecl *mld_polymat_get_row(const mld_polymat *mat, + unsigned int row); + #define mld_polyvec_matrix_expand MLD_NAMESPACE_KL(polyvec_matrix_expand) /************************************************* * Name: mld_polyvec_matrix_expand From 4f2a1aa75e64d321e97ef7c4ceb945292092aad9 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Tue, 25 Nov 2025 08:42:10 +0000 Subject: [PATCH 2/8] Add configuration option for reduced memory usage Signed-off-by: Hanno Becker --- BIBLIOGRAPHY.md | 2 + README.md | 9 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + examples/monolithic_build_native/config_65.h | 586 ++++++++++++++++++ .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + .../mldsa_native/mldsa_native_config.h | 18 + mldsa/mldsa_native_config.h | 18 + mldsa/src/poly.c | 4 +- mldsa/src/poly.h | 4 +- mldsa/src/polyvec.c | 41 +- mldsa/src/polyvec.h | 19 +- mldsa/src/sign.c | 5 +- test/break_pct_config.h | 18 + test/custom_memcpy_config.h | 18 + test/custom_memset_config.h | 18 + test/custom_native_capability_config_0.h | 18 + test/custom_native_capability_config_1.h | 18 + ...stom_native_capability_config_CPUID_AVX2.h | 18 + ...native_capability_config_ID_AA64PFR1_EL1.h | 18 + test/custom_randombytes_config.h | 18 + test/custom_stdlib_config.h | 18 + test/custom_zeroize_config.h | 18 + test/no_asm_config.h | 18 + test/serial_fips202_config.h | 18 + 31 files changed, 1067 insertions(+), 17 deletions(-) create mode 100644 examples/monolithic_build_native/config_65.h diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md index 9814d4cf0..3aa5340a4 100644 --- a/BIBLIOGRAPHY.md +++ b/BIBLIOGRAPHY.md @@ -30,6 +30,7 @@ source code and documentation. - [examples/monolithic_build/mldsa_native/mldsa_native_config.h](examples/monolithic_build/mldsa_native/mldsa_native_config.h) - [examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h](examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h) - [examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h](examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h) + - [examples/monolithic_build_native/config_65.h](examples/monolithic_build_native/config_65.h) - [examples/monolithic_build_native/mldsa_native/mldsa_native_config.h](examples/monolithic_build_native/mldsa_native/mldsa_native_config.h) - [examples/multilevel_build/mldsa_native/mldsa_native_config.h](examples/multilevel_build/mldsa_native/mldsa_native_config.h) - [examples/multilevel_build_native/mldsa_native/mldsa_native_config.h](examples/multilevel_build_native/mldsa_native/mldsa_native_config.h) @@ -76,6 +77,7 @@ source code and documentation. - [examples/monolithic_build/mldsa_native/mldsa_native_config.h](examples/monolithic_build/mldsa_native/mldsa_native_config.h) - [examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h](examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h) - [examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h](examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h) + - [examples/monolithic_build_native/config_65.h](examples/monolithic_build_native/config_65.h) - [examples/monolithic_build_native/mldsa_native/mldsa_native_config.h](examples/monolithic_build_native/mldsa_native/mldsa_native_config.h) - [examples/multilevel_build/mldsa_native/mldsa_native_config.h](examples/multilevel_build/mldsa_native/mldsa_native_config.h) - [examples/multilevel_build_native/mldsa_native/mldsa_native_config.h](examples/multilevel_build_native/mldsa_native/mldsa_native_config.h) diff --git a/README.md b/README.md index 1380b4ab7..d01fa57e5 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,15 @@ contracts and loop invariants from the code; they will be ignored unless `CBMC` Yes. mldsa-native supports all three ML-DSA security levels (ML-DSA-44, ML-DSA-65, ML-DSA-87) as defined in FIPS 204. The security level is a compile-time parameter configured by setting `MLD_CONFIG_PARAMETER_SET=44/65/87` in [config.h](mldsa/src/config.h). +### Can I reduce RAM usage for embedded systems? + +Yes. mldsa-native provides a compile-time option `MLD_CONFIG_REDUCE_RAM` that reduces RAM usage by generating matrix rows on-demand rather than storing the entire matrix in memory. This trades memory for performance: + +- **Memory savings**: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), 49 KB (ML-DSA-87) +- **Performance cost**: Matrix generation is no longer batched, resulting in slower signing and verification + +To enable this mode, define `MLD_CONFIG_REDUCE_RAM` in [config.h](mldsa/src/config.h) or pass `-DMLD_CONFIG_REDUCE_RAM` as a compiler flag. + ### Does mldsa-native use hedged or deterministic signing? By default, mldsa-native uses the randomized "hedged" signing variant as specified in FIPS 204 Section 3.4. The hedged variant uses both fresh randomness at signing time and precomputed randomness from the private key. This helps mitigate fault injection attacks and side-channel attacks while protecting against potential flaws in the random number generator. diff --git a/examples/basic_deterministic/mldsa_native/mldsa_native_config.h b/examples/basic_deterministic/mldsa_native/mldsa_native_config.h index 62139114c..3c5759f19 100644 --- a/examples/basic_deterministic/mldsa_native/mldsa_native_config.h +++ b/examples/basic_deterministic/mldsa_native/mldsa_native_config.h @@ -621,6 +621,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h b/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h index 9ced3b03d..76f65c264 100644 --- a/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h +++ b/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h @@ -621,6 +621,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h b/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h index 177814c4d..8ded0d2f3 100644 --- a/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h +++ b/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h @@ -622,6 +622,24 @@ *****************************************************************************/ #define MLD_CONFIG_SERIAL_FIPS202_ONLY +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/custom_backend/mldsa_native/mldsa_native_config.h b/examples/custom_backend/mldsa_native/mldsa_native_config.h index 7f6dc45a4..91397fa11 100644 --- a/examples/custom_backend/mldsa_native/mldsa_native_config.h +++ b/examples/custom_backend/mldsa_native/mldsa_native_config.h @@ -617,6 +617,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/monolithic_build/mldsa_native/mldsa_native_config.h b/examples/monolithic_build/mldsa_native/mldsa_native_config.h index 017951968..ad388c6c6 100644 --- a/examples/monolithic_build/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build/mldsa_native/mldsa_native_config.h @@ -620,6 +620,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h b/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h index 0e37d425c..3254ee546 100644 --- a/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h @@ -621,6 +621,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h b/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h index 7b446d346..7eb5e4364 100644 --- a/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h @@ -628,6 +628,24 @@ static MLD_INLINE void mld_randombytes(uint8_t *ptr, size_t len) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/monolithic_build_native/config_65.h b/examples/monolithic_build_native/config_65.h new file mode 100644 index 000000000..3ab3adf6d --- /dev/null +++ b/examples/monolithic_build_native/config_65.h @@ -0,0 +1,586 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS140_3_IG] + * Implementation Guidance for FIPS 140-3 and the Cryptographic Module + * Validation Program + * National Institute of Standards and Technology + * https://csrc.nist.gov/projects/cryptographic-module-validation-program/fips-140-3-ig-announcements + * + * - [FIPS204] + * FIPS 204 Module-Lattice-Based Digital Signature Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/204/final + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + */ + +/* + * Test configuration: Monolithic build config for ML-DSA-65 (native backends + * disabled) + * + * This configuration differs from the default mldsa/src/config.h in the + * following places: + * - MLD_CONFIG_PARAMETER_SET + * - MLD_CONFIG_NAMESPACE_PREFIX + * - MLD_CONFIG_USE_NATIVE_BACKEND_ARITH + * - MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 + * - MLD_CONFIG_INTERNAL_API_QUALIFIER + */ + + +#ifndef MLD_CONFIG_H +#define MLD_CONFIG_H + +/****************************************************************************** + * Name: MLD_CONFIG_PARAMETER_SET + * + * Description: Specifies the parameter set for ML-DSA + * - MLD_CONFIG_PARAMETER_SET=44 corresponds to ML-DSA-44 + * - MLD_CONFIG_PARAMETER_SET=65 corresponds to ML-DSA-65 + * - MLD_CONFIG_PARAMETER_SET=87 corresponds to ML-DSA-87 + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +#define MLD_CONFIG_PARAMETER_SET 65 + +/****************************************************************************** + * Name: MLD_CONFIG_NAMESPACE_PREFIX + * + * Description: The prefix to use to namespace global symbols from mldsa/. + * + * In a multi-level build (that is, if either + * - MLD_CONFIG_MULTILEVEL_WITH_SHARED, or + * - MLD_CONFIG_MULTILEVEL_NO_SHARED, + * are set, level-dependent symbols will additionally be prefixed + * with the parameter set (44/65/87). + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +#define MLD_CONFIG_NAMESPACE_PREFIX mldsa + +/****************************************************************************** + * Name: MLD_CONFIG_MULTILEVEL_WITH_SHARED + * + * Description: This is for multi-level builds of mldsa-native only. If you + * need only a single parameter set, keep this unset. + * + * If this is set, all MLD_CONFIG_PARAMETER_SET-independent + * code will be included in the build, including code needed only + * for other parameter sets. + * + * Example: TODO: add example + * + * To build mldsa-native with support for all parameter sets, + * build it three times -- once per parameter set -- and set the + * option MLD_CONFIG_MULTILEVEL_WITH_SHARED for exactly one of + * them, and MLD_CONFIG_MULTILEVEL_NO_SHARED for the others. + * + * See examples/multilevel_build_mldsa for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLD_CONFIG_MULTILEVEL_WITH_SHARED */ + +/****************************************************************************** + * Name: MLD_CONFIG_MULTILEVEL_NO_SHARED + * + * Description: This is for multi-level builds of mldsa-native only. If you + * need only a single parameter set, keep this unset. + * + * If this is set, no MLD_CONFIG_PARAMETER_SET-independent code + * will be included in the build. + * + * To build mldsa-native with support for all parameter sets, + * build it three times -- once per parameter set -- and set the + * option MLD_CONFIG_MULTILEVEL_WITH_SHARED for exactly one of + * them, and MLD_CONFIG_MULTILEVEL_NO_SHARED for the others. + * + * See examples/multilevel_build_mldsa for an example. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +/* #define MLD_CONFIG_MULTILEVEL_NO_SHARED */ + +/****************************************************************************** + * Name: MLD_CONFIG_FILE + * + * Description: If defined, this is a header that will be included instead + * of the default configuration file mldsa/src/config.h. + * + * When you need to build mldsa-native in multiple configurations, + * using varying MLD_CONFIG_FILE can be more convenient + * then configuring everything through CFLAGS. + * + * To use, MLD_CONFIG_FILE _must_ be defined prior + * to the inclusion of any mldsa-native headers. For example, + * it can be set by passing `-DMLD_CONFIG_FILE="..."` + * on the command line. + * + *****************************************************************************/ +/* No need to set this -- we _are_ already in a custom config */ +/* #define MLD_CONFIG_FILE "config.h" */ + +/****************************************************************************** + * Name: MLD_CONFIG_USE_NATIVE_BACKEND_ARITH + * + * Description: Determines whether an native arithmetic backend should be used. + * + * The arithmetic backend covers performance critical functions + * such as the number-theoretic transform (NTT). + * + * If this option is unset, the C backend will be used. + * + * If this option is set, the arithmetic backend to be use is + * determined by MLD_CONFIG_ARITH_BACKEND_FILE: If the latter is + * unset, the default backend for your the target architecture + * will be used. If set, it must be the name of a backend metadata + * file. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +#define MLD_CONFIG_USE_NATIVE_BACKEND_ARITH + +/****************************************************************************** + * Name: MLD_CONFIG_ARITH_BACKEND_FILE + * + * Description: The arithmetic backend to use. + * + * If MLD_CONFIG_USE_NATIVE_BACKEND_ARITH is unset, this option + * is ignored. + * + * If MLD_CONFIG_USE_NATIVE_BACKEND_ARITH is set, this option must + * either be undefined or the filename of an arithmetic backend. + * If unset, the default backend will be used. + * + * This can be set using CFLAGS. + * + *****************************************************************************/ +#if defined(MLD_CONFIG_USE_NATIVE_BACKEND_ARITH) && \ + !defined(MLD_CONFIG_ARITH_BACKEND_FILE) +#define MLD_CONFIG_ARITH_BACKEND_FILE "native/meta.h" +#endif + +/****************************************************************************** + * Name: MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 + * + * Description: Determines whether an native FIPS202 backend should be used. + * + * The FIPS202 backend covers 1x/2x/4x-fold Keccak-f1600, which is + * the performance bottleneck of SHA3 and SHAKE. + * + * If this option is unset, the C backend will be used. + * + * If this option is set, the FIPS202 backend to be use is + * determined by MLD_CONFIG_FIPS202_BACKEND_FILE: If the latter is + * unset, the default backend for your the target architecture + * will be used. If set, it must be the name of a backend metadata + * file. + * + * This can also be set using CFLAGS. + * + *****************************************************************************/ +#define MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 + +/****************************************************************************** + * Name: MLD_CONFIG_FIPS202_BACKEND_FILE + * + * Description: The FIPS-202 backend to use. + * + * If MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 is set, this option + * must either be undefined or the filename of a FIPS202 backend. + * If unset, the default backend will be used. + * + * This can be set using CFLAGS. + * + *****************************************************************************/ +#if defined(MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202) && \ + !defined(MLD_CONFIG_FIPS202_BACKEND_FILE) +#define MLD_CONFIG_FIPS202_BACKEND_FILE "fips202/native/auto.h" +#endif +/****************************************************************************** + * Name: MLD_CONFIG_FIPS202_CUSTOM_HEADER + * + * Description: Custom header to use for FIPS-202 + * + * This should only be set if you intend to use a custom + * FIPS-202 implementation, different from the one shipped + * with mldsa-native. + * + * If set, it must be the name of a file serving as the + * replacement for mldsa/src/fips202/fips202.h, and exposing + * the same API (see FIPS202.md). + * + *****************************************************************************/ +/* #define MLD_CONFIG_FIPS202_CUSTOM_HEADER "SOME_FILE.h" */ + +/****************************************************************************** + * Name: MLD_CONFIG_FIPS202X4_CUSTOM_HEADER + * + * Description: Custom header to use for FIPS-202-X4 + * + * This should only be set if you intend to use a custom + * FIPS-202 implementation, different from the one shipped + * with mldsa-native. + * + * If set, it must be the name of a file serving as the + * replacement for mldsa/src/fips202/fips202x4.h, and exposing + * the same API (see FIPS202.md). + * + *****************************************************************************/ +/* #define MLD_CONFIG_FIPS202X4_CUSTOM_HEADER "SOME_FILE.h" */ + +/****************************************************************************** + * Name: MLD_CONFIG_CUSTOM_ZEROIZE + * + * Description: In compliance with @[FIPS204, Section 3.6.3], mldsa-native, + * zeroizes intermediate stack buffers before returning from + * function calls. + * + * Set this option and define `mld_zeroize_native` if you want to + * use a custom method to zeroize intermediate stack buffers. + * The default implementation uses SecureZeroMemory on Windows + * and a memset + compiler barrier otherwise. If neither of those + * is available on the target platform, compilation will fail, + * and you will need to use MLD_CONFIG_CUSTOM_ZEROIZE to provide + * a custom implementation of `mld_zeroize_native()`. + * + * WARNING: + * The explicit stack zeroization conducted by mldsa-native + * reduces the likelihood of data leaking on the stack, but + * does not eliminate it! The C standard makes no guarantee about + * where a compiler allocates structures and whether/where it makes + * copies of them. Also, in addition to entire structures, there + * may also be potentially exploitable leakage of individual values + * on the stack. + * + * If you need bullet-proof zeroization of the stack, you need to + * consider additional measures instead of what this feature + * provides. In this case, you can set mld_zeroize_native to a + * no-op. + * + *****************************************************************************/ +/* #define MLD_CONFIG_CUSTOM_ZEROIZE + #if !defined(__ASSEMBLER__) + #include + #include "sys.h" + static MLD_INLINE void mld_zeroize_native(void *ptr, size_t len) + { + ... your implementation ... + } + #endif +*/ + +/****************************************************************************** + * Name: MLD_CONFIG_CUSTOM_MEMCPY + * + * Description: Set this option and define `mld_memcpy` if you want to + * use a custom method to copy memory instead of the standard + * library memcpy function. + * + * The custom implementation must have the same signature and + * behavior as the standard memcpy function: + * void *mld_memcpy(void *dest, const void *src, size_t n) + * + *****************************************************************************/ +/* #define MLD_CONFIG_CUSTOM_MEMCPY + #if !defined(__ASSEMBLER__) + #include + #include "sys.h" + static MLD_INLINE void *mld_memcpy(void *dest, const void *src, size_t n) + { + ... your implementation ... + } + #endif +*/ + +/****************************************************************************** + * Name: MLD_CONFIG_CUSTOM_MEMSET + * + * Description: Set this option and define `mld_memset` if you want to + * use a custom method to set memory instead of the standard + * library memset function. + * + * The custom implementation must have the same signature and + * behavior as the standard memset function: + * void *mld_memset(void *s, int c, size_t n) + * + *****************************************************************************/ +/* #define MLD_CONFIG_CUSTOM_MEMSET + #if !defined(__ASSEMBLER__) + #include + #include "sys.h" + static MLD_INLINE void *mld_memset(void *s, int c, size_t n) + { + ... your implementation ... + } + #endif +*/ + +/****************************************************************************** + * Name: MLD_CONFIG_CUSTOM_RANDOMBYTES + * + * Description: mldsa-native does not provide a secure randombytes + * implementation. Such an implementation has to provided by the + * consumer. + * + * If this option is not set, mldsa-native expects a function + * void randombytes(uint8_t *out, size_t outlen). + * + * Set this option and define `mld_randombytes` if you want to + * use a custom method to sample randombytes with a different name + * or signature. + * + *****************************************************************************/ +/* #define MLD_CONFIG_CUSTOM_RANDOMBYTES + #if !defined(__ASSEMBLER__) + #include + #include "sys.h" + static MLD_INLINE void mld_randombytes(uint8_t *ptr, size_t len) + { + ... your implementation ... + } + #endif +*/ + + +/****************************************************************************** + * Name: MLD_CONFIG_CUSTOM_CAPABILITY_FUNC + * + * Description: mldsa-native backends may rely on specific hardware features. + * Those backends will only be included in an mldsa-native build + * if support for the respective features is enabled at + * compile-time. However, when building for a heteroneous set + * of CPUs to run the resulting binary/library on, feature + * detection at _runtime_ is needed to decided whether a backend + * can be used or not. + * + * Set this option and define `mld_sys_check_capability` if you + * want to use a custom method to dispatch between implementations. + * + * If this option is not set, mldsa-native uses compile-time + * feature detection only to decide which backend to use. + * + * If you compile mldsa-native on a system with different + * capabilities than the system that the resulting binary/library + * will be run on, you must use this option. + * + *****************************************************************************/ +/* #define MLD_CONFIG_CUSTOM_CAPABILITY_FUNC + static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) + { + ... your implementation ... + } +*/ + +/****************************************************************************** + * Name: MLD_CONFIG_NO_RANDOMIZED_API + * + * Description: If this option is set, mldsa-native will be built without the + * randomized API functions (crypto_sign_keypair, + * crypto_sign, crypto_sign_signature, and + * crypto_sign_signature_extmu). + * This allows users to build mldsa-native without providing a + * randombytes() implementation if they only need the + * internal deterministic API + * (crypto_sign_keypair_internal, crypto_sign_signature_internal). + * + * NOTE: This option is incompatible with MLD_CONFIG_KEYGEN_PCT + * as the current PCT implementation requires + * crypto_sign_signature(). + * + *****************************************************************************/ +/* #define MLD_CONFIG_NO_RANDOMIZED_API */ + +/****************************************************************************** + * Name: MLD_CONFIG_KEYGEN_PCT + * + * Description: Compliance with @[FIPS140_3_IG, p.87] requires a + * Pairwise Consistency Test (PCT) to be carried out on a freshly + * generated keypair before it can be exported. + * + * Set this option if such a check should be implemented. + * In this case, crypto_sign_keypair_internal and + * crypto_sign_keypair will return a non-zero error code if the + * PCT failed. + * + * NOTE: This feature will drastically lower the performance of + * key generation. + * + *****************************************************************************/ +/* #define MLD_CONFIG_KEYGEN_PCT */ + +/****************************************************************************** + * Name: MLD_CONFIG_KEYGEN_PCT_BREAKAGE_TEST + * + * Description: If this option is set, the user must provide a runtime + * function `static inline int mld_break_pct() { ... }` to + * indicate whether the PCT should be made fail. + * + * This option only has an effect if MLD_CONFIG_KEYGEN_PCT is set. + * + *****************************************************************************/ +/* #define MLD_CONFIG_KEYGEN_PCT_BREAKAGE_TEST + #if !defined(__ASSEMBLER__) + #include "sys.h" + static MLD_INLINE int mld_break_pct(void) + { + ... return 0/1 depending on whether PCT should be broken ... + } + #endif +*/ + +/****************************************************************************** + * Name: MLD_CONFIG_INTERNAL_API_QUALIFIER + * + * Description: If set, this option provides an additional function + * qualifier to be added to declarations of internal API. + * + * The primary use case for this option are single-CU builds, + * in which case this option can be set to `static`. + * + *****************************************************************************/ +#define MLD_CONFIG_INTERNAL_API_QUALIFIER static + +/****************************************************************************** + * Name: MLD_CONFIG_EXTERNAL_API_QUALIFIER + * + * Description: If set, this option provides an additional function + * qualifier to be added to declarations of mldsa-native's + * public API. + * + * The primary use case for this option are single-CU builds + * where the public API exposed by mldsa-native is wrapped by + * another API in the consuming application. In this case, + * even mldsa-native's public API can be marked `static`. + * + *****************************************************************************/ +/* #define MLD_CONFIG_EXTERNAL_API_QUALIFIER */ + +/****************************************************************************** + * Name: MLD_CONFIG_CT_TESTING_ENABLED + * + * Description: If set, mldsa-native annotates data as secret / public using + * valgrind's annotations VALGRIND_MAKE_MEM_UNDEFINED and + * VALGRIND_MAKE_MEM_DEFINED, enabling various checks for secret- + * dependent control flow of variable time execution (depending + * on the exact version of valgrind installed). + * + *****************************************************************************/ +/* #define MLD_CONFIG_CT_TESTING_ENABLED */ + +/****************************************************************************** + * Name: MLD_CONFIG_NO_ASM + * + * Description: If this option is set, mldsa-native will be built without + * use of native code or inline assembly. + * + * By default, inline assembly is used to implement value barriers. + * Without inline assembly, mldsa-native will use a global volatile + * 'opt blocker' instead; see ct.h. + * + * Inline assembly is also used to implement a secure zeroization + * function on non-Windows platforms. If this option is set and + * the target platform is not Windows, you MUST set + * MLD_CONFIG_CUSTOM_ZEROIZE and provide a custom zeroization + * function. + * + * If this option is set, MLD_CONFIG_USE_NATIVE_BACKEND_FIPS202 and + * and MLD_CONFIG_USE_NATIVE_BACKEND_ARITH will be ignored, and no + * native backends will be used. + * + *****************************************************************************/ +/* #define MLD_CONFIG_NO_ASM */ + +/****************************************************************************** + * Name: MLD_CONFIG_NO_ASM_VALUE_BARRIER + * + * Description: If this option is set, mldsa-native will be built without + * use of native code or inline assembly for value barriers. + * + * By default, inline assembly (if available) is used to implement + * value barriers. + * Without inline assembly, mldsa-native will use a global volatile + * 'opt blocker' instead; see ct.h. + * + *****************************************************************************/ +/* #define MLD_CONFIG_NO_ASM_VALUE_BARRIER */ + +/****************************************************************************** + * Name: MLD_CONFIG_SERIAL_FIPS202_ONLY + * + * Description: Set this to use a FIPS202 implementation with global state + * that supports only one active Keccak computation at a time + * (e.g. some hardware accelerators). + * + * If this option is set, ML-DSA will use FIPS202 operations + * serially, ensuring that only one SHAKE context is active + * at any given time. + * + * This allows offloading Keccak computations to a hardware + * accelerator that holds only a single Keccak state locally, + * rather than requiring support for multiple concurrent + * Keccak states. + * + * NOTE: Depending on the target CPU, this may reduce + * performance when using software FIPS202 implementations. + * Only enable this when you have to. + * + *****************************************************************************/ +/* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ + +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + +/************************* Config internals ********************************/ + +/* Default namespace + * + * Don't change this. If you need a different namespace, re-define + * MLD_CONFIG_NAMESPACE_PREFIX above instead, and remove the following. + * + * The default MLDSA namespace is + * + * PQCP_MLDSA_NATIVE_MLDSA_ + * + * e.g., PQCP_MLDSA_NATIVE_MLDSA44_ + */ + +#if MLD_CONFIG_PARAMETER_SET == 44 +#define MLD_DEFAULT_NAMESPACE_PREFIX PQCP_MLDSA_NATIVE_MLDSA44 +#elif MLD_CONFIG_PARAMETER_SET == 65 +#define MLD_DEFAULT_NAMESPACE_PREFIX PQCP_MLDSA_NATIVE_MLDSA65 +#elif MLD_CONFIG_PARAMETER_SET == 87 +#define MLD_DEFAULT_NAMESPACE_PREFIX PQCP_MLDSA_NATIVE_MLDSA87 +#endif + +#endif /* !MLD_CONFIG_H */ diff --git a/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h b/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h index dfe74bbec..b883f5f73 100644 --- a/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h @@ -620,6 +620,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/multilevel_build/mldsa_native/mldsa_native_config.h b/examples/multilevel_build/mldsa_native/mldsa_native_config.h index 1270efa12..1d6350a7c 100644 --- a/examples/multilevel_build/mldsa_native/mldsa_native_config.h +++ b/examples/multilevel_build/mldsa_native/mldsa_native_config.h @@ -620,6 +620,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h b/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h index 9a24cd70e..77fcd8684 100644 --- a/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h +++ b/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h @@ -618,6 +618,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/mldsa/mldsa_native_config.h b/mldsa/mldsa_native_config.h index bc01ae764..e26acaaf5 100644 --- a/mldsa/mldsa_native_config.h +++ b/mldsa/mldsa_native_config.h @@ -605,6 +605,24 @@ *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/mldsa/src/poly.c b/mldsa/src/poly.c index a68be3a24..9004f1158 100644 --- a/mldsa/src/poly.c +++ b/mldsa/src/poly.c @@ -655,7 +655,7 @@ void mld_poly_uniform(mld_poly *a, const uint8_t seed[MLDSA_SEEDBYTES + 2]) mld_zeroize(buf, sizeof(buf)); } -#if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY) +#if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY) && !defined(MLD_CONFIG_REDUCE_RAM) MLD_INTERNAL_API void mld_poly_uniform_4x(mld_poly *vec0, mld_poly *vec1, mld_poly *vec2, mld_poly *vec3, @@ -720,7 +720,7 @@ void mld_poly_uniform_4x(mld_poly *vec0, mld_poly *vec1, mld_poly *vec2, mld_zeroize(buf, sizeof(buf)); } -#endif /* !MLD_CONFIG_SERIAL_FIPS202_ONLY */ +#endif /* !MLD_CONFIG_SERIAL_FIPS202_ONLY && !MLD_CONFIG_REDUCE_RAM */ MLD_INTERNAL_API void mld_polyt1_pack(uint8_t r[MLDSA_POLYT1_PACKEDBYTES], const mld_poly *a) diff --git a/mldsa/src/poly.h b/mldsa/src/poly.h index 3b6bf42a9..ff7710438 100644 --- a/mldsa/src/poly.h +++ b/mldsa/src/poly.h @@ -243,7 +243,7 @@ __contract__( ensures(array_bound(a->coeffs, 0, MLDSA_N, 0, MLDSA_Q)) ); -#if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY) +#if !defined(MLD_CONFIG_SERIAL_FIPS202_ONLY) && !defined(MLD_CONFIG_REDUCE_RAM) #define mld_poly_uniform_4x MLD_NAMESPACE(poly_uniform_4x) /************************************************* * Name: mld_poly_uniform_x4 @@ -277,7 +277,7 @@ __contract__( ensures(array_bound(vec2->coeffs, 0, MLDSA_N, 0, MLDSA_Q)) ensures(array_bound(vec3->coeffs, 0, MLDSA_N, 0, MLDSA_Q)) ); -#endif /* !MLD_CONFIG_SERIAL_FIPS202_ONLY */ +#endif /* !MLD_CONFIG_SERIAL_FIPS202_ONLY && !MLD_CONFIG_REDUCE_RAM */ #define mld_polyt1_pack MLD_NAMESPACE(polyt1_pack) /************************************************* diff --git a/mldsa/src/polyvec.c b/mldsa/src/polyvec.c index e688940c9..95f7677f6 100644 --- a/mldsa/src/polyvec.c +++ b/mldsa/src/polyvec.c @@ -28,6 +28,7 @@ #define mld_polyvecl_pointwise_acc_montgomery_c \ MLD_ADD_PARAM_SET(mld_polyvecl_pointwise_acc_montgomery_c) +#if !defined(MLD_CONFIG_REDUCE_RAM) /* Helper function to ensure that the polynomial entries in the output * of mld_polyvec_matrix_expand use the standard (bitreversed) ordering * of coefficients. @@ -65,18 +66,50 @@ __contract__( #endif /* !MLD_USE_NATIVE_NTT_CUSTOM_ORDER */ } +#endif /* !MLD_CONFIG_REDUCE_RAM */ MLD_INTERNAL_API -const mld_polyvecl *mld_polymat_get_row(const mld_polymat *mat, - unsigned int row) +const mld_polyvecl *mld_polymat_get_row(mld_polymat *mat, unsigned int row) { +#if defined(MLD_CONFIG_REDUCE_RAM) + unsigned int i; + MLD_ALIGN uint8_t seed_ext[MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)]; + + mld_memcpy(seed_ext, mat->rho, MLDSA_SEEDBYTES); + + /* Generate row on-demand */ + for (i = 0; i < MLDSA_L; i++) + { + uint8_t x = (uint8_t)row; + uint8_t y = (uint8_t)i; + + seed_ext[MLDSA_SEEDBYTES + 0] = y; + seed_ext[MLDSA_SEEDBYTES + 1] = x; + + mld_poly_uniform(&mat->row_buffer.vec[i], seed_ext); + +#if defined(MLD_USE_NATIVE_NTT_CUSTOM_ORDER) + mld_poly_permute_bitrev_to_custom(mat->row_buffer.vec[i].coeffs); +#endif + } + + /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */ + mld_zeroize(seed_ext, sizeof(seed_ext)); + + return &mat->row_buffer; +#else /* MLD_CONFIG_REDUCE_RAM */ return &mat->vec[row]; +#endif /* !MLD_CONFIG_REDUCE_RAM */ } MLD_INTERNAL_API void mld_polyvec_matrix_expand(mld_polymat *mat, const uint8_t rho[MLDSA_SEEDBYTES]) { +#if defined(MLD_CONFIG_REDUCE_RAM) + /* In REDUCE_RAM mode, just copy the seed for later on-demand generation */ + mld_memcpy(mat->rho, rho, MLDSA_SEEDBYTES); +#else unsigned int i, j; /* * We generate four separate seed arrays rather than a single one to work @@ -160,11 +193,11 @@ void mld_polyvec_matrix_expand(mld_polymat *mat, /* @[FIPS204, Section 3.6.3] Destruction of intermediate values. */ mld_zeroize(seed_ext, sizeof(seed_ext)); +#endif /* !MLD_CONFIG_REDUCE_RAM */ } MLD_INTERNAL_API -void mld_polyvec_matrix_pointwise_montgomery(mld_polyveck *t, - const mld_polymat *mat, +void mld_polyvec_matrix_pointwise_montgomery(mld_polyveck *t, mld_polymat *mat, const mld_polyvecl *v) { unsigned int i; diff --git a/mldsa/src/polyvec.h b/mldsa/src/polyvec.h index fa06dede5..47e4dbb5c 100644 --- a/mldsa/src/polyvec.h +++ b/mldsa/src/polyvec.h @@ -236,7 +236,12 @@ typedef struct /* Matrix of polynomials (K x L) */ typedef struct { +#if defined(MLD_CONFIG_REDUCE_RAM) + mld_polyvecl row_buffer; + uint8_t rho[MLDSA_SEEDBYTES]; +#else mld_polyvecl vec[MLDSA_K]; +#endif } mld_polymat; #define mld_polyveck_reduce MLD_NAMESPACE_KL(polyveck_reduce) @@ -755,15 +760,15 @@ __contract__( * Name: mld_polymat_get_row * * Description: Retrieve a pointer to a specific row of the matrix. + * In MLD_CONFIG_REDUCE_RAM mode, generates the row on-demand. * - * Arguments: - const mld_polymat *mat: pointer to matrix + * Arguments: - mld_polymat *mat: pointer to matrix * - unsigned int row: row index (must be < MLDSA_K) * * Returns pointer to the row (mld_polyvecl) **************************************************/ MLD_INTERNAL_API -const mld_polyvecl *mld_polymat_get_row(const mld_polymat *mat, - unsigned int row); +const mld_polyvecl *mld_polymat_get_row(mld_polymat *mat, unsigned int row); #define mld_polyvec_matrix_expand MLD_NAMESPACE_KL(polyvec_matrix_expand) /************************************************* @@ -806,13 +811,15 @@ __contract__( * hence must have coefficients bounded by [-9q+1, +9q-1] * inclusive. * + * Note: In MLD_CONFIG_REDUCE_RAM mode, mat cannot be const + * as rows are generated on-demand. + * * Arguments: - mld_polyveck *t: pointer to output vector t - * - const mld_polymat *mat: pointer to input matrix + * - mld_polymat *mat: pointer to input matrix * - const mld_polyvecl *v: pointer to input vector v **************************************************/ MLD_INTERNAL_API -void mld_polyvec_matrix_pointwise_montgomery(mld_polyveck *t, - const mld_polymat *mat, +void mld_polyvec_matrix_pointwise_montgomery(mld_polyveck *t, mld_polymat *mat, const mld_polyvecl *v) __contract__( requires(memory_no_alias(t, sizeof(mld_polyveck))) diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index 5c79ccf1e..e60cfffff 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -423,9 +423,8 @@ __contract__( MLD_MUST_CHECK_RETURN_VALUE static int mld_attempt_signature_generation( uint8_t sig[MLDSA_CRYPTO_BYTES], const uint8_t *mu, - const uint8_t rhoprime[MLDSA_CRHBYTES], uint16_t nonce, - const mld_polymat *mat, const mld_polyvecl *s1, const mld_polyveck *s2, - const mld_polyveck *t0) + const uint8_t rhoprime[MLDSA_CRHBYTES], uint16_t nonce, mld_polymat *mat, + const mld_polyvecl *s1, const mld_polyveck *s2, const mld_polyveck *t0) __contract__( requires(memory_no_alias(sig, MLDSA_CRYPTO_BYTES)) requires(memory_no_alias(mu, MLDSA_CRHBYTES)) diff --git a/test/break_pct_config.h b/test/break_pct_config.h index 8004a7d77..8130cf27f 100644 --- a/test/break_pct_config.h +++ b/test/break_pct_config.h @@ -625,6 +625,24 @@ static MLD_INLINE int mld_break_pct(void) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_memcpy_config.h b/test/custom_memcpy_config.h index f9f4fbc4d..6f9f8e332 100644 --- a/test/custom_memcpy_config.h +++ b/test/custom_memcpy_config.h @@ -628,6 +628,24 @@ static MLD_INLINE void *mld_memcpy(void *dest, const void *src, size_t n) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_memset_config.h b/test/custom_memset_config.h index fdacfdeac..44487c2c5 100644 --- a/test/custom_memset_config.h +++ b/test/custom_memset_config.h @@ -627,6 +627,24 @@ static MLD_INLINE void *mld_memset(void *s, int c, size_t n) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_native_capability_config_0.h b/test/custom_native_capability_config_0.h index 1d113b483..437d4b902 100644 --- a/test/custom_native_capability_config_0.h +++ b/test/custom_native_capability_config_0.h @@ -627,6 +627,24 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_native_capability_config_1.h b/test/custom_native_capability_config_1.h index f79124299..c6da82cab 100644 --- a/test/custom_native_capability_config_1.h +++ b/test/custom_native_capability_config_1.h @@ -626,6 +626,24 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_native_capability_config_CPUID_AVX2.h b/test/custom_native_capability_config_CPUID_AVX2.h index 70ab6057b..9cf9da0c5 100644 --- a/test/custom_native_capability_config_CPUID_AVX2.h +++ b/test/custom_native_capability_config_CPUID_AVX2.h @@ -658,6 +658,24 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_native_capability_config_ID_AA64PFR1_EL1.h b/test/custom_native_capability_config_ID_AA64PFR1_EL1.h index 020e8ca2a..b8de811dd 100644 --- a/test/custom_native_capability_config_ID_AA64PFR1_EL1.h +++ b/test/custom_native_capability_config_ID_AA64PFR1_EL1.h @@ -645,6 +645,24 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_randombytes_config.h b/test/custom_randombytes_config.h index 7be486237..90c3456e5 100644 --- a/test/custom_randombytes_config.h +++ b/test/custom_randombytes_config.h @@ -621,6 +621,24 @@ static MLD_INLINE void mld_randombytes(uint8_t *ptr, size_t len) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_stdlib_config.h b/test/custom_stdlib_config.h index 7eb6a6e04..4e5f6502e 100644 --- a/test/custom_stdlib_config.h +++ b/test/custom_stdlib_config.h @@ -636,6 +636,24 @@ static MLD_INLINE void *mld_memset(void *s, int c, size_t n) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/custom_zeroize_config.h b/test/custom_zeroize_config.h index 08206c954..0ca823fcd 100644 --- a/test/custom_zeroize_config.h +++ b/test/custom_zeroize_config.h @@ -621,6 +621,24 @@ static MLD_INLINE void mld_zeroize_native(void *ptr, size_t len) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/no_asm_config.h b/test/no_asm_config.h index 7fc4428ba..265c0dc79 100644 --- a/test/no_asm_config.h +++ b/test/no_asm_config.h @@ -622,6 +622,24 @@ static MLD_INLINE void mld_zeroize_native(void *ptr, size_t len) *****************************************************************************/ /* #define MLD_CONFIG_SERIAL_FIPS202_ONLY */ +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ diff --git a/test/serial_fips202_config.h b/test/serial_fips202_config.h index 5fc54797a..97936d5e7 100644 --- a/test/serial_fips202_config.h +++ b/test/serial_fips202_config.h @@ -620,6 +620,24 @@ *****************************************************************************/ #define MLD_CONFIG_SERIAL_FIPS202_ONLY +/****************************************************************************** + * Name: MLD_CONFIG_REDUCE_RAM + * + * Description: Set this to reduce RAM usage by generating matrix rows + * on-demand rather than storing the entire matrix in memory. + * + * This trades memory for performance: + * - Memory savings: 12 KB (ML-DSA-44), 25 KB (ML-DSA-65), + * 49 KB (ML-DSA-87) + * - Performance cost: Matrix generation is no longer batched, + * resulting in slower signing and verification + * + * This option is useful for embedded systems with tight RAM + * constraints but relaxed performance requirements. + * + *****************************************************************************/ +/* #define MLD_CONFIG_REDUCE_RAM */ + /************************* Config internals ********************************/ #endif /* MLD_BUILD_INTERNAL */ From 24a2d2a5e2465c21e3e513b27a40c3e7d2673b68 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Tue, 25 Nov 2025 08:43:27 +0000 Subject: [PATCH 3/8] [TEST] Enable reduced RAM option by default Signed-off-by: Hanno Becker --- examples/basic_deterministic/mldsa_native/mldsa_native_config.h | 2 +- .../bring_your_own_fips202/mldsa_native/mldsa_native_config.h | 2 +- .../mldsa_native/mldsa_native_config.h | 2 +- examples/custom_backend/mldsa_native/mldsa_native_config.h | 2 +- examples/monolithic_build/mldsa_native/mldsa_native_config.h | 2 +- .../mldsa_native/mldsa_native_config.h | 2 +- .../mldsa_native/mldsa_native_config.h | 2 +- examples/monolithic_build_native/config_65.h | 2 +- .../monolithic_build_native/mldsa_native/mldsa_native_config.h | 2 +- examples/multilevel_build/mldsa_native/mldsa_native_config.h | 2 +- .../multilevel_build_native/mldsa_native/mldsa_native_config.h | 2 +- mldsa/mldsa_native_config.h | 2 +- test/break_pct_config.h | 2 +- test/custom_memcpy_config.h | 2 +- test/custom_memset_config.h | 2 +- test/custom_native_capability_config_0.h | 2 +- test/custom_native_capability_config_1.h | 2 +- test/custom_native_capability_config_CPUID_AVX2.h | 2 +- test/custom_native_capability_config_ID_AA64PFR1_EL1.h | 2 +- test/custom_randombytes_config.h | 2 +- test/custom_stdlib_config.h | 2 +- test/custom_zeroize_config.h | 2 +- test/no_asm_config.h | 2 +- test/serial_fips202_config.h | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) diff --git a/examples/basic_deterministic/mldsa_native/mldsa_native_config.h b/examples/basic_deterministic/mldsa_native/mldsa_native_config.h index 3c5759f19..da8e5f780 100644 --- a/examples/basic_deterministic/mldsa_native/mldsa_native_config.h +++ b/examples/basic_deterministic/mldsa_native/mldsa_native_config.h @@ -637,7 +637,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h b/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h index 76f65c264..9549b2ff0 100644 --- a/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h +++ b/examples/bring_your_own_fips202/mldsa_native/mldsa_native_config.h @@ -637,7 +637,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h b/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h index 8ded0d2f3..b27c59bd7 100644 --- a/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h +++ b/examples/bring_your_own_fips202_static/mldsa_native/mldsa_native_config.h @@ -638,7 +638,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/custom_backend/mldsa_native/mldsa_native_config.h b/examples/custom_backend/mldsa_native/mldsa_native_config.h index 91397fa11..d28286efe 100644 --- a/examples/custom_backend/mldsa_native/mldsa_native_config.h +++ b/examples/custom_backend/mldsa_native/mldsa_native_config.h @@ -633,7 +633,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/monolithic_build/mldsa_native/mldsa_native_config.h b/examples/monolithic_build/mldsa_native/mldsa_native_config.h index ad388c6c6..8156ce294 100644 --- a/examples/monolithic_build/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build/mldsa_native/mldsa_native_config.h @@ -636,7 +636,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h b/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h index 3254ee546..08091c052 100644 --- a/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build_multilevel/mldsa_native/mldsa_native_config.h @@ -637,7 +637,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h b/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h index 7eb5e4364..94b1793ba 100644 --- a/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build_multilevel_native/mldsa_native/mldsa_native_config.h @@ -644,7 +644,7 @@ static MLD_INLINE void mld_randombytes(uint8_t *ptr, size_t len) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/monolithic_build_native/config_65.h b/examples/monolithic_build_native/config_65.h index 3ab3adf6d..e88ab2360 100644 --- a/examples/monolithic_build_native/config_65.h +++ b/examples/monolithic_build_native/config_65.h @@ -559,7 +559,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h b/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h index b883f5f73..88f24488c 100644 --- a/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h +++ b/examples/monolithic_build_native/mldsa_native/mldsa_native_config.h @@ -636,7 +636,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/multilevel_build/mldsa_native/mldsa_native_config.h b/examples/multilevel_build/mldsa_native/mldsa_native_config.h index 1d6350a7c..b6a3962f8 100644 --- a/examples/multilevel_build/mldsa_native/mldsa_native_config.h +++ b/examples/multilevel_build/mldsa_native/mldsa_native_config.h @@ -636,7 +636,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h b/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h index 77fcd8684..1ec71be67 100644 --- a/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h +++ b/examples/multilevel_build_native/mldsa_native/mldsa_native_config.h @@ -634,7 +634,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/mldsa/mldsa_native_config.h b/mldsa/mldsa_native_config.h index e26acaaf5..e6474a289 100644 --- a/mldsa/mldsa_native_config.h +++ b/mldsa/mldsa_native_config.h @@ -621,7 +621,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/break_pct_config.h b/test/break_pct_config.h index 8130cf27f..30ee29c11 100644 --- a/test/break_pct_config.h +++ b/test/break_pct_config.h @@ -641,7 +641,7 @@ static MLD_INLINE int mld_break_pct(void) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_memcpy_config.h b/test/custom_memcpy_config.h index 6f9f8e332..a33bab357 100644 --- a/test/custom_memcpy_config.h +++ b/test/custom_memcpy_config.h @@ -644,7 +644,7 @@ static MLD_INLINE void *mld_memcpy(void *dest, const void *src, size_t n) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_memset_config.h b/test/custom_memset_config.h index 44487c2c5..618995f7a 100644 --- a/test/custom_memset_config.h +++ b/test/custom_memset_config.h @@ -643,7 +643,7 @@ static MLD_INLINE void *mld_memset(void *s, int c, size_t n) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_native_capability_config_0.h b/test/custom_native_capability_config_0.h index 437d4b902..1efbaaf71 100644 --- a/test/custom_native_capability_config_0.h +++ b/test/custom_native_capability_config_0.h @@ -643,7 +643,7 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_native_capability_config_1.h b/test/custom_native_capability_config_1.h index c6da82cab..564070d80 100644 --- a/test/custom_native_capability_config_1.h +++ b/test/custom_native_capability_config_1.h @@ -642,7 +642,7 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_native_capability_config_CPUID_AVX2.h b/test/custom_native_capability_config_CPUID_AVX2.h index 9cf9da0c5..e1d9b6f10 100644 --- a/test/custom_native_capability_config_CPUID_AVX2.h +++ b/test/custom_native_capability_config_CPUID_AVX2.h @@ -674,7 +674,7 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_native_capability_config_ID_AA64PFR1_EL1.h b/test/custom_native_capability_config_ID_AA64PFR1_EL1.h index b8de811dd..649b144bb 100644 --- a/test/custom_native_capability_config_ID_AA64PFR1_EL1.h +++ b/test/custom_native_capability_config_ID_AA64PFR1_EL1.h @@ -661,7 +661,7 @@ static MLD_INLINE int mld_sys_check_capability(mld_sys_cap cap) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_randombytes_config.h b/test/custom_randombytes_config.h index 90c3456e5..95650a689 100644 --- a/test/custom_randombytes_config.h +++ b/test/custom_randombytes_config.h @@ -637,7 +637,7 @@ static MLD_INLINE void mld_randombytes(uint8_t *ptr, size_t len) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_stdlib_config.h b/test/custom_stdlib_config.h index 4e5f6502e..f306df666 100644 --- a/test/custom_stdlib_config.h +++ b/test/custom_stdlib_config.h @@ -652,7 +652,7 @@ static MLD_INLINE void *mld_memset(void *s, int c, size_t n) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/custom_zeroize_config.h b/test/custom_zeroize_config.h index 0ca823fcd..ba29137ba 100644 --- a/test/custom_zeroize_config.h +++ b/test/custom_zeroize_config.h @@ -637,7 +637,7 @@ static MLD_INLINE void mld_zeroize_native(void *ptr, size_t len) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/no_asm_config.h b/test/no_asm_config.h index 265c0dc79..db7cd281d 100644 --- a/test/no_asm_config.h +++ b/test/no_asm_config.h @@ -638,7 +638,7 @@ static MLD_INLINE void mld_zeroize_native(void *ptr, size_t len) * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ diff --git a/test/serial_fips202_config.h b/test/serial_fips202_config.h index 97936d5e7..fcc9551de 100644 --- a/test/serial_fips202_config.h +++ b/test/serial_fips202_config.h @@ -636,7 +636,7 @@ * constraints but relaxed performance requirements. * *****************************************************************************/ -/* #define MLD_CONFIG_REDUCE_RAM */ +#define MLD_CONFIG_REDUCE_RAM /************************* Config internals ********************************/ From d5f7fe67332e2e7bde2db219d227dd18494c63d4 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 29 Nov 2025 11:23:59 +0800 Subject: [PATCH 4/8] verify stack usage: Reuse t1/w1 polyveck crypto_sign_verify_internal stack: before: 26928/37232/49776 after: 22784/31040/41536 Signed-off-by: Matthias J. Kannwischer --- mldsa/src/sign.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index e60cfffff..f6e190c5c 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -789,7 +789,12 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_poly cp; mld_polymat mat; mld_polyvecl z; - mld_polyveck t1, w1, tmp, h; + mld_polyveck tmp, h; + union + { + mld_polyveck t1; + mld_polyveck w1; + } t1w1; if (siglen != MLDSA_CRYPTO_BYTES) { @@ -797,7 +802,7 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, goto cleanup; } - mld_unpack_pk(rho, &t1, pk); + mld_unpack_pk(rho, &t1w1.t1, pk); if (mld_unpack_sig(c, &z, &h, sig)) { res = -1; @@ -830,22 +835,22 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_poly_challenge(&cp, c); mld_polyvec_matrix_expand(&mat, rho); - mld_polyvecl_ntt(&z); - mld_polyvec_matrix_pointwise_montgomery(&w1, &mat, &z); - mld_poly_ntt(&cp); - mld_polyveck_shiftl(&t1); - mld_polyveck_ntt(&t1); + mld_polyveck_shiftl(&t1w1.t1); + mld_polyveck_ntt(&t1w1.t1); - mld_polyveck_pointwise_poly_montgomery(&tmp, &cp, &t1); + mld_polyveck_pointwise_poly_montgomery(&tmp, &cp, &t1w1.t1); - mld_polyveck_sub(&w1, &tmp); - mld_polyveck_reduce(&w1); - mld_polyveck_invntt_tomont(&w1); + mld_polyvecl_ntt(&z); + mld_polyvec_matrix_pointwise_montgomery(&t1w1.w1, &mat, &z); + + mld_polyveck_sub(&t1w1.w1, &tmp); + mld_polyveck_reduce(&t1w1.w1); + mld_polyveck_invntt_tomont(&t1w1.w1); /* Reconstruct w1 */ - mld_polyveck_caddq(&w1); - mld_polyveck_use_hint(&tmp, &w1, &h); + mld_polyveck_caddq(&t1w1.w1); + mld_polyveck_use_hint(&tmp, &t1w1.w1, &h); mld_polyveck_pack_w1(buf, &tmp); /* Call random oracle and verify challenge */ mld_H(c2, MLDSA_CTILDEBYTES, mu, MLDSA_CRHBYTES, buf, @@ -884,8 +889,7 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_zeroize(&cp, sizeof(cp)); mld_zeroize(&mat, sizeof(mat)); mld_zeroize(&z, sizeof(z)); - mld_zeroize(&t1, sizeof(t1)); - mld_zeroize(&w1, sizeof(w1)); + mld_zeroize(&t1w1, sizeof(t1w1)); mld_zeroize(&tmp, sizeof(tmp)); mld_zeroize(&h, sizeof(h)); return res; From 33ae8646c1a5571ab1069110fba572d3d4d41928 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 29 Nov 2025 11:27:11 +0800 Subject: [PATCH 5/8] verify stack usage: reuse mat/c2 buffer crypto_sign_verify_internal stack: 21743/30016/40515 Signed-off-by: Matthias J. Kannwischer --- mldsa/src/sign.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index f6e190c5c..aec77c9ad 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -786,8 +786,6 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, MLD_ALIGN uint8_t mu[MLDSA_CRHBYTES]; MLD_ALIGN uint8_t c[MLDSA_CTILDEBYTES]; MLD_ALIGN uint8_t c2[MLDSA_CTILDEBYTES]; - mld_poly cp; - mld_polymat mat; mld_polyvecl z; mld_polyveck tmp, h; union @@ -795,6 +793,11 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_polyveck t1; mld_polyveck w1; } t1w1; + union + { + mld_poly cp; + mld_polymat mat; + } cpmat; if (siglen != MLDSA_CRYPTO_BYTES) { @@ -832,17 +835,16 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, } /* Matrix-vector multiplication; compute Az - c2^dt1 */ - mld_poly_challenge(&cp, c); - mld_polyvec_matrix_expand(&mat, rho); + mld_poly_challenge(&cpmat.cp, c); - mld_poly_ntt(&cp); + mld_poly_ntt(&cpmat.cp); mld_polyveck_shiftl(&t1w1.t1); mld_polyveck_ntt(&t1w1.t1); - mld_polyveck_pointwise_poly_montgomery(&tmp, &cp, &t1w1.t1); - + mld_polyveck_pointwise_poly_montgomery(&tmp, &cpmat.cp, &t1w1.t1); mld_polyvecl_ntt(&z); - mld_polyvec_matrix_pointwise_montgomery(&t1w1.w1, &mat, &z); + mld_polyvec_matrix_expand(&cpmat.mat, rho); + mld_polyvec_matrix_pointwise_montgomery(&t1w1.w1, &cpmat.mat, &z); mld_polyveck_sub(&t1w1.w1, &tmp); mld_polyveck_reduce(&t1w1.w1); @@ -886,8 +888,7 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_zeroize(mu, sizeof(mu)); mld_zeroize(c, sizeof(c)); mld_zeroize(c2, sizeof(c2)); - mld_zeroize(&cp, sizeof(cp)); - mld_zeroize(&mat, sizeof(mat)); + mld_zeroize(&cpmat.cp, sizeof(cpmat)); mld_zeroize(&z, sizeof(z)); mld_zeroize(&t1w1, sizeof(t1w1)); mld_zeroize(&tmp, sizeof(tmp)); From 77b9c0ca5088bac2a7cebfd720d92b0cf0351d73 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 29 Nov 2025 11:42:09 +0800 Subject: [PATCH 6/8] verify stack usage: Unpack h on the fly crypto_sign_verify_internal stack: 17664/24864/33312 Signed-off-by: Matthias J. Kannwischer --- mldsa/mldsa_native.S | 1 + mldsa/mldsa_native.c | 1 + mldsa/src/packing.c | 35 +++++------------------------------ mldsa/src/packing.h | 25 ++++++++++++++++++++----- mldsa/src/sign.c | 32 +++++++++++++++++--------------- 5 files changed, 44 insertions(+), 50 deletions(-) diff --git a/mldsa/mldsa_native.S b/mldsa/mldsa_native.S index 4818317e3..37597eb72 100644 --- a/mldsa/mldsa_native.S +++ b/mldsa/mldsa_native.S @@ -216,6 +216,7 @@ #undef mld_pack_pk #undef mld_pack_sig #undef mld_pack_sk +#undef mld_unpack_hints #undef mld_unpack_pk #undef mld_unpack_sig #undef mld_unpack_sk diff --git a/mldsa/mldsa_native.c b/mldsa/mldsa_native.c index 51e46248a..6805dc6c7 100644 --- a/mldsa/mldsa_native.c +++ b/mldsa/mldsa_native.c @@ -212,6 +212,7 @@ #undef mld_pack_pk #undef mld_pack_sig #undef mld_pack_sk +#undef mld_unpack_hints #undef mld_unpack_pk #undef mld_unpack_sig #undef mld_unpack_sk diff --git a/mldsa/src/packing.c b/mldsa/src/packing.c index ab08fb6ea..aa40136fa 100644 --- a/mldsa/src/packing.c +++ b/mldsa/src/packing.c @@ -9,13 +9,6 @@ #include "poly.h" #include "polyvec.h" -/* Parameter set namespacing - * This is to facilitate building multiple instances - * of mldsa-native (e.g. with varying parameter sets) - * within a single compilation unit. */ -#define mld_unpack_hints MLD_ADD_PARAM_SET(mld_unpack_hints) -/* End of parameter set namespacing */ - MLD_INTERNAL_API void mld_pack_pk(uint8_t pk[MLDSA_CRYPTO_PUBLICKEYBYTES], const uint8_t rho[MLDSA_SEEDBYTES], const mld_polyveck *t1) @@ -168,20 +161,9 @@ void mld_pack_sig(uint8_t sig[MLDSA_CRYPTO_BYTES], } } -/************************************************* - * Name: mld_unpack_hints - * - * Description: Unpack raw hint bytes into a polyveck - * struct - * - * Arguments: - mld_polyveck *h: pointer to output hint vector h - * - const uint8_t packed_hints[MLDSA_POLYVECH_PACKEDBYTES]: - * raw hint bytes - * - * Returns 1 in case of malformed hints; otherwise 0. - **************************************************/ -static int mld_unpack_hints( - mld_polyveck *h, const uint8_t packed_hints[MLDSA_POLYVECH_PACKEDBYTES]) +MLD_INTERNAL_API +int mld_unpack_hints(mld_polyveck *h, + const uint8_t packed_hints[MLDSA_POLYVECH_PACKEDBYTES]) __contract__( requires(memory_no_alias(packed_hints, MLDSA_POLYVECH_PACKEDBYTES)) requires(memory_no_alias(h, sizeof(mld_polyveck))) @@ -260,18 +242,11 @@ __contract__( } MLD_INTERNAL_API -int mld_unpack_sig(uint8_t c[MLDSA_CTILDEBYTES], mld_polyvecl *z, - mld_polyveck *h, const uint8_t sig[MLDSA_CRYPTO_BYTES]) +void mld_unpack_sig(uint8_t c[MLDSA_CTILDEBYTES], mld_polyvecl *z, + const uint8_t sig[MLDSA_CRYPTO_BYTES]) { mld_memcpy(c, sig, MLDSA_CTILDEBYTES); sig += MLDSA_CTILDEBYTES; mld_polyvecl_unpack_z(z, sig); - sig += MLDSA_L * MLDSA_POLYZ_PACKEDBYTES; - - return mld_unpack_hints(h, sig); } - -/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. - * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef mld_unpack_hints diff --git a/mldsa/src/packing.h b/mldsa/src/packing.h index b8057bca5..8d8dea6cc 100644 --- a/mldsa/src/packing.h +++ b/mldsa/src/packing.h @@ -169,23 +169,38 @@ __contract__( array_bound(s2->vec[k2].coeffs, 0, MLDSA_N, MLD_POLYETA_UNPACK_LOWER_BOUND, MLDSA_ETA + 1))) ); +#define mld_unpack_hints MLD_NAMESPACE_KL(mld_unpack_hints) +/************************************************* + * Name: mld_unpack_hints + * + * Description: Unpack raw hint bytes into a polyveck + * struct + * + * Arguments: - mld_polyveck *h: pointer to output hint vector h + * - const uint8_t packed_hints[MLDSA_POLYVECH_PACKEDBYTES]: + * raw hint bytes + * + * Returns 1 in case of malformed hints; otherwise 0. + **************************************************/ +MLD_INTERNAL_API +int mld_unpack_hints(mld_polyveck *h, + const uint8_t packed_hints[MLDSA_POLYVECH_PACKEDBYTES]); + #define mld_unpack_sig MLD_NAMESPACE_KL(unpack_sig) /************************************************* * Name: mld_unpack_sig * - * Description: Unpack signature sig = (c, z, h). + * Description: Unpack (c,z) from signature sig = (c, z, h). * * Arguments: - uint8_t *c: pointer to output challenge hash * - mld_polyvecl *z: pointer to output vector z - * - mld_polyveck *h: pointer to output hint vector h * - const uint8_t sig[]: byte array containing * bit-packed signature * - * Returns 1 in case of malformed signature; otherwise 0. **************************************************/ MLD_INTERNAL_API -int mld_unpack_sig(uint8_t c[MLDSA_CTILDEBYTES], mld_polyvecl *z, - mld_polyveck *h, const uint8_t sig[MLDSA_CRYPTO_BYTES]) +void mld_unpack_sig(uint8_t c[MLDSA_CTILDEBYTES], mld_polyvecl *z, + const uint8_t sig[MLDSA_CRYPTO_BYTES]) __contract__( requires(memory_no_alias(sig, MLDSA_CRYPTO_BYTES)) requires(memory_no_alias(c, MLDSA_CTILDEBYTES)) diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index aec77c9ad..dd57989ea 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -787,7 +787,7 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, MLD_ALIGN uint8_t c[MLDSA_CTILDEBYTES]; MLD_ALIGN uint8_t c2[MLDSA_CTILDEBYTES]; mld_polyvecl z; - mld_polyveck tmp, h; + mld_polyveck tmp; union { mld_polyveck t1; @@ -797,7 +797,8 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, { mld_poly cp; mld_polymat mat; - } cpmat; + mld_polyveck h; + } cpmath; if (siglen != MLDSA_CRYPTO_BYTES) { @@ -806,11 +807,7 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, } mld_unpack_pk(rho, &t1w1.t1, pk); - if (mld_unpack_sig(c, &z, &h, sig)) - { - res = -1; - goto cleanup; - } + mld_unpack_sig(c, &z, sig); if (mld_polyvecl_chknorm(&z, MLDSA_GAMMA1 - MLDSA_BETA)) { res = -1; @@ -835,16 +832,16 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, } /* Matrix-vector multiplication; compute Az - c2^dt1 */ - mld_poly_challenge(&cpmat.cp, c); + mld_poly_challenge(&cpmath.cp, c); - mld_poly_ntt(&cpmat.cp); + mld_poly_ntt(&cpmath.cp); mld_polyveck_shiftl(&t1w1.t1); mld_polyveck_ntt(&t1w1.t1); - mld_polyveck_pointwise_poly_montgomery(&tmp, &cpmat.cp, &t1w1.t1); + mld_polyveck_pointwise_poly_montgomery(&tmp, &cpmath.cp, &t1w1.t1); mld_polyvecl_ntt(&z); - mld_polyvec_matrix_expand(&cpmat.mat, rho); - mld_polyvec_matrix_pointwise_montgomery(&t1w1.w1, &cpmat.mat, &z); + mld_polyvec_matrix_expand(&cpmath.mat, rho); + mld_polyvec_matrix_pointwise_montgomery(&t1w1.w1, &cpmath.mat, &z); mld_polyveck_sub(&t1w1.w1, &tmp); mld_polyveck_reduce(&t1w1.w1); @@ -852,7 +849,13 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, /* Reconstruct w1 */ mld_polyveck_caddq(&t1w1.w1); - mld_polyveck_use_hint(&tmp, &t1w1.w1, &h); + if (mld_unpack_hints(&cpmath.h, sig + MLDSA_CTILDEBYTES + + MLDSA_L * MLDSA_POLYZ_PACKEDBYTES)) + { + res = -1; + goto cleanup; + } + mld_polyveck_use_hint(&tmp, &t1w1.w1, &cpmath.h); mld_polyveck_pack_w1(buf, &tmp); /* Call random oracle and verify challenge */ mld_H(c2, MLDSA_CTILDEBYTES, mu, MLDSA_CRHBYTES, buf, @@ -888,11 +891,10 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_zeroize(mu, sizeof(mu)); mld_zeroize(c, sizeof(c)); mld_zeroize(c2, sizeof(c2)); - mld_zeroize(&cpmat.cp, sizeof(cpmat)); + mld_zeroize(&cpmath.cp, sizeof(cpmath)); mld_zeroize(&z, sizeof(z)); mld_zeroize(&t1w1, sizeof(t1w1)); mld_zeroize(&tmp, sizeof(tmp)); - mld_zeroize(&h, sizeof(h)); return res; } From 5c3053bc898a45b8d77774ebaa8ec851ae62949d Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 29 Nov 2025 12:29:51 +0800 Subject: [PATCH 7/8] verify stack usage: unpack t1 on the fly + share tmp/mat buffer crypto_sign_verify_internal stack: 14592/19744/26144 Signed-off-by: Matthias J. Kannwischer --- mldsa/src/sign.c | 57 ++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index dd57989ea..2028a99f7 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -787,18 +787,15 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, MLD_ALIGN uint8_t c[MLDSA_CTILDEBYTES]; MLD_ALIGN uint8_t c2[MLDSA_CTILDEBYTES]; mld_polyvecl z; - mld_polyveck tmp; - union - { - mld_polyveck t1; - mld_polyveck w1; - } t1w1; + mld_poly cp; + mld_polyveck w1; union { - mld_poly cp; mld_polymat mat; mld_polyveck h; - } cpmath; + mld_polyveck tmp; + mld_polyveck t1; + } mathtmpt1; if (siglen != MLDSA_CRYPTO_BYTES) { @@ -806,7 +803,9 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, goto cleanup; } - mld_unpack_pk(rho, &t1w1.t1, pk); + /* unpack rho part from public key */ + mld_memcpy(rho, pk, MLDSA_SEEDBYTES); + mld_unpack_sig(c, &z, sig); if (mld_polyvecl_chknorm(&z, MLDSA_GAMMA1 - MLDSA_BETA)) { @@ -832,31 +831,32 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, } /* Matrix-vector multiplication; compute Az - c2^dt1 */ - mld_poly_challenge(&cpmath.cp, c); - - mld_poly_ntt(&cpmath.cp); - mld_polyveck_shiftl(&t1w1.t1); - mld_polyveck_ntt(&t1w1.t1); - - mld_polyveck_pointwise_poly_montgomery(&tmp, &cpmath.cp, &t1w1.t1); mld_polyvecl_ntt(&z); - mld_polyvec_matrix_expand(&cpmath.mat, rho); - mld_polyvec_matrix_pointwise_montgomery(&t1w1.w1, &cpmath.mat, &z); + mld_polyvec_matrix_expand(&mathtmpt1.mat, rho); + mld_polyvec_matrix_pointwise_montgomery(&w1, &mathtmpt1.mat, &z); - mld_polyveck_sub(&t1w1.w1, &tmp); - mld_polyveck_reduce(&t1w1.w1); - mld_polyveck_invntt_tomont(&t1w1.w1); + mld_poly_challenge(&cp, c); + mld_poly_ntt(&cp); + + /* unpack t1 part of public key */ + mld_unpack_pk(rho, &mathtmpt1.t1, pk); + mld_polyveck_shiftl(&mathtmpt1.t1); + mld_polyveck_ntt(&mathtmpt1.t1); + mld_polyveck_pointwise_poly_montgomery(&mathtmpt1.tmp, &cp, &mathtmpt1.t1); + mld_polyveck_sub(&w1, &mathtmpt1.tmp); + mld_polyveck_reduce(&w1); + mld_polyveck_invntt_tomont(&w1); /* Reconstruct w1 */ - mld_polyveck_caddq(&t1w1.w1); - if (mld_unpack_hints(&cpmath.h, sig + MLDSA_CTILDEBYTES + - MLDSA_L * MLDSA_POLYZ_PACKEDBYTES)) + mld_polyveck_caddq(&w1); + if (mld_unpack_hints(&mathtmpt1.h, sig + MLDSA_CTILDEBYTES + + MLDSA_L * MLDSA_POLYZ_PACKEDBYTES)) { res = -1; goto cleanup; } - mld_polyveck_use_hint(&tmp, &t1w1.w1, &cpmath.h); - mld_polyveck_pack_w1(buf, &tmp); + mld_polyveck_use_hint(&mathtmpt1.tmp, &w1, &mathtmpt1.h); + mld_polyveck_pack_w1(buf, &mathtmpt1.tmp); /* Call random oracle and verify challenge */ mld_H(c2, MLDSA_CTILDEBYTES, mu, MLDSA_CRHBYTES, buf, MLDSA_K * MLDSA_POLYW1_PACKEDBYTES, NULL, 0); @@ -891,10 +891,9 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_zeroize(mu, sizeof(mu)); mld_zeroize(c, sizeof(c)); mld_zeroize(c2, sizeof(c2)); - mld_zeroize(&cpmath.cp, sizeof(cpmath)); + mld_zeroize(&mathtmpt1, sizeof(mathtmpt1)); mld_zeroize(&z, sizeof(z)); - mld_zeroize(&t1w1, sizeof(t1w1)); - mld_zeroize(&tmp, sizeof(tmp)); + mld_zeroize(&w1, sizeof(w1)); return res; } From 90a220243e7d98b7105cce269d52f29c1df31dad Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 29 Nov 2025 12:37:47 +0800 Subject: [PATCH 8/8] verify stack usage: reuse z/cp buffer crypto_sign_verify_internal stack: 13568/18720/25120 Signed-off-by: Matthias J. Kannwischer --- mldsa/src/sign.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/mldsa/src/sign.c b/mldsa/src/sign.c index 2028a99f7..f5665de4b 100644 --- a/mldsa/src/sign.c +++ b/mldsa/src/sign.c @@ -786,9 +786,13 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, MLD_ALIGN uint8_t mu[MLDSA_CRHBYTES]; MLD_ALIGN uint8_t c[MLDSA_CTILDEBYTES]; MLD_ALIGN uint8_t c2[MLDSA_CTILDEBYTES]; - mld_polyvecl z; - mld_poly cp; mld_polyveck w1; + union + { + mld_polyvecl z; + mld_poly cp; + } zcp; + union { mld_polymat mat; @@ -806,8 +810,8 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, /* unpack rho part from public key */ mld_memcpy(rho, pk, MLDSA_SEEDBYTES); - mld_unpack_sig(c, &z, sig); - if (mld_polyvecl_chknorm(&z, MLDSA_GAMMA1 - MLDSA_BETA)) + mld_unpack_sig(c, &zcp.z, sig); + if (mld_polyvecl_chknorm(&zcp.z, MLDSA_GAMMA1 - MLDSA_BETA)) { res = -1; goto cleanup; @@ -831,18 +835,19 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, } /* Matrix-vector multiplication; compute Az - c2^dt1 */ - mld_polyvecl_ntt(&z); + mld_polyvecl_ntt(&zcp.z); mld_polyvec_matrix_expand(&mathtmpt1.mat, rho); - mld_polyvec_matrix_pointwise_montgomery(&w1, &mathtmpt1.mat, &z); + mld_polyvec_matrix_pointwise_montgomery(&w1, &mathtmpt1.mat, &zcp.z); - mld_poly_challenge(&cp, c); - mld_poly_ntt(&cp); + mld_poly_challenge(&zcp.cp, c); + mld_poly_ntt(&zcp.cp); /* unpack t1 part of public key */ mld_unpack_pk(rho, &mathtmpt1.t1, pk); mld_polyveck_shiftl(&mathtmpt1.t1); mld_polyveck_ntt(&mathtmpt1.t1); - mld_polyveck_pointwise_poly_montgomery(&mathtmpt1.tmp, &cp, &mathtmpt1.t1); + mld_polyveck_pointwise_poly_montgomery(&mathtmpt1.tmp, &zcp.cp, + &mathtmpt1.t1); mld_polyveck_sub(&w1, &mathtmpt1.tmp); mld_polyveck_reduce(&w1); mld_polyveck_invntt_tomont(&w1); @@ -892,7 +897,7 @@ int crypto_sign_verify_internal(const uint8_t *sig, size_t siglen, mld_zeroize(c, sizeof(c)); mld_zeroize(c2, sizeof(c2)); mld_zeroize(&mathtmpt1, sizeof(mathtmpt1)); - mld_zeroize(&z, sizeof(z)); + mld_zeroize(&zcp, sizeof(zcp)); mld_zeroize(&w1, sizeof(w1)); return res; }