[PATCH] MPI helper modular exponentiation, Least Leak Intended
NIIBE Yutaka
gniibe at fsij.org
Fri Feb 14 08:44:13 CET 2025
Hello, again,
NIIBE Yutaka <gniibe at fsij.org> wrote:
> This month, I created the ticket for improvement of modular
> exponentiation implementation with a branch named gniibe/t7490:
>
> https://dev.gnupg.org/T7490
>
> It somehow works for me now.
>
> I'd like to merge the branch manually in steps.
Here is the last and main part of the changes for the modular
exponentiation implementation (it's by Montgomery exponentiation
algorithm).
I put the changes inline, so that people can easily reply and
comment.
FYI, the reference, Handbook of Applied Cryptography is available:
https://cacr.uwaterloo.ca/hac/
diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h
index ffe8140a..decaadd8 100644
--- a/mpi/mpi-internal.h
+++ b/mpi/mpi-internal.h
@@ -261,6 +261,9 @@ mpi_limb_t _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
unsigned cnt);
mpi_limb_t _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
unsigned cnt);
+/*-- mpih-pow.c --*/
+void _gcry_mpih_powm_lli (mpi_ptr_t rp, mpi_ptr_t bp, mpi_ptr_t mp,
+ mpi_size_t n, mpi_ptr_t ep, mpi_size_t en);
/*-- mpih-const-time.c --*/
#define mpih_set_cond(w,u,s,o) _gcry_mpih_set_cond ((w),(u),(s),(o))
diff --git a/mpi/mpi-pow.c b/mpi/mpi-pow.c
index defd675e..ef410a60 100644
--- a/mpi/mpi-pow.c
+++ b/mpi/mpi-pow.c
@@ -34,6 +34,21 @@
#include "longlong.h"
+#ifndef USE_ALGORITHM_LLI_EXPONENTIATION
+/*
+ * When you don't need least-leak implementation, please add compilation option
+ * -DUSE_ALGORITHM_LLI_EXPONENTIATION=0
+ *
+ * For performance (by tests/benchmark rsa), it's comparable to leaky
+ * sliding window implementation on 64-bit architecture. On 32-bit
+ * architecture, it's slower with 3072-bit and 4096-bit. In future,
+ * it's good to have non-leaky Karatsuba multiplication, then, it's an
+ * option to use it.
+ *
+ */
+#define USE_ALGORITHM_LLI_EXPONENTIATION 1
+#endif
+
/*
* When you need old implementation, please add compilation option
* -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
@@ -41,6 +56,16 @@
#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
*/
+const char *
+_gcry_mpi_get_powm_config (void)
+{
+#if USE_ALGORITHM_LLI_EXPONENTIATION
+ return "fixed-window";
+#else
+ return "sliding-window";
+#endif
+}
+
#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
/****************
* RES = BASE ^ EXPO mod MOD
@@ -541,6 +566,33 @@ _gcry_mpi_powm (gcry_mpi_t res,
rp = res->d;
}
+#if USE_ALGORITHM_LLI_EXPONENTIATION
+ if ((esec || bsec || msec) && (mod->d[0] & 1))
+ {
+ mpi_ptr_t bp1 = NULL;
+
+ if (bsize < msize)
+ {
+ bp1 = mpi_alloc_limb_space (msize, 1);
+ MPN_ZERO (bp1, msize);
+ MPN_COPY (bp1, bp, bsize);
+ }
+
+ _gcry_mpih_powm_lli (rp, bp1?bp1:bp, mod->d, msize, ep, esize);
+ if (bp1)
+ _gcry_mpi_free_limb_space (bp1, msize);
+
+ rsign = 0;
+ negative_result = (ep[0] & 1) && bsign;
+ if (negative_result)
+ rsign = msign;
+
+ res->nlimbs = msize;
+ res->sign = rsign;
+ goto leave;
+ }
+#endif
+
/* Main processing. */
{
mpi_size_t i, j, k;
diff --git a/mpi/mpih-pow.c b/mpi/mpih-pow.c
new file mode 100644
index 00000000..ad7d1e8d
--- /dev/null
+++ b/mpi/mpih-pow.c
@@ -0,0 +1,283 @@
+/* mpih-pow.c - MPI helper functions
+ * Copyright (C) 2025 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#if BITS_PER_MPI_LIMB <= 32
+#define MAX_SCRATCH_SPACE 256*2
+#define MAX_WINDOW 8
+#else
+#define MAX_SCRATCH_SPACE 256
+#define MAX_WINDOW 4
+#endif
+
+/*
+ * Compute -N^(-1) mod 2^BITS_PER_MPI_LIMB
+ *
+ * (1) Compute N^(-1) mod 2^BITS_PER_MPI_LIMB
+ * (2) Then, negate the value of (1)
+ *
+ * For computing N^(-1) mod (power of two), see:
+ *
+ * Jeffrey Hurchalla
+ * An Improved Integer Modular Multiplicative Inverse (modulo 2^w)
+ * https://arxiv.org/abs/2204.04342
+ *
+ */
+static mpi_limb_t
+compute_minv (mpi_limb_t n)
+{
+ mpi_limb_t x, y;
+
+ gcry_assert (n%2 == 1);
+
+ x = (3*n)^2;
+ y = 1 - n*x;
+ x = x*(1 + y);
+ y *= y;
+ x = x*(1 + y);
+ y *= y;
+ x = x*(1 + y);
+#if BITS_PER_MPI_LIMB == 32
+ return -x;
+#elif BITS_PER_MPI_LIMB == 64
+ y *= y;
+ x = x*(1 + y);
+ return -x;
+#else
+# error "Please implement multiplicative inverse mod power of 2"
+#endif
+}
+
+
+/*
+ * Compute T * R^(-1) mod M (where R is represented by MINV)
+ *
+ * Reference:
+ * Handbook of Applied Cryptography
+ * Algorithm 14.82: Montgomery reduction
+ */
+static void
+mont_reduc (mpi_ptr_t rp, mpi_ptr_t tp,
+ mpi_ptr_t mp, mpi_size_t n, mpi_limb_t minv)
+{
+ mpi_size_t i;
+ mpi_limb_t cy0;
+ mpi_limb_t cy1 = 0;
+
+ for (i = 0; i < n; i++)
+ {
+ mpi_limb_t ui = tp[i] * minv;
+
+ cy0 = _gcry_mpih_addmul_1 (tp + i, mp, n, ui);
+ cy1 += _gcry_mpih_add_1_lli (tp + n + i, n - i, cy0);
+ }
+
+ cy0 = _gcry_mpih_sub_n (rp, tp + n, mp, n);
+ _gcry_mpih_set_cond (rp, tp + n, n,
+ mpih_limb_is_not_zero (cy0)
+ & mpih_limb_is_zero (cy1));
+}
+
+/*
+ * Compute X * Y * R^(-1) mod M (where R is represented by MINV)
+ *
+ * Reference:
+ * Handbook of Applied Cryptography
+ * Algorithm 14.86: Montgomery multiplication
+ *
+ * RP should have space of 2*N limbs.
+ *
+ */
+static void
+mont_mul (mpi_ptr_t rp, mpi_ptr_t xp, mpi_ptr_t yp, mpi_ptr_t mp,
+ mpi_size_t n, mpi_limb_t minv, mpi_ptr_t scratch_2n)
+{
+ _gcry_mpih_mul_lli (scratch_2n, xp, n, yp, n);
+ mont_reduc (rp, scratch_2n, mp, n, minv);
+}
+
+/* Determine the window size for computing exponentiation. */
+static int
+window_size (mpi_size_t esize)
+{
+ int W;
+
+#if BITS_PER_MPI_LIMB <= 32
+ if (esize > 24)
+ W = 5;
+ else if (esize > 16)
+ W = 4;
+ else if (esize > 12)
+ W = 3;
+ else if (esize > 8)
+ W = 2;
+ else
+ W = 1;
+ return W;
+#else
+ if (esize > 8)
+ W = 4;
+ else if (esize > 6)
+ W = 3;
+ else if (esize > 4)
+ W = 2;
+ else
+ W = 1;
+ return W;
+#endif
+ return W;
+}
+
+/*
+ * Compute X ^ E mod M
+ *
+ * where X is in BP, M is in MP.
+ * The size of limbs for BP and MP are N.
+ * E is in EP, size of limbs EN.
+ *
+ * Result will be in RP with size N.
+ *
+ * Reference:
+ * Handbook of Applied Cryptography
+ * Algorithm 14.82: Left-to-right k-ary exponentiation
+ * Algorithm 14.94: Montgomery exponentiation
+ *
+ */
+void
+_gcry_mpih_powm_lli (mpi_ptr_t rp, mpi_ptr_t bp, mpi_ptr_t mp, mpi_size_t n,
+ mpi_ptr_t ep, mpi_size_t en)
+{
+ mpi_size_t scratch_size;
+#define temp0 (scratch)
+#define temp2 (scratch+n*2)
+#define a (scratch+n*4)
+#define precomp (scratch+n*5)
+ mpi_ptr_t scratch;
+ mpi_limb_t minv;
+ mpi_size_t i;
+ int mod_shift_cnt;
+ int windowsize = window_size (en);
+ mpi_limb_t wmask = (((mpi_limb_t) 1 << windowsize) - 1);
+#define temp1 (precomp+n)
+#define x_tilde (precomp+n)
+
+ scratch_size = (5 + (1 << windowsize))*n;
+ scratch = _gcry_mpi_alloc_limb_space (scratch_size, 1);
+
+ minv = compute_minv (mp[0]);
+
+ gcry_assert (mp[0]*(-minv) == 1);
+
+ MPN_ZERO (temp0, n);
+
+ /* TEMP0 := R mod m */
+ count_leading_zeros (mod_shift_cnt, mp[n-1]);
+ if (mod_shift_cnt)
+ {
+ _gcry_mpih_lshift (temp2, mp, n, mod_shift_cnt);
+ temp0[n] = (mpi_limb_t)1 << mod_shift_cnt;
+ }
+ else
+ {
+ MPN_COPY (temp2, mp, n);
+ temp0[n] = 1;
+ }
+ _gcry_mpih_divrem (temp1, 0, temp0, n+1, temp2, n);
+ if (mod_shift_cnt)
+ _gcry_mpih_rshift (temp0, temp0, n, mod_shift_cnt);
+ /* PRECOMP[0] := R mod m */
+ MPN_COPY (precomp, temp0, n);
+
+ /* TEMP0 := (R mod m)^2 */
+ _gcry_mpih_sqr_n_basecase (temp0, precomp, n);
+
+ /* TEMP0 := R^2 mod m */
+ if (mod_shift_cnt)
+ _gcry_mpih_lshift (temp0, temp0, n*2, mod_shift_cnt);
+ _gcry_mpih_divrem (temp1, 0, temp0, n*2, temp2, n);
+ if (mod_shift_cnt)
+ _gcry_mpih_rshift (temp0, temp0, n, mod_shift_cnt);
+ /* x~ := Mont(x, R^2 mod m) */
+ mont_mul (x_tilde, bp, temp0, mp, n, minv, temp2);
+
+ /* PRECOMP[i] := x~ ^ i */
+ for (i = 0; i < (1 << windowsize) - 2; i += 2)
+ {
+ _gcry_mpih_sqr_n_basecase (temp0, precomp+n*(i/2+1), n);
+ mont_reduc (precomp+n*(i+2), temp0, mp, n, minv);
+ mont_mul (precomp+n*(i+3), x_tilde, precomp+n*(i+2), mp, n, minv, temp2);
+ }
+
+ MPN_COPY (a, precomp, n);
+ i = en * BITS_PER_MPI_LIMB;
+ do
+ {
+ mpi_limb_t e;
+ int w;
+
+ if (i < windowsize)
+ {
+ e = ep[0] & (((mpi_limb_t) 1 << i) - 1);
+ w = i;
+ i = 0;
+ }
+ else
+ {
+ mpi_limb_t v;
+ mpi_size_t shift;
+ mpi_size_t j;
+ int nbits_in_v;
+
+ i -= windowsize;
+ j = i / BITS_PER_MPI_LIMB;
+ shift = i % BITS_PER_MPI_LIMB;
+ v = ep[j] >> shift;
+ nbits_in_v = BITS_PER_MPI_LIMB - shift;
+ if (nbits_in_v < windowsize)
+ v += ep[j + 1] << nbits_in_v;
+ e = v & wmask;
+ w = windowsize;
+ }
+
+ do
+ {
+ _gcry_mpih_sqr_n_basecase (temp0, a, n);
+ mont_reduc (a, temp0, mp, n, minv);
+ }
+ while (--w);
+
+ _gcry_mpih_table_lookup (temp0, precomp, n, (1 << windowsize), e);
+ mont_mul (a, a, temp0, mp, n, minv, temp2);
+ }
+ while (i);
+
+ MPN_ZERO (temp0, n);
+ temp0[0] = 1;
+ mont_mul (a, a, temp0, mp, n, minv, temp2);
+
+ MPN_COPY (rp, a, n);
+ _gcry_mpi_free_limb_space (scratch, scratch_size);
+}
diff --git a/src/gcrypt-int.h b/src/gcrypt-int.h
index d52a1b1b..1eb58cb9 100644
--- a/src/gcrypt-int.h
+++ b/src/gcrypt-int.h
@@ -474,6 +474,7 @@ void _gcry_mpi_mul_2exp (gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt);
void _gcry_mpi_div (gcry_mpi_t q, gcry_mpi_t r,
gcry_mpi_t dividend, gcry_mpi_t divisor, int round);
void _gcry_mpi_mod (gcry_mpi_t r, gcry_mpi_t dividend, gcry_mpi_t divisor);
+const char *_gcry_mpi_get_powm_config (void);
void _gcry_mpi_powm (gcry_mpi_t w,
const gcry_mpi_t b, const gcry_mpi_t e,
const gcry_mpi_t m);
diff --git a/src/global.c b/src/global.c
index a5a48cb5..74cf16d3 100644
--- a/src/global.c
+++ b/src/global.c
@@ -378,6 +378,9 @@ print_config (const char *what, gpgrt_stream_t fp)
if (!what || !strcmp (what, "mpi-asm"))
gpgrt_fprintf (fp, "mpi-asm:%s:\n", _gcry_mpi_get_hw_config ());
+ if (!what || !strcmp (what, "mpi-pow"))
+ gpgrt_fprintf (fp, "mpi-pow:%s\n", _gcry_mpi_get_powm_config ());
+
if (!what || !strcmp (what, "hwflist"))
{
unsigned int hwfeatures, afeature;
--
More information about the Gcrypt-devel
mailing list