[PATCH 4/6] Add GHASH RISC-V/Zbc implementation

Jussi Kivilinna jussi.kivilinna at iki.fi
Mon Jan 6 16:08:51 CET 2025


* cipher/Makefile.am: Add 'cipher-gcm-riscv-b-zbc.c'.
* cipher/cipher-gcm-riscv-b-zbc.c: New.
* cipher/cipher-gcm.c [GCM_USE_RISCV_ZBC] (_gcry_ghash_setup_riscv_zbc)
(_gcry_ghash_riscv_zbc): New.
(setupM) [GCM_USE_RISCV_ZBC]: Check for HWF_RISCV_IMAFDC, HWF_RISCV_B
HWF_RISCV_ZBC to enable RISC-V/Zbc implementation.
* cipher/cipher-internal.h (GCM_USE_RISCV_ZBC): New.
* configure.ac: Add 'cipher-gcm-riscv-b-zbc.lo'.
--

Patch adds RISC-V Zbc extension accelerated GHASH implementation.

Benchmark on SpacemiT K1 (1600 Mhz):

Before:
                    |  nanosecs/byte   mebibytes/sec   cycles/byte
 GMAC_AES           |     14.32 ns/B     66.60 MiB/s     22.91 c/B

After (19x faster):
                    |  nanosecs/byte   mebibytes/sec   cycles/byte
 GMAC_AES           |     0.744 ns/B      1281 MiB/s      1.19 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/Makefile.am              |   1 +
 cipher/cipher-gcm-riscv-b-zbc.c | 276 ++++++++++++++++++++++++++++++++
 cipher/cipher-gcm.c             |  16 ++
 cipher/cipher-internal.h        |   7 +
 configure.ac                    |   3 +
 5 files changed, 303 insertions(+)
 create mode 100644 cipher/cipher-gcm-riscv-b-zbc.c

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 88b2d17c..a0a4d7d8 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -92,6 +92,7 @@ EXTRA_libcipher_la_SOURCES = \
 	cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c \
 	cipher-gcm-aarch64-simd.c cipher-gcm-armv7-neon.S \
 	cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \
+	cipher-gcm-riscv-b-zbc.c \
 	crc.c crc-intel-pclmul.c crc-armv8-ce.c \
 	crc-armv8-aarch64-ce.S \
 	crc-ppc.c \
diff --git a/cipher/cipher-gcm-riscv-b-zbc.c b/cipher/cipher-gcm-riscv-b-zbc.c
new file mode 100644
index 00000000..705b7462
--- /dev/null
+++ b/cipher/cipher-gcm-riscv-b-zbc.c
@@ -0,0 +1,276 @@
+/* cipher-gcm-irscv-b-zbc.c - RISC-V Zbc accelerated GHASH
+ * Copyright (C) 2025 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+#if defined (GCM_USE_RISCV_ZBC)
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+typedef struct { u64 val[2]; } u64x2;
+typedef struct { u64x2 val[2]; } u64x2x2;
+
+static ASM_FUNC_ATTR_INLINE u64x2
+load_aligned_u64x2(const void *ptr)
+{
+  u64x2 vec;
+
+  asm ("ld %0, 0(%1)"
+       : "=r" (vec.val[0])
+       : "r" (ptr)
+       : "memory");
+  asm ("ld %0, 8(%1)"
+       : "=r" (vec.val[1])
+       : "r" (ptr)
+       : "memory");
+
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+load_unaligned_u64x2(const void *ptr)
+{
+  if (((uintptr_t)ptr & 7) == 0)
+    {
+      /* aligned load */
+      return load_aligned_u64x2(ptr);
+    }
+  else
+    {
+      /* unaligned load */
+      const bufhelp_u64_t *ptr_u64 = ptr;
+      u64x2 vec;
+      vec.val[0] = ptr_u64[0].a;
+      vec.val[1] = ptr_u64[1].a;
+      return vec;
+    }
+}
+
+static ASM_FUNC_ATTR_INLINE void
+store_aligned_u64x2(void *ptr, u64x2 vec)
+{
+  asm ("sd %0, 0(%1)"
+       :
+       : "r" (vec.val[0]), "r" (ptr)
+       : "memory");
+  asm ("sd %0, 8(%1)"
+       :
+       : "r" (vec.val[1]), "r" (ptr)
+       : "memory");
+}
+
+static ASM_FUNC_ATTR_INLINE u64
+byteswap_u64(u64 x)
+{
+  asm (".option push;\n\t"
+       ".option arch, +zbb;\n\t"
+       "rev8 %0, %1;\n\t"
+       ".option pop;\n\t"
+       : "=r" (x)
+       : "r" (x));
+  return x;
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+byteswap_u64x2(u64x2 vec)
+{
+  u64 tmp = byteswap_u64(vec.val[0]);
+  vec.val[0] = byteswap_u64(vec.val[1]);
+  vec.val[1] = tmp;
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+veor_u64x2(u64x2 va, u64x2 vb)
+{
+  va.val[0] ^= vb.val[0];
+  va.val[1] ^= vb.val[1];
+  return va;
+}
+
+/* 64x64 => 128 carry-less multiplication */
+static ASM_FUNC_ATTR_INLINE u64x2
+clmul_u64x2(u64 a, u64 b)
+{
+  u64x2 vec;
+  asm (".option push;\n\t"
+       ".option arch, +zbc;\n\t"
+       "clmul %0, %1, %2;\n\t"
+       ".option pop;\n\t"
+       : "=r" (vec.val[0])
+       : "r" (a), "r" (b));
+  asm (".option push;\n\t"
+       ".option arch, +zbc;\n\t"
+       "clmulh %0, %1, %2;\n\t"
+       ".option pop;\n\t"
+       : "=r" (vec.val[1])
+       : "r" (a), "r" (b));
+  return vec;
+}
+
+/* GHASH functions.
+ *
+ * See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+static ASM_FUNC_ATTR_INLINE u64x2x2
+pmul_128x128(u64x2 a, u64x2 b)
+{
+  u64 a_l = a.val[0];
+  u64 a_h = a.val[1];
+  u64 b_l = b.val[0];
+  u64 b_h = b.val[1];
+  u64 t1_h = b_l ^ b_h;
+  u64 t1_l = a_l ^ a_h;
+  u64x2 r0 = clmul_u64x2(a_l, b_l);
+  u64x2 r1 = clmul_u64x2(a_h, b_h);
+  u64x2 t2 = clmul_u64x2(t1_h, t1_l);
+  u64 t2_l, t2_h;
+  u64 r0_l, r0_h;
+  u64 r1_l, r1_h;
+
+  t2 = veor_u64x2(t2, r0);
+  t2 = veor_u64x2(t2, r1);
+
+  r0_l = r0.val[0];
+  r0_h = r0.val[1];
+  r1_l = r1.val[0];
+  r1_h = r1.val[1];
+  t2_l = t2.val[0];
+  t2_h = t2.val[1];
+
+  r0_h = r0_h ^ t2_l;
+  r1_l = r1_l ^ t2_h;
+
+  r0 = (const u64x2){ .val = { r0_l, r0_h } };
+  r1 = (const u64x2){ .val = { r1_l, r1_h } };
+
+  return (const u64x2x2){ .val = { r0, r1 } };
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+reduction(u64x2x2 r0r1)
+{
+  static const u64 rconst = { U64_C(0xc200000000000000) };
+  u64x2 r0 = r0r1.val[0];
+  u64x2 r1 = r0r1.val[1];
+  u64x2 t = clmul_u64x2(r0.val[0], rconst);
+  r0.val[1] ^= t.val[0];
+  r1.val[0] ^= t.val[1];
+  t = clmul_u64x2(r0.val[1], rconst);
+  r1 = veor_u64x2(r1, t);
+  return veor_u64x2(r0, r1);
+}
+
+ASM_FUNC_ATTR_NOINLINE unsigned int
+_gcry_ghash_riscv_zbc(gcry_cipher_hd_t c, byte *result, const byte *buf,
+		      size_t nblocks)
+{
+  u64x2 rhash;
+  u64x2 rh1;
+  u64x2 rbuf;
+  u64x2x2 rr0rr1;
+
+  if (nblocks == 0)
+    return 0;
+
+  rhash = load_aligned_u64x2(result);
+  rh1 = load_aligned_u64x2(c->u_mode.gcm.u_ghash_key.key);
+
+  rhash = byteswap_u64x2(rhash);
+
+  rbuf = load_unaligned_u64x2(buf);
+  buf += 16;
+  nblocks--;
+
+  rbuf = byteswap_u64x2(rbuf);
+
+  rhash = veor_u64x2(rhash, rbuf);
+
+  while (nblocks)
+    {
+      rbuf = load_unaligned_u64x2(buf);
+      buf += 16;
+      nblocks--;
+
+      rr0rr1 = pmul_128x128(rhash, rh1);
+
+      rbuf = byteswap_u64x2(rbuf);
+
+      rhash = reduction(rr0rr1);
+
+      rhash = veor_u64x2(rhash, rbuf);
+    }
+
+  rr0rr1 = pmul_128x128(rhash, rh1);
+  rhash = reduction(rr0rr1);
+
+  rhash = byteswap_u64x2(rhash);
+
+  store_aligned_u64x2(result, rhash);
+
+
+  return 0;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+gcm_lsh_1(void *r_out, u64x2 i)
+{
+  static const u64 rconst = { U64_C(0xc200000000000000) };
+  u64 ia = i.val[0];
+  u64 ib = i.val[1];
+  u64 oa, ob, ma;
+  u64x2 oa_ob;
+
+  ma = (u64)-(ib >> 63);
+  oa = ib >> 63;
+  ob = ia >> 63;
+  ma = ma & rconst;
+  ib = ib << 1;
+  ia = ia << 1;
+  ob = ob | ib;
+  oa = oa | ia;
+  ob = ob ^ ma;
+  oa_ob = (const u64x2){ .val = { oa, ob } };
+  store_aligned_u64x2(r_out, oa_ob);
+}
+
+ASM_FUNC_ATTR_NOINLINE void
+_gcry_ghash_setup_riscv_zbc(gcry_cipher_hd_t c)
+{
+  u64x2 rhash = load_aligned_u64x2(c->u_mode.gcm.u_ghash_key.key);
+
+  rhash = byteswap_u64x2(rhash);
+
+  gcm_lsh_1(c->u_mode.gcm.u_ghash_key.key, rhash);
+}
+
+#endif /* GCM_USE_RISCV_ZBC */
diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c
index 9fbdb02e..37743c30 100644
--- a/cipher/cipher-gcm.c
+++ b/cipher/cipher-gcm.c
@@ -102,6 +102,13 @@ ghash_armv7_neon (gcry_cipher_hd_t c, byte *result, const byte *buf,
 }
 #endif /* GCM_USE_ARM_NEON */
 
+#ifdef GCM_USE_RISCV_ZBC
+extern void _gcry_ghash_setup_riscv_zbc(gcry_cipher_hd_t c);
+
+extern unsigned int _gcry_ghash_riscv_zbc(gcry_cipher_hd_t c, byte *result,
+					  const byte *buf, size_t nblocks);
+#endif /* GCM_USE_RISCV_ZBC */
+
 #ifdef GCM_USE_AARCH64
 extern void _gcry_ghash_setup_aarch64_simd(gcry_cipher_hd_t c);
 
@@ -621,6 +628,15 @@ setupM (gcry_cipher_hd_t c)
       _gcry_ghash_setup_aarch64_simd (c);
     }
 #endif
+#ifdef GCM_USE_RISCV_ZBC
+  else if ((features & HWF_RISCV_IMAFDC)
+	   && (features & HWF_RISCV_B)
+	   && (features & HWF_RISCV_ZBC))
+    {
+      c->u_mode.gcm.ghash_fn = _gcry_ghash_riscv_zbc;
+      _gcry_ghash_setup_riscv_zbc (c);
+    }
+#endif
 #ifdef GCM_USE_PPC_VPMSUM
   else if (features & HWF_PPC_VCRYPTO)
     {
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 19b3eada..9f50ebc2 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -136,6 +136,13 @@
 #endif
 #endif /* GCM_USE_PPC_VPMSUM */
 
+/* GCM_USE_RISCV_ZBC indicates whether to compile GCM with RISC-V Zbc code. */
+#undef GCM_USE_RISCV_ZBC
+#if defined (__riscv) && (__riscv_xlen == 64) && \
+    defined(HAVE_GCC_INLINE_ASM_RISCV)
+# define GCM_USE_RISCV_ZBC 1
+#endif
+
 typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result,
                                     const byte *buf, size_t nblocks);
 
diff --git a/configure.ac b/configure.ac
index 55d15fa3..fbe82695 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3868,6 +3868,9 @@ case "${host}" in
   powerpc64le-*-* | powerpc64-*-* | powerpc-*-*)
     GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo"
   ;;
+  riscv64-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-riscv-b-zbc.lo"
+  ;;
 esac
 
 # Arch specific MAC implementations
-- 
2.45.2




More information about the Gcrypt-devel mailing list