[PATCH 4/6] Add GHASH RISC-V/Zbc implementation
Jussi Kivilinna
jussi.kivilinna at iki.fi
Mon Jan 6 16:08:51 CET 2025
* cipher/Makefile.am: Add 'cipher-gcm-riscv-b-zbc.c'.
* cipher/cipher-gcm-riscv-b-zbc.c: New.
* cipher/cipher-gcm.c [GCM_USE_RISCV_ZBC] (_gcry_ghash_setup_riscv_zbc)
(_gcry_ghash_riscv_zbc): New.
(setupM) [GCM_USE_RISCV_ZBC]: Check for HWF_RISCV_IMAFDC, HWF_RISCV_B
HWF_RISCV_ZBC to enable RISC-V/Zbc implementation.
* cipher/cipher-internal.h (GCM_USE_RISCV_ZBC): New.
* configure.ac: Add 'cipher-gcm-riscv-b-zbc.lo'.
--
Patch adds RISC-V Zbc extension accelerated GHASH implementation.
Benchmark on SpacemiT K1 (1600 Mhz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
GMAC_AES | 14.32 ns/B 66.60 MiB/s 22.91 c/B
After (19x faster):
| nanosecs/byte mebibytes/sec cycles/byte
GMAC_AES | 0.744 ns/B 1281 MiB/s 1.19 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/Makefile.am | 1 +
cipher/cipher-gcm-riscv-b-zbc.c | 276 ++++++++++++++++++++++++++++++++
cipher/cipher-gcm.c | 16 ++
cipher/cipher-internal.h | 7 +
configure.ac | 3 +
5 files changed, 303 insertions(+)
create mode 100644 cipher/cipher-gcm-riscv-b-zbc.c
diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 88b2d17c..a0a4d7d8 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -92,6 +92,7 @@ EXTRA_libcipher_la_SOURCES = \
cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c \
cipher-gcm-aarch64-simd.c cipher-gcm-armv7-neon.S \
cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \
+ cipher-gcm-riscv-b-zbc.c \
crc.c crc-intel-pclmul.c crc-armv8-ce.c \
crc-armv8-aarch64-ce.S \
crc-ppc.c \
diff --git a/cipher/cipher-gcm-riscv-b-zbc.c b/cipher/cipher-gcm-riscv-b-zbc.c
new file mode 100644
index 00000000..705b7462
--- /dev/null
+++ b/cipher/cipher-gcm-riscv-b-zbc.c
@@ -0,0 +1,276 @@
+/* cipher-gcm-irscv-b-zbc.c - RISC-V Zbc accelerated GHASH
+ * Copyright (C) 2025 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+#if defined (GCM_USE_RISCV_ZBC)
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+typedef struct { u64 val[2]; } u64x2;
+typedef struct { u64x2 val[2]; } u64x2x2;
+
+static ASM_FUNC_ATTR_INLINE u64x2
+load_aligned_u64x2(const void *ptr)
+{
+ u64x2 vec;
+
+ asm ("ld %0, 0(%1)"
+ : "=r" (vec.val[0])
+ : "r" (ptr)
+ : "memory");
+ asm ("ld %0, 8(%1)"
+ : "=r" (vec.val[1])
+ : "r" (ptr)
+ : "memory");
+
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+load_unaligned_u64x2(const void *ptr)
+{
+ if (((uintptr_t)ptr & 7) == 0)
+ {
+ /* aligned load */
+ return load_aligned_u64x2(ptr);
+ }
+ else
+ {
+ /* unaligned load */
+ const bufhelp_u64_t *ptr_u64 = ptr;
+ u64x2 vec;
+ vec.val[0] = ptr_u64[0].a;
+ vec.val[1] = ptr_u64[1].a;
+ return vec;
+ }
+}
+
+static ASM_FUNC_ATTR_INLINE void
+store_aligned_u64x2(void *ptr, u64x2 vec)
+{
+ asm ("sd %0, 0(%1)"
+ :
+ : "r" (vec.val[0]), "r" (ptr)
+ : "memory");
+ asm ("sd %0, 8(%1)"
+ :
+ : "r" (vec.val[1]), "r" (ptr)
+ : "memory");
+}
+
+static ASM_FUNC_ATTR_INLINE u64
+byteswap_u64(u64 x)
+{
+ asm (".option push;\n\t"
+ ".option arch, +zbb;\n\t"
+ "rev8 %0, %1;\n\t"
+ ".option pop;\n\t"
+ : "=r" (x)
+ : "r" (x));
+ return x;
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+byteswap_u64x2(u64x2 vec)
+{
+ u64 tmp = byteswap_u64(vec.val[0]);
+ vec.val[0] = byteswap_u64(vec.val[1]);
+ vec.val[1] = tmp;
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+veor_u64x2(u64x2 va, u64x2 vb)
+{
+ va.val[0] ^= vb.val[0];
+ va.val[1] ^= vb.val[1];
+ return va;
+}
+
+/* 64x64 => 128 carry-less multiplication */
+static ASM_FUNC_ATTR_INLINE u64x2
+clmul_u64x2(u64 a, u64 b)
+{
+ u64x2 vec;
+ asm (".option push;\n\t"
+ ".option arch, +zbc;\n\t"
+ "clmul %0, %1, %2;\n\t"
+ ".option pop;\n\t"
+ : "=r" (vec.val[0])
+ : "r" (a), "r" (b));
+ asm (".option push;\n\t"
+ ".option arch, +zbc;\n\t"
+ "clmulh %0, %1, %2;\n\t"
+ ".option pop;\n\t"
+ : "=r" (vec.val[1])
+ : "r" (a), "r" (b));
+ return vec;
+}
+
+/* GHASH functions.
+ *
+ * See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+static ASM_FUNC_ATTR_INLINE u64x2x2
+pmul_128x128(u64x2 a, u64x2 b)
+{
+ u64 a_l = a.val[0];
+ u64 a_h = a.val[1];
+ u64 b_l = b.val[0];
+ u64 b_h = b.val[1];
+ u64 t1_h = b_l ^ b_h;
+ u64 t1_l = a_l ^ a_h;
+ u64x2 r0 = clmul_u64x2(a_l, b_l);
+ u64x2 r1 = clmul_u64x2(a_h, b_h);
+ u64x2 t2 = clmul_u64x2(t1_h, t1_l);
+ u64 t2_l, t2_h;
+ u64 r0_l, r0_h;
+ u64 r1_l, r1_h;
+
+ t2 = veor_u64x2(t2, r0);
+ t2 = veor_u64x2(t2, r1);
+
+ r0_l = r0.val[0];
+ r0_h = r0.val[1];
+ r1_l = r1.val[0];
+ r1_h = r1.val[1];
+ t2_l = t2.val[0];
+ t2_h = t2.val[1];
+
+ r0_h = r0_h ^ t2_l;
+ r1_l = r1_l ^ t2_h;
+
+ r0 = (const u64x2){ .val = { r0_l, r0_h } };
+ r1 = (const u64x2){ .val = { r1_l, r1_h } };
+
+ return (const u64x2x2){ .val = { r0, r1 } };
+}
+
+static ASM_FUNC_ATTR_INLINE u64x2
+reduction(u64x2x2 r0r1)
+{
+ static const u64 rconst = { U64_C(0xc200000000000000) };
+ u64x2 r0 = r0r1.val[0];
+ u64x2 r1 = r0r1.val[1];
+ u64x2 t = clmul_u64x2(r0.val[0], rconst);
+ r0.val[1] ^= t.val[0];
+ r1.val[0] ^= t.val[1];
+ t = clmul_u64x2(r0.val[1], rconst);
+ r1 = veor_u64x2(r1, t);
+ return veor_u64x2(r0, r1);
+}
+
+ASM_FUNC_ATTR_NOINLINE unsigned int
+_gcry_ghash_riscv_zbc(gcry_cipher_hd_t c, byte *result, const byte *buf,
+ size_t nblocks)
+{
+ u64x2 rhash;
+ u64x2 rh1;
+ u64x2 rbuf;
+ u64x2x2 rr0rr1;
+
+ if (nblocks == 0)
+ return 0;
+
+ rhash = load_aligned_u64x2(result);
+ rh1 = load_aligned_u64x2(c->u_mode.gcm.u_ghash_key.key);
+
+ rhash = byteswap_u64x2(rhash);
+
+ rbuf = load_unaligned_u64x2(buf);
+ buf += 16;
+ nblocks--;
+
+ rbuf = byteswap_u64x2(rbuf);
+
+ rhash = veor_u64x2(rhash, rbuf);
+
+ while (nblocks)
+ {
+ rbuf = load_unaligned_u64x2(buf);
+ buf += 16;
+ nblocks--;
+
+ rr0rr1 = pmul_128x128(rhash, rh1);
+
+ rbuf = byteswap_u64x2(rbuf);
+
+ rhash = reduction(rr0rr1);
+
+ rhash = veor_u64x2(rhash, rbuf);
+ }
+
+ rr0rr1 = pmul_128x128(rhash, rh1);
+ rhash = reduction(rr0rr1);
+
+ rhash = byteswap_u64x2(rhash);
+
+ store_aligned_u64x2(result, rhash);
+
+
+ return 0;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+gcm_lsh_1(void *r_out, u64x2 i)
+{
+ static const u64 rconst = { U64_C(0xc200000000000000) };
+ u64 ia = i.val[0];
+ u64 ib = i.val[1];
+ u64 oa, ob, ma;
+ u64x2 oa_ob;
+
+ ma = (u64)-(ib >> 63);
+ oa = ib >> 63;
+ ob = ia >> 63;
+ ma = ma & rconst;
+ ib = ib << 1;
+ ia = ia << 1;
+ ob = ob | ib;
+ oa = oa | ia;
+ ob = ob ^ ma;
+ oa_ob = (const u64x2){ .val = { oa, ob } };
+ store_aligned_u64x2(r_out, oa_ob);
+}
+
+ASM_FUNC_ATTR_NOINLINE void
+_gcry_ghash_setup_riscv_zbc(gcry_cipher_hd_t c)
+{
+ u64x2 rhash = load_aligned_u64x2(c->u_mode.gcm.u_ghash_key.key);
+
+ rhash = byteswap_u64x2(rhash);
+
+ gcm_lsh_1(c->u_mode.gcm.u_ghash_key.key, rhash);
+}
+
+#endif /* GCM_USE_RISCV_ZBC */
diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c
index 9fbdb02e..37743c30 100644
--- a/cipher/cipher-gcm.c
+++ b/cipher/cipher-gcm.c
@@ -102,6 +102,13 @@ ghash_armv7_neon (gcry_cipher_hd_t c, byte *result, const byte *buf,
}
#endif /* GCM_USE_ARM_NEON */
+#ifdef GCM_USE_RISCV_ZBC
+extern void _gcry_ghash_setup_riscv_zbc(gcry_cipher_hd_t c);
+
+extern unsigned int _gcry_ghash_riscv_zbc(gcry_cipher_hd_t c, byte *result,
+ const byte *buf, size_t nblocks);
+#endif /* GCM_USE_RISCV_ZBC */
+
#ifdef GCM_USE_AARCH64
extern void _gcry_ghash_setup_aarch64_simd(gcry_cipher_hd_t c);
@@ -621,6 +628,15 @@ setupM (gcry_cipher_hd_t c)
_gcry_ghash_setup_aarch64_simd (c);
}
#endif
+#ifdef GCM_USE_RISCV_ZBC
+ else if ((features & HWF_RISCV_IMAFDC)
+ && (features & HWF_RISCV_B)
+ && (features & HWF_RISCV_ZBC))
+ {
+ c->u_mode.gcm.ghash_fn = _gcry_ghash_riscv_zbc;
+ _gcry_ghash_setup_riscv_zbc (c);
+ }
+#endif
#ifdef GCM_USE_PPC_VPMSUM
else if (features & HWF_PPC_VCRYPTO)
{
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 19b3eada..9f50ebc2 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -136,6 +136,13 @@
#endif
#endif /* GCM_USE_PPC_VPMSUM */
+/* GCM_USE_RISCV_ZBC indicates whether to compile GCM with RISC-V Zbc code. */
+#undef GCM_USE_RISCV_ZBC
+#if defined (__riscv) && (__riscv_xlen == 64) && \
+ defined(HAVE_GCC_INLINE_ASM_RISCV)
+# define GCM_USE_RISCV_ZBC 1
+#endif
+
typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result,
const byte *buf, size_t nblocks);
diff --git a/configure.ac b/configure.ac
index 55d15fa3..fbe82695 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3868,6 +3868,9 @@ case "${host}" in
powerpc64le-*-* | powerpc64-*-* | powerpc-*-*)
GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo"
;;
+ riscv64-*-*)
+ GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-riscv-b-zbc.lo"
+ ;;
esac
# Arch specific MAC implementations
--
2.45.2
More information about the Gcrypt-devel
mailing list