[PATCH 4/6] Add RISC-V vector cryptography implementations of SHA256 and SHA512
Jussi Kivilinna
jussi.kivilinna at iki.fi
Thu Aug 7 15:28:53 CEST 2025
* cipher/Makefile.am: Add 'sha256-riscv-zvknha-zvkb.c' and
'sha512-riscv-zvknhb-zvkb.c'.
* cipher/sha256-riscv-zvknha-zvkb.c: New.
* cipher/sha256.c (USE_RISCV_V_CRYPTO): New.
[USE_RISCV_V_CRYPTO] (_gcry_sha256_riscv_v_check_hw)
(_gcry_sha256_transform_riscv_zvknha_zvkb)
(do_sha256_transform_riscv_zvknha): New.
(sha256_common_init) [sha256_common_init]: Enable new implementation
if supported by HW.
* cipher/sha512-riscv-zvknhb-zvkb.c: New.
* cipher/sha512.c (USE_RISCV_V_CRYPTO): New.
[USE_RISCV_V_CRYPTO] (_gcry_sha512_riscv_v_check_hw)
(_gcry_sha512_transform_riscv_zvknhb_zvkb)
(do_sha512_transform_riscv_zvknhb): New.
(sha512_common_init) [sha512_common_init]: Enable new implementation
if supported by HW.
* configure.ac: Add 'sha256-riscv-zvknha-zvkb.lo' and
'sha512-riscv-zvknhb-zvkb.lo'.
(GCRY_RISCV_VECTOR_INTRINSICS_TEST): New.
(gcry_cv_cc_riscv_vector_intrinsics)
(gcry_cv_cc_riscv_vector_intrinsics_cflags): Move test code to new
macro GCRY_RISCV_VECTOR_INTRINSICS_TEST.
(GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST)
(gcry_cv_cc_riscv_vector_crypto_intrinsics)
(gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags)
(gcry_cv_riscv_vsha2cl_intrinsics_work): New.
* src/g10lib.h [HAVE_CPU_ARCH_RISCV] (HWF_RISCV_ZVKB, HWF_RISCV_ZVKNHA)
(HWF_RISCV_ZVKNHB): New.
* src/hwf-riscv.c (HWF_RISCV_HWPROBE_EXT_ZVKB)
(HWF_RISCV_HWPROBE_EXT_ZVKNHA, HWF_RISCV_HWPROBE_EXT_ZVKNHB): New.
* src/hwfeatures.c (hwflist) [HAVE_CPU_ARCH_RISCV]: Add "riscv-zvkb",
"riscv-zvknha" and "riscv-zvknhb".
--
Implementations have been tested against QEMU emulator as there is no
actual HW available with these instructions yet.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/Makefile.am | 23 ++-
cipher/sha256-riscv-zvknha-zvkb.c | 197 +++++++++++++++++++++
cipher/sha256.c | 34 ++++
cipher/sha512-riscv-zvknhb-zvkb.c | 190 +++++++++++++++++++++
cipher/sha512.c | 37 ++++
configure.ac | 274 ++++++++++++++++++++++--------
src/g10lib.h | 3 +
src/hwf-riscv.c | 6 +
src/hwfeatures.c | 3 +
9 files changed, 690 insertions(+), 77 deletions(-)
create mode 100644 cipher/sha256-riscv-zvknha-zvkb.c
create mode 100644 cipher/sha512-riscv-zvknhb-zvkb.c
diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index dfffefb5..3375ea38 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -143,11 +143,11 @@ EXTRA_libcipher_la_SOURCES = \
sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \
sha256-avx2-bmi2-amd64.S \
sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \
- sha256-intel-shaext.c sha256-ppc.c \
+ sha256-intel-shaext.c sha256-ppc.c sha256-riscv-zvknha-zvkb.c \
sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \
sha512-avx2-bmi2-amd64.S sha512-avx512-amd64.S \
sha512-armv7-neon.S sha512-armv8-aarch64-ce.S sha512-arm.S \
- sha512-ppc.c sha512-ssse3-i386.c \
+ sha512-ppc.c sha512-riscv-zvknhb-zvkb.c sha512-ssse3-i386.c \
sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S sm3-armv8-aarch64-ce.S \
keccak.c keccak_permute_32.h keccak_permute_64.h \
keccak-armv7-neon.S keccak-amd64-avx512.S \
@@ -373,3 +373,22 @@ rijndael-vp-riscv.o: $(srcdir)/rijndael-vp-riscv.c Makefile
rijndael-vp-riscv.lo: $(srcdir)/rijndael-vp-riscv.c Makefile
`echo $(LTCOMPILE) $(riscv_vector_cflags) -c $< | $(instrumentation_munging) `
+
+if ENABLE_RISCV_VECTOR_CRYPTO_INTRINSICS_EXTRA_CFLAGS
+# Note: -mstrict-align needed for GCC-14 bug (disable unaligned vector loads)
+riscv_vector_crypto_cflags = -O2 -march=rv64imafdcv_zvbc_zvkg_zvkn_zvks -mstrict-align
+else
+riscv_vector_crypto_cflags =
+endif
+
+sha256-riscv-zvknha-zvkb.o: $(srcdir)/sha256-riscv-zvknha-zvkb.c Makefile
+ `echo $(COMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha256-riscv-zvknha-zvkb.lo: $(srcdir)/sha256-riscv-zvknha-zvkb.c Makefile
+ `echo $(LTCOMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-riscv-zvknhb-zvkb.o: $(srcdir)/sha512-riscv-zvknhb-zvkb.c Makefile
+ `echo $(COMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-riscv-zvknhb-zvkb.lo: $(srcdir)/sha512-riscv-zvknhb-zvkb.c Makefile
+ `echo $(LTCOMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
diff --git a/cipher/sha256-riscv-zvknha-zvkb.c b/cipher/sha256-riscv-zvknha-zvkb.c
new file mode 100644
index 00000000..6375f9aa
--- /dev/null
+++ b/cipher/sha256-riscv-zvknha-zvkb.c
@@ -0,0 +1,197 @@
+/* sha256-riscv-zvknha-zvkb.c - RISC-V vector crypto implementation of SHA-256
+ * Copyright (C) 2025 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined (__riscv) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS)
+
+#include "g10lib.h"
+#include "simd-common-riscv.h"
+#include <riscv_vector.h>
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static ASM_FUNC_ATTR_INLINE vuint32m1_t
+working_vsha2cl_vv_u32m1(vuint32m1_t hgcd, vuint32m1_t feba,
+ vuint32m1_t kw, size_t vl)
+{
+#ifdef HAVE_BROKEN_VSHA2CL_INTRINSIC
+ asm (
+ "vsetvli zero,%3,e32,m1,ta,ma;\n\t"
+ "vsha2cl.vv %0,%1,%2;\n\t"
+ : "+vr" (hgcd)
+ : "vr" (feba), "vr" (kw), "r" (vl)
+ : "vl", "vtype"
+ );
+ return hgcd;
+#else
+ return __riscv_vsha2cl_vv_u32m1(hgcd, feba, kw, vl);
+#endif
+}
+
+
+/* Quad-round with message expansion (rounds 0-47) */
+#define QUAD_ROUND_W_SCHED(w0, w1, w2, w3) \
+ v_k = __riscv_vle32_v_u32m1(k, vl); \
+ k += 4; \
+ v_kw = __riscv_vadd_vv_u32m1(v_k, w0, vl); \
+ v_hgcd_work = working_vsha2cl_vv_u32m1(v_hgcd_work, v_feba_work, v_kw, vl); \
+ v_feba_work = __riscv_vsha2ch_vv_u32m1(v_feba_work, v_hgcd_work, v_kw, vl); \
+ v_w_merged = __riscv_vmerge_vvm_u32m1(w2, w1, merge_mask, vl); \
+ w0 = __riscv_vsha2ms_vv_u32m1(w0, v_w_merged, w3, vl);
+
+/* Quad-round without message expansion (rounds 48-63) */
+#define QUAD_ROUND_NO_SCHED(w0) \
+ v_k = __riscv_vle32_v_u32m1(k, vl); \
+ k += 4; \
+ v_kw = __riscv_vadd_vv_u32m1(v_k, w0, vl); \
+ v_hgcd_work = working_vsha2cl_vv_u32m1(v_hgcd_work, v_feba_work, v_kw, vl); \
+ v_feba_work = __riscv_vsha2ch_vv_u32m1(v_feba_work, v_hgcd_work, v_kw, vl);
+
+
+static ASM_FUNC_ATTR_INLINE vuint32m1_t
+load_and_swap (const byte * p, size_t vl, size_t vl_bytes)
+{
+ vuint8m1_t temp_bytes = __riscv_vle8_v_u8m1(p, vl_bytes);
+ return __riscv_vrev8_v_u32m1(__riscv_vreinterpret_v_u8m1_u32m1(temp_bytes),
+ vl);
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+sha256_transform_zvknha_zvkb (u32 state[8], const uint8_t * data,
+ size_t nblocks)
+{
+ static const u32 k_const[64] =
+ {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+ };
+ static const u32 feba_hgcd_indices[4] = { 20, 16, 4, 0 };
+ static const int feba_offset = 0;
+ static const int hgcd_offset = 8 / sizeof(u32);
+ size_t vl;
+ size_t vl_bytes;
+ vuint32m1_t idx;
+ vuint32m1_t v_feba_work, v_feba;
+ vuint32m1_t v_hgcd_work, v_hgcd;
+ vuint32m1_t w0, w1, w2, w3;
+ vuint32m1_t v_k, v_kw, v_w_merged;
+ vbool32_t merge_mask;
+ vuint32m1_t v_feba_hgcd_idx;
+
+ vl = 4;
+ vl_bytes = vl * 4;
+ idx = __riscv_vid_v_u32m1(vl);
+ merge_mask = __riscv_vmseq_vx_u32m1_b32(idx, 0, vl);
+
+ v_feba_hgcd_idx = __riscv_vle32_v_u32m1(feba_hgcd_indices, vl);
+
+ v_feba = __riscv_vluxei32_v_u32m1(state + feba_offset, v_feba_hgcd_idx, vl);
+ v_hgcd = __riscv_vluxei32_v_u32m1(state + hgcd_offset, v_feba_hgcd_idx, vl);
+
+ while (nblocks > 0)
+ {
+ const u32 *k = k_const;
+
+ v_feba_work = v_feba;
+ v_hgcd_work = v_hgcd;
+
+ w0 = load_and_swap(data + 0, vl, vl_bytes);
+ w1 = load_and_swap(data + 16, vl, vl_bytes);
+ w2 = load_and_swap(data + 32, vl, vl_bytes);
+ w3 = load_and_swap(data + 48, vl, vl_bytes);
+
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+ QUAD_ROUND_NO_SCHED(w0);
+ QUAD_ROUND_NO_SCHED(w1);
+ QUAD_ROUND_NO_SCHED(w2);
+ QUAD_ROUND_NO_SCHED(w3);
+
+ v_feba = __riscv_vadd_vv_u32m1(v_feba, v_feba_work, vl);
+ v_hgcd = __riscv_vadd_vv_u32m1(v_hgcd, v_hgcd_work, vl);
+
+ data += 64;
+ nblocks--;
+ }
+
+ __riscv_vsuxei32_v_u32m1(state + feba_offset, v_feba_hgcd_idx, v_feba, vl);
+ __riscv_vsuxei32_v_u32m1(state + hgcd_offset, v_feba_hgcd_idx, v_hgcd, vl);
+
+ clear_vec_regs();
+}
+
+
+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT_O2 __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT_O2
+#endif
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha256_transform_riscv_zvknha_zvkb(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks)
+{
+ sha256_transform_zvknha_zvkb(state, input_data, num_blks);
+ return 0;
+}
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha256_riscv_v_check_hw(void)
+{
+ return (__riscv_vsetvl_e32m1(4) == 4);
+}
+
+#endif /* HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS */
diff --git a/cipher/sha256.c b/cipher/sha256.c
index 24cab566..27d4b1d4 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -110,6 +110,15 @@
# endif
#endif
+/* USE_RISCV_V_CRYPTO indicates whether to enable RISC-V vector cryptography
+ * extension code. */
+#undef USE_RISCV_V_CRYPTO
+#if defined (__riscv) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS)
+# define USE_RISCV_V_CRYPTO 1
+#endif
+
/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
#undef USE_S390X_CRYPTO
#if defined(HAVE_GCC_INLINE_ASM_S390X)
@@ -242,6 +251,23 @@ do_sha256_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
}
#endif
+#ifdef USE_RISCV_V_CRYPTO
+unsigned int _gcry_sha256_riscv_v_check_hw(void);
+
+unsigned int
+_gcry_sha256_transform_riscv_zvknha_zvkb(u32 state[8],
+ const unsigned char *input_data,
+ size_t num_blks);
+
+static unsigned int
+do_sha256_transform_riscv_zvknha(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA256_CONTEXT *hd = ctx;
+ return _gcry_sha256_transform_riscv_zvknha_zvkb (hd->h, data, nblks);
+}
+#endif
+
#ifdef USE_S390X_CRYPTO
#include "asm-inline-s390x.h"
@@ -324,6 +350,14 @@ sha256_common_init (SHA256_CONTEXT *hd)
if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
hd->bctx.bwrite = do_sha256_transform_ppc9;
#endif
+#ifdef USE_RISCV_V_CRYPTO
+ if ((features & HWF_RISCV_IMAFDC)
+ && (features & HWF_RISCV_V)
+ && (features & HWF_RISCV_ZVKB)
+ && ((features & HWF_RISCV_ZVKNHA) || (features & HWF_RISCV_ZVKNHB))
+ && _gcry_sha256_riscv_v_check_hw())
+ hd->bctx.bwrite = do_sha256_transform_riscv_zvknha;
+#endif
#ifdef USE_S390X_CRYPTO
hd->use_s390x_crypto = 0;
if ((features & HWF_S390X_MSA) != 0)
diff --git a/cipher/sha512-riscv-zvknhb-zvkb.c b/cipher/sha512-riscv-zvknhb-zvkb.c
new file mode 100644
index 00000000..5f5d483a
--- /dev/null
+++ b/cipher/sha512-riscv-zvknhb-zvkb.c
@@ -0,0 +1,190 @@
+/* sha512-riscv-zvknhb-zvkb.c - RISC-V vector crypto implementation of SHA-512
+ * Copyright (C) 2025 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined (__riscv) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS) && \
+ defined(USE_SHA512)
+
+#include "g10lib.h"
+#include "simd-common-riscv.h"
+#include <riscv_vector.h>
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static ASM_FUNC_ATTR_INLINE vuint64m2_t
+working_vsha2cl_vv_u64m2(vuint64m2_t hgcd, vuint64m2_t feba,
+ vuint64m2_t kw, size_t vl)
+{
+#ifdef HAVE_BROKEN_VSHA2CL_INTRINSIC
+ asm (
+ "vsetvli zero,%3,e64,m2,ta,ma;\n\t"
+ "vsha2cl.vv %0,%1,%2;\n\t"
+ : "+vr" (hgcd)
+ : "vr" (feba), "vr" (kw), "r" (vl)
+ : "vl", "vtype"
+ );
+ return hgcd;
+#else
+ return __riscv_vsha2cl_vv_u64m2(hgcd, feba, kw, vl);
+#endif
+}
+
+
+/* Quad-round with message expansion (rounds 0-63) */
+#define QUAD_ROUND_W_SCHED(w0, w1, w2, w3) \
+ k_tmp = k; \
+ asm ("" : "+r" (k_tmp) :: "memory"); \
+ v_k = __riscv_vle64_v_u64m2(k_tmp, vl); \
+ k += 4; \
+ v_kw = __riscv_vadd_vv_u64m2(v_k, w0, vl); \
+ v_hgcd_work = working_vsha2cl_vv_u64m2(v_hgcd_work, v_feba_work, v_kw, vl); \
+ v_feba_work = __riscv_vsha2ch_vv_u64m2(v_feba_work, v_hgcd_work, v_kw, vl); \
+ v_w_merged = __riscv_vmerge_vvm_u64m2(w2, w1, merge_mask, vl); \
+ w0 = __riscv_vsha2ms_vv_u64m2(w0, v_w_merged, w3, vl);
+
+/* Quad-round without message expansion (rounds 64-79) */
+#define QUAD_ROUND_NO_SCHED(w0) \
+ k_tmp = k; \
+ asm ("" : "+r" (k_tmp) :: "memory"); \
+ v_k = __riscv_vle64_v_u64m2(k_tmp, vl); \
+ k += 4; \
+ v_kw = __riscv_vadd_vv_u64m2(v_k, w0, vl); \
+ v_hgcd_work = working_vsha2cl_vv_u64m2(v_hgcd_work, v_feba_work, v_kw, vl); \
+ v_feba_work = __riscv_vsha2ch_vv_u64m2(v_feba_work, v_hgcd_work, v_kw, vl);
+
+
+static ASM_FUNC_ATTR_INLINE vuint64m2_t
+load_and_swap(const byte *p, size_t vl, size_t vl_bytes)
+{
+ vuint8m2_t temp_bytes = __riscv_vle8_v_u8m2(p, vl_bytes);
+ return __riscv_vrev8_v_u64m2(__riscv_vreinterpret_v_u8m2_u64m2(temp_bytes),
+ vl);
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+sha512_transform_zvknhb_zvkb (u64 state[8], const byte *data,
+ size_t nblocks, const u64 k_const[80])
+{
+ static const u64 feba_hgcd_indices[4] = { 40, 32, 8, 0 };
+ static const int feba_offset = 0;
+ static const int hgcd_offset = 16 / sizeof(u64);
+ size_t vl;
+ size_t vl_bytes;
+ vuint64m2_t idx;
+ vuint64m2_t v_feba_work, v_feba;
+ vuint64m2_t v_hgcd_work, v_hgcd;
+ vuint64m2_t w0, w1, w2, w3;
+ vuint64m2_t v_k, v_kw, v_w_merged;
+ vbool32_t merge_mask;
+ vuint64m2_t v_feba_hgcd_idx;
+
+ vl = 4;
+ vl_bytes = vl * 8;
+ idx = __riscv_vid_v_u64m2(vl);
+ merge_mask = __riscv_vmseq_vx_u64m2_b32(idx, 0, vl);
+
+ v_feba_hgcd_idx = __riscv_vle64_v_u64m2(feba_hgcd_indices, vl);
+
+ v_feba = __riscv_vluxei64_v_u64m2(state + feba_offset, v_feba_hgcd_idx, vl);
+ v_hgcd = __riscv_vluxei64_v_u64m2(state + hgcd_offset, v_feba_hgcd_idx, vl);
+
+ while (nblocks > 0)
+ {
+ const u64 *k = k_const;
+ const u64 *k_tmp;
+
+ v_feba_work = v_feba;
+ v_hgcd_work = v_hgcd;
+
+ w0 = load_and_swap(data + 0, vl, vl_bytes);
+ w1 = load_and_swap(data + 32, vl, vl_bytes);
+ w2 = load_and_swap(data + 64, vl, vl_bytes);
+ w3 = load_and_swap(data + 96, vl, vl_bytes);
+
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+ QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+ QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+ QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+ QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+
+ QUAD_ROUND_NO_SCHED(w0);
+ QUAD_ROUND_NO_SCHED(w1);
+ QUAD_ROUND_NO_SCHED(w2);
+ QUAD_ROUND_NO_SCHED(w3);
+
+ v_feba = __riscv_vadd_vv_u64m2(v_feba, v_feba_work, vl);
+ v_hgcd = __riscv_vadd_vv_u64m2(v_hgcd, v_hgcd_work, vl);
+
+ data += 128;
+ nblocks--;
+ }
+
+ __riscv_vsuxei64_v_u64m2(state + feba_offset, v_feba_hgcd_idx, v_feba, vl);
+ __riscv_vsuxei64_v_u64m2(state + hgcd_offset, v_feba_hgcd_idx, v_hgcd, vl);
+
+ clear_vec_regs();
+}
+
+
+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT_O2 __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT_O2
+#endif
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha512_transform_riscv_zvknhb_zvkb(u64 state[8],
+ const unsigned char *input_data,
+ size_t num_blks,
+ const u64 k[80])
+{
+ sha512_transform_zvknhb_zvkb(state, input_data, num_blks, k);
+ return 0;
+}
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha512_riscv_v_check_hw(void)
+{
+ return (__riscv_vsetvl_e64m2(4) == 4);
+}
+
+#endif /* HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS */
diff --git a/cipher/sha512.c b/cipher/sha512.c
index bf3f3ff2..a0c0bf1c 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -154,6 +154,16 @@
#endif
+/* USE_RISCV_V_CRYPTO indicates whether to enable RISC-V vector cryptography
+ * extension code. */
+#undef USE_RISCV_V_CRYPTO
+#if defined (__riscv) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+ defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS)
+# define USE_RISCV_V_CRYPTO 1
+#endif
+
+
/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
#undef USE_S390X_CRYPTO
#if defined(HAVE_GCC_INLINE_ASM_S390X)
@@ -392,6 +402,25 @@ do_sha512_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
#endif
+#ifdef USE_RISCV_V_CRYPTO
+unsigned int _gcry_sha512_riscv_v_check_hw(void);
+
+unsigned int
+_gcry_sha512_transform_riscv_zvknhb_zvkb(u64 state[8],
+ const unsigned char *input_data,
+ size_t num_blks,
+ const u64 k[80]);
+
+static unsigned int
+do_sha512_transform_riscv_zvknhb(void *ctx, const unsigned char *data,
+ size_t nblks)
+{
+ SHA512_CONTEXT *hd = ctx;
+ return _gcry_sha512_transform_riscv_zvknhb_zvkb (hd->state.h, data, nblks, k);
+}
+#endif
+
+
#ifdef USE_S390X_CRYPTO
#include "asm-inline-s390x.h"
@@ -479,6 +508,14 @@ sha512_init_common (SHA512_CONTEXT *ctx, unsigned int flags)
if ((features & HWF_INTEL_SSSE3) != 0)
ctx->bctx.bwrite = do_sha512_transform_i386_ssse3;
#endif
+#ifdef USE_RISCV_V_CRYPTO
+ if ((features & HWF_RISCV_IMAFDC)
+ && (features & HWF_RISCV_V)
+ && (features & HWF_RISCV_ZVKB)
+ && (features & HWF_RISCV_ZVKNHB)
+ && _gcry_sha512_riscv_v_check_hw())
+ ctx->bctx.bwrite = do_sha512_transform_riscv_zvknhb;
+#endif
#ifdef USE_S390X_CRYPTO
ctx->use_s390x_crypto = 0;
if ((features & HWF_S390X_MSA) != 0)
diff --git a/configure.ac b/configure.ac
index 3ce405e9..63bdfbe7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2669,6 +2669,46 @@ fi
#
# Check whether compiler supports RISC-V vector intrinsics
#
+m4_define([GCRY_RISCV_VECTOR_INTRINSICS_TEST],
+ [AC_LANG_SOURCE(
+ [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
+ #error __riscv_v_intrinsic not defined or too old version
+ #endif
+ #include <riscv_vector.h>
+ typedef vuint8m1_t __m128i;
+ #define cast_m128i_to_u64(a) (__riscv_vreinterpret_v_u8m1_u64m1(a))
+ #define cast_u64_to_m128i(a) (__riscv_vreinterpret_v_u64m1_u8m1(a))
+ #define paddq128(a, o) (o = cast_u64_to_m128i( \
+ __riscv_vadd_vv_u64m1( \
+ cast_m128i_to_u64(o), \
+ cast_m128i_to_u64(a), 2)))
+ #define pshufb128(m8, o) (o = __riscv_vrgather_vv_u8m1((o), (m8), 16))
+ #define memory_barrier_with_vec(a) __asm__("" : "+vr"(a) :: "memory")
+ #define clear_vec_reg_v0() \
+ __asm__ volatile("vsetivli zero, 16, e8, m1, ta, ma;\n" \
+ "vmv.v.x v0, zero;\n" \
+ ::: "memory", "vtype", "vl", "v0")
+ static inline __attribute__((always_inline)) __m128i
+ fn2(__m128i a)
+ {
+ paddq128(a, a);
+ return a;
+ }
+ __m128i fn(__m128i in)
+ {
+ __m128i x;
+ memory_barrier_with_vec(in);
+ x = fn2(in);
+ memory_barrier_with_vec(x);
+ pshufb128(in, x);
+ memory_barrier_with_vec(in);
+ clear_vec_reg_v0();
+ return in;
+ }
+ ]]
+ )]
+)
+
AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics],
[gcry_cv_cc_riscv_vector_intrinsics],
[if test "$mpi_cpu_arch" != "riscv64" ||
@@ -2676,43 +2716,9 @@ AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics],
gcry_cv_cc_riscv_vector_intrinsics="n/a"
else
gcry_cv_cc_riscv_vector_intrinsics=no
- AC_COMPILE_IFELSE([AC_LANG_SOURCE(
- [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
- #error __riscv_v_intrinsic not defined or too old version
- #endif
- #include <riscv_vector.h>
- typedef vuint8m1_t __m128i;
- #define cast_m128i_to_u64(a) (__riscv_vreinterpret_v_u8m1_u64m1(a))
- #define cast_u64_to_m128i(a) (__riscv_vreinterpret_v_u64m1_u8m1(a))
- #define paddq128(a, o) (o = cast_u64_to_m128i( \
- __riscv_vadd_vv_u64m1( \
- cast_m128i_to_u64(o), \
- cast_m128i_to_u64(a), 2)))
- #define pshufb128(m8, o) (o = __riscv_vrgather_vv_u8m1((o), (m8), 16))
- #define memory_barrier_with_vec(a) __asm__("" : "+vr"(a) :: "memory")
- #define clear_vec_reg_v0() \
- __asm__ volatile("vsetivli zero, 16, e8, m1, ta, ma;\n" \
- "vmv.v.x v0, zero;\n" \
- ::: "memory", "vtype", "vl", "v0")
- static inline __attribute__((always_inline)) __m128i
- fn2(__m128i a)
- {
- paddq128(a, a);
- return a;
- }
- __m128i fn(__m128i in)
- {
- __m128i x;
- memory_barrier_with_vec(in);
- x = fn2(in);
- memory_barrier_with_vec(x);
- pshufb128(in, x);
- memory_barrier_with_vec(in);
- clear_vec_reg_v0();
- return in;
- }
- ]])],
- [gcry_cv_cc_riscv_vector_intrinsics=yes])
+ AC_COMPILE_IFELSE(
+ [GCRY_RISCV_VECTOR_INTRINSICS_TEST],
+ [gcry_cv_cc_riscv_vector_intrinsics=yes])
fi])
if test "$gcry_cv_cc_riscv_vector_intrinsics" = "yes" ; then
AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS,1,
@@ -2720,6 +2726,7 @@ if test "$gcry_cv_cc_riscv_vector_intrinsics" = "yes" ; then
fi
_gcc_cflags_save=$CFLAGS
+# Note: -mstrict-align needed for GCC-14 bug (disable unaligned vector loads)
CFLAGS="$CFLAGS -O2 -march=rv64imafdcv -mstrict-align"
if test "$gcry_cv_cc_riscv_vector_intrinsics" = "no" &&
@@ -2727,44 +2734,12 @@ if test "$gcry_cv_cc_riscv_vector_intrinsics" = "no" &&
test "$try_asm_modules" = "yes" ; then
AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics with extra GCC flags],
[gcry_cv_cc_riscv_vector_intrinsics_cflags],
- [gcry_cv_cc_riscv_vector_intrinsics_cflags=no
- AC_COMPILE_IFELSE([AC_LANG_SOURCE(
- [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
- #error __riscv_v_intrinsic not defined or too old version
- #endif
- #include <riscv_vector.h>
- typedef vuint8m1_t __m128i;
- #define cast_m128i_to_u64(a) (__riscv_vreinterpret_v_u8m1_u64m1(a))
- #define cast_u64_to_m128i(a) (__riscv_vreinterpret_v_u64m1_u8m1(a))
- #define paddq128(a, o) (o = cast_u64_to_m128i( \
- __riscv_vadd_vv_u64m1( \
- cast_m128i_to_u64(o), \
- cast_m128i_to_u64(a), 2)))
- #define pshufb128(m8, o) (o = __riscv_vrgather_vv_u8m1((o), (m8), 16))
- #define memory_barrier_with_vec(a) __asm__("" : "+vr"(a) :: "memory")
- #define clear_vec_reg_v0() \
- __asm__ volatile("vsetivli zero, 16, e8, m1, ta, ma;\n" \
- "vmv.v.x v0, zero;\n" \
- ::: "memory", "vl", "v0")
- static inline __attribute__((always_inline)) __m128i
- fn2(__m128i a)
- {
- paddq128(a, a);
- return a;
- }
- __m128i fn(__m128i in)
- {
- __m128i x;
- memory_barrier_with_vec(in);
- x = fn2(in);
- memory_barrier_with_vec(x);
- pshufb128(in, x);
- memory_barrier_with_vec(in);
- clear_vec_reg_v0();
- return in;
- }
- ]])],
- [gcry_cv_cc_riscv_vector_intrinsics_cflags=yes])])
+ [
+ gcry_cv_cc_riscv_vector_intrinsics_cflags=no
+ AC_COMPILE_IFELSE(
+ [GCRY_RISCV_VECTOR_INTRINSICS_TEST],
+ [gcry_cv_cc_riscv_vector_intrinsics_cflags=yes])
+ ])
if test "$gcry_cv_cc_riscv_vector_intrinsics_cflags" = "yes" ; then
AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS,1,
[Defined if underlying compiler supports RISC-V vector intrinsics])
@@ -2780,6 +2755,145 @@ AM_CONDITIONAL(ENABLE_RISCV_VECTOR_INTRINSICS_EXTRA_CFLAGS,
CFLAGS=$_gcc_cflags_save;
+#
+# Check whether compiler supports RISC-V vector cryptography intrinsics
+#
+m4_define([GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST],
+ [AC_LANG_SOURCE(
+ [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
+ #error __riscv_v_intrinsic not defined or too old version
+ #endif
+ #include <riscv_vector.h>
+ void test_sha2(unsigned int *ptr)
+ {
+ int vl = __riscv_vsetvl_e32m1 (4);
+ vuint32m1_t a = __riscv_vle32_v_u32m1(ptr + 0 * vl, vl);
+ vuint32m1_t b = __riscv_vle32_v_u32m1(ptr + 1 * vl, vl);
+ vuint32m1_t w0 = __riscv_vle32_v_u32m1(ptr + 2 * vl, vl);
+ vuint32m1_t w1 = __riscv_vle32_v_u32m1(ptr + 3 * vl, vl);
+ vuint32m1_t w2 = __riscv_vle32_v_u32m1(ptr + 4 * vl, vl);
+ vuint32m1_t w3 = __riscv_vle32_v_u32m1(ptr + 5 * vl, vl);
+ vuint32m1_t m;
+ vuint32m1_t idx = __riscv_vid_v_u32m1 (vl);
+ vbool32_t merge_mask = __riscv_vmseq_vx_u32m1_b32 (idx, 0, vl);
+ a = __riscv_vsha2cl_vv_u32m1(a, b, w0, vl);
+ b = __riscv_vsha2ch_vv_u32m1(a, b, w0, vl);
+ m = __riscv_vmerge_vvm_u32m1(w2, w1, merge_mask, vl);
+ w0 = __riscv_vsha2ms_vv_u32m1(w0, m, w3, vl);
+ __riscv_vse32_v_u32m1(ptr + 0 * vl, a, vl);
+ __riscv_vse32_v_u32m1(ptr + 1 * vl, b, vl);
+ __riscv_vse32_v_u32m1(ptr + 2 * vl, w0, vl);
+ __riscv_vse32_v_u32m1(ptr + 3 * vl, w1, vl);
+ __riscv_vse32_v_u32m1(ptr + 4 * vl, w2, vl);
+ __riscv_vse32_v_u32m1(ptr + 5 * vl, w3, vl);
+ }
+ void test_inline_vec_asm(unsigned int *ptr)
+ {
+ int vl = __riscv_vsetvl_e32m1 (4);
+ vuint32m1_t a = __riscv_vle32_v_u32m1(ptr + 0 * vl, vl);
+ vuint32m1_t b = __riscv_vle32_v_u32m1(ptr + 1 * vl, vl);
+ asm (
+ "vsetvli zero,%1,e32,m1,ta,ma;\n\t"
+ "vsha2ms.vv %0,%2,%2;\n\t"
+ : "+vr" (a)
+ : "r" (vl), "vr" (b)
+ : "vl", "vtype"
+ );
+ __riscv_vse32_v_u32m1(ptr + 0 * vl, a, vl);
+ }
+ ]]
+ )]
+)
+
+AC_CACHE_CHECK([whether compiler supports RISC-V vector cryptography intrinsics],
+ [gcry_cv_cc_riscv_vector_crypto_intrinsics],
+ [if test "$mpi_cpu_arch" != "riscv64" ||
+ test "$try_asm_modules" != "yes" ; then
+ gcry_cv_cc_riscv_vector_crypto_intrinsics="n/a"
+ else
+ gcry_cv_cc_riscv_vector_crypto_intrinsics=no
+ AC_COMPILE_IFELSE(
+ [GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST],
+ [gcry_cv_cc_riscv_vector_crypto_intrinsics=yes])
+ fi])
+if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics" = "yes" ; then
+ AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS,1,
+ [Defined if underlying compiler supports RISC-V vector cryptography intrinsics])
+fi
+
+_gcc_cflags_save=$CFLAGS
+# Note: -mstrict-align needed for GCC-14 bug (disable unaligned vector loads)
+CFLAGS="$CFLAGS -O2 -march=rv64imafdcv_zvbc_zvkg_zvkn_zvks -mstrict-align"
+
+if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics" = "no" &&
+ test "$mpi_cpu_arch" = "riscv64" &&
+ test "$try_asm_modules" = "yes" ; then
+ AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics with extra GCC flags],
+ [gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags],
+ [gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags=no
+ AC_COMPILE_IFELSE(
+ [GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST],
+ [gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags=yes])])
+ if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes" ; then
+ AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS,1,
+ [Defined if underlying compiler supports RISC-V vector cryptography intrinsics])
+ AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS_WITH_CFLAGS,1,
+ [Defined if underlying compiler supports RISC-V vector cryptography intrinsics with extra GCC flags])
+ fi
+fi
+
+AM_CONDITIONAL(ENABLE_RISCV_VECTOR_CRYPTO_INTRINSICS_EXTRA_CFLAGS,
+ test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes")
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether compiler has working RISC-V vector __riscv_vsha2cl intrinsics
+#
+# LLVM has broken __riscv_vsha2cl_* intrinsics where they emit 'vsha2ch.vv'
+# instructions instead of expected 'vsha2cl.vv':
+# https://github.com/llvm/llvm-project/issues/151814
+#
+if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics" = "yes" ||
+ test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes"; then
+
+ # Setup flags for test if needed.
+ _gcc_cflags_save=$CFLAGS
+ if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes"; then
+ CFLAGS="$CFLAGS -O2 -march=rv64imafdcv_zvbc_zvkg_zvkn_zvks -mstrict-align"
+ fi
+
+ AC_CACHE_CHECK([whether compiler has working RISC-V __riscv_vsha2cl intrinsics],
+ [gcry_cv_riscv_vsha2cl_intrinsics_work],
+ [gcry_cv_riscv_vsha2cl_intrinsics_work=no
+ cat > conftest.c <<EOF
+#include <riscv_vector.h>
+vuint32m1_t test_fn(vuint32m1_t a, vuint32m1_t b, vuint32m1_t c, int vl)
+{
+ return __riscv_vsha2cl_vv_u32m1(a, b, c, vl);
+}
+EOF
+
+ if $CC $CFLAGS -S conftest.c -o conftest.s >&5 2>&5; then
+ if grep 'vsha2cl' conftest.s >/dev/null 2>&1; then
+ gcry_cv_riscv_vsha2cl_intrinsics_work=yes
+ fi
+ fi
+ rm -f conftest.*
+ ])
+
+ if test "$gcry_cv_riscv_vsha2cl_intrinsics_work" = "no"; then
+ AC_DEFINE([HAVE_BROKEN_VSHA2CL_INTRINSIC], [1],
+ [Define to 1 if __riscv_vsha2cl intrinsics are broken])
+ fi
+
+ # Restore flags.
+ CFLAGS=$_gcc_cflags_save;
+fi
+
+
#######################################
#### Checks for library functions. ####
#######################################
@@ -3656,6 +3770,11 @@ if test "$found" = "1" ; then
# Big-Endian.
# Build with the crypto extension implementation
GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+ ;;
+ riscv64-*-*)
+ # Build with the RISC-V vector cryptography implementation
+ GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-riscv-zvknha-zvkb.lo"
+ ;;
esac
case "$mpi_cpu_arch" in
@@ -3709,6 +3828,11 @@ if test "$found" = "1" ; then
# Big-Endian.
# Build with the crypto extension implementation
GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+ ;;
+ riscv64-*-*)
+ # Build with the RISC-V vector cryptography implementation
+ GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-riscv-zvknhb-zvkb.lo"
+ ;;
esac
fi
diff --git a/src/g10lib.h b/src/g10lib.h
index 84ec4713..4fa91ba9 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -280,6 +280,9 @@ char **_gcry_strtokenize (const char *string, const char *delim);
#define HWF_RISCV_V (1 << 1)
#define HWF_RISCV_ZBB (1 << 2)
#define HWF_RISCV_ZBC (1 << 3)
+#define HWF_RISCV_ZVKB (1 << 4)
+#define HWF_RISCV_ZVKNHA (1 << 5)
+#define HWF_RISCV_ZVKNHB (1 << 6)
#endif
diff --git a/src/hwf-riscv.c b/src/hwf-riscv.c
index 13ca4879..925284a1 100644
--- a/src/hwf-riscv.c
+++ b/src/hwf-riscv.c
@@ -190,6 +190,9 @@ detect_riscv_at_hwcap(void)
#define HWF_RISCV_HWPROBE_EXT_ZBB (1U << 4)
#define HWF_RISCV_HWPROBE_EXT_ZBS (1U << 5)
#define HWF_RISCV_HWPROBE_EXT_ZBC (1U << 7)
+#define HWF_RISCV_HWPROBE_EXT_ZVKB (1U << 19)
+#define HWF_RISCV_HWPROBE_EXT_ZVKNHA (1U << 22)
+#define HWF_RISCV_HWPROBE_EXT_ZVKNHB (1U << 23)
#define HWF_RISCV_HWPROBE_EXT_ZICOND (U64_C(1) << 35)
#define HWF_RISCV_HWPROBE_IMA_FDC (HWF_RISCV_HWPROBE_IMA_FD \
@@ -211,6 +214,9 @@ static const struct hwprobe_feature_map_s hwprobe_features[] =
{ HWF_RISCV_HWPROBE_IMA_V, HWF_RISCV_V },
{ HWF_RISCV_HWPROBE_EXT_ZBB, HWF_RISCV_ZBB },
{ HWF_RISCV_HWPROBE_EXT_ZBC, HWF_RISCV_ZBC },
+ { HWF_RISCV_HWPROBE_EXT_ZVKB, HWF_RISCV_ZVKB },
+ { HWF_RISCV_HWPROBE_EXT_ZVKNHA, HWF_RISCV_ZVKNHA },
+ { HWF_RISCV_HWPROBE_EXT_ZVKNHB, HWF_RISCV_ZVKNHB },
};
static int
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index afcaa00d..df2aaf17 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -96,6 +96,9 @@ static struct
{ HWF_RISCV_V, "riscv-v" },
{ HWF_RISCV_ZBB, "riscv-zbb" },
{ HWF_RISCV_ZBC, "riscv-zbc" },
+ { HWF_RISCV_ZVKB, "riscv-zvkb" },
+ { HWF_RISCV_ZVKNHA, "riscv-zvknha" },
+ { HWF_RISCV_ZVKNHB, "riscv-zvknhb" },
#endif
};
--
2.48.1
More information about the Gcrypt-devel
mailing list