[PATCH 4/6] Add RISC-V vector cryptography implementations of SHA256 and SHA512

Thu Aug 7 15:28:53 CEST 2025

* cipher/Makefile.am: Add 'sha256-riscv-zvknha-zvkb.c' and
'sha512-riscv-zvknhb-zvkb.c'.
* cipher/sha256-riscv-zvknha-zvkb.c: New.
* cipher/sha256.c (USE_RISCV_V_CRYPTO): New.
[USE_RISCV_V_CRYPTO] (_gcry_sha256_riscv_v_check_hw)
(_gcry_sha256_transform_riscv_zvknha_zvkb)
(do_sha256_transform_riscv_zvknha): New.
(sha256_common_init) [sha256_common_init]: Enable new implementation
if supported by HW.
* cipher/sha512-riscv-zvknhb-zvkb.c: New.
* cipher/sha512.c (USE_RISCV_V_CRYPTO): New.
[USE_RISCV_V_CRYPTO] (_gcry_sha512_riscv_v_check_hw)
(_gcry_sha512_transform_riscv_zvknhb_zvkb)
(do_sha512_transform_riscv_zvknhb): New.
(sha512_common_init) [sha512_common_init]: Enable new implementation
if supported by HW.
* configure.ac: Add 'sha256-riscv-zvknha-zvkb.lo' and
'sha512-riscv-zvknhb-zvkb.lo'.
(GCRY_RISCV_VECTOR_INTRINSICS_TEST): New.
(gcry_cv_cc_riscv_vector_intrinsics)
(gcry_cv_cc_riscv_vector_intrinsics_cflags): Move test code to new
macro GCRY_RISCV_VECTOR_INTRINSICS_TEST.
(GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST)
(gcry_cv_cc_riscv_vector_crypto_intrinsics)
(gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags)
(gcry_cv_riscv_vsha2cl_intrinsics_work): New.
* src/g10lib.h [HAVE_CPU_ARCH_RISCV] (HWF_RISCV_ZVKB, HWF_RISCV_ZVKNHA)
(HWF_RISCV_ZVKNHB): New.
* src/hwf-riscv.c (HWF_RISCV_HWPROBE_EXT_ZVKB)
(HWF_RISCV_HWPROBE_EXT_ZVKNHA, HWF_RISCV_HWPROBE_EXT_ZVKNHB): New.
* src/hwfeatures.c (hwflist) [HAVE_CPU_ARCH_RISCV]: Add "riscv-zvkb",
"riscv-zvknha" and "riscv-zvknhb".
--

Implementations have been tested against QEMU emulator as there is no
actual HW available with these instructions yet.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/Makefile.am                |  23 ++-
 cipher/sha256-riscv-zvknha-zvkb.c | 197 +++++++++++++++++++++
 cipher/sha256.c                   |  34 ++++
 cipher/sha512-riscv-zvknhb-zvkb.c | 190 +++++++++++++++++++++
 cipher/sha512.c                   |  37 ++++
 configure.ac                      | 274 ++++++++++++++++++++++--------
 src/g10lib.h                      |   3 +
 src/hwf-riscv.c                   |   6 +
 src/hwfeatures.c                  |   3 +
 9 files changed, 690 insertions(+), 77 deletions(-)
 create mode 100644 cipher/sha256-riscv-zvknha-zvkb.c
 create mode 100644 cipher/sha512-riscv-zvknhb-zvkb.c

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index dfffefb5..3375ea38 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -143,11 +143,11 @@ EXTRA_libcipher_la_SOURCES = \
 	sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \
 	sha256-avx2-bmi2-amd64.S \
 	sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \
-	sha256-intel-shaext.c sha256-ppc.c \
+	sha256-intel-shaext.c sha256-ppc.c sha256-riscv-zvknha-zvkb.c \
 	sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \
 	sha512-avx2-bmi2-amd64.S sha512-avx512-amd64.S \
 	sha512-armv7-neon.S sha512-armv8-aarch64-ce.S sha512-arm.S \
-	sha512-ppc.c sha512-ssse3-i386.c \
+	sha512-ppc.c sha512-riscv-zvknhb-zvkb.c sha512-ssse3-i386.c \
 	sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S sm3-armv8-aarch64-ce.S \
 	keccak.c keccak_permute_32.h keccak_permute_64.h \
 	keccak-armv7-neon.S keccak-amd64-avx512.S \
@@ -373,3 +373,22 @@ rijndael-vp-riscv.o: $(srcdir)/rijndael-vp-riscv.c Makefile
 
 rijndael-vp-riscv.lo: $(srcdir)/rijndael-vp-riscv.c Makefile
 	`echo $(LTCOMPILE) $(riscv_vector_cflags) -c $< | $(instrumentation_munging) `
+
+if ENABLE_RISCV_VECTOR_CRYPTO_INTRINSICS_EXTRA_CFLAGS
+# Note: -mstrict-align needed for GCC-14 bug (disable unaligned vector loads)
+riscv_vector_crypto_cflags = -O2 -march=rv64imafdcv_zvbc_zvkg_zvkn_zvks -mstrict-align
+else
+riscv_vector_crypto_cflags =
+endif
+
+sha256-riscv-zvknha-zvkb.o: $(srcdir)/sha256-riscv-zvknha-zvkb.c Makefile
+	`echo $(COMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha256-riscv-zvknha-zvkb.lo: $(srcdir)/sha256-riscv-zvknha-zvkb.c Makefile
+	`echo $(LTCOMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-riscv-zvknhb-zvkb.o: $(srcdir)/sha512-riscv-zvknhb-zvkb.c Makefile
+	`echo $(COMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
+
+sha512-riscv-zvknhb-zvkb.lo: $(srcdir)/sha512-riscv-zvknhb-zvkb.c Makefile
+	`echo $(LTCOMPILE) $(riscv_vector_crypto_cflags) -c $< | $(instrumentation_munging) `
diff --git a/cipher/sha256-riscv-zvknha-zvkb.c b/cipher/sha256-riscv-zvknha-zvkb.c
new file mode 100644
index 00000000..6375f9aa
--- /dev/null
+++ b/cipher/sha256-riscv-zvknha-zvkb.c
@@ -0,0 +1,197 @@
+/* sha256-riscv-zvknha-zvkb.c - RISC-V vector crypto implementation of SHA-256
+ * Copyright (C) 2025 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined (__riscv) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS)
+
+#include "g10lib.h"
+#include "simd-common-riscv.h"
+#include <riscv_vector.h>
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static ASM_FUNC_ATTR_INLINE vuint32m1_t
+working_vsha2cl_vv_u32m1(vuint32m1_t hgcd, vuint32m1_t feba,
+			 vuint32m1_t kw, size_t vl)
+{
+#ifdef HAVE_BROKEN_VSHA2CL_INTRINSIC
+  asm (
+    "vsetvli zero,%3,e32,m1,ta,ma;\n\t"
+    "vsha2cl.vv %0,%1,%2;\n\t"
+    : "+vr" (hgcd)
+    : "vr" (feba), "vr" (kw), "r" (vl)
+    : "vl", "vtype"
+  );
+  return hgcd;
+#else
+  return __riscv_vsha2cl_vv_u32m1(hgcd, feba, kw, vl);
+#endif
+}
+
+
+/* Quad-round with message expansion (rounds 0-47) */
+#define QUAD_ROUND_W_SCHED(w0, w1, w2, w3) \
+    v_k = __riscv_vle32_v_u32m1(k, vl); \
+    k += 4; \
+    v_kw = __riscv_vadd_vv_u32m1(v_k, w0, vl); \
+    v_hgcd_work = working_vsha2cl_vv_u32m1(v_hgcd_work, v_feba_work, v_kw, vl); \
+    v_feba_work = __riscv_vsha2ch_vv_u32m1(v_feba_work, v_hgcd_work, v_kw, vl); \
+    v_w_merged = __riscv_vmerge_vvm_u32m1(w2, w1, merge_mask, vl); \
+    w0 = __riscv_vsha2ms_vv_u32m1(w0, v_w_merged, w3, vl);
+
+/* Quad-round without message expansion (rounds 48-63) */
+#define QUAD_ROUND_NO_SCHED(w0) \
+    v_k = __riscv_vle32_v_u32m1(k, vl); \
+    k += 4; \
+    v_kw = __riscv_vadd_vv_u32m1(v_k, w0, vl); \
+    v_hgcd_work = working_vsha2cl_vv_u32m1(v_hgcd_work, v_feba_work, v_kw, vl); \
+    v_feba_work = __riscv_vsha2ch_vv_u32m1(v_feba_work, v_hgcd_work, v_kw, vl);
+
+
+static ASM_FUNC_ATTR_INLINE vuint32m1_t
+load_and_swap (const byte * p, size_t vl, size_t vl_bytes)
+{
+  vuint8m1_t temp_bytes = __riscv_vle8_v_u8m1(p, vl_bytes);
+  return __riscv_vrev8_v_u32m1(__riscv_vreinterpret_v_u8m1_u32m1(temp_bytes),
+			       vl);
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+sha256_transform_zvknha_zvkb (u32 state[8], const uint8_t * data,
+			      size_t nblocks)
+{
+  static const u32 k_const[64] =
+  {
+    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+  };
+  static const u32 feba_hgcd_indices[4] = { 20, 16, 4, 0 };
+  static const int feba_offset = 0;
+  static const int hgcd_offset = 8 / sizeof(u32);
+  size_t vl;
+  size_t vl_bytes;
+  vuint32m1_t idx;
+  vuint32m1_t v_feba_work, v_feba;
+  vuint32m1_t v_hgcd_work, v_hgcd;
+  vuint32m1_t w0, w1, w2, w3;
+  vuint32m1_t v_k, v_kw, v_w_merged;
+  vbool32_t merge_mask;
+  vuint32m1_t v_feba_hgcd_idx;
+
+  vl = 4;
+  vl_bytes = vl * 4;
+  idx = __riscv_vid_v_u32m1(vl);
+  merge_mask = __riscv_vmseq_vx_u32m1_b32(idx, 0, vl);
+
+  v_feba_hgcd_idx = __riscv_vle32_v_u32m1(feba_hgcd_indices, vl);
+
+  v_feba = __riscv_vluxei32_v_u32m1(state + feba_offset, v_feba_hgcd_idx, vl);
+  v_hgcd = __riscv_vluxei32_v_u32m1(state + hgcd_offset, v_feba_hgcd_idx, vl);
+
+  while (nblocks > 0)
+    {
+      const u32 *k = k_const;
+
+      v_feba_work = v_feba;
+      v_hgcd_work = v_hgcd;
+
+      w0 = load_and_swap(data + 0, vl, vl_bytes);
+      w1 = load_and_swap(data + 16, vl, vl_bytes);
+      w2 = load_and_swap(data + 32, vl, vl_bytes);
+      w3 = load_and_swap(data + 48, vl, vl_bytes);
+
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+      QUAD_ROUND_NO_SCHED(w0);
+      QUAD_ROUND_NO_SCHED(w1);
+      QUAD_ROUND_NO_SCHED(w2);
+      QUAD_ROUND_NO_SCHED(w3);
+
+      v_feba = __riscv_vadd_vv_u32m1(v_feba, v_feba_work, vl);
+      v_hgcd = __riscv_vadd_vv_u32m1(v_hgcd, v_hgcd_work, vl);
+
+      data += 64;
+      nblocks--;
+    }
+
+  __riscv_vsuxei32_v_u32m1(state + feba_offset, v_feba_hgcd_idx, v_feba, vl);
+  __riscv_vsuxei32_v_u32m1(state + hgcd_offset, v_feba_hgcd_idx, v_hgcd, vl);
+
+  clear_vec_regs();
+}
+
+
+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT_O2 __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT_O2
+#endif
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha256_transform_riscv_zvknha_zvkb(u32 state[8],
+					 const unsigned char *input_data,
+					 size_t num_blks)
+{
+  sha256_transform_zvknha_zvkb(state, input_data, num_blks);
+  return 0;
+}
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha256_riscv_v_check_hw(void)
+{
+  return (__riscv_vsetvl_e32m1(4) == 4);
+}
+
+#endif /* HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS */
diff --git a/cipher/sha256.c b/cipher/sha256.c
index 24cab566..27d4b1d4 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -110,6 +110,15 @@
 # endif
 #endif
 
+/* USE_RISCV_V_CRYPTO indicates whether to enable RISC-V vector cryptography
+ * extension code. */
+#undef USE_RISCV_V_CRYPTO
+#if defined (__riscv) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS)
+# define USE_RISCV_V_CRYPTO 1
+#endif
+
 /* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
 #undef USE_S390X_CRYPTO
 #if defined(HAVE_GCC_INLINE_ASM_S390X)
@@ -242,6 +251,23 @@ do_sha256_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
 }
 #endif
 
+#ifdef USE_RISCV_V_CRYPTO
+unsigned int _gcry_sha256_riscv_v_check_hw(void);
+
+unsigned int
+_gcry_sha256_transform_riscv_zvknha_zvkb(u32 state[8],
+					 const unsigned char *input_data,
+					 size_t num_blks);
+
+static unsigned int
+do_sha256_transform_riscv_zvknha(void *ctx, const unsigned char *data,
+				 size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_riscv_zvknha_zvkb (hd->h, data, nblks);
+}
+#endif
+
 #ifdef USE_S390X_CRYPTO
 #include "asm-inline-s390x.h"
 
@@ -324,6 +350,14 @@ sha256_common_init (SHA256_CONTEXT *hd)
   if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
     hd->bctx.bwrite = do_sha256_transform_ppc9;
 #endif
+#ifdef USE_RISCV_V_CRYPTO
+  if ((features & HWF_RISCV_IMAFDC)
+      && (features & HWF_RISCV_V)
+      && (features & HWF_RISCV_ZVKB)
+      && ((features & HWF_RISCV_ZVKNHA) || (features & HWF_RISCV_ZVKNHB))
+      && _gcry_sha256_riscv_v_check_hw())
+    hd->bctx.bwrite = do_sha256_transform_riscv_zvknha;
+#endif
 #ifdef USE_S390X_CRYPTO
   hd->use_s390x_crypto = 0;
   if ((features & HWF_S390X_MSA) != 0)
diff --git a/cipher/sha512-riscv-zvknhb-zvkb.c b/cipher/sha512-riscv-zvknhb-zvkb.c
new file mode 100644
index 00000000..5f5d483a
--- /dev/null
+++ b/cipher/sha512-riscv-zvknhb-zvkb.c
@@ -0,0 +1,190 @@
+/* sha512-riscv-zvknhb-zvkb.c - RISC-V vector crypto implementation of SHA-512
+ * Copyright (C) 2025 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined (__riscv) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS) && \
+    defined(USE_SHA512)
+
+#include "g10lib.h"
+#include "simd-common-riscv.h"
+#include <riscv_vector.h>
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static ASM_FUNC_ATTR_INLINE vuint64m2_t
+working_vsha2cl_vv_u64m2(vuint64m2_t hgcd, vuint64m2_t feba,
+			 vuint64m2_t kw, size_t vl)
+{
+#ifdef HAVE_BROKEN_VSHA2CL_INTRINSIC
+  asm (
+    "vsetvli zero,%3,e64,m2,ta,ma;\n\t"
+    "vsha2cl.vv %0,%1,%2;\n\t"
+    : "+vr" (hgcd)
+    : "vr" (feba), "vr" (kw), "r" (vl)
+    : "vl", "vtype"
+  );
+  return hgcd;
+#else
+  return __riscv_vsha2cl_vv_u64m2(hgcd, feba, kw, vl);
+#endif
+}
+
+
+/* Quad-round with message expansion (rounds 0-63) */
+#define QUAD_ROUND_W_SCHED(w0, w1, w2, w3) \
+  k_tmp = k; \
+  asm ("" : "+r" (k_tmp) :: "memory"); \
+  v_k = __riscv_vle64_v_u64m2(k_tmp, vl); \
+  k += 4; \
+  v_kw = __riscv_vadd_vv_u64m2(v_k, w0, vl); \
+  v_hgcd_work = working_vsha2cl_vv_u64m2(v_hgcd_work, v_feba_work, v_kw, vl); \
+  v_feba_work = __riscv_vsha2ch_vv_u64m2(v_feba_work, v_hgcd_work, v_kw, vl); \
+  v_w_merged = __riscv_vmerge_vvm_u64m2(w2, w1, merge_mask, vl); \
+  w0 = __riscv_vsha2ms_vv_u64m2(w0, v_w_merged, w3, vl);
+
+/* Quad-round without message expansion (rounds 64-79) */
+#define QUAD_ROUND_NO_SCHED(w0) \
+  k_tmp = k; \
+  asm ("" : "+r" (k_tmp) :: "memory"); \
+  v_k = __riscv_vle64_v_u64m2(k_tmp, vl); \
+  k += 4; \
+  v_kw = __riscv_vadd_vv_u64m2(v_k, w0, vl); \
+  v_hgcd_work = working_vsha2cl_vv_u64m2(v_hgcd_work, v_feba_work, v_kw, vl); \
+  v_feba_work = __riscv_vsha2ch_vv_u64m2(v_feba_work, v_hgcd_work, v_kw, vl);
+
+
+static ASM_FUNC_ATTR_INLINE vuint64m2_t
+load_and_swap(const byte *p, size_t vl, size_t vl_bytes)
+{
+  vuint8m2_t temp_bytes = __riscv_vle8_v_u8m2(p, vl_bytes);
+  return __riscv_vrev8_v_u64m2(__riscv_vreinterpret_v_u8m2_u64m2(temp_bytes),
+                               vl);
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+sha512_transform_zvknhb_zvkb (u64 state[8], const byte *data,
+			      size_t nblocks, const u64 k_const[80])
+{
+  static const u64 feba_hgcd_indices[4] = { 40, 32, 8, 0 };
+  static const int feba_offset = 0;
+  static const int hgcd_offset = 16 / sizeof(u64);
+  size_t vl;
+  size_t vl_bytes;
+  vuint64m2_t idx;
+  vuint64m2_t v_feba_work, v_feba;
+  vuint64m2_t v_hgcd_work, v_hgcd;
+  vuint64m2_t w0, w1, w2, w3;
+  vuint64m2_t v_k, v_kw, v_w_merged;
+  vbool32_t merge_mask;
+  vuint64m2_t v_feba_hgcd_idx;
+
+  vl = 4;
+  vl_bytes = vl * 8;
+  idx = __riscv_vid_v_u64m2(vl);
+  merge_mask = __riscv_vmseq_vx_u64m2_b32(idx, 0, vl);
+
+  v_feba_hgcd_idx = __riscv_vle64_v_u64m2(feba_hgcd_indices, vl);
+
+  v_feba = __riscv_vluxei64_v_u64m2(state + feba_offset, v_feba_hgcd_idx, vl);
+  v_hgcd = __riscv_vluxei64_v_u64m2(state + hgcd_offset, v_feba_hgcd_idx, vl);
+
+  while (nblocks > 0)
+    {
+      const u64 *k = k_const;
+      const u64 *k_tmp;
+
+      v_feba_work = v_feba;
+      v_hgcd_work = v_hgcd;
+
+      w0 = load_and_swap(data + 0, vl, vl_bytes);
+      w1 = load_and_swap(data + 32, vl, vl_bytes);
+      w2 = load_and_swap(data + 64, vl, vl_bytes);
+      w3 = load_and_swap(data + 96, vl, vl_bytes);
+
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+      QUAD_ROUND_W_SCHED(w0, w1, w2, w3);
+      QUAD_ROUND_W_SCHED(w1, w2, w3, w0);
+      QUAD_ROUND_W_SCHED(w2, w3, w0, w1);
+      QUAD_ROUND_W_SCHED(w3, w0, w1, w2);
+
+      QUAD_ROUND_NO_SCHED(w0);
+      QUAD_ROUND_NO_SCHED(w1);
+      QUAD_ROUND_NO_SCHED(w2);
+      QUAD_ROUND_NO_SCHED(w3);
+
+      v_feba = __riscv_vadd_vv_u64m2(v_feba, v_feba_work, vl);
+      v_hgcd = __riscv_vadd_vv_u64m2(v_hgcd, v_hgcd_work, vl);
+
+      data += 128;
+      nblocks--;
+    }
+
+  __riscv_vsuxei64_v_u64m2(state + feba_offset, v_feba_hgcd_idx, v_feba, vl);
+  __riscv_vsuxei64_v_u64m2(state + hgcd_offset, v_feba_hgcd_idx, v_hgcd, vl);
+
+  clear_vec_regs();
+}
+
+
+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT_O2 __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT_O2
+#endif
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha512_transform_riscv_zvknhb_zvkb(u64 state[8],
+					 const unsigned char *input_data,
+					 size_t num_blks,
+					 const u64 k[80])
+{
+  sha512_transform_zvknhb_zvkb(state, input_data, num_blks, k);
+  return 0;
+}
+
+unsigned int ASM_FUNC_ATTR FUNC_ATTR_OPT_O2
+_gcry_sha512_riscv_v_check_hw(void)
+{
+  return (__riscv_vsetvl_e64m2(4) == 4);
+}
+
+#endif /* HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS */
diff --git a/cipher/sha512.c b/cipher/sha512.c
index bf3f3ff2..a0c0bf1c 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -154,6 +154,16 @@
 #endif
 
 
+/* USE_RISCV_V_CRYPTO indicates whether to enable RISC-V vector cryptography
+ * extension code. */
+#undef USE_RISCV_V_CRYPTO
+#if defined (__riscv) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS) && \
+    defined(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS)
+# define USE_RISCV_V_CRYPTO 1
+#endif
+
+
 /* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
 #undef USE_S390X_CRYPTO
 #if defined(HAVE_GCC_INLINE_ASM_S390X)
@@ -392,6 +402,25 @@ do_sha512_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
 #endif
 
 
+#ifdef USE_RISCV_V_CRYPTO
+unsigned int _gcry_sha512_riscv_v_check_hw(void);
+
+unsigned int
+_gcry_sha512_transform_riscv_zvknhb_zvkb(u64 state[8],
+					 const unsigned char *input_data,
+					 size_t num_blks,
+					 const u64 k[80]);
+
+static unsigned int
+do_sha512_transform_riscv_zvknhb(void *ctx, const unsigned char *data,
+				 size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_riscv_zvknhb_zvkb (hd->state.h, data, nblks, k);
+}
+#endif
+
+
 #ifdef USE_S390X_CRYPTO
 #include "asm-inline-s390x.h"
 
@@ -479,6 +508,14 @@ sha512_init_common (SHA512_CONTEXT *ctx, unsigned int flags)
   if ((features & HWF_INTEL_SSSE3) != 0)
     ctx->bctx.bwrite = do_sha512_transform_i386_ssse3;
 #endif
+#ifdef USE_RISCV_V_CRYPTO
+  if ((features & HWF_RISCV_IMAFDC)
+      && (features & HWF_RISCV_V)
+      && (features & HWF_RISCV_ZVKB)
+      && (features & HWF_RISCV_ZVKNHB)
+      && _gcry_sha512_riscv_v_check_hw())
+    ctx->bctx.bwrite = do_sha512_transform_riscv_zvknhb;
+#endif
 #ifdef USE_S390X_CRYPTO
   ctx->use_s390x_crypto = 0;
   if ((features & HWF_S390X_MSA) != 0)
diff --git a/configure.ac b/configure.ac
index 3ce405e9..63bdfbe7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2669,6 +2669,46 @@ fi
 #
 # Check whether compiler supports RISC-V vector intrinsics
 #
+m4_define([GCRY_RISCV_VECTOR_INTRINSICS_TEST],
+  [AC_LANG_SOURCE(
+    [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
+      #error __riscv_v_intrinsic not defined or too old version
+      #endif
+      #include <riscv_vector.h>
+      typedef vuint8m1_t __m128i;
+      #define cast_m128i_to_u64(a) (__riscv_vreinterpret_v_u8m1_u64m1(a))
+      #define cast_u64_to_m128i(a) (__riscv_vreinterpret_v_u64m1_u8m1(a))
+      #define paddq128(a, o) (o = cast_u64_to_m128i( \
+				    __riscv_vadd_vv_u64m1( \
+				      cast_m128i_to_u64(o), \
+				      cast_m128i_to_u64(a), 2)))
+      #define pshufb128(m8, o) (o = __riscv_vrgather_vv_u8m1((o), (m8), 16))
+      #define memory_barrier_with_vec(a) __asm__("" : "+vr"(a) :: "memory")
+      #define clear_vec_reg_v0() \
+	__asm__ volatile("vsetivli zero, 16, e8, m1, ta, ma;\n" \
+			  "vmv.v.x v0, zero;\n" \
+			  ::: "memory", "vtype", "vl", "v0")
+      static inline __attribute__((always_inline)) __m128i
+      fn2(__m128i a)
+      {
+	paddq128(a, a);
+	return a;
+      }
+      __m128i fn(__m128i in)
+      {
+	__m128i x;
+	memory_barrier_with_vec(in);
+	x = fn2(in);
+	memory_barrier_with_vec(x);
+	pshufb128(in, x);
+	memory_barrier_with_vec(in);
+	clear_vec_reg_v0();
+	return in;
+      }
+    ]]
+  )]
+)
+
 AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics],
       [gcry_cv_cc_riscv_vector_intrinsics],
       [if test "$mpi_cpu_arch" != "riscv64" ||
@@ -2676,43 +2716,9 @@ AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics],
 	gcry_cv_cc_riscv_vector_intrinsics="n/a"
       else
 	gcry_cv_cc_riscv_vector_intrinsics=no
-	AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-	[[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
-	  #error __riscv_v_intrinsic not defined or too old version
-	  #endif
-	  #include <riscv_vector.h>
-	  typedef vuint8m1_t __m128i;
-	  #define cast_m128i_to_u64(a) (__riscv_vreinterpret_v_u8m1_u64m1(a))
-	  #define cast_u64_to_m128i(a) (__riscv_vreinterpret_v_u64m1_u8m1(a))
-	  #define paddq128(a, o) (o = cast_u64_to_m128i( \
-					__riscv_vadd_vv_u64m1( \
-					  cast_m128i_to_u64(o), \
-					  cast_m128i_to_u64(a), 2)))
-	  #define pshufb128(m8, o) (o = __riscv_vrgather_vv_u8m1((o), (m8), 16))
-	  #define memory_barrier_with_vec(a) __asm__("" : "+vr"(a) :: "memory")
-	  #define clear_vec_reg_v0() \
-	    __asm__ volatile("vsetivli zero, 16, e8, m1, ta, ma;\n" \
-			     "vmv.v.x v0, zero;\n" \
-			     ::: "memory", "vtype", "vl", "v0")
-	  static inline __attribute__((always_inline)) __m128i
-	  fn2(__m128i a)
-	  {
-	    paddq128(a, a);
-	    return a;
-	  }
-	  __m128i fn(__m128i in)
-	  {
-	    __m128i x;
-	    memory_barrier_with_vec(in);
-	    x = fn2(in);
-	    memory_barrier_with_vec(x);
-	    pshufb128(in, x);
-	    memory_barrier_with_vec(in);
-	    clear_vec_reg_v0();
-	    return in;
-	  }
-	  ]])],
-	[gcry_cv_cc_riscv_vector_intrinsics=yes])
+	AC_COMPILE_IFELSE(
+	  [GCRY_RISCV_VECTOR_INTRINSICS_TEST],
+	  [gcry_cv_cc_riscv_vector_intrinsics=yes])
       fi])
 if test "$gcry_cv_cc_riscv_vector_intrinsics" = "yes" ; then
     AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS,1,
@@ -2720,6 +2726,7 @@ if test "$gcry_cv_cc_riscv_vector_intrinsics" = "yes" ; then
 fi
 
 _gcc_cflags_save=$CFLAGS
+# Note: -mstrict-align needed for GCC-14 bug (disable unaligned vector loads)
 CFLAGS="$CFLAGS -O2 -march=rv64imafdcv -mstrict-align"
 
 if test "$gcry_cv_cc_riscv_vector_intrinsics" = "no" &&
@@ -2727,44 +2734,12 @@ if test "$gcry_cv_cc_riscv_vector_intrinsics" = "no" &&
    test "$try_asm_modules" = "yes" ; then
   AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics with extra GCC flags],
     [gcry_cv_cc_riscv_vector_intrinsics_cflags],
-    [gcry_cv_cc_riscv_vector_intrinsics_cflags=no
-    AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-      [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
-	#error __riscv_v_intrinsic not defined or too old version
-	#endif
-	#include <riscv_vector.h>
-	typedef vuint8m1_t __m128i;
-	#define cast_m128i_to_u64(a) (__riscv_vreinterpret_v_u8m1_u64m1(a))
-	#define cast_u64_to_m128i(a) (__riscv_vreinterpret_v_u64m1_u8m1(a))
-	#define paddq128(a, o) (o = cast_u64_to_m128i( \
-					__riscv_vadd_vv_u64m1( \
-					  cast_m128i_to_u64(o), \
-					  cast_m128i_to_u64(a), 2)))
-	#define pshufb128(m8, o) (o = __riscv_vrgather_vv_u8m1((o), (m8), 16))
-	#define memory_barrier_with_vec(a) __asm__("" : "+vr"(a) :: "memory")
-	#define clear_vec_reg_v0() \
-	  __asm__ volatile("vsetivli zero, 16, e8, m1, ta, ma;\n" \
-			    "vmv.v.x v0, zero;\n" \
-			    ::: "memory", "vl", "v0")
-	static inline __attribute__((always_inline)) __m128i
-	fn2(__m128i a)
-	{
-	  paddq128(a, a);
-	  return a;
-	}
-	__m128i fn(__m128i in)
-	{
-	  __m128i x;
-	  memory_barrier_with_vec(in);
-	  x = fn2(in);
-	  memory_barrier_with_vec(x);
-	  pshufb128(in, x);
-	  memory_barrier_with_vec(in);
-	  clear_vec_reg_v0();
-	  return in;
-	}
-	]])],
-      [gcry_cv_cc_riscv_vector_intrinsics_cflags=yes])])
+    [
+      gcry_cv_cc_riscv_vector_intrinsics_cflags=no
+      AC_COMPILE_IFELSE(
+	[GCRY_RISCV_VECTOR_INTRINSICS_TEST],
+	[gcry_cv_cc_riscv_vector_intrinsics_cflags=yes])
+    ])
   if test "$gcry_cv_cc_riscv_vector_intrinsics_cflags" = "yes" ; then
     AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_INTRINSICS,1,
 	      [Defined if underlying compiler supports RISC-V vector intrinsics])
@@ -2780,6 +2755,145 @@ AM_CONDITIONAL(ENABLE_RISCV_VECTOR_INTRINSICS_EXTRA_CFLAGS,
 CFLAGS=$_gcc_cflags_save;
 
 
+#
+# Check whether compiler supports RISC-V vector cryptography intrinsics
+#
+m4_define([GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST],
+  [AC_LANG_SOURCE(
+    [[#if !(defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 12000)
+      #error __riscv_v_intrinsic not defined or too old version
+      #endif
+      #include <riscv_vector.h>
+      void test_sha2(unsigned int *ptr)
+      {
+	int vl = __riscv_vsetvl_e32m1 (4);
+	vuint32m1_t a = __riscv_vle32_v_u32m1(ptr + 0 * vl, vl);
+	vuint32m1_t b = __riscv_vle32_v_u32m1(ptr + 1 * vl, vl);
+	vuint32m1_t w0 = __riscv_vle32_v_u32m1(ptr + 2 * vl, vl);
+	vuint32m1_t w1 = __riscv_vle32_v_u32m1(ptr + 3 * vl, vl);
+	vuint32m1_t w2 = __riscv_vle32_v_u32m1(ptr + 4 * vl, vl);
+	vuint32m1_t w3 = __riscv_vle32_v_u32m1(ptr + 5 * vl, vl);
+	vuint32m1_t m;
+	vuint32m1_t idx = __riscv_vid_v_u32m1 (vl);
+	vbool32_t merge_mask = __riscv_vmseq_vx_u32m1_b32 (idx, 0, vl);
+	a = __riscv_vsha2cl_vv_u32m1(a, b, w0, vl);
+	b = __riscv_vsha2ch_vv_u32m1(a, b, w0, vl);
+	m = __riscv_vmerge_vvm_u32m1(w2, w1, merge_mask, vl);
+	w0 = __riscv_vsha2ms_vv_u32m1(w0, m, w3, vl);
+	__riscv_vse32_v_u32m1(ptr + 0 * vl, a, vl);
+	__riscv_vse32_v_u32m1(ptr + 1 * vl, b, vl);
+	__riscv_vse32_v_u32m1(ptr + 2 * vl, w0, vl);
+	__riscv_vse32_v_u32m1(ptr + 3 * vl, w1, vl);
+	__riscv_vse32_v_u32m1(ptr + 4 * vl, w2, vl);
+	__riscv_vse32_v_u32m1(ptr + 5 * vl, w3, vl);
+      }
+      void test_inline_vec_asm(unsigned int *ptr)
+      {
+	int vl = __riscv_vsetvl_e32m1 (4);
+	vuint32m1_t a = __riscv_vle32_v_u32m1(ptr + 0 * vl, vl);
+	vuint32m1_t b = __riscv_vle32_v_u32m1(ptr + 1 * vl, vl);
+	asm (
+	  "vsetvli zero,%1,e32,m1,ta,ma;\n\t"
+	  "vsha2ms.vv %0,%2,%2;\n\t"
+	  : "+vr" (a)
+	  : "r" (vl), "vr" (b)
+	  : "vl", "vtype"
+	);
+	__riscv_vse32_v_u32m1(ptr + 0 * vl, a, vl);
+      }
+    ]]
+  )]
+)
+
+AC_CACHE_CHECK([whether compiler supports RISC-V vector cryptography intrinsics],
+      [gcry_cv_cc_riscv_vector_crypto_intrinsics],
+      [if test "$mpi_cpu_arch" != "riscv64" ||
+	  test "$try_asm_modules" != "yes" ; then
+	gcry_cv_cc_riscv_vector_crypto_intrinsics="n/a"
+      else
+	gcry_cv_cc_riscv_vector_crypto_intrinsics=no
+	AC_COMPILE_IFELSE(
+	  [GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST],
+	  [gcry_cv_cc_riscv_vector_crypto_intrinsics=yes])
+      fi])
+if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics" = "yes" ; then
+    AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS,1,
+	    [Defined if underlying compiler supports RISC-V vector cryptography intrinsics])
+fi
+
+_gcc_cflags_save=$CFLAGS
+# Note: -mstrict-align needed for GCC-14 bug (disable unaligned vector loads)
+CFLAGS="$CFLAGS -O2 -march=rv64imafdcv_zvbc_zvkg_zvkn_zvks -mstrict-align"
+
+if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics" = "no" &&
+   test "$mpi_cpu_arch" = "riscv64" &&
+   test "$try_asm_modules" = "yes" ; then
+  AC_CACHE_CHECK([whether compiler supports RISC-V vector intrinsics with extra GCC flags],
+    [gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags],
+    [gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags=no
+    AC_COMPILE_IFELSE(
+      [GCRY_RISCV_VECTOR_CRYPTO_INTRINSICS_TEST],
+      [gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags=yes])])
+  if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes" ; then
+    AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS,1,
+	      [Defined if underlying compiler supports RISC-V vector cryptography intrinsics])
+    AC_DEFINE(HAVE_COMPATIBLE_CC_RISCV_VECTOR_CRYPTO_INTRINSICS_WITH_CFLAGS,1,
+	      [Defined if underlying compiler supports RISC-V vector cryptography intrinsics with extra GCC flags])
+  fi
+fi
+
+AM_CONDITIONAL(ENABLE_RISCV_VECTOR_CRYPTO_INTRINSICS_EXTRA_CFLAGS,
+	       test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes")
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether compiler has working RISC-V vector __riscv_vsha2cl intrinsics
+#
+# LLVM has broken __riscv_vsha2cl_* intrinsics where they emit 'vsha2ch.vv'
+# instructions instead of expected 'vsha2cl.vv':
+#  https://github.com/llvm/llvm-project/issues/151814
+#
+if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics" = "yes" ||
+   test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes"; then
+
+  # Setup flags for test if needed.
+  _gcc_cflags_save=$CFLAGS
+  if test "$gcry_cv_cc_riscv_vector_crypto_intrinsics_cflags" = "yes"; then
+    CFLAGS="$CFLAGS -O2 -march=rv64imafdcv_zvbc_zvkg_zvkn_zvks -mstrict-align"
+  fi
+
+  AC_CACHE_CHECK([whether compiler has working RISC-V __riscv_vsha2cl intrinsics],
+    [gcry_cv_riscv_vsha2cl_intrinsics_work],
+    [gcry_cv_riscv_vsha2cl_intrinsics_work=no
+     cat > conftest.c <<EOF
+#include <riscv_vector.h>
+vuint32m1_t test_fn(vuint32m1_t a, vuint32m1_t b, vuint32m1_t c, int vl)
+{
+  return __riscv_vsha2cl_vv_u32m1(a, b, c, vl);
+}
+EOF
+
+    if $CC $CFLAGS -S conftest.c -o conftest.s >&5 2>&5; then
+      if grep 'vsha2cl' conftest.s >/dev/null 2>&1; then
+        gcry_cv_riscv_vsha2cl_intrinsics_work=yes
+      fi
+    fi
+    rm -f conftest.*
+  ])
+
+  if test "$gcry_cv_riscv_vsha2cl_intrinsics_work" = "no"; then
+    AC_DEFINE([HAVE_BROKEN_VSHA2CL_INTRINSIC], [1],
+              [Define to 1 if __riscv_vsha2cl intrinsics are broken])
+  fi
+
+  # Restore flags.
+  CFLAGS=$_gcc_cflags_save;
+fi
+
+
 #######################################
 #### Checks for library functions. ####
 #######################################
@@ -3656,6 +3770,11 @@ if test "$found" = "1" ; then
          # Big-Endian.
          # Build with the crypto extension implementation
          GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+      ;;
+      riscv64-*-*)
+         # Build with the RISC-V vector cryptography implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-riscv-zvknha-zvkb.lo"
+      ;;
    esac
 
    case "$mpi_cpu_arch" in
@@ -3709,6 +3828,11 @@ if test "$found" = "1" ; then
          # Big-Endian.
          # Build with the crypto extension implementation
          GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+      ;;
+      riscv64-*-*)
+         # Build with the RISC-V vector cryptography implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-riscv-zvknhb-zvkb.lo"
+      ;;
    esac
 fi
 
diff --git a/src/g10lib.h b/src/g10lib.h
index 84ec4713..4fa91ba9 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -280,6 +280,9 @@ char **_gcry_strtokenize (const char *string, const char *delim);
 #define HWF_RISCV_V             (1 << 1)
 #define HWF_RISCV_ZBB           (1 << 2)
 #define HWF_RISCV_ZBC           (1 << 3)
+#define HWF_RISCV_ZVKB          (1 << 4)
+#define HWF_RISCV_ZVKNHA        (1 << 5)
+#define HWF_RISCV_ZVKNHB        (1 << 6)
 
 #endif
 
diff --git a/src/hwf-riscv.c b/src/hwf-riscv.c
index 13ca4879..925284a1 100644
--- a/src/hwf-riscv.c
+++ b/src/hwf-riscv.c
@@ -190,6 +190,9 @@ detect_riscv_at_hwcap(void)
 #define HWF_RISCV_HWPROBE_EXT_ZBB           (1U << 4)
 #define HWF_RISCV_HWPROBE_EXT_ZBS           (1U << 5)
 #define HWF_RISCV_HWPROBE_EXT_ZBC           (1U << 7)
+#define HWF_RISCV_HWPROBE_EXT_ZVKB          (1U << 19)
+#define HWF_RISCV_HWPROBE_EXT_ZVKNHA        (1U << 22)
+#define HWF_RISCV_HWPROBE_EXT_ZVKNHB        (1U << 23)
 #define HWF_RISCV_HWPROBE_EXT_ZICOND        (U64_C(1) << 35)
 
 #define HWF_RISCV_HWPROBE_IMA_FDC (HWF_RISCV_HWPROBE_IMA_FD \
@@ -211,6 +214,9 @@ static const struct hwprobe_feature_map_s hwprobe_features[] =
     { HWF_RISCV_HWPROBE_IMA_V,       HWF_RISCV_V },
     { HWF_RISCV_HWPROBE_EXT_ZBB,     HWF_RISCV_ZBB },
     { HWF_RISCV_HWPROBE_EXT_ZBC,     HWF_RISCV_ZBC },
+    { HWF_RISCV_HWPROBE_EXT_ZVKB,    HWF_RISCV_ZVKB },
+    { HWF_RISCV_HWPROBE_EXT_ZVKNHA,  HWF_RISCV_ZVKNHA },
+    { HWF_RISCV_HWPROBE_EXT_ZVKNHB,  HWF_RISCV_ZVKNHB },
   };
 
 static int
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index afcaa00d..df2aaf17 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -96,6 +96,9 @@ static struct
     { HWF_RISCV_V,             "riscv-v" },
     { HWF_RISCV_ZBB,           "riscv-zbb" },
     { HWF_RISCV_ZBC,           "riscv-zbc" },
+    { HWF_RISCV_ZVKB,          "riscv-zvkb" },
+    { HWF_RISCV_ZVKNHA,        "riscv-zvknha" },
+    { HWF_RISCV_ZVKNHB,        "riscv-zvknhb" },
 #endif
   };
 
-- 
2.48.1