[PATCH 1/2] Add Intel SHA Extensions accelerated SHA1 implementation
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sat Feb 17 11:03:55 CET 2018
* cipher/Makefile.am: Add 'sha1-intel-shaext.c'.
* cipher/sha1-intel-shaext.c: New.
* cipher/sha1.c (USE_SHAEXT, _gcry_sha1_transform_intel_shaext): New.
(sha1_init) [USE_SHAEXT]: Use shaext implementation is supported.
(transform) [USE_SHAEXT]: Use shaext if enabled.
(transform): Only add ASM_EXTRA_STACK if returned burn length is not
zero.
* cipher/sha1.h (SHA1_CONTEXT): Add 'use_shaext'.
* configure.ac: Add 'sha1-intel-shaext.lo'.
(shaextsupport, gcry_cv_gcc_inline_asm_shaext): New.
* src/g10lib.h: Add HWF_INTEL_SHAEXT and reorder HWF flags.
* src/hwf-x86.c (detect_x86_gnuc): Detect SHA Extensions.
* src/hwfeatures.c (hwflist): Add 'intel-shaext'.
--
Benchmark on Intel Celeron J3455 (1500 Mhz, no turbo):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
SHA1 | 4.50 ns/B 211.7 MiB/s 6.76 c/B
After (4.0x faster):
| nanosecs/byte mebibytes/sec cycles/byte
SHA1 | 1.11 ns/B 858.1 MiB/s 1.67 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
0 files changed
diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 625a0ef69..110a48b2c 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -92,6 +92,7 @@ seed.c \
serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S serpent-armv7-neon.S \
sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \
sha1-armv7-neon.S sha1-armv8-aarch32-ce.S sha1-armv8-aarch64-ce.S \
+ sha1-intel-shaext.c \
sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \
sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \
sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \
diff --git a/cipher/sha1-intel-shaext.c b/cipher/sha1-intel-shaext.c
new file mode 100644
index 000000000..5a2349e1e
--- /dev/null
+++ b/cipher/sha1-intel-shaext.c
@@ -0,0 +1,281 @@
+/* sha1-intel-shaext.S - SHAEXT accelerated SHA-1 transform function
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+ defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA1) && \
+ defined(ENABLE_SHAEXT_SUPPORT)
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+# pragma GCC target("no-sse")
+#endif
+
+/* Two macros to be called prior and after the use of SHA-EXT
+ instructions. There should be no external function calls between
+ the use of these macros. There purpose is to make sure that the
+ SSE regsiters are cleared and won't reveal any information about
+ the key or the data. */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define shaext_prepare_variable char win64tmp[2*16]
+# define shaext_prepare_variable_size sizeof(win64tmp)
+# define shaext_prepare() \
+ do { asm volatile ("movdqu %%xmm6, (%0)\n" \
+ "movdqu %%xmm7, (%1)\n" \
+ : \
+ : "r" (&win64tmp[0]), "r" (&win64tmp[16]) \
+ : "memory"); \
+ } while (0)
+# define shaext_cleanup(tmp0,tmp1) \
+ do { asm volatile ("movdqu (%0), %%xmm6\n" \
+ "movdqu (%1), %%xmm7\n" \
+ "pxor %%xmm0, %%xmm0\n" \
+ "pxor %%xmm1, %%xmm1\n" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "movdqa %%xmm0, (%2)\n\t" \
+ "movdqa %%xmm0, (%3)\n\t" \
+ : \
+ : "r" (&win64tmp[0]), "r" (&win64tmp[16]), \
+ "r" (tmp0), "r" (tmp1) \
+ : "memory"); \
+ } while (0)
+#else
+# define shaext_prepare_variable
+# define shaext_prepare_variable_size 0
+# define shaext_prepare() do { } while (0)
+# define shaext_cleanup(tmp0,tmp1) \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n" \
+ "pxor %%xmm1, %%xmm1\n" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "pxor %%xmm6, %%xmm6\n" \
+ "pxor %%xmm7, %%xmm7\n" \
+ "movdqa %%xmm0, (%0)\n\t" \
+ "movdqa %%xmm0, (%1)\n\t" \
+ : \
+ : "r" (tmp0), "r" (tmp1) \
+ : "memory"); \
+ } while (0)
+#endif
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ */
+unsigned int
+_gcry_sha1_transform_intel_shaext(void *state, const unsigned char *data,
+ size_t nblks)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ char save_buf[2 * 16 + 15];
+ char *abcd_save;
+ char *e_save;
+ shaext_prepare_variable;
+
+ if (nblks == 0)
+ return 0;
+
+ shaext_prepare ();
+
+ asm volatile ("" : "=r" (abcd_save) : "0" (save_buf) : "memory");
+ abcd_save = abcd_save + (-(uintptr_t)abcd_save & 15);
+ e_save = abcd_save + 16;
+
+ /* byteswap mask => XMM7 */
+ asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */
+ :
+ : [mask] "m" (*be_mask)
+ : "memory");
+
+ /* Load state.. ABCD => XMM4, E => XMM5 */
+ asm volatile ("movd 16(%[state]), %%xmm5\n\t"
+ "movdqu (%[state]), %%xmm4\n\t"
+ "pslldq $12, %%xmm5\n\t"
+ "pshufd $0x1b, %%xmm4, %%xmm4\n\t"
+ "movdqa %%xmm5, (%[e_save])\n\t"
+ "movdqa %%xmm4, (%[abcd_save])\n\t"
+ :
+ : [state] "r" (state), [abcd_save] "r" (abcd_save),
+ [e_save] "r" (e_save)
+ : "memory" );
+
+ /* DATA => XMM[0..4] */
+ asm volatile ("movdqu 0(%[data]), %%xmm0\n\t"
+ "movdqu 16(%[data]), %%xmm1\n\t"
+ "movdqu 32(%[data]), %%xmm2\n\t"
+ "movdqu 48(%[data]), %%xmm3\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+ "pshufb %%xmm7, %%xmm1\n\t"
+ "pshufb %%xmm7, %%xmm2\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+ data += 64;
+
+ while (1)
+ {
+ /* Round 0..3 */
+ asm volatile ("paddd %%xmm0, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t" /* ABCD => E1 */
+ "sha1rnds4 $0, %%xmm5, %%xmm4\n\t"
+ ::: "memory" );
+
+ /* Round 4..7 */
+ asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1rnds4 $0, %%xmm6, %%xmm4\n\t"
+ "sha1msg1 %%xmm1, %%xmm0\n\t"
+ ::: "memory" );
+
+ /* Round 8..11 */
+ asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t"
+ "sha1rnds4 $0, %%xmm5, %%xmm4\n\t"
+ "sha1msg1 %%xmm2, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm0\n\t"
+ ::: "memory" );
+
+#define ROUND(imm, E0, E1, MSG0, MSG1, MSG2, MSG3) \
+ asm volatile ("sha1nexte %%"MSG0", %%"E0"\n\t" \
+ "movdqa %%xmm4, %%"E1"\n\t" \
+ "sha1msg2 %%"MSG0", %%"MSG1"\n\t" \
+ "sha1rnds4 $"imm", %%"E0", %%xmm4\n\t" \
+ "sha1msg1 %%"MSG0", %%"MSG3"\n\t" \
+ "pxor %%"MSG0", %%"MSG2"\n\t" \
+ ::: "memory" )
+
+ /* Rounds 12..15 to 64..67 */
+ ROUND("0", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("0", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+ ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+ ROUND("1", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+ ROUND("1", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("1", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+ ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+ ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+ ROUND("2", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("2", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+ ROUND("2", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+ ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+ ROUND("3", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+ ROUND("3", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+
+ if (--nblks == 0)
+ break;
+
+ /* Round 68..71 */
+ asm volatile ("movdqu 0(%[data]), %%xmm0\n\t"
+ "sha1nexte %%xmm1, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1msg2 %%xmm1, %%xmm2\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm1, %%xmm3\n\t"
+ "pshufb %%xmm7, %%xmm0\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+
+ /* Round 72..75 */
+ asm volatile ("movdqu 16(%[data]), %%xmm1\n\t"
+ "sha1nexte %%xmm2, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t"
+ "sha1msg2 %%xmm2, %%xmm3\n\t"
+ "sha1rnds4 $3, %%xmm5, %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm1\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+
+ /* Round 76..79 */
+ asm volatile ("movdqu 32(%[data]), %%xmm2\n\t"
+ "sha1nexte %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm2\n\t"
+ :
+ : [data] "r" (data)
+ : "memory" );
+
+ /* Merge states, store current. */
+ asm volatile ("movdqu 48(%[data]), %%xmm3\n\t"
+ "sha1nexte (%[e_save]), %%xmm5\n\t"
+ "paddd (%[abcd_save]), %%xmm4\n\t"
+ "pshufb %%xmm7, %%xmm3\n\t"
+ "movdqa %%xmm5, (%[e_save])\n\t"
+ "movdqa %%xmm4, (%[abcd_save])\n\t"
+ :
+ : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save),
+ [data] "r" (data)
+ : "memory" );
+
+ data += 64;
+ }
+
+ /* Round 68..71 */
+ asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1msg2 %%xmm1, %%xmm2\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ "pxor %%xmm1, %%xmm3\n\t"
+ ::: "memory" );
+
+ /* Round 72..75 */
+ asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t"
+ "movdqa %%xmm4, %%xmm6\n\t"
+ "sha1msg2 %%xmm2, %%xmm3\n\t"
+ "sha1rnds4 $3, %%xmm5, %%xmm4\n\t"
+ ::: "memory" );
+
+ /* Round 76..79 */
+ asm volatile ("sha1nexte %%xmm3, %%xmm6\n\t"
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+ ::: "memory" );
+
+ /* Merge states. */
+ asm volatile ("sha1nexte (%[e_save]), %%xmm5\n\t"
+ "paddd (%[abcd_save]), %%xmm4\n\t"
+ :
+ : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save)
+ : "memory" );
+
+ /* Save state */
+ asm volatile ("pshufd $0x1b, %%xmm4, %%xmm4\n\t"
+ "psrldq $12, %%xmm5\n\t"
+ "movdqu %%xmm4, (%[state])\n\t"
+ "movd %%xmm5, 16(%[state])\n\t"
+ :
+ : [state] "r" (state)
+ : "memory" );
+
+ shaext_cleanup (abcd_save, e_save);
+ return 0;
+}
+
+#endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */
diff --git a/cipher/sha1.c b/cipher/sha1.c
index 78b172f24..09868aa3f 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -68,6 +68,14 @@
# define USE_BMI2 1
#endif
+/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
+#undef USE_SHAEXT
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+ defined(HAVE_GCC_INLINE_ASM_SSE41) && \
+ defined(ENABLE_SHAEXT_SUPPORT)
+# define USE_SHAEXT 1
+#endif
+
/* USE_NEON indicates whether to enable ARM NEON assembly code. */
#undef USE_NEON
#ifdef ENABLE_NEON_SUPPORT
@@ -138,6 +146,10 @@ sha1_init (void *context, unsigned int flags)
#ifdef USE_BMI2
hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
#endif
+#ifdef USE_SHAEXT
+ hd->use_shaext = (features & HWF_INTEL_SHAEXT)
+ && (features & HWF_INTEL_SSE4_1);
+#endif
#ifdef USE_NEON
hd->use_neon = (features & HWF_ARM_NEON) != 0;
#endif
@@ -311,7 +323,8 @@ transform_blk (void *ctx, const unsigned char *data)
* stack to store XMM6-XMM15 needed on Win64. */
#undef ASM_FUNC_ABI
#undef ASM_EXTRA_STACK
-#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2)
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
+ defined(USE_SHAEXT)
# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
# define ASM_FUNC_ABI __attribute__((sysv_abi))
# define ASM_EXTRA_STACK (10 * 16)
@@ -340,6 +353,13 @@ _gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
size_t nblks) ASM_FUNC_ABI;
#endif
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
+ size_t nblks);
+#endif
+
static unsigned int
transform (void *ctx, const unsigned char *data, size_t nblks)
@@ -347,29 +367,53 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
SHA1_CONTEXT *hd = ctx;
unsigned int burn;
+#ifdef USE_SHAEXT
+ if (hd->use_shaext)
+ {
+ burn = _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
+ burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
+ return burn;
+ }
+#endif
#ifdef USE_BMI2
if (hd->use_bmi2)
- return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
+ {
+ burn = _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks);
+ burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
+ return burn;
+ }
#endif
#ifdef USE_AVX
if (hd->use_avx)
- return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
+ {
+ burn = _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks);
+ burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
+ return burn;
+ }
#endif
#ifdef USE_SSSE3
if (hd->use_ssse3)
- return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
- + 4 * sizeof(void*) + ASM_EXTRA_STACK;
+ {
+ burn = _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks);
+ burn += burn ? 4 * sizeof(void*) + ASM_EXTRA_STACK : 0;
+ return burn;
+ }
#endif
#ifdef USE_ARM_CE
if (hd->use_arm_ce)
- return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
+ {
+ burn = _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
+ burn += burn ? 4 * sizeof(void*) : 0;
+ return burn;
+ }
#endif
#ifdef USE_NEON
if (hd->use_neon)
- return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks)
- + 4 * sizeof(void*);
+ {
+ burn = _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
+ burn += burn ? 4 * sizeof(void*) : 0;
+ return burn;
+ }
#endif
do
diff --git a/cipher/sha1.h b/cipher/sha1.h
index d448fcac8..93ce79b5c 100644
--- a/cipher/sha1.h
+++ b/cipher/sha1.h
@@ -29,6 +29,7 @@ typedef struct
unsigned int use_ssse3:1;
unsigned int use_avx:1;
unsigned int use_bmi2:1;
+ unsigned int use_shaext:1;
unsigned int use_neon:1;
unsigned int use_arm_ce:1;
} SHA1_CONTEXT;
diff --git a/configure.ac b/configure.ac
index 305b19f7e..4ae7667b3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -588,6 +588,14 @@ AC_ARG_ENABLE(aesni-support,
aesnisupport=$enableval,aesnisupport=yes)
AC_MSG_RESULT($aesnisupport)
+# Implementation of the --disable-shaext-support switch.
+AC_MSG_CHECKING([whether SHAEXT support is requested])
+AC_ARG_ENABLE(shaext-support,
+ AC_HELP_STRING([--disable-shaext-support],
+ [Disable support for the Intel SHAEXT instructions]),
+ shaextsupport=$enableval,shaextsupport=yes)
+AC_MSG_RESULT($shaextsupport)
+
# Implementation of the --disable-pclmul-support switch.
AC_MSG_CHECKING([whether PCLMUL support is requested])
AC_ARG_ENABLE(pclmul-support,
@@ -1175,6 +1183,7 @@ AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes)
# Reset non applicable feature flags.
if test "$mpi_cpu_arch" != "x86" ; then
aesnisupport="n/a"
+ shaextsupport="n/a"
pclmulsupport="n/a"
sse41support="n/a"
avxsupport="n/a"
@@ -1329,6 +1338,34 @@ if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then
[Defined if inline assembler supports PCLMUL instructions])
fi
+
+#
+# Check whether GCC inline assembler supports SHA Extensions instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions instructions],
+ [gcry_cv_gcc_inline_asm_shaext],
+ [if test "$mpi_cpu_arch" != "x86" ; then
+ gcry_cv_gcc_inline_asm_shaext="n/a"
+ else
+ gcry_cv_gcc_inline_asm_shaext=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[void a(void) {
+ __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc");
+ __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc");
+ }]])],
+ [gcry_cv_gcc_inline_asm_shaext=yes])
+ fi])
+if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1,
+ [Defined if inline assembler supports SHA Extensions instructions])
+fi
+
+
#
# Check whether GCC inline assembler supports SSE4.1 instructions.
#
@@ -1921,6 +1958,11 @@ if test x"$aesnisupport" = xyes ; then
aesnisupport="no (unsupported by compiler)"
fi
fi
+if test x"$shaextsupport" = xyes ; then
+ if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then
+ shaextsupport="no (unsupported by compiler)"
+ fi
+fi
if test x"$pclmulsupport" = xyes ; then
if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then
pclmulsupport="no (unsupported by compiler)"
@@ -1960,6 +2002,10 @@ if test x"$aesnisupport" = xyes ; then
AC_DEFINE(ENABLE_AESNI_SUPPORT, 1,
[Enable support for Intel AES-NI instructions.])
fi
+if test x"$shaextsupport" = xyes ; then
+ AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1,
+ [Enable support for Intel SHAEXT instructions.])
+fi
if test x"$pclmulsupport" = xyes ; then
AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1,
[Enable support for Intel PCLMUL instructions.])
@@ -2449,6 +2495,13 @@ case "${host}" in
;;
esac
+case "$mpi_cpu_arch" in
+ x86)
+ # Build with the SHAEXT implementation
+ GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1-intel-shaext.lo"
+ ;;
+esac
+
LIST_MEMBER(sm3, $enabled_digests)
if test "$found" = "1" ; then
GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo"
@@ -2634,6 +2687,7 @@ GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport])
GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities])
GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport])
GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport])
+GCRY_MSG_SHOW([Try using Intel SHAEXT: ],[$shaextsupport])
GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport])
GCRY_MSG_SHOW([Try using Intel SSE4.1: ],[$sse41support])
GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport])
diff --git a/src/g10lib.h b/src/g10lib.h
index 961b51541..d41fa0cf7 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -224,14 +224,14 @@ char **_gcry_strtokenize (const char *string, const char *delim);
#define HWF_INTEL_AVX (1 << 12)
#define HWF_INTEL_AVX2 (1 << 13)
#define HWF_INTEL_FAST_VPGATHER (1 << 14)
-
-#define HWF_ARM_NEON (1 << 15)
-#define HWF_ARM_AES (1 << 16)
-#define HWF_ARM_SHA1 (1 << 17)
-#define HWF_ARM_SHA2 (1 << 18)
-#define HWF_ARM_PMULL (1 << 19)
-
-#define HWF_INTEL_RDTSC (1 << 20)
+#define HWF_INTEL_RDTSC (1 << 15)
+#define HWF_INTEL_SHAEXT (1 << 16)
+
+#define HWF_ARM_NEON (1 << 17)
+#define HWF_ARM_AES (1 << 18)
+#define HWF_ARM_SHA1 (1 << 19)
+#define HWF_ARM_SHA2 (1 << 20)
+#define HWF_ARM_PMULL (1 << 21)
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index 0d3a1f40e..b644eda1f 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -357,6 +357,10 @@ detect_x86_gnuc (void)
if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
result |= HWF_INTEL_FAST_VPGATHER;
#endif /*ENABLE_AVX_SUPPORT*/
+
+ /* Test bit 29 for SHA Extensions. */
+ if (features & (1 << 29))
+ result |= HWF_INTEL_SHAEXT;
}
return result;
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 1cad546d2..e08166945 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -58,6 +58,7 @@ static struct
{ HWF_INTEL_AVX2, "intel-avx2" },
{ HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
{ HWF_INTEL_RDTSC, "intel-rdtsc" },
+ { HWF_INTEL_SHAEXT, "intel-shaext" },
{ HWF_ARM_NEON, "arm-neon" },
{ HWF_ARM_AES, "arm-aes" },
{ HWF_ARM_SHA1, "arm-sha1" },
More information about the Gcrypt-devel
mailing list