[git] GCRYPT - branch, master, updated. libgcrypt-1.5.0-441-gd2b8532

Fri Dec 13 21:49:36 CET 2013

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  d2b853246c2ed056a92096d89c3ca057e45c9c92 (commit)
      from  be2238f68abcc6f2b4e8c38ad9141376ce622a22 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit d2b853246c2ed056a92096d89c3ca057e45c9c92
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Fri Dec 13 21:07:41 2013 +0200

    Convert SHA-1 SSSE3 implementation from mixed asm&C to pure asm
    
    * cipher/Makefile.am: Change 'sha1-ssse3-amd64.c' to
    'sha1-ssse3-amd64.S'.
    * cipher/sha1-ssse3-amd64.c: Remove.
    * cipher/sha1-ssse3-amd64.S: New.
    --
    
    Mixed C&asm implementation appears to trigger GCC bugs easily. Therefore
    convert SSSE3 implementation to pure assembly for safety.
    
    Benchmark also show smallish speed improvement.
    
    cpu             C&asm     asm
    Intel i5-4570   5.22 c/B  5.09 c/B
    Intel i5-2450M  7.24 c/B  7.00 c/B
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 0477772..7d737e2 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -77,7 +77,7 @@ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
 scrypt.c \
 seed.c \
 serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S \
-sha1.c sha1-ssse3-amd64.c \
+sha1.c sha1-ssse3-amd64.S \
 sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \
 sha512.c sha512-ssse3-amd64.S sha512-armv7-neon.S \
 stribog.c \
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
new file mode 100644
index 0000000..5165f3f
--- /dev/null
+++ b/cipher/sha1-ssse3-amd64.S
@@ -0,0 +1,378 @@
+/* sha1-ssse3-amd64.c - Intel SSSE3 accelerated SHA-1 transform function
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * Based on sha1.c:
+ *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ *  "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ *  http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
+
+#ifdef __PIC__
+#  define RIP (%rip)
+#else
+#  define RIP
+#endif
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.data
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 16
+.LK_XMM:
+.LK1:	.long K1, K1, K1, K1
+.LK2:	.long K2, K2, K2, K2
+.LK3:	.long K3, K3, K3, K3
+.LK4:	.long K4, K4, K4, K4
+
+.Lbswap_shufb_ctl:
+	.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+	movl c, RT0; \
+	addl WK(i), e; \
+	xorl d, RT0; \
+	movl a, RT1; \
+	andl b, RT0; \
+	roll $30, b; \
+	xorl d, RT0; \
+	leal (RT0,e), e; \
+	roll $5, RT1; \
+	addl RT1, e;
+
+#define R_F2(a,b,c,d,e,i) \
+	movl c, RT0; \
+	addl WK(i), e; \
+	xorl b, RT0; \
+	roll $30, b; \
+	xorl d, RT0; \
+	movl a, RT1; \
+	leal (RT0,e), e; \
+	roll $5, RT1; \
+	addl RT1, e;
+
+#define R_F3(a,b,c,d,e,i) \
+	movl c, RT0; \
+	movl b, RT1; \
+	xorl b, RT0; \
+	andl c, RT1; \
+	andl d, RT0; \
+	addl RT1, e; \
+	addl WK(i), e; \
+	roll $30, b; \
+	movl a, RT1; \
+	leal (RT0,e), e; \
+	roll $5, RT1; \
+	addl RT1, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+	R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+	movdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+	pshufb BSWAP_REG, tmp0; \
+	movdqa tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0) \
+	paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+	movdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+	movdqa W_m12, W; \
+	palignr $8, W_m16, W; \
+	movdqa W_m04, tmp0; \
+	psrldq $4, tmp0; \
+	pxor W_m08, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+	pxor W_m16, tmp0; \
+	pxor tmp0, W; \
+	movdqa W, tmp1; \
+	movdqa W, tmp0; \
+	pslldq $12, tmp1;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+	psrld $31, W; \
+	pslld $1, tmp0; \
+	por W, tmp0; \
+	movdqa tmp1, W; \
+	psrld $30, tmp1; \
+	pslld $2, W;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+	pxor W, tmp0; \
+	pxor tmp1, tmp0; \
+	movdqa tmp0, W; \
+	paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \
+	movdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+	movdqa W_m04, tmp0; \
+	pxor W_m28, W; \
+	palignr $8, W_m08, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+	pxor W_m16, W; \
+	pxor tmp0, W; \
+	movdqa W, tmp0;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+	psrld $30, W; \
+	pslld $2, tmp0; \
+	por W, tmp0;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
+	movdqa tmp0, W; \
+	paddd (.LK_XMM + ((i)/20)*16) RIP, tmp0; \
+	movdqa tmp0, WK((i)&~3);
+
+#define CLEAR_REG(reg) pxor reg, reg;
+
+
+/*
+ * Transform 64 bytes (16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data)
+ */
+.text
+.globl _gcry_sha1_transform_amd64_ssse3
+.type _gcry_sha1_transform_amd64_ssse3, at function
+.align 16
+_gcry_sha1_transform_amd64_ssse3:
+  /* input:
+   *	%rdi: ctx, CTX
+   *	%rsi: data (64 bytes)
+   *	%rdx: ...
+   */
+
+  movq %rdi, RSTATE;
+  movq %rsi, RDATA;
+  pushq %rbx;
+  pushq %rbp;
+
+  movq %rsp, ROLDSTACK;
+
+  subq $(16*4), %rsp;
+  andq $(~31), %rsp;
+
+  /* Get the values of the chaining variables. */
+  movl state_h0(RSTATE), a;
+  movl state_h1(RSTATE), b;
+  movl state_h2(RSTATE), c;
+  movl state_h3(RSTATE), d;
+  movl state_h4(RSTATE), e;
+
+  movdqa .Lbswap_shufb_ctl RIP, BSWAP_REG;
+
+  /* Precalc 0-15. */
+  W_PRECALC_00_15_0(0, W0, Wtmp0);
+  W_PRECALC_00_15_1(1, W0, Wtmp0);
+  W_PRECALC_00_15_2(2, W0, Wtmp0);
+  W_PRECALC_00_15_3(3, W0, Wtmp0);
+  W_PRECALC_00_15_0(4, W7, Wtmp0);
+  W_PRECALC_00_15_1(5, W7, Wtmp0);
+  W_PRECALC_00_15_2(6, W7, Wtmp0);
+  W_PRECALC_00_15_3(7, W7, Wtmp0);
+  W_PRECALC_00_15_0(8, W6, Wtmp0);
+  W_PRECALC_00_15_1(9, W6, Wtmp0);
+  W_PRECALC_00_15_2(10, W6, Wtmp0);
+  W_PRECALC_00_15_3(11, W6, Wtmp0);
+  W_PRECALC_00_15_0(12, W5, Wtmp0);
+  W_PRECALC_00_15_1(13, W5, Wtmp0);
+  W_PRECALC_00_15_2(14, W5, Wtmp0);
+  W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+  /* Transform 0-15 + Precalc 16-31. */
+  R( a, b, c, d, e, F1,  0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  R( e, a, b, c, d, F1,  1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  R( d, e, a, b, c, F1,  2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  R( c, d, e, a, b, F1,  3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  R( b, c, d, e, a, F1,  4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  R( a, b, c, d, e, F1,  5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  R( e, a, b, c, d, F1,  6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  R( d, e, a, b, c, F1,  7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  R( c, d, e, a, b, F1,  8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  R( b, c, d, e, a, F1,  9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+  R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+  R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+  R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+
+  /* Transform 16-63 + Precalc 32-79. */
+  R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+  R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+  R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+  R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
+  R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+  R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+  R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+  R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
+  R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+  R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+  R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+  R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
+  R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+  R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+  R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+  R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
+  R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
+  R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
+  R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
+  R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+  R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
+
+  /* Transform 64-79 + Clear XMM registers. */
+  R( b, c, d, e, a, F4, 64 ); CLEAR_REG(BSWAP_REG);
+  R( a, b, c, d, e, F4, 65 ); CLEAR_REG(Wtmp0);
+  R( e, a, b, c, d, F4, 66 ); CLEAR_REG(Wtmp1);
+  R( d, e, a, b, c, F4, 67 ); CLEAR_REG(W0);
+  R( c, d, e, a, b, F4, 68 ); CLEAR_REG(W1);
+  R( b, c, d, e, a, F4, 69 ); CLEAR_REG(W2);
+  R( a, b, c, d, e, F4, 70 ); CLEAR_REG(W3);
+  R( e, a, b, c, d, F4, 71 ); CLEAR_REG(W4);
+  R( d, e, a, b, c, F4, 72 ); CLEAR_REG(W5);
+  R( c, d, e, a, b, F4, 73 ); CLEAR_REG(W6);
+  R( b, c, d, e, a, F4, 74 ); CLEAR_REG(W7);
+  R( a, b, c, d, e, F4, 75 );
+  R( e, a, b, c, d, F4, 76 );
+  R( d, e, a, b, c, F4, 77 );
+  R( c, d, e, a, b, F4, 78 );
+  R( b, c, d, e, a, F4, 79 );
+
+  /* Update the chaining variables. */
+  addl state_h0(RSTATE), a;
+  addl state_h1(RSTATE), b;
+  addl state_h2(RSTATE), c;
+  addl state_h3(RSTATE), d;
+  addl state_h4(RSTATE), e;
+
+  movl a, state_h0(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl d, state_h3(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  movq ROLDSTACK, %rsp;
+
+  popq %rbp;
+  popq %rbx;
+
+  /* burn_stack */
+  movl $(16*4 + 2*8 + 31), %eax;
+
+  ret;
+
+#endif
+#endif
diff --git a/cipher/sha1-ssse3-amd64.c b/cipher/sha1-ssse3-amd64.c
deleted file mode 100644
index 1342235..0000000
--- a/cipher/sha1-ssse3-amd64.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/* sha1-ssse3-amd64.c - Intel SSSE3 accelerated SHA-1 transform function
- * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
- *
- * Based on sha1.c:
- *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
- *  "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
- *  http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
- */
-
-#ifdef __x86_64__
-#include <config.h>
-
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
-
-#ifdef HAVE_STDINT_H
-# include <stdint.h> /* uintptr_t */
-#elif defined(HAVE_INTTYPES_H)
-# include <inttypes.h>
-#else
-/* In this case, uintptr_t is provided by config.h. */
-#endif
-
-#include "bithelp.h"
-
-
-/* Helper macro to force alignment to 16 bytes.  */
-#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
-# define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
-#else
-# define ATTR_ALIGNED_16
-#endif
-
-
-typedef struct
-{
-  u32           h0,h1,h2,h3,h4;
-} SHA1_STATE;
-
-
-/* Round function macros. */
-#define K1  0x5A827999L
-#define K2  0x6ED9EBA1L
-#define K3  0x8F1BBCDCL
-#define K4  0xCA62C1D6L
-#define F1(x,y,z)   ( z ^ ( x & ( y ^ z ) ) )
-#define F2(x,y,z)   ( x ^ y ^ z )
-#define F3(x,y,z)   ( ( x & y ) | ( z & ( x | y ) ) )
-#define F4(x,y,z)   ( x ^ y ^ z )
-#define R(a,b,c,d,e,f,wk)  do { e += rol( a, 5 )	\
-				      + f( b, c, d )	\
-				      + wk;	 	\
-				 b = rol( b, 30 );	\
-			       } while(0)
-
-#define WK(i) (wk[i & 15])
-
-
-static const u32 K_XMM[4][4] ATTR_ALIGNED_16 =
-  {
-    { K1, K1, K1, K1 },
-    { K2, K2, K2, K2 },
-    { K3, K3, K3, K3 },
-    { K4, K4, K4, K4 },
-  };
-static const u32 bswap_shufb_ctl[4] ATTR_ALIGNED_16 =
-  { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f };
-
-
-/*
- * Transform 64 bytes (16 32-bit words) at DATA.
- */
-unsigned int
-_gcry_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data)
-{
-  SHA1_STATE *state = ctx;
-  register u32 a, b, c, d, e; /* Local copies of the chaining variables.  */
-  byte wk_unaligned[4*16+15];  /* The array we work on. */
-  u32 *wk = (u32 *)(wk_unaligned
-                    + ((16 - ((uintptr_t)wk_unaligned & 15)) & 15));
-
-  /* Get the values of the chaining variables. */
-  a = state->h0;
-  b = state->h1;
-  c = state->h2;
-  d = state->h3;
-  e = state->h4;
-
-#define Wtmp0 "xmm0"
-#define Wtmp1 "xmm1"
-
-#define W0 "xmm2"
-#define W1 "xmm3"
-#define W2 "xmm4"
-#define W3 "xmm5"
-#define W4 "xmm6"
-#define W5 "xmm7"
-#define W6 "xmm8"
-#define W7 "xmm9"
-
-#define BSWAP_REG "xmm10"
-
-  __asm__ volatile ("movdqa %[bswap], %%"BSWAP_REG";\n\t"
-                    :: [bswap] "m" (bswap_shufb_ctl[0]));
-
-#define W_PRECALC_00_15_0(i, W, tmp0) \
-  __asm__ volatile ("movdqu %[data], %%"tmp0";\n\t" \
-                    ::[data] "m" (*(data+4*(i))));
-
-#define W_PRECALC_00_15_1(i, W, tmp0) \
-  __asm__ volatile ("pshufb %%"BSWAP_REG", %%"tmp0";\n\t" \
-                    "movdqa %%"tmp0", %%"W";\n\t" \
-                    ::: "cc");
-
-#define W_PRECALC_00_15_2(i, W, tmp0) \
-  __asm__ volatile ("paddd %[k_xmm], %%"tmp0";\n\t" \
-                    ::[k_xmm] "m" (K_XMM[i / 20][0]));
-
-#define W_PRECALC_00_15_3(i, W, tmp0) \
-  __asm__ volatile ("movdqa %%"tmp0", %[wk];\n\t" \
-                    :[wk] "=m" (WK(i&~3)));
-
-  /* Precalc 0-15. */
-  W_PRECALC_00_15_0(0, W0, Wtmp0);
-  W_PRECALC_00_15_1(1, W0, Wtmp0);
-  W_PRECALC_00_15_2(2, W0, Wtmp0);
-  W_PRECALC_00_15_3(3, W0, Wtmp0);
-  W_PRECALC_00_15_0(4, W7, Wtmp0);
-  W_PRECALC_00_15_1(5, W7, Wtmp0);
-  W_PRECALC_00_15_2(6, W7, Wtmp0);
-  W_PRECALC_00_15_3(7, W7, Wtmp0);
-  W_PRECALC_00_15_0(8, W6, Wtmp0);
-  W_PRECALC_00_15_1(9, W6, Wtmp0);
-  W_PRECALC_00_15_2(10, W6, Wtmp0);
-  W_PRECALC_00_15_3(11, W6, Wtmp0);
-  W_PRECALC_00_15_0(12, W5, Wtmp0);
-  W_PRECALC_00_15_1(13, W5, Wtmp0);
-  W_PRECALC_00_15_2(14, W5, Wtmp0);
-  W_PRECALC_00_15_3(15, W5, Wtmp0);
-
-#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
-  __asm__ volatile ("movdqa %%"W_m12", %%"W";\n\t" \
-                    "palignr $8, %%"W_m16", %%"W";\n\t" \
-                    "movdqa %%"W_m04", %%"tmp0";\n\t" \
-                    "psrldq $4, %%"tmp0";\n\t" \
-                    "pxor %%"W_m08", %%"W";\n\t" \
-                    :::"cc");
-
-#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
-  __asm__ volatile ("pxor %%"W_m16", %%"tmp0";\n\t" \
-                    "pxor %%"tmp0", %%"W";\n\t" \
-                    "movdqa %%"W", %%"tmp1";\n\t" \
-                    "movdqa %%"W", %%"tmp0";\n\t" \
-                    "pslldq $12, %%"tmp1";\n\t" \
-                    :::"cc");
-
-#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
-  __asm__ volatile ("psrld $31, %%"W";\n\t" \
-                    "pslld $1, %%"tmp0";\n\t" \
-                    "por %%"W", %%"tmp0";\n\t" \
-                    "movdqa %%"tmp1", %%"W";\n\t" \
-                    "psrld $30, %%"tmp1";\n\t" \
-                    "pslld $2, %%"W";\n\t" \
-                    :::"cc");
-
-#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
-  __asm__ volatile ("pxor %%"W", %%"tmp0";\n\t" \
-                    "pxor %%"tmp1", %%"tmp0";\n\t" \
-                    "movdqa %%"tmp0", %%"W";\n\t" \
-                    "paddd %[k_xmm], %%"tmp0";\n\t" \
-                    "movdqa %%"tmp0", %[wk];\n\t" \
-                    : [wk] "=m" (WK(i&~3)) \
-                    : [k_xmm] "m" (K_XMM[i / 20][0]));
-
-  /* Transform 0-15 + Precalc 16-31. */
-  R( a, b, c, d, e, F1, WK( 0) ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
-  R( e, a, b, c, d, F1, WK( 1) ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
-  R( d, e, a, b, c, F1, WK( 2) ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
-  R( c, d, e, a, b, F1, WK( 3) ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
-  R( b, c, d, e, a, F1, WK( 4) ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
-  R( a, b, c, d, e, F1, WK( 5) ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
-  R( e, a, b, c, d, F1, WK( 6) ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
-  R( d, e, a, b, c, F1, WK( 7) ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
-  R( c, d, e, a, b, F1, WK( 8) ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
-  R( b, c, d, e, a, F1, WK( 9) ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
-  R( a, b, c, d, e, F1, WK(10) ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
-  R( e, a, b, c, d, F1, WK(11) ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
-  R( d, e, a, b, c, F1, WK(12) ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
-  R( c, d, e, a, b, F1, WK(13) ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
-  R( b, c, d, e, a, F1, WK(14) ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
-  R( a, b, c, d, e, F1, WK(15) ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
-
-#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
-  __asm__ volatile ("movdqa %%"W_m04", %%"tmp0";\n\t" \
-                    "pxor %%"W_m28", %%"W";\n\t" \
-                    "palignr $8, %%"W_m08", %%"tmp0";\n\t" \
-                    :::"cc");
-
-#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
-  __asm__ volatile ("pxor %%"W_m16", %%"W";\n\t" \
-                    "pxor %%"tmp0", %%"W";\n\t" \
-                    "movdqa %%"W", %%"tmp0";\n\t" \
-                    :::"cc");
-
-#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
-  __asm__ volatile ("psrld $30, %%"W";\n\t" \
-                    "pslld $2, %%"tmp0";\n\t" \
-                    "por %%"W", %%"tmp0";\n\t" \
-                    :::"cc");
-
-#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, W_m28, tmp0) \
-  __asm__ volatile ("movdqa %%"tmp0", %%"W";\n\t" \
-                    "paddd %[k_xmm], %%"tmp0";\n\t" \
-                    "movdqa %%"tmp0", %[wk];\n\t" \
-                    : [wk] "=m" (WK(i&~3)) \
-                    : [k_xmm] "m" (K_XMM[i / 20][0]));
-
-  /* Transform 16-63 + Precalc 32-79. */
-  R( e, a, b, c, d, F1, WK(16) ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( d, e, a, b, c, F1, WK(17) ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( c, d, e, a, b, F1, WK(18) ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( b, c, d, e, a, F1, WK(19) ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( a, b, c, d, e, F2, WK(20) ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( e, a, b, c, d, F2, WK(21) ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( d, e, a, b, c, F2, WK(22) ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( c, d, e, a, b, F2, WK(23) ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( b, c, d, e, a, F2, WK(24) ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( a, b, c, d, e, F2, WK(25) ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( e, a, b, c, d, F2, WK(26) ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( d, e, a, b, c, F2, WK(27) ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( c, d, e, a, b, F2, WK(28) ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( b, c, d, e, a, F2, WK(29) ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( a, b, c, d, e, F2, WK(30) ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( e, a, b, c, d, F2, WK(31) ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( d, e, a, b, c, F2, WK(32) ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
-  R( c, d, e, a, b, F2, WK(33) ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
-  R( b, c, d, e, a, F2, WK(34) ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
-  R( a, b, c, d, e, F2, WK(35) ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, W2, W3, Wtmp0);
-  R( e, a, b, c, d, F2, WK(36) ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
-  R( d, e, a, b, c, F2, WK(37) ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
-  R( c, d, e, a, b, F2, WK(38) ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
-  R( b, c, d, e, a, F2, WK(39) ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, W1, W2, Wtmp0);
-  R( a, b, c, d, e, F3, WK(40) ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
-  R( e, a, b, c, d, F3, WK(41) ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
-  R( d, e, a, b, c, F3, WK(42) ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
-  R( c, d, e, a, b, F3, WK(43) ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, W0, W1, Wtmp0);
-  R( b, c, d, e, a, F3, WK(44) ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
-  R( a, b, c, d, e, F3, WK(45) ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
-  R( e, a, b, c, d, F3, WK(46) ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
-  R( d, e, a, b, c, F3, WK(47) ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, W7, W0, Wtmp0);
-  R( c, d, e, a, b, F3, WK(48) ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( b, c, d, e, a, F3, WK(49) ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( a, b, c, d, e, F3, WK(50) ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( e, a, b, c, d, F3, WK(51) ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, W6, W7, Wtmp0);
-  R( d, e, a, b, c, F3, WK(52) ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( c, d, e, a, b, F3, WK(53) ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( b, c, d, e, a, F3, WK(54) ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( a, b, c, d, e, F3, WK(55) ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, W5, W6, Wtmp0);
-  R( e, a, b, c, d, F3, WK(56) ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( d, e, a, b, c, F3, WK(57) ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( c, d, e, a, b, F3, WK(58) ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( b, c, d, e, a, F3, WK(59) ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, W4, W5, Wtmp0);
-  R( a, b, c, d, e, F4, WK(60) ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( e, a, b, c, d, F4, WK(61) ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( d, e, a, b, c, F4, WK(62) ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-  R( c, d, e, a, b, F4, WK(63) ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, W3, W4, Wtmp0);
-
-#define CLEAR_REG(reg) __asm__ volatile ("pxor %%"reg", %%"reg";\n\t":::"cc");
-
-  /* Transform 64-79 + Clear XMM registers. */
-  R( b, c, d, e, a, F4, WK(64) ); CLEAR_REG(BSWAP_REG);
-  R( a, b, c, d, e, F4, WK(65) ); CLEAR_REG(Wtmp0);
-  R( e, a, b, c, d, F4, WK(66) ); CLEAR_REG(Wtmp1);
-  R( d, e, a, b, c, F4, WK(67) ); CLEAR_REG(W0);
-  R( c, d, e, a, b, F4, WK(68) ); CLEAR_REG(W1);
-  R( b, c, d, e, a, F4, WK(69) ); CLEAR_REG(W2);
-  R( a, b, c, d, e, F4, WK(70) ); CLEAR_REG(W3);
-  R( e, a, b, c, d, F4, WK(71) ); CLEAR_REG(W4);
-  R( d, e, a, b, c, F4, WK(72) ); CLEAR_REG(W5);
-  R( c, d, e, a, b, F4, WK(73) ); CLEAR_REG(W6);
-  R( b, c, d, e, a, F4, WK(74) ); CLEAR_REG(W7);
-  R( a, b, c, d, e, F4, WK(75) );
-  R( e, a, b, c, d, F4, WK(76) );
-  R( d, e, a, b, c, F4, WK(77) );
-  R( c, d, e, a, b, F4, WK(78) );
-  R( b, c, d, e, a, F4, WK(79) );
-
-  /* Update the chaining variables. */
-  state->h0 += a;
-  state->h1 += b;
-  state->h2 += c;
-  state->h3 += d;
-  state->h4 += e;
-
-  return /* burn_stack */ 84+15;
-}
-
-#endif
-#endif

-----------------------------------------------------------------------

Summary of changes:
 cipher/Makefile.am        |    2 +-
 cipher/sha1-ssse3-amd64.S |  378 +++++++++++++++++++++++++++++++++++++++++++++
 cipher/sha1-ssse3-amd64.c |  319 --------------------------------------
 3 files changed, 379 insertions(+), 320 deletions(-)
 create mode 100644 cipher/sha1-ssse3-amd64.S
 delete mode 100644 cipher/sha1-ssse3-amd64.c


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits