[git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-35-gc3d60ac

by Jussi Kivilinna cvs at cvs.gnupg.org
Fri Jan 12 18:36:54 CET 2018


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  c3d60acc3ab5c6d60c2258882175bf31351cc998 (commit)
       via  a518b6680ea80a4325731028545a701c1d71fc02 (commit)
       via  135250e3060e79be698d4f36a819aa8a880789f8 (commit)
      from  a00c5b2988cea256c7823a76ce601febf02c790f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit c3d60acc3ab5c6d60c2258882175bf31351cc998
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Jan 6 23:21:44 2018 +0200

    rijndael-ssse3: call assembly functions directly
    
    * cipher/rijndael-ssse3-amd64-asm.S (_gcry_aes_ssse3_enc_preload)
    (_gcry_aes_ssse3_dec_preload, _gcry_aes_ssse3_encrypt_core)
    (_gcry_aes_ssse3_decrypt_core, _gcry_aes_schedule_core): Add
    ENTER_SYSV_FUNC_PARAMS_* at function entry and EXIT_SYSV_FUNC at exit.
    (_gcry_aes_ssse3_encrypt_core, _gcry_aes_ssse3_decrypt_core): Change
    to input parameters to RDI and RSI registers.
    * cipher/rijndael-ssse3-amd64.c (_gcry_aes_ssse3_encrypt_core)
    (_gcry_aes_ssse3_decrypt_core, _gcry_aes_schedule_core): Add parameters
    for function prototypes.
    (PUSH_STACK_PTR, POP_STACK_PTR): Remove.
    (vpaes_ssse3_prepare_enc, vpaes_ssse3_prepare_dec)
    (_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption)
    (do_vpaes_ssse3_enc, do_vpaes_ssse3_dec): Remove inline assembly to
    call functions, and call directly instead.
    --
    
    Instead of using inline assembly to call assembly functions in
    AES SSSE3 implementation, change assembly functions so that they
    can be called directly instead.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/rijndael-ssse3-amd64-asm.S b/cipher/rijndael-ssse3-amd64-asm.S
index 3ae55e8..ffce5df 100644
--- a/cipher/rijndael-ssse3-amd64-asm.S
+++ b/cipher/rijndael-ssse3-amd64-asm.S
@@ -40,11 +40,7 @@
     (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-# define ELF(...)
-#else
-# define ELF(...) __VA_ARGS__
-#endif
+#include "asm-common-amd64.h"
 
 .text
 
@@ -54,6 +50,7 @@
 ELF(.type _gcry_aes_ssse3_enc_preload, at function)
 .globl _gcry_aes_ssse3_enc_preload
 _gcry_aes_ssse3_enc_preload:
+	ENTER_SYSV_FUNC_PARAMS_0_4
 	lea	.Laes_consts(%rip), %rax
 	movdqa	          (%rax), %xmm9  # 0F
 	movdqa	.Lk_inv   (%rax), %xmm10 # inv
@@ -62,6 +59,7 @@ _gcry_aes_ssse3_enc_preload:
 	movdqa	.Lk_sb1+16(%rax), %xmm12 # sb1t
 	movdqa	.Lk_sb2   (%rax), %xmm15 # sb2u
 	movdqa	.Lk_sb2+16(%rax), %xmm14 # sb2t
+	EXIT_SYSV_FUNC
 	ret
 ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
 
@@ -71,6 +69,7 @@ ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
 ELF(.type _gcry_aes_ssse3_dec_preload, at function)
 .globl _gcry_aes_ssse3_dec_preload
 _gcry_aes_ssse3_dec_preload:
+	ENTER_SYSV_FUNC_PARAMS_0_4
 	lea	.Laes_consts(%rip), %rax
 	movdqa	          (%rax), %xmm9   # 0F
 	movdqa	.Lk_inv   (%rax), %xmm10  # inv
@@ -80,6 +79,7 @@ _gcry_aes_ssse3_dec_preload:
 	movdqa	.Lk_dsbd   (%rax), %xmm15 # sbdu
 	movdqa	.Lk_dsbb   (%rax), %xmm14 # sbbu
 	movdqa	.Lk_dsbe   (%rax), %xmm8  # sbeu
+	EXIT_SYSV_FUNC
 	ret
 ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload)
 
@@ -98,11 +98,11 @@ ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload)
 ##  Inputs:
 ##     %xmm0 = input
 ##     %xmm9-%xmm15 as in .Laes_preheat
-##    (%rdx) = scheduled keys
-##     %rax  = nrounds - 1
+##    (%rdi) = scheduled keys
+##     %rsi  = nrounds
 ##
 ##  Output in %xmm0
-##  Clobbers  %xmm1-%xmm4, %r9, %r11, %rax, %rcx
+##  Clobbers  %xmm1-%xmm4, %r9, %r11, %rax, %rcx, %rdx
 ##  Preserves %xmm6 - %xmm7 so you get some local vectors
 ##
 ##
@@ -111,6 +111,9 @@ ELF(.type _gcry_aes_ssse3_encrypt_core, at function)
 .globl _gcry_aes_ssse3_encrypt_core
 _gcry_aes_ssse3_encrypt_core:
 _aes_encrypt_core:
+	ENTER_SYSV_FUNC_PARAMS_0_4
+	mov	%rdi,	%rdx
+	leaq	-1(%rsi), %rax
 	lea	.Laes_consts(%rip), %rcx
 	leaq	.Lk_mc_backward(%rcx), %rdi
 	mov	$16,	%rsi
@@ -185,6 +188,7 @@ _aes_encrypt_core:
 	pshufb  %xmm3,	%xmm0	# 0 = sb1t
 	pxor	%xmm4,	%xmm0	# 0 = A
 	pshufb	.Lk_sr(%rsi,%rcx), %xmm0
+	EXIT_SYSV_FUNC
 	ret
 ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
 
@@ -198,8 +202,11 @@ ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
 ELF(.type _gcry_aes_ssse3_decrypt_core, at function)
 _gcry_aes_ssse3_decrypt_core:
 _aes_decrypt_core:
+	ENTER_SYSV_FUNC_PARAMS_0_4
+	mov	%rdi,	%rdx
 	lea	.Laes_consts(%rip), %rcx
-	movl	%eax,	%esi
+	subl	$1,	%esi
+	movl	%esi,   %eax
 	shll	$4,	%esi
 	xorl	$48,	%esi
 	andl	$48,	%esi
@@ -288,6 +295,7 @@ _aes_decrypt_core:
 	pshufb  %xmm3,	%xmm0	# 0 = sb1t
 	pxor	%xmm4,	%xmm0	# 0 = A
 	pshufb	.Lk_sr(%rsi,%rcx), %xmm0
+	EXIT_SYSV_FUNC
 	ret
 ELF(.size _aes_decrypt_core,.-_aes_decrypt_core)
 
@@ -306,6 +314,8 @@ _aes_schedule_core:
 	# rsi = size in bits
 	# rdx = buffer
 	# rcx = direction.  0=encrypt, 1=decrypt
+	# r8 = rotoffs
+	ENTER_SYSV_FUNC_PARAMS_5
 
 	# load the tables
 	lea	.Laes_consts(%rip), %r10
@@ -659,8 +669,9 @@ _aes_schedule_core:
 	pxor	%xmm6,  %xmm6
 	pxor	%xmm7,  %xmm7
 	pxor	%xmm8,  %xmm8
+	EXIT_SYSV_FUNC
 	ret
-ELF(.size _aes_schedule_core,.-_aes_schedule_core)
+ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core)
 
 ########################################################
 ##                                                    ##
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index da5339e..98660ec 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -58,13 +58,14 @@
 
 
 /* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these
-   have custom calling convention and need to be called from assembly
-   blocks, not directly. */
+   have custom calling convention (additional XMM parameters). */
 extern void _gcry_aes_ssse3_enc_preload(void);
 extern void _gcry_aes_ssse3_dec_preload(void);
-extern void _gcry_aes_ssse3_schedule_core(void);
-extern void _gcry_aes_ssse3_encrypt_core(void);
-extern void _gcry_aes_ssse3_decrypt_core(void);
+extern void _gcry_aes_ssse3_schedule_core(const void *key, u64 keybits,
+					  void *buffer, u64 decrypt,
+					  u64 rotoffs);
+extern void _gcry_aes_ssse3_encrypt_core(const void *key, u64 nrounds);
+extern void _gcry_aes_ssse3_decrypt_core(const void *key, u64 nrounds);
 
 
 
@@ -110,8 +111,6 @@ extern void _gcry_aes_ssse3_decrypt_core(void);
                   : \
                   : "r" (ssse3_state) \
                   : "memory" )
-# define PUSH_STACK_PTR
-# define POP_STACK_PTR
 #else
 # define SSSE3_STATE_SIZE 1
 # define vpaes_ssse3_prepare() (void)ssse3_state
@@ -126,31 +125,15 @@ extern void _gcry_aes_ssse3_decrypt_core(void);
                   "pxor	%%xmm7,  %%xmm7 \n\t" \
                   "pxor	%%xmm8,  %%xmm8 \n\t" \
                   ::: "memory" )
-/* Old GCC versions use red-zone of AMD64 SYSV ABI and stack pointer is
- * not properly adjusted for assembly block. Therefore stack pointer
- * needs to be manually corrected. */
-# define PUSH_STACK_PTR "subq $128, %%rsp;\n\t"
-# define POP_STACK_PTR  "addq $128, %%rsp;\n\t"
 #endif
 
 #define vpaes_ssse3_prepare_enc() \
     vpaes_ssse3_prepare(); \
-    asm volatile (PUSH_STACK_PTR \
-                  "callq *%q[core] \n\t" \
-                  POP_STACK_PTR \
-                  : \
-                  : [core] "r" (_gcry_aes_ssse3_enc_preload) \
-                  : "rax", "cc", "memory" )
+    _gcry_aes_ssse3_enc_preload();
 
 #define vpaes_ssse3_prepare_dec() \
     vpaes_ssse3_prepare(); \
-    asm volatile (PUSH_STACK_PTR \
-                  "callq *%q[core] \n\t" \
-                  POP_STACK_PTR \
-                  : \
-                  : [core] "r" (_gcry_aes_ssse3_dec_preload) \
-                  : "rax", "cc", "memory" )
-
+    _gcry_aes_ssse3_dec_preload();
 
 
 void
@@ -161,23 +144,7 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 
   vpaes_ssse3_prepare();
 
-  asm volatile ("leaq %q[key], %%rdi"			"\n\t"
-                "movl %[bits], %%esi"			"\n\t"
-                "leaq %[buf], %%rdx"			"\n\t"
-                "movl %[dir], %%ecx"			"\n\t"
-                "movl %[rotoffs], %%r8d"		"\n\t"
-                PUSH_STACK_PTR
-                "callq *%q[core]"			"\n\t"
-                POP_STACK_PTR
-                :
-                : [core] "r" (&_gcry_aes_ssse3_schedule_core),
-                  [key] "m" (*key),
-                  [bits] "g" (keybits),
-                  [buf] "m" (ctx->keyschenc32[0][0]),
-                  [dir] "g" (0),
-                  [rotoffs] "g" (48)
-                : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
-                  "cc", "memory");
+  _gcry_aes_ssse3_schedule_core(key, keybits, &ctx->keyschenc32[0][0], 0, 48);
 
   /* Save key for setting up decryption. */
   if (keybits > 192)
@@ -216,23 +183,9 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
 
   vpaes_ssse3_prepare();
 
-  asm volatile ("leaq %q[key], %%rdi"			"\n\t"
-                "movl %[bits], %%esi"			"\n\t"
-                "leaq %[buf], %%rdx"			"\n\t"
-                "movl %[dir], %%ecx"			"\n\t"
-                "movl %[rotoffs], %%r8d"		"\n\t"
-                PUSH_STACK_PTR
-                "callq *%q[core]"			"\n\t"
-                POP_STACK_PTR
-                :
-                : [core] "r" (_gcry_aes_ssse3_schedule_core),
-                  [key] "m" (ctx->keyschdec32[0][0]),
-                  [bits] "g" (keybits),
-                  [buf] "m" (ctx->keyschdec32[ctx->rounds][0]),
-                  [dir] "g" (1),
-                  [rotoffs] "g" ((keybits == 192) ? 0 : 32)
-                : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
-                  "cc", "memory");
+  _gcry_aes_ssse3_schedule_core(&ctx->keyschdec32[0][0], keybits,
+				&ctx->keyschdec32[ctx->rounds][0], 1,
+				(keybits == 192) ? 0 : 32);
 
   vpaes_ssse3_cleanup();
 }
@@ -243,15 +196,7 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
 static inline void
 do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds)
 {
-  unsigned int middle_rounds = nrounds - 1;
-  const void *keysched = ctx->keyschenc32;
-
-  asm volatile (PUSH_STACK_PTR
-		"callq *%q[core]"			"\n\t"
-		POP_STACK_PTR
-		: "+a" (middle_rounds), "+d" (keysched)
-		: [core] "r" (_gcry_aes_ssse3_encrypt_core)
-		: "rcx", "rsi", "rdi", "cc", "memory");
+  _gcry_aes_ssse3_encrypt_core(ctx->keyschenc32, nrounds);
 }
 
 
@@ -260,15 +205,7 @@ do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds)
 static inline void
 do_vpaes_ssse3_dec (const RIJNDAEL_context *ctx, unsigned int nrounds)
 {
-  unsigned int middle_rounds = nrounds - 1;
-  const void *keysched = ctx->keyschdec32;
-
-  asm volatile (PUSH_STACK_PTR
-		"callq *%q[core]"			"\n\t"
-		POP_STACK_PTR
-		: "+a" (middle_rounds), "+d" (keysched)
-		: [core] "r" (_gcry_aes_ssse3_decrypt_core)
-		: "rcx", "rsi", "cc", "memory");
+  _gcry_aes_ssse3_decrypt_core(ctx->keyschdec32, nrounds);
 }
 
 

commit a518b6680ea80a4325731028545a701c1d71fc02
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Jan 6 22:19:56 2018 +0200

    Move AMD64 MS to SysV calling convention conversion to assembly side
    
    * cipher/Makefile.am: Add 'asm-common-amd64.h'.
    * cipher/asm-common-amd64.h: New.
    * cipher/blowfish-amd64.S: Add ENTER_SYSV_FUNC_* and EXIT_SYSV_FUNC for
    each global function from 'asm-common-amd64.h'.
    * cipher/cast5-amd64.S: Ditto.
    * cipher/des-amd64.S: Ditto.
    * cipher/rijndael-amd64.S: Ditto.
    * cipher/twofish-amd64.S: Ditto.
    * cipher/arcfour-amd64.S: Ditto.
    * cipher/blowfish.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
    (call_sysv_fn): Remove.
    * cipher/cast5.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
    (call_sysv_fn): Remove.
    * cipher/twofish.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
    (call_sysv_fn, call_sysv_fn5, call_sysv_fn6): Remove.
    * cipher/rijndael.c (do_encrypt, do_decrypt)
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Remove assembly block for
    calling SysV ABI function.
    * cipher/arcfour.c [USE_AMD64_ASM] (encrypt_stream): Ditto.
    --
    
    Old approach was to convert MS ABI to SysV ABI calling convention
    for AMD64 assembly functions at caller side. This patch moves
    calling convention conversion to assembly/callee side.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 3c4eae0..bba815b 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -61,6 +61,7 @@ dsa-common.c rsa-common.c \
 sha1.h
 
 EXTRA_libcipher_la_SOURCES = \
+asm-common-amd64.h \
 arcfour.c arcfour-amd64.S \
 blowfish.c blowfish-amd64.S blowfish-arm.S \
 cast5.c cast5-amd64.S cast5-arm.S \
diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index 2e52ea0..c08f345 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
@@ -18,17 +18,14 @@
 #if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
 
 .text
 .align 16
 .globl _gcry_arcfour_amd64
 ELF(.type _gcry_arcfour_amd64, at function)
 _gcry_arcfour_amd64:
+	ENTER_SYSV_FUNC_PARAMS_0_4
 	push	%rbp
 	push	%rbx
 	mov	%rdi,		%rbp	# key = ARG(key)
@@ -96,6 +93,7 @@ _gcry_arcfour_amd64:
 	movb	%dl,		(4*256+4)(%rbp)	# key->x = x
 	pop	%rbx
 	pop	%rbp
+	EXIT_SYSV_FUNC
 	ret
 .L__gcry_arcfour_amd64_end:
 ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
diff --git a/cipher/arcfour.c b/cipher/arcfour.c
index 44e8ef4..085df9b 100644
--- a/cipher/arcfour.c
+++ b/cipher/arcfour.c
@@ -54,21 +54,7 @@ static void
 encrypt_stream (void *context,
                 byte *outbuf, const byte *inbuf, size_t length)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  const void *fn = _gcry_arcfour_amd64;
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("callq *%0\n\t"
-                : "+a" (fn),
-                  "+D" (context),
-                  "+S" (length),
-                  "+d" (inbuf),
-                  "+c" (outbuf)
-                :
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-#else
   _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
-#endif
 }
 
 #else /*!USE_AMD64_ASM*/
diff --git a/cipher/asm-common-amd64.h b/cipher/asm-common-amd64.h
new file mode 100644
index 0000000..7eb4264
--- /dev/null
+++ b/cipher/asm-common-amd64.h
@@ -0,0 +1,90 @@
+/* asm-common-amd64.h  -  Common macros for AMD64 assembly
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AMD64_H
+#define GCRY_ASM_COMMON_AMD64_H
+
+#include <config.h>
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef __PIC__
+#  define rRIP (%rip)
+#else
+#  define rRIP
+#endif
+
+#ifdef __PIC__
+#  define RIP %rip
+#else
+#  define RIP
+#endif
+
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
+#  define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
+#else
+#  ifdef __code_model_large__
+#    define GET_EXTERN_POINTER(name, reg) \
+	       pushq %r15; \
+	       pushq %r14; \
+	    1: leaq 1b(%rip), reg; \
+	       movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \
+	       movabsq $name at GOT, %r15; \
+	       addq %r14, reg; \
+	       popq %r14; \
+	       movq (reg, %r15), reg; \
+	       popq %r15;
+#  else
+#    define GET_EXTERN_POINTER(name, reg) movq name at GOTPCREL(%rip), reg
+#  endif
+#endif
+
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ENTER_SYSV_FUNC_PARAMS_0_4 \
+	pushq %rdi; \
+	pushq %rsi; \
+	movq %rcx, %rdi; \
+	movq %rdx, %rsi; \
+	movq %r8, %rdx; \
+	movq %r9, %rcx; \
+
+# define ENTER_SYSV_FUNC_PARAMS_5 \
+	ENTER_SYSV_FUNC_PARAMS_0_4; \
+	movq 0x38(%rsp), %r8;
+
+# define ENTER_SYSV_FUNC_PARAMS_6 \
+	ENTER_SYSV_FUNC_PARAMS_5; \
+	movq 0x40(%rsp), %r9;
+
+# define EXIT_SYSV_FUNC \
+	popq %rsi; \
+	popq %rdi;
+#else
+# define ENTER_SYSV_FUNC_PARAMS_0_4
+# define ENTER_SYSV_FUNC_PARAMS_5
+# define ENTER_SYSV_FUNC_PARAMS_6
+# define EXIT_SYSV_FUNC
+#endif
+
+#endif /* GCRY_ASM_COMMON_AMD64_H */
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 21b63fc..02d3b71 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -24,11 +24,7 @@
     (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
 
 .text
 
@@ -165,6 +161,8 @@ _gcry_blowfish_amd64_do_encrypt:
 	 *	%rsi: u32 *ret_xl
 	 *	%rdx: u32 *ret_xr
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	movl (%rdx), RX0d;
 	shlq $32, RX0;
 	movl (%rsi), RT3d;
@@ -178,6 +176,7 @@ _gcry_blowfish_amd64_do_encrypt:
 	shrq $32, RX0;
 	movl RX0d, (RX2);
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
 
@@ -191,6 +190,7 @@ _gcry_blowfish_amd64_encrypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
 
 	movq %rsi, %r10;
 
@@ -202,6 +202,7 @@ _gcry_blowfish_amd64_encrypt_block:
 	movq %r10, RIO;
 	write_block();
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
 
@@ -215,6 +216,8 @@ _gcry_blowfish_amd64_decrypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	movq %rbp, %r11;
 
 	movq %rsi, %r10;
@@ -238,6 +241,7 @@ _gcry_blowfish_amd64_decrypt_block:
 
 	movq %r11, %rbp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
 
@@ -392,6 +396,8 @@ _gcry_blowfish_amd64_ctr_enc:
 	 *	%rdx: src (4 blocks)
 	 *	%rcx: iv (big endian, 64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
@@ -436,6 +442,7 @@ _gcry_blowfish_amd64_ctr_enc:
 	popq %rbx;
 	popq %rbp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
 
@@ -449,6 +456,8 @@ _gcry_blowfish_amd64_cbc_dec:
 	 *	%rdx: src (4 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
@@ -484,6 +493,7 @@ _gcry_blowfish_amd64_cbc_dec:
 	popq %rbx;
 	popq %rbp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
 
@@ -497,6 +507,8 @@ _gcry_blowfish_amd64_cfb_dec:
 	 *	%rdx: src (4 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
@@ -534,6 +546,8 @@ _gcry_blowfish_amd64_cfb_dec:
 	popq %r12;
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
 
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index a3fc26c..724d64e 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -281,87 +281,43 @@ extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
 extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-static inline void
-call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
-              const void *arg3, const void *arg4)
-{
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("callq *%0\n\t"
-                : "+a" (fn),
-                  "+D" (arg1),
-                  "+S" (arg2),
-                  "+d" (arg3),
-                  "+c" (arg4)
-                :
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-}
-#endif
-
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL);
-#else
   _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
-#endif
 }
 
 static void
 do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf,
-                NULL);
-#else
   _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
-#endif
 }
 
 static void
 do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf,
-                NULL);
-#else
   _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
-#endif
 }
 
 static inline void
 blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
                        byte *ctr)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr);
-#else
   _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
-#endif
 }
 
 static inline void
 blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
                        byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv);
-#else
   _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
-#endif
 }
 
 static inline void
 blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
                        byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv);
-#else
   _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
-#endif
 }
 
 static unsigned int
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index c04015a..1a1d43f 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -23,30 +23,7 @@
 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5)
 
-#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
-#  define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
-#else
-#  ifdef __code_model_large__
-#    define GET_EXTERN_POINTER(name, reg) \
-	       pushq %r15; \
-	       pushq %r14; \
-	    1: leaq 1b(%rip), reg; \
-	       movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \
-	       movabsq $name at GOT, %r15; \
-	       addq %r14, reg; \
-	       popq %r14; \
-	       movq (reg, %r15), reg; \
-	       popq %r15;
-#  else
-#    define GET_EXTERN_POINTER(name, reg) movq name at GOTPCREL(%rip), reg
-#  endif
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
 
 .text
 
@@ -206,6 +183,8 @@ _gcry_cast5_amd64_encrypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 
@@ -233,6 +212,8 @@ _gcry_cast5_amd64_encrypt_block:
 
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
 
@@ -246,6 +227,8 @@ _gcry_cast5_amd64_decrypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 
@@ -273,6 +256,8 @@ _gcry_cast5_amd64_decrypt_block:
 
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
 
@@ -444,6 +429,7 @@ _gcry_cast5_amd64_ctr_enc:
 	 *	%rdx: src (8 blocks)
 	 *	%rcx: iv (big endian, 64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
 
 	pushq %rbp;
 	pushq %rbx;
@@ -489,6 +475,8 @@ _gcry_cast5_amd64_ctr_enc:
 	popq %r12;
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret
 ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
 
@@ -502,6 +490,7 @@ _gcry_cast5_amd64_cbc_dec:
 	 *	%rdx: src (8 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
 
 	pushq %rbp;
 	pushq %rbx;
@@ -542,6 +531,8 @@ _gcry_cast5_amd64_cbc_dec:
 	popq %r12;
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret;
 
 ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
@@ -556,6 +547,7 @@ _gcry_cast5_amd64_cfb_dec:
 	 *	%rdx: src (8 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
 
 	pushq %rbp;
 	pushq %rbx;
@@ -597,6 +589,8 @@ _gcry_cast5_amd64_cfb_dec:
 	popq %r12;
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret;
 
 ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 94dcee7..d23882b 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -373,72 +373,34 @@ extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
 extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-static inline void
-call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
-              const void *arg3, const void *arg4)
-{
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("callq *%0\n\t"
-                : "+a" (fn),
-                  "+D" (arg1),
-                  "+S" (arg2),
-                  "+d" (arg3),
-                  "+c" (arg4)
-                :
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-}
-#endif
-
 static void
 do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_cast5_amd64_encrypt_block, context, outbuf, inbuf, NULL);
-#else
   _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
-#endif
 }
 
 static void
 do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_cast5_amd64_decrypt_block, context, outbuf, inbuf, NULL);
-#else
   _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
-#endif
 }
 
 static void
 cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_cast5_amd64_ctr_enc, ctx, out, in, ctr);
-#else
   _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
-#endif
 }
 
 static void
 cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_cast5_amd64_cbc_dec, ctx, out, in, iv);
-#else
   _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
-#endif
 }
 
 static void
 cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_cast5_amd64_cfb_dec, ctx, out, in, iv);
-#else
   _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
-#endif
 }
 
 static unsigned int
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index 1b7cfba..f25573d 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -23,17 +23,7 @@
 #if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-#ifdef __PIC__
-#  define RIP (%rip)
-#else
-#  define RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
 
 .text
 
@@ -200,6 +190,8 @@ _gcry_3des_amd64_crypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
@@ -208,7 +200,7 @@ _gcry_3des_amd64_crypt_block:
 	pushq %r15;
 	pushq %rsi; /*dst*/
 
-	leaq .L_s1 RIP, SBOXES;
+	leaq .L_s1 rRIP, SBOXES;
 
 	read_block(%rdx, RL0, RR0);
 	initial_permutation(RL0, RR0);
@@ -277,6 +269,7 @@ _gcry_3des_amd64_crypt_block:
 	popq %rbx;
 	popq %rbp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
 
@@ -473,7 +466,7 @@ _gcry_3des_amd64_crypt_blk3:
 	 *  RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks
 	 */
 
-	leaq .L_s1 RIP, SBOXES;
+	leaq .L_s1 rRIP, SBOXES;
 
 	initial_permutation3(RL, RR);
 
@@ -547,6 +540,7 @@ _gcry_3des_amd64_cbc_dec:
 	 *	%rdx: src (3 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
 
 	pushq %rbp;
 	pushq %rbx;
@@ -610,6 +604,7 @@ _gcry_3des_amd64_cbc_dec:
 	popq %rbx;
 	popq %rbp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
@@ -623,6 +618,7 @@ _gcry_3des_amd64_ctr_enc:
 	 *	%rdx: src (3 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
 
 	pushq %rbp;
 	pushq %rbx;
@@ -688,6 +684,7 @@ _gcry_3des_amd64_ctr_enc:
 	popq %rbx;
 	popq %rbp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
@@ -701,6 +698,8 @@ _gcry_3des_amd64_cfb_dec:
 	 *	%rdx: src (3 blocks)
 	 *	%rcx: iv (64bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	pushq %rbp;
 	pushq %rbx;
 	pushq %r12;
@@ -763,6 +762,8 @@ _gcry_3des_amd64_cfb_dec:
 	popq %r12;
 	popq %rbx;
 	popq %rbp;
+
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
 
diff --git a/cipher/des.c b/cipher/des.c
index 5c99f50..7801b08 100644
--- a/cipher/des.c
+++ b/cipher/des.c
@@ -772,23 +772,6 @@ extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
 
 #define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
 
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-static inline void
-call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
-              const void *arg3, const void *arg4)
-{
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("callq *%0\n\t"
-                : "+a" (fn),
-                  "+D" (arg1),
-                  "+S" (arg2),
-                  "+d" (arg3),
-                  "+c" (arg4)
-                :
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-}
-#endif
 
 /*
  * Electronic Codebook Mode Triple-DES encryption/decryption of data
@@ -803,11 +786,7 @@ tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
 
   keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
 
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_3des_amd64_crypt_block, keys, to, from, NULL);
-#else
   _gcry_3des_amd64_crypt_block(keys, to, from);
-#endif
 
   return 0;
 }
@@ -815,31 +794,19 @@ tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
 static inline void
 tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_3des_amd64_ctr_enc, keys, out, in, ctr);
-#else
   _gcry_3des_amd64_ctr_enc(keys, out, in, ctr);
-#endif
 }
 
 static inline void
 tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_3des_amd64_cbc_dec, keys, out, in, iv);
-#else
   _gcry_3des_amd64_cbc_dec(keys, out, in, iv);
-#endif
 }
 
 static inline void
 tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn (_gcry_3des_amd64_cfb_dec, keys, out, in, iv);
-#else
   _gcry_3des_amd64_cfb_dec(keys, out, in, iv);
-#endif
 }
 
 #else /*USE_AMD64_ASM*/
diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S
index b149e94..798ff51 100644
--- a/cipher/rijndael-amd64.S
+++ b/cipher/rijndael-amd64.S
@@ -23,17 +23,7 @@
 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_AES)
 
-#ifdef __PIC__
-#  define RIP (%rip)
-#else
-#  define RIP
-#endif
-
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
+#include "asm-common-amd64.h"
 
 .text
 
@@ -222,6 +212,8 @@ _gcry_aes_amd64_encrypt_block:
 	 *	%ecx: number of rounds.. 10, 12 or 14
 	 *	%r8:  encryption tables
 	 */
+	ENTER_SYSV_FUNC_PARAMS_5
+
 	subq $(5 * 8), %rsp;
 	movq %rsi, (0 * 8)(%rsp);
 	movl %ecx, (1 * 8)(%rsp);
@@ -265,6 +257,8 @@ _gcry_aes_amd64_encrypt_block:
 	addq $(5 * 8), %rsp;
 
 	movl $(6 * 8), %eax;
+
+	EXIT_SYSV_FUNC
 	ret;
 
 .align 4
@@ -382,6 +376,8 @@ _gcry_aes_amd64_decrypt_block:
 	 *	%ecx: number of rounds.. 10, 12 or 14
 	 *	%r8:  decryption tables
 	 */
+	ENTER_SYSV_FUNC_PARAMS_5
+
 	subq $(5 * 8), %rsp;
 	movq %rsi, (0 * 8)(%rsp);
 	movl %ecx, (1 * 8)(%rsp);
@@ -426,6 +422,8 @@ _gcry_aes_amd64_decrypt_block:
 	addq $(5 * 8), %rsp;
 
 	movl $(6 * 8), %eax;
+
+	EXIT_SYSV_FUNC
 	ret;
 
 .align 4
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 548bfa0..df1363f 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -740,27 +740,8 @@ do_encrypt (const RIJNDAEL_context *ctx,
             unsigned char *bx, const unsigned char *ax)
 {
 #ifdef USE_AMD64_ASM
-# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
   return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds,
 				       encT);
-# else
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  const void *key = ctx->keyschenc;
-  uintptr_t rounds = ctx->rounds;
-  uintptr_t ret;
-  asm volatile ("movq %[encT], %%r8\n\t"
-                "callq *%[ret]\n\t"
-                : [ret] "=a" (ret),
-                  "+D" (key),
-                  "+S" (bx),
-                  "+d" (ax),
-                  "+c" (rounds)
-                : "0" (_gcry_aes_amd64_encrypt_block),
-                  [encT] "r" (encT)
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-  return ret;
-# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */
 #elif defined(USE_ARM_ASM)
   return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT);
 #else
@@ -1123,27 +1104,8 @@ do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
             const unsigned char *ax)
 {
 #ifdef USE_AMD64_ASM
-# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
   return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
 				       &dec_tables);
-# else
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  const void *key = ctx->keyschdec;
-  uintptr_t rounds = ctx->rounds;
-  uintptr_t ret;
-  asm volatile ("movq %[dectabs], %%r8\n\t"
-                "callq *%[ret]\n\t"
-                : [ret] "=a" (ret),
-                  "+D" (key),
-                  "+S" (bx),
-                  "+d" (ax),
-                  "+c" (rounds)
-                : "0" (_gcry_aes_amd64_decrypt_block),
-                  [dectabs] "r" (&dec_tables)
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-  return ret;
-# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */
 #elif defined(USE_ARM_ASM)
   return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
 				     &dec_tables);
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index aa964e0..7a83646 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -23,17 +23,7 @@
 #if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH)
 
-#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
-# define ELF(...) __VA_ARGS__
-#else
-# define ELF(...) /*_*/
-#endif
-
-#ifdef __PIC__
-#  define RIP %rip
-#else
-#  define RIP
-#endif
+#include "asm-common-amd64.h"
 
 .text
 
@@ -181,6 +171,8 @@ _gcry_twofish_amd64_encrypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	subq $(3 * 8), %rsp;
 	movq %rsi, (0 * 8)(%rsp);
 	movq %rbp, (1 * 8)(%rsp);
@@ -211,6 +203,7 @@ _gcry_twofish_amd64_encrypt_block:
 	movq (1 * 8)(%rsp), %rbp;
 	addq $(3 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
@@ -224,6 +217,8 @@ _gcry_twofish_amd64_decrypt_block:
 	 *	%rsi: dst
 	 *	%rdx: src
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	subq $(3 * 8), %rsp;
 	movq %rsi, (0 * 8)(%rsp);
 	movq %rbp, (1 * 8)(%rsp);
@@ -254,6 +249,7 @@ _gcry_twofish_amd64_decrypt_block:
 	movq (1 * 8)(%rsp), %rbp;
 	addq $(3 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
@@ -530,6 +526,8 @@ _gcry_twofish_amd64_ctr_enc:
 	 *	%rdx: src (3 blocks)
 	 *	%rcx: iv (big endian, 128bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	subq $(8 * 8), %rsp;
 	movq %rbp, (0 * 8)(%rsp);
 	movq %rbx, (1 * 8)(%rsp);
@@ -599,6 +597,7 @@ _gcry_twofish_amd64_ctr_enc:
 	movq (5 * 8)(%rsp), %r15;
 	addq $(8 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
 
@@ -612,6 +611,8 @@ _gcry_twofish_amd64_cbc_dec:
 	 *	%rdx: src (3 blocks)
 	 *	%rcx: iv (128bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	subq $(9 * 8), %rsp;
 	movq %rbp, (0 * 8)(%rsp);
 	movq %rbx, (1 * 8)(%rsp);
@@ -665,6 +666,7 @@ _gcry_twofish_amd64_cbc_dec:
 	movq (5 * 8)(%rsp), %r15;
 	addq $(9 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
 
@@ -678,6 +680,8 @@ _gcry_twofish_amd64_cfb_dec:
 	 *	%rdx: src (3 blocks)
 	 *	%rcx: iv (128bit)
 	 */
+	ENTER_SYSV_FUNC_PARAMS_0_4
+
 	subq $(8 * 8), %rsp;
 	movq %rbp, (0 * 8)(%rsp);
 	movq %rbx, (1 * 8)(%rsp);
@@ -731,6 +735,7 @@ _gcry_twofish_amd64_cfb_dec:
 	movq (5 * 8)(%rsp), %r15;
 	addq $(8 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
 
@@ -746,6 +751,8 @@ _gcry_twofish_amd64_ocb_enc:
 	 *	%r8 : checksum
 	 *	%r9 : L pointers (void *L[3])
 	 */
+	ENTER_SYSV_FUNC_PARAMS_6
+
 	subq $(8 * 8), %rsp;
 	movq %rbp, (0 * 8)(%rsp);
 	movq %rbx, (1 * 8)(%rsp);
@@ -838,6 +845,7 @@ _gcry_twofish_amd64_ocb_enc:
 	movq (5 * 8)(%rsp), %r15;
 	addq $(8 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
 
@@ -853,6 +861,8 @@ _gcry_twofish_amd64_ocb_dec:
 	 *	%r8 : checksum
 	 *	%r9 : L pointers (void *L[3])
 	 */
+	ENTER_SYSV_FUNC_PARAMS_6
+
 	subq $(8 * 8), %rsp;
 	movq %rbp, (0 * 8)(%rsp);
 	movq %rbx, (1 * 8)(%rsp);
@@ -953,6 +963,7 @@ _gcry_twofish_amd64_ocb_dec:
 	movq (5 * 8)(%rsp), %r15;
 	addq $(8 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
 
@@ -967,6 +978,8 @@ _gcry_twofish_amd64_ocb_auth:
 	 *	%rcx: checksum
 	 *	%r8 : L pointers (void *L[3])
 	 */
+	ENTER_SYSV_FUNC_PARAMS_5
+
 	subq $(8 * 8), %rsp;
 	movq %rbp, (0 * 8)(%rsp);
 	movq %rbx, (1 * 8)(%rsp);
@@ -1039,6 +1052,7 @@ _gcry_twofish_amd64_ocb_auth:
 	movq (5 * 8)(%rsp), %r15;
 	addq $(8 * 8), %rsp;
 
+	EXIT_SYSV_FUNC
 	ret;
 ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
 
diff --git a/cipher/twofish.c b/cipher/twofish.c
index 942e8d4..48feaae 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -829,145 +829,58 @@ extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
 					 const byte *abuf, byte *offset,
 					 byte *checksum, const u64 Ls[3]);
 
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-static inline void
-call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
-              const void *arg3, const void *arg4)
-{
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("callq *%0\n\t"
-                : "+a" (fn),
-                  "+D" (arg1),
-                  "+S" (arg2),
-                  "+d" (arg3),
-                  "+c" (arg4)
-                :
-                : "cc", "memory", "r8", "r9", "r10", "r11");
-}
-
-static inline void
-call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2,
-               const void *arg3, const void *arg4, const void *arg5)
-{
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("movq %[arg5], %%r8\n\t"
-		"callq *%0\n\t"
-		: "+a" (fn),
-		  "+D" (arg1),
-		  "+S" (arg2),
-		  "+d" (arg3),
-		  "+c" (arg4)
-		: [arg5] "g" (arg5)
-		: "cc", "memory", "r8", "r9", "r10", "r11");
-}
-
-static inline void
-call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2,
-               const void *arg3, const void *arg4, const void *arg5,
-	       const void *arg6)
-{
-  /* Call SystemV ABI function without storing non-volatile XMM registers,
-   * as target function does not use vector instruction sets. */
-  asm volatile ("movq %[arg5], %%r8\n\t"
-		"movq %[arg6], %%r9\n\t"
-		"callq *%0\n\t"
-		: "+a" (fn),
-		  "+D" (arg1),
-		  "+S" (arg2),
-		  "+d" (arg3),
-		  "+c" (arg4)
-		: [arg5] "g" (arg5),
-		  [arg6] "g" (arg6)
-		: "cc", "memory", "r8", "r9", "r10", "r11");
-}
-#endif
-
 static inline void
 twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL);
-#else
   _gcry_twofish_amd64_encrypt_block(c, out, in);
-#endif
 }
 
 static inline void
 twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL);
-#else
   _gcry_twofish_amd64_decrypt_block(c, out, in);
-#endif
 }
 
 static inline void
 twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
                       byte *ctr)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr);
-#else
   _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
-#endif
 }
 
 static inline void
 twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
                       byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv);
-#else
   _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
-#endif
 }
 
 static inline void
 twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
                       byte *iv)
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv);
-#else
   _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
-#endif
 }
 
 static inline void
 twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
 		      byte *offset, byte *checksum, const u64 Ls[3])
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls);
-#else
   _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
-#endif
 }
 
 static inline void
 twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
 		      byte *offset, byte *checksum, const u64 Ls[3])
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls);
-#else
   _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
-#endif
 }
 
 static inline void
 twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
 		       byte *offset, byte *checksum, const u64 Ls[3])
 {
-#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
-  call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls);
-#else
   _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
-#endif
 }
 
 #elif defined(USE_ARM_ASM)

commit 135250e3060e79be698d4f36a819aa8a880789f8
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Jan 6 20:26:52 2018 +0200

    Make BMI2 inline assembly check more robust
    
    * configure.ac (gcry_cv_gcc_inline_asm_bmi2): New assembly test.
    --
    
    Use actual assembly snippets from keccak.c to check that compiler
    has proper support for used BMI2 instructions.
    
    GnuPG-bug-id: 3408
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/configure.ac b/configure.ac
index 42cd4c2..aaf3c82 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1403,8 +1403,15 @@ AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions],
         else
           gcry_cv_gcc_inline_asm_bmi2=no
           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-          [[void a(void) {
-              __asm__("rorxl \$23, %%eax, %%edx\\n\\t":::"memory");
+          [[unsigned int a(unsigned int x, unsigned int y) {
+              unsigned int tmp1, tmp2;
+              asm ("rorxl %2, %1, %0"
+                   : "=r" (tmp1)
+                   : "rm0" (x), "J" (32 - ((23) & 31)));
+              asm ("andnl %2, %1, %0"
+                   : "=r" (tmp2)
+                   : "r0" (x), "rm" (y));
+              return tmp1 + tmp2;
             }]])],
           [gcry_cv_gcc_inline_asm_bmi2=yes])
         fi])

-----------------------------------------------------------------------

Summary of changes:
 cipher/Makefile.am                |  1 +
 cipher/arcfour-amd64.S            |  8 ++--
 cipher/arcfour.c                  | 14 ------
 cipher/asm-common-amd64.h         | 90 ++++++++++++++++++++++++++++++++++++++
 cipher/blowfish-amd64.S           | 24 ++++++++---
 cipher/blowfish.c                 | 44 -------------------
 cipher/cast5-amd64.S              | 42 ++++++++----------
 cipher/cast5.c                    | 38 ----------------
 cipher/des-amd64.S                | 27 ++++++------
 cipher/des.c                      | 33 --------------
 cipher/rijndael-amd64.S           | 20 ++++-----
 cipher/rijndael-ssse3-amd64-asm.S | 31 ++++++++-----
 cipher/rijndael-ssse3-amd64.c     | 91 ++++++---------------------------------
 cipher/rijndael.c                 | 38 ----------------
 cipher/twofish-amd64.S            | 36 +++++++++++-----
 cipher/twofish.c                  | 87 -------------------------------------
 configure.ac                      | 11 ++++-
 17 files changed, 223 insertions(+), 412 deletions(-)
 create mode 100644 cipher/asm-common-amd64.h


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org




More information about the Gnupg-commits mailing list