[PATCH 08/10] Enable AMD64 Salsa20 implementation on WIN64
Jussi Kivilinna
jussi.kivilinna at iki.fi
Thu May 14 13:11:39 CEST 2015
* cipher/salsa20-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/salsa20.c (USE_AMD64): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_AMD64] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
(_gcry_salsa20_amd64_keysetup, _gcry_salsa20_amd64_ivsetup)
(_gcry_salsa20_amd64_encrypt_blocks): Add ASM_FUNC_ABI.
[USE_AMD64] (salsa20_core): Add ASM_EXTRA_STACK.
(salsa20_do_encrypt_stream) [USE_AMD64]: Add ASM_EXTRA_STACK.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/salsa20-amd64.S | 17 ++++++++++++-----
cipher/salsa20.c | 26 +++++++++++++++++++++-----
2 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index 7046dbb..470c32a 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
@@ -25,13 +25,20 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SALSA20)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
.text
.align 8
.globl _gcry_salsa20_amd64_keysetup
-.type _gcry_salsa20_amd64_keysetup, at function;
+ELF(.type _gcry_salsa20_amd64_keysetup, at function;)
_gcry_salsa20_amd64_keysetup:
movl 0(%rsi),%r8d
movl 4(%rsi),%r9d
@@ -83,7 +90,7 @@ _gcry_salsa20_amd64_keysetup:
.align 8
.globl _gcry_salsa20_amd64_ivsetup
-.type _gcry_salsa20_amd64_ivsetup, at function;
+ELF(.type _gcry_salsa20_amd64_ivsetup, at function;)
_gcry_salsa20_amd64_ivsetup:
movl 0(%rsi),%r8d
movl 4(%rsi),%esi
@@ -97,7 +104,7 @@ _gcry_salsa20_amd64_ivsetup:
.align 8
.globl _gcry_salsa20_amd64_encrypt_blocks
-.type _gcry_salsa20_amd64_encrypt_blocks, at function;
+ELF(.type _gcry_salsa20_amd64_encrypt_blocks, at function;)
_gcry_salsa20_amd64_encrypt_blocks:
/*
* Modifications to original implementation:
@@ -918,7 +925,7 @@ _gcry_salsa20_amd64_encrypt_blocks:
add $64,%rdi
add $64,%rsi
jmp .L_bytes_are_64_128_or_192
-.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;
+ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
#endif /*defined(USE_SALSA20)*/
#endif /*__x86_64*/
diff --git a/cipher/salsa20.c b/cipher/salsa20.c
index d75fe51..fa3d23b 100644
--- a/cipher/salsa20.c
+++ b/cipher/salsa20.c
@@ -43,7 +43,8 @@
/* USE_AMD64 indicates whether to compile with AMD64 code. */
#undef USE_AMD64
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64 1
#endif
@@ -118,12 +119,25 @@ static const char *selftest (void);
#ifdef USE_AMD64
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
/* AMD64 assembly implementations of Salsa20. */
-void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits);
-void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv);
+void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits)
+ ASM_FUNC_ABI;
+void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv)
+ ASM_FUNC_ABI;
unsigned int
_gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
- size_t len, int rounds);
+ size_t len, int rounds) ASM_FUNC_ABI;
static void
salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
@@ -141,7 +155,8 @@ static unsigned int
salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
{
memset(dst, 0, SALSA20_BLOCK_SIZE);
- return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds);
+ return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds)
+ + ASM_EXTRA_STACK;
}
#else /* USE_AMD64 */
@@ -418,6 +433,7 @@ salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
size_t nblocks = length / SALSA20_BLOCK_SIZE;
burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
nblocks, rounds);
+ burn += ASM_EXTRA_STACK;
length -= SALSA20_BLOCK_SIZE * nblocks;
outbuf += SALSA20_BLOCK_SIZE * nblocks;
inbuf += SALSA20_BLOCK_SIZE * nblocks;
More information about the Gcrypt-devel
mailing list