[PATCH 09/10] Enable AMD64 Serpent implementations on WIN64

Jussi Kivilinna jussi.kivilinna at iki.fi
Thu May 14 13:11:44 CEST 2015


* cipher/serpent-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/serpent-sse2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/chacha20.c (USE_SSE2, USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_SSE2 || USE_AVX2] (ASM_FUNC_ABI): New.
(_gcry_serpent_sse2_ctr_enc, _gcry_serpent_sse2_cbc_dec)
(_gcry_serpent_sse2_cfb_dec, _gcry_serpent_avx2_ctr_enc)
(_gcry_serpent_avx2_cbc_dec, _gcry_serpent_avx2_cfb_dec): Add
ASM_FUNC_ABI.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/serpent-avx2-amd64.S |   29 ++++++++++++++++++-----------
 cipher/serpent-sse2-amd64.S |   29 ++++++++++++++++++-----------
 cipher/serpent.c            |   30 ++++++++++++++++++++++--------
 3 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 03d29ae..3f59f06 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -20,9 +20,16 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \
     defined(ENABLE_AVX2_SUPPORT)
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 #ifdef __PIC__
 #  define RIP (%rip)
 #else
@@ -404,7 +411,7 @@
 .text
 
 .align 8
-.type   __serpent_enc_blk16, at function;
+ELF(.type   __serpent_enc_blk16, at function;)
 __serpent_enc_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -489,10 +496,10 @@ __serpent_enc_blk16:
 	transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_enc_blk16,.-__serpent_enc_blk16;
+ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
 
 .align 8
-.type   __serpent_dec_blk16, at function;
+ELF(.type   __serpent_dec_blk16, at function;)
 __serpent_dec_blk16:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -579,7 +586,7 @@ __serpent_dec_blk16:
 	transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_dec_blk16,.-__serpent_dec_blk16;
+ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
 
 #define inc_le128(x, minus_one, tmp) \
 	vpcmpeqq minus_one, x, tmp; \
@@ -589,7 +596,7 @@ __serpent_dec_blk16:
 
 .align 8
 .globl _gcry_serpent_avx2_ctr_enc
-.type   _gcry_serpent_avx2_ctr_enc, at function;
+ELF(.type   _gcry_serpent_avx2_ctr_enc, at function;)
 _gcry_serpent_avx2_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -695,11 +702,11 @@ _gcry_serpent_avx2_ctr_enc:
 	vzeroall;
 
 	ret
-.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;
+ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
 
 .align 8
 .globl _gcry_serpent_avx2_cbc_dec
-.type   _gcry_serpent_avx2_cbc_dec, at function;
+ELF(.type   _gcry_serpent_avx2_cbc_dec, at function;)
 _gcry_serpent_avx2_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -746,11 +753,11 @@ _gcry_serpent_avx2_cbc_dec:
 	vzeroall;
 
 	ret
-.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;
+ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
 
 .align 8
 .globl _gcry_serpent_avx2_cfb_dec
-.type   _gcry_serpent_avx2_cfb_dec, at function;
+ELF(.type   _gcry_serpent_avx2_cfb_dec, at function;)
 _gcry_serpent_avx2_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -799,7 +806,7 @@ _gcry_serpent_avx2_cfb_dec:
 	vzeroall;
 
 	ret
-.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;
+ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
 
 .data
 .align 16
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index 395f660..adbf4e2 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
@@ -20,7 +20,14 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 #ifdef __PIC__
 #  define RIP (%rip)
@@ -427,7 +434,7 @@
 .text
 
 .align 8
-.type   __serpent_enc_blk8, at function;
+ELF(.type   __serpent_enc_blk8, at function;)
 __serpent_enc_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -512,10 +519,10 @@ __serpent_enc_blk8:
 	transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_enc_blk8,.-__serpent_enc_blk8;
+ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
 
 .align 8
-.type   __serpent_dec_blk8, at function;
+ELF(.type   __serpent_dec_blk8, at function;)
 __serpent_dec_blk8:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -602,11 +609,11 @@ __serpent_dec_blk8:
 	transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
 
 	ret;
-.size __serpent_dec_blk8,.-__serpent_dec_blk8;
+ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
 
 .align 8
 .globl _gcry_serpent_sse2_ctr_enc
-.type   _gcry_serpent_sse2_ctr_enc, at function;
+ELF(.type   _gcry_serpent_sse2_ctr_enc, at function;)
 _gcry_serpent_sse2_ctr_enc:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -732,11 +739,11 @@ _gcry_serpent_sse2_ctr_enc:
 	pxor RNOT, RNOT;
 
 	ret
-.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;
+ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
 
 .align 8
 .globl _gcry_serpent_sse2_cbc_dec
-.type   _gcry_serpent_sse2_cbc_dec, at function;
+ELF(.type   _gcry_serpent_sse2_cbc_dec, at function;)
 _gcry_serpent_sse2_cbc_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -793,11 +800,11 @@ _gcry_serpent_sse2_cbc_dec:
 	pxor RNOT, RNOT;
 
 	ret
-.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;
+ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
 
 .align 8
 .globl _gcry_serpent_sse2_cfb_dec
-.type   _gcry_serpent_sse2_cfb_dec, at function;
+ELF(.type   _gcry_serpent_sse2_cfb_dec, at function;)
 _gcry_serpent_sse2_cfb_dec:
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -857,7 +864,7 @@ _gcry_serpent_sse2_cfb_dec:
 	pxor RNOT, RNOT;
 
 	ret
-.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;
+ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
 
 #endif /*defined(USE_SERPENT)*/
 #endif /*__x86_64*/
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 0be49da..7d0e112 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -34,13 +34,15 @@
 
 /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
 #undef USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSE2 1
 #endif
 
 /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
 #undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # if defined(ENABLE_AVX2_SUPPORT)
 #  define USE_AVX2 1
 # endif
@@ -86,6 +88,18 @@ typedef struct serpent_context
 } serpent_context_t;
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_SSE2) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+#  define ASM_FUNC_ABI
+# endif
+#endif
+
+
 #ifdef USE_SSE2
 /* Assembler implementations of Serpent using SSE2.  Process 8 block in
    parallel.
@@ -93,17 +107,17 @@ typedef struct serpent_context
 extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *ctr);
+				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX2
@@ -113,17 +127,17 @@ extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
 extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *ctr);
+				       unsigned char *ctr) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 
 extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
 				       unsigned char *out,
 				       const unsigned char *in,
-				       unsigned char *iv);
+				       unsigned char *iv) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_NEON




More information about the Gcrypt-devel mailing list