[PATCH 07/10] Enable AMD64 Poly1305 implementations on WIN64

Jussi Kivilinna jussi.kivilinna at iki.fi
Thu May 14 13:11:34 CEST 2015


* cipher/poly1305-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/poly1305-sse2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/poly1305-internal.h (POLY1305_SYSV_FUNC_ABI): New.
(POLY1305_USE_SSE2, POLY1305_USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(OPS_FUNC_ABI): New.
(poly1305_ops_t): Use OPS_FUNC_ABI.
* cipher/poly1305.c (_gcry_poly1305_amd64_sse2_init_ext)
(_gcry_poly1305_amd64_sse2_finish_ext)
(_gcry_poly1305_amd64_sse2_blocks, _gcry_poly1305_amd64_avx2_init_ext)
(_gcry_poly1305_amd64_avx2_finish_ext)
(_gcry_poly1305_amd64_avx2_blocks, _gcry_poly1305_armv7_neon_init_ext)
(_gcry_poly1305_armv7_neon_finish_ext)
(_gcry_poly1305_armv7_neon_blocks, poly1305_init_ext_ref32)
(poly1305_blocks_ref32, poly1305_finish_ext_ref32)
(poly1305_init_ext_ref8, poly1305_blocks_ref8)
(poly1305_finish_ext_ref8): Use OPS_FUNC_ABI.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/poly1305-avx2-amd64.S |   22 +++++++++++++++-------
 cipher/poly1305-internal.h   |   27 ++++++++++++++++++++++-----
 cipher/poly1305-sse2-amd64.S |   22 +++++++++++++++-------
 cipher/poly1305.c            |   33 ++++++++++++++++++---------------
 4 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S
index 0ba7e76..9362a5a 100644
--- a/cipher/poly1305-avx2-amd64.S
+++ b/cipher/poly1305-avx2-amd64.S
@@ -25,15 +25,23 @@
 
 #include <config.h>
 
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AVX2_SUPPORT)
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 .text
 
 
 .align 8
 .globl _gcry_poly1305_amd64_avx2_init_ext
-.type  _gcry_poly1305_amd64_avx2_init_ext, at function;
+ELF(.type  _gcry_poly1305_amd64_avx2_init_ext, at function;)
 _gcry_poly1305_amd64_avx2_init_ext:
 .Lpoly1305_init_ext_avx2_local:
 	xor %edx, %edx
@@ -391,12 +399,12 @@ _gcry_poly1305_amd64_avx2_init_ext:
 	popq %r13
 	popq %r12
 	ret
-.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_avx2_blocks
-.type  _gcry_poly1305_amd64_avx2_blocks, at function;
+ELF(.type  _gcry_poly1305_amd64_avx2_blocks, at function;)
 _gcry_poly1305_amd64_avx2_blocks:
 .Lpoly1305_blocks_avx2_local:
 	vzeroupper
@@ -717,12 +725,12 @@ _gcry_poly1305_amd64_avx2_blocks:
 	leave
 	addq $8, %rax
 	ret
-.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;
+ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_avx2_finish_ext
-.type  _gcry_poly1305_amd64_avx2_finish_ext, at function;
+ELF(.type  _gcry_poly1305_amd64_avx2_finish_ext, at function;)
 _gcry_poly1305_amd64_avx2_finish_ext:
 .Lpoly1305_finish_ext_avx2_local:
 	vzeroupper
@@ -949,6 +957,6 @@ _gcry_poly1305_amd64_avx2_finish_ext:
 	popq %rbp
 	addq $(8*5), %rax
 ret
-.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;)
 
 #endif
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h
index dfc0c04..bcbe5df 100644
--- a/cipher/poly1305-internal.h
+++ b/cipher/poly1305-internal.h
@@ -44,24 +44,30 @@
 #define POLY1305_REF_ALIGNMENT sizeof(void *)
 
 
+#undef POLY1305_SYSV_FUNC_ABI
+
 /* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
 #undef POLY1305_USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define POLY1305_USE_SSE2 1
 # define POLY1305_SSE2_BLOCKSIZE 32
 # define POLY1305_SSE2_STATESIZE 248
 # define POLY1305_SSE2_ALIGNMENT 16
+# define POLY1305_SYSV_FUNC_ABI 1
 #endif
 
 
 /* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
 #undef POLY1305_USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(ENABLE_AVX2_SUPPORT)
 # define POLY1305_USE_AVX2 1
 # define POLY1305_AVX2_BLOCKSIZE 64
 # define POLY1305_AVX2_STATESIZE 328
 # define POLY1305_AVX2_ALIGNMENT 32
+# define POLY1305_SYSV_FUNC_ABI 1
 #endif
 
 
@@ -112,6 +118,17 @@
 #endif
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef OPS_FUNC_ABI
+#if defined(POLY1305_SYSV_FUNC_ABI) && \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define OPS_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define OPS_FUNC_ABI
+#endif
+
+
 typedef struct poly1305_key_s
 {
   byte b[POLY1305_KEYLEN];
@@ -121,10 +138,10 @@ typedef struct poly1305_key_s
 typedef struct poly1305_ops_s
 {
   size_t block_size;
-  void (*init_ext) (void *ctx, const poly1305_key_t * key);
-  unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes);
+  void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI;
+  unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI;
   unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining,
-			      byte mac[POLY1305_TAGLEN]);
+			      byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI;
 } poly1305_ops_t;
 
 
diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S
index 106b119..219eb07 100644
--- a/cipher/poly1305-sse2-amd64.S
+++ b/cipher/poly1305-sse2-amd64.S
@@ -25,14 +25,22 @@
 
 #include <config.h>
 
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 
 .text
 
 
 .align 8
 .globl _gcry_poly1305_amd64_sse2_init_ext
-.type  _gcry_poly1305_amd64_sse2_init_ext, at function;
+ELF(.type  _gcry_poly1305_amd64_sse2_init_ext, at function;)
 _gcry_poly1305_amd64_sse2_init_ext:
 .Lpoly1305_init_ext_x86_local:
 	xor %edx, %edx
@@ -273,12 +281,12 @@ _gcry_poly1305_amd64_sse2_init_ext:
 	popq %r13
 	popq %r12
 	ret
-.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_sse2_finish_ext
-.type  _gcry_poly1305_amd64_sse2_finish_ext, at function;
+ELF(.type  _gcry_poly1305_amd64_sse2_finish_ext, at function;)
 _gcry_poly1305_amd64_sse2_finish_ext:
 .Lpoly1305_finish_ext_x86_local:
 	pushq %rbp
@@ -424,12 +432,12 @@ _gcry_poly1305_amd64_sse2_finish_ext:
 	popq %rbp
 	addq $8, %rax
 	ret
-.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;)
 
 
 .align 8
 .globl _gcry_poly1305_amd64_sse2_blocks
-.type  _gcry_poly1305_amd64_sse2_blocks, at function;
+ELF(.type  _gcry_poly1305_amd64_sse2_blocks, at function;)
 _gcry_poly1305_amd64_sse2_blocks:
 .Lpoly1305_blocks_x86_local:
 	pushq %rbp
@@ -1030,6 +1038,6 @@ _gcry_poly1305_amd64_sse2_blocks:
 	pxor %xmm8, %xmm8
 	pxor %xmm0, %xmm0
 	ret
-.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;
+ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;)
 
 #endif
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 28dbbf8..1adf0e7 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -40,12 +40,13 @@ static const char *selftest (void);
 
 #ifdef POLY1305_USE_SSE2
 
-void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key)
+                                       OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m,
 						  size_t remaining,
-						  byte mac[16]);
+						  byte mac[16]) OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m,
-					      size_t bytes);
+					      size_t bytes) OPS_FUNC_ABI;
 
 static const poly1305_ops_t poly1305_amd64_sse2_ops = {
   POLY1305_SSE2_BLOCKSIZE,
@@ -59,12 +60,13 @@ static const poly1305_ops_t poly1305_amd64_sse2_ops = {
 
 #ifdef POLY1305_USE_AVX2
 
-void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key)
+                                       OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m,
 						  size_t remaining,
-						  byte mac[16]);
+						  byte mac[16]) OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m,
-					      size_t bytes);
+					      size_t bytes) OPS_FUNC_ABI;
 
 static const poly1305_ops_t poly1305_amd64_avx2_ops = {
   POLY1305_AVX2_BLOCKSIZE,
@@ -78,12 +80,13 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = {
 
 #ifdef POLY1305_USE_NEON
 
-void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key)
+                                       OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m,
 						  size_t remaining,
-						  byte mac[16]);
+						  byte mac[16]) OPS_FUNC_ABI;
 unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m,
-					      size_t bytes);
+					      size_t bytes) OPS_FUNC_ABI;
 
 static const poly1305_ops_t poly1305_armv7_neon_ops = {
   POLY1305_NEON_BLOCKSIZE,
@@ -110,7 +113,7 @@ typedef struct poly1305_state_ref32_s
 } poly1305_state_ref32_t;
 
 
-static void
+static OPS_FUNC_ABI void
 poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
 {
   poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -142,7 +145,7 @@ poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
 {
   poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -230,7 +233,7 @@ poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_finish_ext_ref32 (void *state, const byte * m,
 			   size_t remaining, byte mac[POLY1305_TAGLEN])
 {
@@ -370,7 +373,7 @@ typedef struct poly1305_state_ref8_t
 } poly1305_state_ref8_t;
 
 
-static void
+static OPS_FUNC_ABI void
 poly1305_init_ext_ref8 (void *state, const poly1305_key_t * key)
 {
   poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -471,7 +474,7 @@ poly1305_freeze_ref8 (byte h[17])
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
 {
   poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -519,7 +522,7 @@ poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
 }
 
 
-static unsigned int
+static OPS_FUNC_ABI unsigned int
 poly1305_finish_ext_ref8 (void *state, const byte * m, size_t remaining,
 			  byte mac[POLY1305_TAGLEN])
 {




More information about the Gcrypt-devel mailing list