[PATCH 07/10] Enable AMD64 Poly1305 implementations on WIN64
Jussi Kivilinna
jussi.kivilinna at iki.fi
Thu May 14 13:11:34 CEST 2015
* cipher/poly1305-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/poly1305-sse2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/poly1305-internal.h (POLY1305_SYSV_FUNC_ABI): New.
(POLY1305_USE_SSE2, POLY1305_USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(OPS_FUNC_ABI): New.
(poly1305_ops_t): Use OPS_FUNC_ABI.
* cipher/poly1305.c (_gcry_poly1305_amd64_sse2_init_ext)
(_gcry_poly1305_amd64_sse2_finish_ext)
(_gcry_poly1305_amd64_sse2_blocks, _gcry_poly1305_amd64_avx2_init_ext)
(_gcry_poly1305_amd64_avx2_finish_ext)
(_gcry_poly1305_amd64_avx2_blocks, _gcry_poly1305_armv7_neon_init_ext)
(_gcry_poly1305_armv7_neon_finish_ext)
(_gcry_poly1305_armv7_neon_blocks, poly1305_init_ext_ref32)
(poly1305_blocks_ref32, poly1305_finish_ext_ref32)
(poly1305_init_ext_ref8, poly1305_blocks_ref8)
(poly1305_finish_ext_ref8): Use OPS_FUNC_ABI.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/poly1305-avx2-amd64.S | 22 +++++++++++++++-------
cipher/poly1305-internal.h | 27 ++++++++++++++++++++++-----
cipher/poly1305-sse2-amd64.S | 22 +++++++++++++++-------
cipher/poly1305.c | 33 ++++++++++++++++++---------------
4 files changed, 70 insertions(+), 34 deletions(-)
diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S
index 0ba7e76..9362a5a 100644
--- a/cipher/poly1305-avx2-amd64.S
+++ b/cipher/poly1305-avx2-amd64.S
@@ -25,15 +25,23 @@
#include <config.h>
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
.text
.align 8
.globl _gcry_poly1305_amd64_avx2_init_ext
-.type _gcry_poly1305_amd64_avx2_init_ext, at function;
+ELF(.type _gcry_poly1305_amd64_avx2_init_ext, at function;)
_gcry_poly1305_amd64_avx2_init_ext:
.Lpoly1305_init_ext_avx2_local:
xor %edx, %edx
@@ -391,12 +399,12 @@ _gcry_poly1305_amd64_avx2_init_ext:
popq %r13
popq %r12
ret
-.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;)
.align 8
.globl _gcry_poly1305_amd64_avx2_blocks
-.type _gcry_poly1305_amd64_avx2_blocks, at function;
+ELF(.type _gcry_poly1305_amd64_avx2_blocks, at function;)
_gcry_poly1305_amd64_avx2_blocks:
.Lpoly1305_blocks_avx2_local:
vzeroupper
@@ -717,12 +725,12 @@ _gcry_poly1305_amd64_avx2_blocks:
leave
addq $8, %rax
ret
-.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;
+ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;)
.align 8
.globl _gcry_poly1305_amd64_avx2_finish_ext
-.type _gcry_poly1305_amd64_avx2_finish_ext, at function;
+ELF(.type _gcry_poly1305_amd64_avx2_finish_ext, at function;)
_gcry_poly1305_amd64_avx2_finish_ext:
.Lpoly1305_finish_ext_avx2_local:
vzeroupper
@@ -949,6 +957,6 @@ _gcry_poly1305_amd64_avx2_finish_ext:
popq %rbp
addq $(8*5), %rax
ret
-.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;)
#endif
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h
index dfc0c04..bcbe5df 100644
--- a/cipher/poly1305-internal.h
+++ b/cipher/poly1305-internal.h
@@ -44,24 +44,30 @@
#define POLY1305_REF_ALIGNMENT sizeof(void *)
+#undef POLY1305_SYSV_FUNC_ABI
+
/* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
#undef POLY1305_USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define POLY1305_USE_SSE2 1
# define POLY1305_SSE2_BLOCKSIZE 32
# define POLY1305_SSE2_STATESIZE 248
# define POLY1305_SSE2_ALIGNMENT 16
+# define POLY1305_SYSV_FUNC_ABI 1
#endif
/* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
#undef POLY1305_USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
# define POLY1305_USE_AVX2 1
# define POLY1305_AVX2_BLOCKSIZE 64
# define POLY1305_AVX2_STATESIZE 328
# define POLY1305_AVX2_ALIGNMENT 32
+# define POLY1305_SYSV_FUNC_ABI 1
#endif
@@ -112,6 +118,17 @@
#endif
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef OPS_FUNC_ABI
+#if defined(POLY1305_SYSV_FUNC_ABI) && \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define OPS_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define OPS_FUNC_ABI
+#endif
+
+
typedef struct poly1305_key_s
{
byte b[POLY1305_KEYLEN];
@@ -121,10 +138,10 @@ typedef struct poly1305_key_s
typedef struct poly1305_ops_s
{
size_t block_size;
- void (*init_ext) (void *ctx, const poly1305_key_t * key);
- unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes);
+ void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI;
+ unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI;
unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining,
- byte mac[POLY1305_TAGLEN]);
+ byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI;
} poly1305_ops_t;
diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S
index 106b119..219eb07 100644
--- a/cipher/poly1305-sse2-amd64.S
+++ b/cipher/poly1305-sse2-amd64.S
@@ -25,14 +25,22 @@
#include <config.h>
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
.text
.align 8
.globl _gcry_poly1305_amd64_sse2_init_ext
-.type _gcry_poly1305_amd64_sse2_init_ext, at function;
+ELF(.type _gcry_poly1305_amd64_sse2_init_ext, at function;)
_gcry_poly1305_amd64_sse2_init_ext:
.Lpoly1305_init_ext_x86_local:
xor %edx, %edx
@@ -273,12 +281,12 @@ _gcry_poly1305_amd64_sse2_init_ext:
popq %r13
popq %r12
ret
-.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;)
.align 8
.globl _gcry_poly1305_amd64_sse2_finish_ext
-.type _gcry_poly1305_amd64_sse2_finish_ext, at function;
+ELF(.type _gcry_poly1305_amd64_sse2_finish_ext, at function;)
_gcry_poly1305_amd64_sse2_finish_ext:
.Lpoly1305_finish_ext_x86_local:
pushq %rbp
@@ -424,12 +432,12 @@ _gcry_poly1305_amd64_sse2_finish_ext:
popq %rbp
addq $8, %rax
ret
-.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;)
.align 8
.globl _gcry_poly1305_amd64_sse2_blocks
-.type _gcry_poly1305_amd64_sse2_blocks, at function;
+ELF(.type _gcry_poly1305_amd64_sse2_blocks, at function;)
_gcry_poly1305_amd64_sse2_blocks:
.Lpoly1305_blocks_x86_local:
pushq %rbp
@@ -1030,6 +1038,6 @@ _gcry_poly1305_amd64_sse2_blocks:
pxor %xmm8, %xmm8
pxor %xmm0, %xmm0
ret
-.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;
+ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;)
#endif
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 28dbbf8..1adf0e7 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -40,12 +40,13 @@ static const char *selftest (void);
#ifdef POLY1305_USE_SSE2
-void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_amd64_sse2_ops = {
POLY1305_SSE2_BLOCKSIZE,
@@ -59,12 +60,13 @@ static const poly1305_ops_t poly1305_amd64_sse2_ops = {
#ifdef POLY1305_USE_AVX2
-void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_amd64_avx2_ops = {
POLY1305_AVX2_BLOCKSIZE,
@@ -78,12 +80,13 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = {
#ifdef POLY1305_USE_NEON
-void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_armv7_neon_ops = {
POLY1305_NEON_BLOCKSIZE,
@@ -110,7 +113,7 @@ typedef struct poly1305_state_ref32_s
} poly1305_state_ref32_t;
-static void
+static OPS_FUNC_ABI void
poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
{
poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -142,7 +145,7 @@ poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
{
poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -230,7 +233,7 @@ poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_finish_ext_ref32 (void *state, const byte * m,
size_t remaining, byte mac[POLY1305_TAGLEN])
{
@@ -370,7 +373,7 @@ typedef struct poly1305_state_ref8_t
} poly1305_state_ref8_t;
-static void
+static OPS_FUNC_ABI void
poly1305_init_ext_ref8 (void *state, const poly1305_key_t * key)
{
poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -471,7 +474,7 @@ poly1305_freeze_ref8 (byte h[17])
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
{
poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -519,7 +522,7 @@ poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_finish_ext_ref8 (void *state, const byte * m, size_t remaining,
byte mac[POLY1305_TAGLEN])
{
More information about the Gcrypt-devel
mailing list