[PATCH 8/8] Enable AES/AES-NI, AES/SSSE3 and GCM/PCLMUL implementations on WIN64
Jussi Kivilinna
jussi.kivilinna at iki.fi
Fri May 1 19:40:09 CEST 2015
* cipher/cipher-gcm-intel-pclmul.c (_gcry_ghash_intel_pclmul)
( _gcry_ghash_intel_pclmul) [__WIN64__]: Store non-volatile vector
registers before use and restore after.
* cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): Remove dependency
on !defined(__WIN64__).
* cipher/rijndael-aesni.c [__WIN64__] (aesni_prepare_2_6_variable,
aesni_prepare, aesni_prepare_2_6, aesni_cleanup)
( aesni_cleanup_2_6): New.
[!__WIN64__] (aesni_prepare_2_6_variable, aesni_prepare_2_6): New.
(_gcry_aes_aesni_do_setkey, _gcry_aes_aesni_cbc_enc)
(_gcry_aesni_ctr_enc, _gcry_aesni_cfb_dec, _gcry_aesni_cbc_dec)
(_gcry_aesni_ocb_crypt, _gcry_aesni_ocb_auth): Use
'aesni_prepare_2_6'.
* cipher/rijndael-internal.h (USE_SSSE3): Enable if
HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS or
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS.
(USE_AESNI): Remove dependency on !defined(__WIN64__)
* cipher/rijndael-ssse3-amd64.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
(vpaes_ssse3_prepare, vpaes_ssse3_cleanup): New.
[!HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_prepare): New.
(vpaes_ssse3_prepare_enc, vpaes_ssse3_prepare_dec): Use
'vpaes_ssse3_prepare'.
(_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption): Use
'vpaes_ssse3_prepare' and 'vpaes_ssse3_cleanup'.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (X): Add masking macro to
exclude '.type' and '.size' markers from assembly code, as they are
not support on WIN64/COFF objects.
* configure.ac (gcry_cv_gcc_attribute_ms_abi)
(gcry_cv_gcc_attribute_sysv_abi, gcry_cv_gcc_default_abi_is_ms_abi)
(gcry_cv_gcc_default_abi_is_sysv_abi)
(gcry_cv_gcc_win64_platform_as_ok): New checks.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/cipher-gcm-intel-pclmul.c | 72 +++++++++++++++++++++++++
cipher/cipher-internal.h | 4 -
cipher/rijndael-aesni.c | 73 +++++++++++++++++++++-----
cipher/rijndael-internal.h | 9 +--
cipher/rijndael-ssse3-amd64.c | 94 ++++++++++++++++++++++++++-------
configure.ac | 108 +++++++++++++++++++++++++++++++++++++-
6 files changed, 317 insertions(+), 43 deletions(-)
diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c
index 79648ce..a327249 100644
--- a/cipher/cipher-gcm-intel-pclmul.c
+++ b/cipher/cipher-gcm-intel-pclmul.c
@@ -249,6 +249,17 @@ void
_gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
{
u64 tmp[2];
+#if defined(__x86_64__) && defined(__WIN64__)
+ char win64tmp[3 * 16];
+
+ /* XMM6-XMM8 need to be restored after use. */
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+ "movdqu %%xmm7, 1*16(%0)\n\t"
+ "movdqu %%xmm8, 2*16(%0)\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory");
+#endif
/* Swap endianness of hsub. */
tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8);
@@ -285,6 +296,21 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
: [h_234] "r" (c->u_mode.gcm.gcm_table)
: "memory");
+#ifdef __WIN64__
+ /* Clear/restore used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "movdqu 0*16(%0), %%xmm6\n\t"
+ "movdqu 1*16(%0), %%xmm7\n\t"
+ "movdqu 2*16(%0), %%xmm8\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory");
+#else
/* Clear used registers. */
asm volatile( "pxor %%xmm0, %%xmm0\n\t"
"pxor %%xmm1, %%xmm1\n\t"
@@ -297,6 +323,7 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
"pxor %%xmm8, %%xmm8\n\t"
::: "cc" );
#endif
+#endif
wipememory (tmp, sizeof(tmp));
}
@@ -309,10 +336,30 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+#ifdef __WIN64__
+ char win64tmp[10 * 16];
+#endif
if (nblocks == 0)
return 0;
+#ifdef __WIN64__
+ /* XMM8-XMM15 need to be restored after use. */
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+ "movdqu %%xmm7, 1*16(%0)\n\t"
+ "movdqu %%xmm8, 2*16(%0)\n\t"
+ "movdqu %%xmm9, 3*16(%0)\n\t"
+ "movdqu %%xmm10, 4*16(%0)\n\t"
+ "movdqu %%xmm11, 5*16(%0)\n\t"
+ "movdqu %%xmm12, 6*16(%0)\n\t"
+ "movdqu %%xmm13, 7*16(%0)\n\t"
+ "movdqu %%xmm14, 8*16(%0)\n\t"
+ "movdqu %%xmm15, 9*16(%0)\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory" );
+#endif
+
/* Preload hash and H1. */
asm volatile ("movdqu %[hash], %%xmm1\n\t"
"movdqa %[hsub], %%xmm0\n\t"
@@ -353,6 +400,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
}
while (nblocks >= 4);
+#ifndef __WIN64__
/* Clear used x86-64/XMM registers. */
asm volatile( "pxor %%xmm8, %%xmm8\n\t"
"pxor %%xmm9, %%xmm9\n\t"
@@ -363,6 +411,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
"pxor %%xmm14, %%xmm14\n\t"
"pxor %%xmm15, %%xmm15\n\t"
::: "cc" );
+#endif
}
#endif
@@ -385,6 +434,28 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
: [hash] "=m" (*result)
: [be_mask] "m" (*be_mask));
+#ifdef __WIN64__
+ /* Clear/restore used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "movdqu 0*16(%0), %%xmm6\n\t"
+ "movdqu 1*16(%0), %%xmm7\n\t"
+ "movdqu 2*16(%0), %%xmm8\n\t"
+ "movdqu 3*16(%0), %%xmm9\n\t"
+ "movdqu 4*16(%0), %%xmm10\n\t"
+ "movdqu 5*16(%0), %%xmm11\n\t"
+ "movdqu 6*16(%0), %%xmm12\n\t"
+ "movdqu 7*16(%0), %%xmm13\n\t"
+ "movdqu 8*16(%0), %%xmm14\n\t"
+ "movdqu 9*16(%0), %%xmm15\n\t"
+ :
+ : "r" (win64tmp)
+ : "memory" );
+#else
/* Clear used registers. */
asm volatile( "pxor %%xmm0, %%xmm0\n\t"
"pxor %%xmm1, %%xmm1\n\t"
@@ -395,6 +466,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
"pxor %%xmm6, %%xmm6\n\t"
"pxor %%xmm7, %%xmm7\n\t"
::: "cc" );
+#endif
return 0;
}
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 693f218..e20ea56 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -67,9 +67,7 @@
#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
# if __GNUC__ >= 4
-# ifndef __WIN64__
-# define GCM_USE_INTEL_PCLMUL 1
-# endif
+# define GCM_USE_INTEL_PCLMUL 1
# endif
# endif
#endif /* GCM_USE_INTEL_PCLMUL */
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 147679f..910bc68 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -49,24 +49,54 @@ typedef struct u128_s { u32 a, b, c, d; } u128_t;
the use of these macros. There purpose is to make sure that the
SSE regsiters are cleared and won't reveal any information about
the key or the data. */
-#define aesni_prepare() do { } while (0)
-#define aesni_cleanup() \
- do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
- "pxor %%xmm1, %%xmm1\n" :: ); \
- } while (0)
-#define aesni_cleanup_2_6() \
- do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \
- "pxor %%xmm3, %%xmm3\n" \
- "pxor %%xmm4, %%xmm4\n" \
- "pxor %%xmm5, %%xmm5\n" \
- "pxor %%xmm6, %%xmm6\n":: ); \
- } while (0)
-
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define aesni_prepare_2_6_variable char win64tmp[16]
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_6() \
+ do { asm volatile ("movdqu %%xmm6, %0\n\t" \
+ : "=m" (*win64tmp) \
+ : \
+ : "memory"); \
+ } while (0)
+# define aesni_cleanup() \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
+ "pxor %%xmm1, %%xmm1\n" :: ); \
+ } while (0)
+# define aesni_cleanup_2_6() \
+ do { asm volatile ("movdqu %0, %%xmm6\n\t" \
+ "pxor %%xmm2, %%xmm2\n" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ : \
+ : "m" (*win64tmp) \
+ : "memory"); \
+ } while (0)
+#else
+# define aesni_prepare_2_6_variable
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_6() do { } while (0)
+# define aesni_cleanup() \
+ do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \
+ "pxor %%xmm1, %%xmm1\n" :: ); \
+ } while (0)
+# define aesni_cleanup_2_6() \
+ do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \
+ "pxor %%xmm3, %%xmm3\n" \
+ "pxor %%xmm4, %%xmm4\n" \
+ "pxor %%xmm5, %%xmm5\n" \
+ "pxor %%xmm6, %%xmm6\n":: ); \
+ } while (0)
+#endif
void
_gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
{
+ aesni_prepare_2_6_variable;
+
aesni_prepare();
+ aesni_prepare_2_6();
if (ctx->rounds < 12)
{
@@ -999,7 +1029,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
const unsigned char *inbuf, unsigned char *iv,
size_t nblocks, int cbc_mac)
{
+ aesni_prepare_2_6_variable;
+
aesni_prepare ();
+ aesni_prepare_2_6();
asm volatile ("movdqu %[iv], %%xmm5\n\t"
: /* No output */
@@ -1044,8 +1077,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
{
static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ aesni_prepare_2_6_variable;
aesni_prepare ();
+ aesni_prepare_2_6();
asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
"movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */
@@ -1095,7 +1130,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
const unsigned char *inbuf, unsigned char *iv,
size_t nblocks)
{
+ aesni_prepare_2_6_variable;
+
aesni_prepare ();
+ aesni_prepare_2_6();
asm volatile ("movdqu %[iv], %%xmm6\n\t"
: /* No output */
@@ -1177,7 +1215,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
const unsigned char *inbuf, unsigned char *iv,
size_t nblocks)
{
+ aesni_prepare_2_6_variable;
+
aesni_prepare ();
+ aesni_prepare_2_6();
asm volatile
("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */
@@ -1331,8 +1372,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
u64 n = c->u_mode.ocb.data_nblocks;
+ aesni_prepare_2_6_variable;
aesni_prepare ();
+ aesni_prepare_2_6 ();
/* Preload Offset and Checksum */
asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -1473,8 +1516,10 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
u64 n = c->u_mode.ocb.data_nblocks;
+ aesni_prepare_2_6_variable;
aesni_prepare ();
+ aesni_prepare_2_6 ();
/* Preload Offset and Checksum */
asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -1625,8 +1670,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
RIJNDAEL_context *ctx = (void *)&c->context.c;
const unsigned char *abuf = abuf_arg;
u64 n = c->u_mode.ocb.aad_nblocks;
+ aesni_prepare_2_6_variable;
aesni_prepare ();
+ aesni_prepare_2_6 ();
/* Preload Offset and Sum */
asm volatile ("movdqu %[iv], %%xmm5\n\t"
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index bd247a9..33ca53f 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -44,8 +44,9 @@
#endif
/* USE_SSSE3 indicates whether to use SSSE3 code. */
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
- defined(HAVE_GCC_INLINE_ASM_SSSE3)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
@@ -75,9 +76,7 @@
#ifdef ENABLE_AESNI_SUPPORT
# if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
# if __GNUC__ >= 4
-# ifndef __WIN64__
-# define USE_AESNI 1
-# endif
+# define USE_AESNI 1
# endif
# endif
#endif /* ENABLE_AESNI_SUPPORT */
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 3f1b352..21438dc 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -61,7 +61,60 @@
the use of these macros. There purpose is to make sure that the
SSE registers are cleared and won't reveal any information about
the key or the data. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define vpaes_ssse3_prepare() \
+ char win64tmp[16 * 10]; \
+ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" \
+ "movdqu %%xmm7, 1*16(%0)\n\t" \
+ "movdqu %%xmm8, 2*16(%0)\n\t" \
+ "movdqu %%xmm9, 3*16(%0)\n\t" \
+ "movdqu %%xmm10, 4*16(%0)\n\t" \
+ "movdqu %%xmm11, 5*16(%0)\n\t" \
+ "movdqu %%xmm12, 6*16(%0)\n\t" \
+ "movdqu %%xmm13, 7*16(%0)\n\t" \
+ "movdqu %%xmm14, 8*16(%0)\n\t" \
+ "movdqu %%xmm15, 9*16(%0)\n\t" \
+ : \
+ : "r" (win64tmp) \
+ : "memory" )
+# define vpaes_ssse3_cleanup() \
+ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \
+ "pxor %%xmm1, %%xmm1 \n\t" \
+ "pxor %%xmm2, %%xmm2 \n\t" \
+ "pxor %%xmm3, %%xmm3 \n\t" \
+ "pxor %%xmm4, %%xmm4 \n\t" \
+ "pxor %%xmm5, %%xmm5 \n\t" \
+ "movdqu 0*16(%0), %%xmm6 \n\t" \
+ "movdqu 1*16(%0), %%xmm7 \n\t" \
+ "movdqu 2*16(%0), %%xmm8 \n\t" \
+ "movdqu 3*16(%0), %%xmm9 \n\t" \
+ "movdqu 4*16(%0), %%xmm10 \n\t" \
+ "movdqu 5*16(%0), %%xmm11 \n\t" \
+ "movdqu 6*16(%0), %%xmm12 \n\t" \
+ "movdqu 7*16(%0), %%xmm13 \n\t" \
+ "movdqu 8*16(%0), %%xmm14 \n\t" \
+ "movdqu 9*16(%0), %%xmm15 \n\t" \
+ : \
+ : "r" (win64tmp) \
+ : "memory" )
+#else
+# define vpaes_ssse3_prepare() /*_*/
+# define vpaes_ssse3_cleanup() \
+ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \
+ "pxor %%xmm1, %%xmm1 \n\t" \
+ "pxor %%xmm2, %%xmm2 \n\t" \
+ "pxor %%xmm3, %%xmm3 \n\t" \
+ "pxor %%xmm4, %%xmm4 \n\t" \
+ "pxor %%xmm5, %%xmm5 \n\t" \
+ "pxor %%xmm6, %%xmm6 \n\t" \
+ "pxor %%xmm7, %%xmm7 \n\t" \
+ "pxor %%xmm8, %%xmm8 \n\t" \
+ ::: "memory" )
+#endif
+
#define vpaes_ssse3_prepare_enc(const_ptr) \
+ vpaes_ssse3_prepare(); \
asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \
"movdqa (%q0), %%xmm9 # 0F \n\t" \
"movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \
@@ -75,6 +128,7 @@
: "memory" )
#define vpaes_ssse3_prepare_dec(const_ptr) \
+ vpaes_ssse3_prepare(); \
asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \
"movdqa (%q0), %%xmm9 # 0F \n\t" \
"movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \
@@ -88,17 +142,6 @@
: \
: "memory" )
-#define vpaes_ssse3_cleanup() \
- asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \
- "pxor %%xmm1, %%xmm1 \n\t" \
- "pxor %%xmm2, %%xmm2 \n\t" \
- "pxor %%xmm3, %%xmm3 \n\t" \
- "pxor %%xmm4, %%xmm4 \n\t" \
- "pxor %%xmm5, %%xmm5 \n\t" \
- "pxor %%xmm6, %%xmm6 \n\t" \
- "pxor %%xmm7, %%xmm7 \n\t" \
- "pxor %%xmm8, %%xmm8 \n\t" \
- ::: "memory" )
void
@@ -106,6 +149,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
{
unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
+ vpaes_ssse3_prepare();
+
asm volatile ("leaq %q[key], %%rdi" "\n\t"
"movl %[bits], %%esi" "\n\t"
"leaq %[buf], %%rdx" "\n\t"
@@ -121,6 +166,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
: "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
"cc", "memory");
+ vpaes_ssse3_cleanup();
+
/* Save key for setting up decryption. */
memcpy(&ctx->keyschdec32[0][0], key, keybits / 8);
}
@@ -132,6 +179,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
{
unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
+ vpaes_ssse3_prepare();
+
asm volatile ("leaq %q[key], %%rdi" "\n\t"
"movl %[bits], %%esi" "\n\t"
"leaq %[buf], %%rdx" "\n\t"
@@ -146,6 +195,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
[rotoffs] "g" ((keybits == 192) ? 0 : 32)
: "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
"cc", "memory");
+
+ vpaes_ssse3_cleanup();
}
@@ -465,6 +516,11 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
}
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define X(...)
+#else
+# define X(...) __VA_ARGS__
+#endif
asm (
"\n\t" "##"
@@ -494,7 +550,7 @@ asm (
"\n\t" "##"
"\n\t" "##"
"\n\t" ".align 16"
- "\n\t" ".type _aes_encrypt_core, at function"
+X("\n\t" ".type _aes_encrypt_core, at function")
"\n\t" "_aes_encrypt_core:"
"\n\t" " leaq .Lk_mc_backward(%rcx), %rdi"
"\n\t" " mov $16, %rsi"
@@ -570,7 +626,7 @@ asm (
"\n\t" " pxor %xmm4, %xmm0 # 0 = A"
"\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0"
"\n\t" " ret"
- "\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core"
+X("\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core")
"\n\t" "##"
"\n\t" "## Decryption core"
@@ -578,7 +634,7 @@ asm (
"\n\t" "## Same API as encryption core."
"\n\t" "##"
"\n\t" ".align 16"
- "\n\t" ".type _aes_decrypt_core, at function"
+X("\n\t" ".type _aes_decrypt_core, at function")
"\n\t" "_aes_decrypt_core:"
"\n\t" " movl %eax, %esi"
"\n\t" " shll $4, %esi"
@@ -670,7 +726,7 @@ asm (
"\n\t" " pxor %xmm4, %xmm0 # 0 = A"
"\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0"
"\n\t" " ret"
- "\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core"
+X("\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core")
"\n\t" "########################################################"
"\n\t" "## ##"
@@ -679,7 +735,7 @@ asm (
"\n\t" "########################################################"
"\n\t" ".align 16"
- "\n\t" ".type _aes_schedule_core, at function"
+X("\n\t" ".type _aes_schedule_core, at function")
"\n\t" "_aes_schedule_core:"
"\n\t" " # rdi = key"
"\n\t" " # rsi = size in bits"
@@ -1039,7 +1095,7 @@ asm (
"\n\t" " pxor %xmm7, %xmm7"
"\n\t" " pxor %xmm8, %xmm8"
"\n\t" " ret"
- "\n\t" ".size _aes_schedule_core,.-_aes_schedule_core"
+X("\n\t" ".size _aes_schedule_core,.-_aes_schedule_core")
"\n\t" "########################################################"
"\n\t" "## ##"
@@ -1048,7 +1104,7 @@ asm (
"\n\t" "########################################################"
"\n\t" ".align 16"
- "\n\t" ".type _aes_consts, at object"
+X("\n\t" ".type _aes_consts, at object")
"\n\t" ".Laes_consts:"
"\n\t" "_aes_consts:"
"\n\t" " # s0F"
@@ -1226,7 +1282,7 @@ asm (
"\n\t" " .quad 0xC7AA6DB9D4943E2D"
"\n\t" " .quad 0x12D7560F93441D00"
"\n\t" " .quad 0xCA4B8159D8C58E9C"
- "\n\t" ".size _aes_consts,.-_aes_consts"
+X("\n\t" ".size _aes_consts,.-_aes_consts")
);
#endif /* USE_SSSE3 */
diff --git a/configure.ac b/configure.ac
index 594209f..0f16175 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1127,6 +1127,93 @@ fi
#### ####
#############################################
+
+# Following tests depend on warnings to cause compile to fail, so set -Werror
+# temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether compiler supports 'ms_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute],
+ [gcry_cv_gcc_attribute_ms_abi],
+ [gcry_cv_gcc_attribute_ms_abi=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[int __attribute__ ((ms_abi)) proto(int);]])],
+ [gcry_cv_gcc_attribute_ms_abi=yes])])
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1,
+ [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute])
+fi
+
+
+#
+# Check whether compiler supports 'sysv_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute],
+ [gcry_cv_gcc_attribute_sysv_abi],
+ [gcry_cv_gcc_attribute_sysv_abi=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[int __attribute__ ((sysv_abi)) proto(int);]])],
+ [gcry_cv_gcc_attribute_sysv_abi=yes])])
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1,
+ [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute])
+fi
+
+
+#
+# Check whether default calling convention is 'ms_abi'.
+#
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+ AC_CACHE_CHECK([whether default calling convention is 'ms_abi'],
+ [gcry_cv_gcc_default_abi_is_ms_abi],
+ [gcry_cv_gcc_default_abi_is_ms_abi=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[void *test(void) {
+ void *(*def_func)(void) = test;
+ void *__attribute__((ms_abi))(*msabi_func)(void);
+ /* warning on SysV abi targets, passes on Windows based targets */
+ msabi_func = def_func;
+ return msabi_func;
+ }]])],
+ [gcry_cv_gcc_default_abi_is_ms_abi=yes])])
+ if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1,
+ [Defined if default calling convention is 'ms_abi'])
+ fi
+fi
+
+
+#
+# Check whether default calling convention is 'sysv_abi'.
+#
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+ AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'],
+ [gcry_cv_gcc_default_abi_is_sysv_abi],
+ [gcry_cv_gcc_default_abi_is_sysv_abi=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[void *test(void) {
+ void *(*def_func)(void) = test;
+ void *__attribute__((sysv_abi))(*sysvabi_func)(void);
+ /* warning on MS ABI targets, passes on SysV ABI targets */
+ sysvabi_func = def_func;
+ return sysvabi_func;
+ }]])],
+ [gcry_cv_gcc_default_abi_is_sysv_abi=yes])])
+ if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1,
+ [Defined if default calling convention is 'sysv_abi'])
+ fi
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
#
# Check whether GCC inline assembler supports SSSE3 instructions
# This is required for the AES-NI instructions.
@@ -1281,9 +1368,6 @@ if test $amd64_as_feature_detection = yes; then
[[__asm__(
/* Test if '.type' and '.size' are supported. */
/* These work only on ELF targets. */
- /* TODO: add COFF (mingw64, cygwin64) support to assembly
- * implementations. Mingw64/cygwin64 also require additional
- * work because they use different calling convention. */
"asmfunc:\n\t"
".size asmfunc,.-asmfunc;\n\t"
".type asmfunc, at function;\n\t"
@@ -1299,6 +1383,24 @@ if test $amd64_as_feature_detection = yes; then
AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1,
[Defined if underlying assembler is compatible with amd64 assembly implementations])
fi
+ if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" &&
+ test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" &&
+ test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then
+ AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations],
+ [gcry_cv_gcc_win64_platform_as_ok],
+ [gcry_cv_gcc_win64_platform_as_ok=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[__asm__(
+ ".globl asmfunc\n\t"
+ "asmfunc:\n\t"
+ "xorq \$(1234), %rbp;\n\t"
+ );]])],
+ [gcry_cv_gcc_win64_platform_as_ok=yes])])
+ if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then
+ AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1,
+ [Defined if underlying assembler is compatible with WIN64 assembly implementations])
+ fi
+ fi
fi
More information about the Gcrypt-devel
mailing list