[PATCH 8/8] Enable AES/AES-NI, AES/SSSE3 and GCM/PCLMUL implementations on WIN64

Jussi Kivilinna jussi.kivilinna at iki.fi
Fri May 1 19:40:09 CEST 2015


* cipher/cipher-gcm-intel-pclmul.c (_gcry_ghash_intel_pclmul)
( _gcry_ghash_intel_pclmul) [__WIN64__]: Store non-volatile vector
registers before use and restore after.
* cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): Remove dependency
on !defined(__WIN64__).
* cipher/rijndael-aesni.c [__WIN64__] (aesni_prepare_2_6_variable,
aesni_prepare, aesni_prepare_2_6, aesni_cleanup)
( aesni_cleanup_2_6): New.
[!__WIN64__] (aesni_prepare_2_6_variable, aesni_prepare_2_6): New.
(_gcry_aes_aesni_do_setkey, _gcry_aes_aesni_cbc_enc)
(_gcry_aesni_ctr_enc, _gcry_aesni_cfb_dec, _gcry_aesni_cbc_dec)
(_gcry_aesni_ocb_crypt, _gcry_aesni_ocb_auth): Use
'aesni_prepare_2_6'.
* cipher/rijndael-internal.h (USE_SSSE3): Enable if
HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS or
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS.
(USE_AESNI): Remove dependency on !defined(__WIN64__)
* cipher/rijndael-ssse3-amd64.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
(vpaes_ssse3_prepare, vpaes_ssse3_cleanup): New.
[!HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_prepare): New.
(vpaes_ssse3_prepare_enc, vpaes_ssse3_prepare_dec): Use
'vpaes_ssse3_prepare'.
(_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption): Use
'vpaes_ssse3_prepare' and 'vpaes_ssse3_cleanup'.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (X): Add masking macro to
exclude '.type' and '.size' markers from assembly code, as they are
not support on WIN64/COFF objects.
* configure.ac (gcry_cv_gcc_attribute_ms_abi)
(gcry_cv_gcc_attribute_sysv_abi, gcry_cv_gcc_default_abi_is_ms_abi)
(gcry_cv_gcc_default_abi_is_sysv_abi)
(gcry_cv_gcc_win64_platform_as_ok): New checks.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/cipher-gcm-intel-pclmul.c |   72 +++++++++++++++++++++++++
 cipher/cipher-internal.h         |    4 -
 cipher/rijndael-aesni.c          |   73 +++++++++++++++++++++-----
 cipher/rijndael-internal.h       |    9 +--
 cipher/rijndael-ssse3-amd64.c    |   94 ++++++++++++++++++++++++++-------
 configure.ac                     |  108 +++++++++++++++++++++++++++++++++++++-
 6 files changed, 317 insertions(+), 43 deletions(-)

diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c
index 79648ce..a327249 100644
--- a/cipher/cipher-gcm-intel-pclmul.c
+++ b/cipher/cipher-gcm-intel-pclmul.c
@@ -249,6 +249,17 @@ void
 _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
 {
   u64 tmp[2];
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[3 * 16];
+
+  /* XMM6-XMM8 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+                "movdqu %%xmm7, 1*16(%0)\n\t"
+                "movdqu %%xmm8, 2*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory");
+#endif
 
   /* Swap endianness of hsub. */
   tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8);
@@ -285,6 +296,21 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
                 : [h_234] "r" (c->u_mode.gcm.gcm_table)
                 : "memory");
 
+#ifdef __WIN64__
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory");
+#else
   /* Clear used registers. */
   asm volatile( "pxor %%xmm0, %%xmm0\n\t"
                 "pxor %%xmm1, %%xmm1\n\t"
@@ -297,6 +323,7 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
                 "pxor %%xmm8, %%xmm8\n\t"
                 ::: "cc" );
 #endif
+#endif
 
   wipememory (tmp, sizeof(tmp));
 }
@@ -309,10 +336,30 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
     { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
   const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+#ifdef __WIN64__
+  char win64tmp[10 * 16];
+#endif
 
   if (nblocks == 0)
     return 0;
 
+#ifdef __WIN64__
+  /* XMM8-XMM15 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t"
+                "movdqu %%xmm7,  1*16(%0)\n\t"
+                "movdqu %%xmm8,  2*16(%0)\n\t"
+                "movdqu %%xmm9,  3*16(%0)\n\t"
+                "movdqu %%xmm10, 4*16(%0)\n\t"
+                "movdqu %%xmm11, 5*16(%0)\n\t"
+                "movdqu %%xmm12, 6*16(%0)\n\t"
+                "movdqu %%xmm13, 7*16(%0)\n\t"
+                "movdqu %%xmm14, 8*16(%0)\n\t"
+                "movdqu %%xmm15, 9*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#endif
+
   /* Preload hash and H1. */
   asm volatile ("movdqu %[hash], %%xmm1\n\t"
                 "movdqa %[hsub], %%xmm0\n\t"
@@ -353,6 +400,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
         }
       while (nblocks >= 4);
 
+#ifndef __WIN64__
       /* Clear used x86-64/XMM registers. */
       asm volatile( "pxor %%xmm8, %%xmm8\n\t"
                     "pxor %%xmm9, %%xmm9\n\t"
@@ -363,6 +411,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
                     "pxor %%xmm14, %%xmm14\n\t"
                     "pxor %%xmm15, %%xmm15\n\t"
                     ::: "cc" );
+#endif
     }
 #endif
 
@@ -385,6 +434,28 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
                 : [hash] "=m" (*result)
                 : [be_mask] "m" (*be_mask));
 
+#ifdef __WIN64__
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                "movdqu 3*16(%0), %%xmm9\n\t"
+                "movdqu 4*16(%0), %%xmm10\n\t"
+                "movdqu 5*16(%0), %%xmm11\n\t"
+                "movdqu 6*16(%0), %%xmm12\n\t"
+                "movdqu 7*16(%0), %%xmm13\n\t"
+                "movdqu 8*16(%0), %%xmm14\n\t"
+                "movdqu 9*16(%0), %%xmm15\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#else
   /* Clear used registers. */
   asm volatile( "pxor %%xmm0, %%xmm0\n\t"
                 "pxor %%xmm1, %%xmm1\n\t"
@@ -395,6 +466,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
                 "pxor %%xmm6, %%xmm6\n\t"
                 "pxor %%xmm7, %%xmm7\n\t"
                 ::: "cc" );
+#endif
 
   return 0;
 }
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 693f218..e20ea56 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -67,9 +67,7 @@
 #if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
 # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
-#   ifndef __WIN64__
-#    define GCM_USE_INTEL_PCLMUL 1
-#   endif
+#   define GCM_USE_INTEL_PCLMUL 1
 #  endif
 # endif
 #endif /* GCM_USE_INTEL_PCLMUL */
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 147679f..910bc68 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -49,24 +49,54 @@ typedef struct u128_s { u32 a, b, c, d; } u128_t;
    the use of these macros.  There purpose is to make sure that the
    SSE regsiters are cleared and won't reveal any information about
    the key or the data.  */
-#define aesni_prepare() do { } while (0)
-#define aesni_cleanup()                                                \
-  do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
-                     "pxor %%xmm1, %%xmm1\n" :: );                     \
-  } while (0)
-#define aesni_cleanup_2_6()                                            \
-  do { asm volatile ("pxor %%xmm2, %%xmm2\n\t"                         \
-                     "pxor %%xmm3, %%xmm3\n"                           \
-                     "pxor %%xmm4, %%xmm4\n"                           \
-                     "pxor %%xmm5, %%xmm5\n"                           \
-                     "pxor %%xmm6, %%xmm6\n":: );                      \
-  } while (0)
-
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define aesni_prepare_2_6_variable char win64tmp[16]
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_6()                                            \
+   do { asm volatile ("movdqu %%xmm6, %0\n\t"                           \
+                      : "=m" (*win64tmp)                                \
+                      :                                                 \
+                      : "memory");                                      \
+   } while (0)
+# define aesni_cleanup()                                                \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
+                      "pxor %%xmm1, %%xmm1\n" :: );                     \
+   } while (0)
+# define aesni_cleanup_2_6()                                            \
+   do { asm volatile ("movdqu %0,   %%xmm6\n\t"                         \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      :                                                 \
+                      : "m" (*win64tmp)                                 \
+                      : "memory");                                      \
+   } while (0)
+#else
+# define aesni_prepare_2_6_variable
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_6() do { } while (0)
+# define aesni_cleanup()                                                \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
+                      "pxor %%xmm1, %%xmm1\n" :: );                     \
+   } while (0)
+# define aesni_cleanup_2_6()                                            \
+   do { asm volatile ("pxor %%xmm2, %%xmm2\n\t"                         \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "pxor %%xmm6, %%xmm6\n":: );                      \
+   } while (0)
+#endif
 
 void
 _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare();
+  aesni_prepare_2_6();
 
   if (ctx->rounds < 12)
     {
@@ -999,7 +1029,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
                          const unsigned char *inbuf, unsigned char *iv,
                          size_t nblocks, int cbc_mac)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
                 : /* No output */
@@ -1044,8 +1077,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 {
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
     { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
                 "movdqa %[ctr], %%xmm5\n\t"  /* Preload CTR */
@@ -1095,7 +1130,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
                          const unsigned char *inbuf, unsigned char *iv,
                          size_t nblocks)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile ("movdqu %[iv], %%xmm6\n\t"
                 : /* No output */
@@ -1177,7 +1215,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 			 const unsigned char *inbuf, unsigned char *iv,
 			 size_t nblocks)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile
     ("movdqu %[iv], %%xmm5\n\t"	/* use xmm5 as fast IV storage */
@@ -1331,8 +1372,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6 ();
 
   /* Preload Offset and Checksum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -1473,8 +1516,10 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6 ();
 
   /* Preload Offset and Checksum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -1625,8 +1670,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6 ();
 
   /* Preload Offset and Sum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index bd247a9..33ca53f 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -44,8 +44,9 @@
 #endif
 
 /* USE_SSSE3 indicates whether to use SSSE3 code. */
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 #  define USE_SSSE3 1
 #endif
 
@@ -75,9 +76,7 @@
 #ifdef ENABLE_AESNI_SUPPORT
 # if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
-#   ifndef __WIN64__
-#    define USE_AESNI 1
-#   endif
+#   define USE_AESNI 1
 #  endif
 # endif
 #endif /* ENABLE_AESNI_SUPPORT */
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 3f1b352..21438dc 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -61,7 +61,60 @@
   the use of these macros.  There purpose is to make sure that the
   SSE registers are cleared and won't reveal any information about
   the key or the data.  */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define vpaes_ssse3_prepare() \
+    char win64tmp[16 * 10]; \
+    asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t" \
+                  "movdqu %%xmm7,  1*16(%0)\n\t" \
+                  "movdqu %%xmm8,  2*16(%0)\n\t" \
+                  "movdqu %%xmm9,  3*16(%0)\n\t" \
+                  "movdqu %%xmm10, 4*16(%0)\n\t" \
+                  "movdqu %%xmm11, 5*16(%0)\n\t" \
+                  "movdqu %%xmm12, 6*16(%0)\n\t" \
+                  "movdqu %%xmm13, 7*16(%0)\n\t" \
+                  "movdqu %%xmm14, 8*16(%0)\n\t" \
+                  "movdqu %%xmm15, 9*16(%0)\n\t" \
+                  : \
+                  : "r" (win64tmp) \
+                  : "memory" )
+# define vpaes_ssse3_cleanup() \
+    asm volatile ("pxor	%%xmm0,  %%xmm0 \n\t" \
+                  "pxor	%%xmm1,  %%xmm1 \n\t" \
+                  "pxor	%%xmm2,  %%xmm2 \n\t" \
+                  "pxor	%%xmm3,  %%xmm3 \n\t" \
+                  "pxor	%%xmm4,  %%xmm4 \n\t" \
+                  "pxor	%%xmm5,  %%xmm5 \n\t" \
+                  "movdqu 0*16(%0), %%xmm6 \n\t" \
+                  "movdqu 1*16(%0), %%xmm7 \n\t" \
+                  "movdqu 2*16(%0), %%xmm8 \n\t" \
+                  "movdqu 3*16(%0), %%xmm9 \n\t" \
+                  "movdqu 4*16(%0), %%xmm10 \n\t" \
+                  "movdqu 5*16(%0), %%xmm11 \n\t" \
+                  "movdqu 6*16(%0), %%xmm12 \n\t" \
+                  "movdqu 7*16(%0), %%xmm13 \n\t" \
+                  "movdqu 8*16(%0), %%xmm14 \n\t" \
+                  "movdqu 9*16(%0), %%xmm15 \n\t" \
+                  : \
+                  : "r" (win64tmp) \
+                  : "memory" )
+#else
+# define vpaes_ssse3_prepare() /*_*/
+# define vpaes_ssse3_cleanup() \
+    asm volatile ("pxor	%%xmm0,  %%xmm0 \n\t" \
+                  "pxor	%%xmm1,  %%xmm1 \n\t" \
+                  "pxor	%%xmm2,  %%xmm2 \n\t" \
+                  "pxor	%%xmm3,  %%xmm3 \n\t" \
+                  "pxor	%%xmm4,  %%xmm4 \n\t" \
+                  "pxor	%%xmm5,  %%xmm5 \n\t" \
+                  "pxor	%%xmm6,  %%xmm6 \n\t" \
+                  "pxor	%%xmm7,  %%xmm7 \n\t" \
+                  "pxor	%%xmm8,  %%xmm8 \n\t" \
+                  ::: "memory" )
+#endif
+
 #define vpaes_ssse3_prepare_enc(const_ptr) \
+    vpaes_ssse3_prepare(); \
     asm volatile ("lea	.Laes_consts(%%rip), %q0 \n\t" \
                   "movdqa	          (%q0), %%xmm9  # 0F \n\t" \
                   "movdqa	.Lk_inv   (%q0), %%xmm10 # inv \n\t" \
@@ -75,6 +128,7 @@
                   : "memory" )
 
 #define vpaes_ssse3_prepare_dec(const_ptr) \
+    vpaes_ssse3_prepare(); \
     asm volatile ("lea	.Laes_consts(%%rip), %q0 \n\t" \
                   "movdqa	          (%q0), %%xmm9  # 0F \n\t" \
                   "movdqa	.Lk_inv   (%q0), %%xmm10 # inv \n\t" \
@@ -88,17 +142,6 @@
                   : \
                   : "memory" )
 
-#define vpaes_ssse3_cleanup() \
-    asm volatile ("pxor	%%xmm0,  %%xmm0 \n\t" \
-                  "pxor	%%xmm1,  %%xmm1 \n\t" \
-                  "pxor	%%xmm2,  %%xmm2 \n\t" \
-                  "pxor	%%xmm3,  %%xmm3 \n\t" \
-                  "pxor	%%xmm4,  %%xmm4 \n\t" \
-                  "pxor	%%xmm5,  %%xmm5 \n\t" \
-                  "pxor	%%xmm6,  %%xmm6 \n\t" \
-                  "pxor	%%xmm7,  %%xmm7 \n\t" \
-                  "pxor	%%xmm8,  %%xmm8 \n\t" \
-                  ::: "memory" )
 
 
 void
@@ -106,6 +149,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 {
   unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
 
+  vpaes_ssse3_prepare();
+
   asm volatile ("leaq %q[key], %%rdi"			"\n\t"
                 "movl %[bits], %%esi"			"\n\t"
                 "leaq %[buf], %%rdx"			"\n\t"
@@ -121,6 +166,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
                 : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
                   "cc", "memory");
 
+  vpaes_ssse3_cleanup();
+
   /* Save key for setting up decryption. */
   memcpy(&ctx->keyschdec32[0][0], key, keybits / 8);
 }
@@ -132,6 +179,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
 {
   unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
 
+  vpaes_ssse3_prepare();
+
   asm volatile ("leaq %q[key], %%rdi"			"\n\t"
                 "movl %[bits], %%esi"			"\n\t"
                 "leaq %[buf], %%rdx"			"\n\t"
@@ -146,6 +195,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
                   [rotoffs] "g" ((keybits == 192) ? 0 : 32)
                 : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
                   "cc", "memory");
+
+  vpaes_ssse3_cleanup();
 }
 
 
@@ -465,6 +516,11 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define X(...)
+#else
+# define X(...) __VA_ARGS__
+#endif
 
 asm (
   "\n\t" "##"
@@ -494,7 +550,7 @@ asm (
   "\n\t" "##"
   "\n\t" "##"
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_encrypt_core, at function"
+X("\n\t" ".type _aes_encrypt_core, at function")
   "\n\t" "_aes_encrypt_core:"
   "\n\t" "	leaq	.Lk_mc_backward(%rcx), %rdi"
   "\n\t" "	mov	$16,	%rsi"
@@ -570,7 +626,7 @@ asm (
   "\n\t" "	pxor	%xmm4,	%xmm0	# 0 = A"
   "\n\t" "	pshufb	.Lk_sr(%rsi,%rcx), %xmm0"
   "\n\t" "	ret"
-  "\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core"
+X("\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core")
 
   "\n\t" "##"
   "\n\t" "##  Decryption core"
@@ -578,7 +634,7 @@ asm (
   "\n\t" "##  Same API as encryption core."
   "\n\t" "##"
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_decrypt_core, at function"
+X("\n\t" ".type _aes_decrypt_core, at function")
   "\n\t" "_aes_decrypt_core:"
   "\n\t" "	movl	%eax,	%esi"
   "\n\t" "	shll	$4,	%esi"
@@ -670,7 +726,7 @@ asm (
   "\n\t" "	pxor	%xmm4,	%xmm0	# 0 = A"
   "\n\t" "	pshufb	.Lk_sr(%rsi,%rcx), %xmm0"
   "\n\t" "	ret"
-  "\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core"
+X("\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core")
 
   "\n\t" "########################################################"
   "\n\t" "##                                                    ##"
@@ -679,7 +735,7 @@ asm (
   "\n\t" "########################################################"
 
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_schedule_core, at function"
+X("\n\t" ".type _aes_schedule_core, at function")
   "\n\t" "_aes_schedule_core:"
   "\n\t" "	# rdi = key"
   "\n\t" "	# rsi = size in bits"
@@ -1039,7 +1095,7 @@ asm (
   "\n\t" "	pxor	%xmm7,  %xmm7"
   "\n\t" "	pxor	%xmm8,  %xmm8"
   "\n\t" "	ret"
-  "\n\t" ".size _aes_schedule_core,.-_aes_schedule_core"
+X("\n\t" ".size _aes_schedule_core,.-_aes_schedule_core")
 
   "\n\t" "########################################################"
   "\n\t" "##                                                    ##"
@@ -1048,7 +1104,7 @@ asm (
   "\n\t" "########################################################"
 
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_consts, at object"
+X("\n\t" ".type _aes_consts, at object")
   "\n\t" ".Laes_consts:"
   "\n\t" "_aes_consts:"
   "\n\t" "	# s0F"
@@ -1226,7 +1282,7 @@ asm (
   "\n\t" "	.quad	0xC7AA6DB9D4943E2D"
   "\n\t" "	.quad	0x12D7560F93441D00"
   "\n\t" "	.quad	0xCA4B8159D8C58E9C"
-  "\n\t" ".size _aes_consts,.-_aes_consts"
+X("\n\t" ".size _aes_consts,.-_aes_consts")
 );
 
 #endif /* USE_SSSE3 */
diff --git a/configure.ac b/configure.ac
index 594209f..0f16175 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1127,6 +1127,93 @@ fi
 ####                                     ####
 #############################################
 
+
+# Following tests depend on warnings to cause compile to fail, so set -Werror
+# temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether compiler supports 'ms_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute],
+       [gcry_cv_gcc_attribute_ms_abi],
+       [gcry_cv_gcc_attribute_ms_abi=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[int __attribute__ ((ms_abi)) proto(int);]])],
+          [gcry_cv_gcc_attribute_ms_abi=yes])])
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1,
+     [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute])
+fi
+
+
+#
+# Check whether compiler supports 'sysv_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute],
+       [gcry_cv_gcc_attribute_sysv_abi],
+       [gcry_cv_gcc_attribute_sysv_abi=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[int __attribute__ ((sysv_abi)) proto(int);]])],
+          [gcry_cv_gcc_attribute_sysv_abi=yes])])
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1,
+     [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute])
+fi
+
+
+#
+# Check whether default calling convention is 'ms_abi'.
+#
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   AC_CACHE_CHECK([whether default calling convention is 'ms_abi'],
+          [gcry_cv_gcc_default_abi_is_ms_abi],
+          [gcry_cv_gcc_default_abi_is_ms_abi=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((ms_abi))(*msabi_func)(void);
+                 /* warning on SysV abi targets, passes on Windows based targets */
+                 msabi_func = def_func;
+                 return msabi_func;
+             }]])],
+             [gcry_cv_gcc_default_abi_is_ms_abi=yes])])
+   if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then
+      AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1,
+        [Defined if default calling convention is 'ms_abi'])
+   fi
+fi
+
+
+#
+# Check whether default calling convention is 'sysv_abi'.
+#
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'],
+          [gcry_cv_gcc_default_abi_is_sysv_abi],
+          [gcry_cv_gcc_default_abi_is_sysv_abi=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((sysv_abi))(*sysvabi_func)(void);
+                 /* warning on MS ABI targets, passes on SysV ABI targets */
+                 sysvabi_func = def_func;
+                 return sysvabi_func;
+             }]])],
+             [gcry_cv_gcc_default_abi_is_sysv_abi=yes])])
+   if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then
+      AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1,
+        [Defined if default calling convention is 'sysv_abi'])
+   fi
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
 #
 # Check whether GCC inline assembler supports SSSE3 instructions
 # This is required for the AES-NI instructions.
@@ -1281,9 +1368,6 @@ if test $amd64_as_feature_detection = yes; then
           [[__asm__(
                 /* Test if '.type' and '.size' are supported.  */
                 /* These work only on ELF targets. */
-		/* TODO: add COFF (mingw64, cygwin64) support to assembly
-                 * implementations.  Mingw64/cygwin64 also require additional
-                 * work because they use different calling convention. */
 		"asmfunc:\n\t"
                 ".size asmfunc,.-asmfunc;\n\t"
                 ".type asmfunc, at function;\n\t"
@@ -1299,6 +1383,24 @@ if test $amd64_as_feature_detection = yes; then
      AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1,
               [Defined if underlying assembler is compatible with amd64 assembly implementations])
   fi
+  if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" &&
+     test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" &&
+     test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then
+    AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations],
+      [gcry_cv_gcc_win64_platform_as_ok],
+      [gcry_cv_gcc_win64_platform_as_ok=no
+      AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+        [[__asm__(
+              ".globl asmfunc\n\t"
+              "asmfunc:\n\t"
+              "xorq \$(1234), %rbp;\n\t"
+          );]])],
+        [gcry_cv_gcc_win64_platform_as_ok=yes])])
+    if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then
+      AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1,
+                [Defined if underlying assembler is compatible with WIN64 assembly implementations])
+    fi
+  fi
 fi
 
 




More information about the Gcrypt-devel mailing list