[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-207-g66129b3

by Jussi Kivilinna cvs at cvs.gnupg.org
Sun May 3 10:41:00 CEST 2015


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  66129b3334a5aa54ff8a97981507e4704f759571 (commit)
       via  8422d5d699265b960bd1ca837044ee052fc5b614 (commit)
       via  1089a13073c26a9a456e43ec38d937e6ee7f4077 (commit)
       via  022959099644f64df5f2a83ade21159864f64837 (commit)
       via  e433676a899fa0d274d40547166b03c7c8bd8e78 (commit)
       via  4e09aaa36d151c3312019724a77fc09aa345b82f (commit)
       via  460355f23e770637d29e3af7b998a957a2b5bc88 (commit)
       via  6c21cf5fed1ad430fa41445eac2350802bc8aaed (commit)
       via  9cf224322007d90193d4910f0da6e0e29ce01d70 (commit)
       via  d5a7e00b6b222566a5650639ef29684b047c1909 (commit)
       via  0cdd24456b33defc7f8176fa82ab694fbc284385 (commit)
       via  f701954555340a503f6e52cc18d58b0c515427b7 (commit)
       via  e78560a4b717f7154f910a8ce4128de152f586da (commit)
      from  e886e4f5e73fe6a9f9191f5155852ce5d8bb88fe (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 66129b3334a5aa54ff8a97981507e4704f759571
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat May 2 13:27:06 2015 +0300

    Enable AMD64 AES implementation for WIN64
    
    * cipher/rijndael-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/rijndael-internal.h (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (do_encrypt, do_decrypt)
    [USE_AMD64_ASM && !HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS]: Use
    assembly block to call AMD64 assembly encrypt/decrypt function.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S
index 24c555a..b149e94 100644
--- a/cipher/rijndael-amd64.S
+++ b/cipher/rijndael-amd64.S
@@ -20,7 +20,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_AES)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_AES)
 
 #ifdef __PIC__
 #  define RIP (%rip)
@@ -28,6 +29,12 @@
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .text
 
 /* table macros */
@@ -205,7 +212,7 @@
 
 .align 8
 .globl _gcry_aes_amd64_encrypt_block
-.type   _gcry_aes_amd64_encrypt_block, at function;
+ELF(.type   _gcry_aes_amd64_encrypt_block, at function;)
 
 _gcry_aes_amd64_encrypt_block:
 	/* input:
@@ -279,7 +286,7 @@ _gcry_aes_amd64_encrypt_block:
 	lastencround(11);
 
 	jmp .Lenc_done;
-.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;
+ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;)
 
 #define do_decround(next_r) \
 	do16bit_shr(16, mov, RA, Dsize, D0, RNA, D0, RNB, RT0, RT1); \
@@ -365,7 +372,7 @@ _gcry_aes_amd64_encrypt_block:
 
 .align 8
 .globl _gcry_aes_amd64_decrypt_block
-.type   _gcry_aes_amd64_decrypt_block, at function;
+ELF(.type   _gcry_aes_amd64_decrypt_block, at function;)
 
 _gcry_aes_amd64_decrypt_block:
 	/* input:
@@ -440,7 +447,7 @@ _gcry_aes_amd64_decrypt_block:
 	decround(9);
 
 	jmp .Ldec_tail;
-.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;
+ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;)
 
 #endif /*USE_AES*/
 #endif /*__x86_64*/
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 33ca53f..6641728 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -39,7 +39,8 @@
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index ade41c9..7ebf329 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -665,8 +665,25 @@ do_encrypt (const RIJNDAEL_context *ctx,
             unsigned char *bx, const unsigned char *ax)
 {
 #ifdef USE_AMD64_ASM
+# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
   return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds,
 				       encT);
+# else
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  uintptr_t ret;
+  asm ("movq %[encT], %%r8\n\t"
+       "callq *%[ret]\n\t"
+       : [ret] "=a" (ret)
+       : "0" (_gcry_aes_amd64_encrypt_block),
+         "D" (ctx->keyschenc),
+         "S" (bx),
+         "d" (ax),
+         "c" (ctx->rounds),
+         [encT] "r" (encT)
+       : "cc", "memory", "r8", "r9", "r10", "r11");
+  return ret;
+# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */
 #elif defined(USE_ARM_ASM)
   return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds, encT);
 #else
@@ -1008,8 +1025,25 @@ do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
             const unsigned char *ax)
 {
 #ifdef USE_AMD64_ASM
+# ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
   return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
 				       &dec_tables);
+# else
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  uintptr_t ret;
+  asm ("movq %[dectabs], %%r8\n\t"
+       "callq *%[ret]\n\t"
+       : [ret] "=a" (ret)
+       : "0" (_gcry_aes_amd64_decrypt_block),
+         "D" (ctx->keyschdec),
+         "S" (bx),
+         "d" (ax),
+         "c" (ctx->rounds),
+         [dectabs] "r" (&dec_tables)
+       : "cc", "memory", "r8", "r9", "r10", "r11");
+  return ret;
+# endif /* HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS */
 #elif defined(USE_ARM_ASM)
   return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
 				     &dec_tables);

commit 8422d5d699265b960bd1ca837044ee052fc5b614
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat May 2 13:26:46 2015 +0300

    Enable AMD64 Whirlpool implementation for WIN64
    
    * cipher/whirlpool-sse2-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/whirlpool.c (USE_AMD64_ASM): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_AMD64_ASM] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
    [USE_AMD64_ASM] (_gcry_whirlpool_transform_amd64): Add ASM_FUNC_ABI to
    prototype.
    [USE_AMD64_ASM] (whirlpool_transform): Add ASM_EXTRA_STACK to stack
    burn value.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/whirlpool-sse2-amd64.S b/cipher/whirlpool-sse2-amd64.S
index d0bcf2d..e98b831 100644
--- a/cipher/whirlpool-sse2-amd64.S
+++ b/cipher/whirlpool-sse2-amd64.S
@@ -20,7 +20,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_WHIRLPOOL)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_WHIRLPOOL)
 
 #ifdef __PIC__
 #  define RIP %rip
@@ -28,6 +29,12 @@
 #  define RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .text
 
 /* look-up table offsets on RTAB */
@@ -157,7 +164,7 @@
 
 .align 8
 .globl _gcry_whirlpool_transform_amd64
-.type  _gcry_whirlpool_transform_amd64, at function;
+ELF(.type  _gcry_whirlpool_transform_amd64, at function;)
 
 _gcry_whirlpool_transform_amd64:
 	/* input:
@@ -329,7 +336,7 @@ _gcry_whirlpool_transform_amd64:
 .Lskip:
 	movl $(STACK_MAX + 8), %eax;
 	ret;
-.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;
+ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;)
 
 #endif
 #endif
diff --git a/cipher/whirlpool.c b/cipher/whirlpool.c
index 2732f63..5f224a1 100644
--- a/cipher/whirlpool.c
+++ b/cipher/whirlpool.c
@@ -42,7 +42,8 @@
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64_ASM 1
 #endif
 
@@ -1192,9 +1193,17 @@ whirlpool_init (void *ctx, unsigned int flags)
 
 #ifdef USE_AMD64_ASM
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
 extern unsigned int
 _gcry_whirlpool_transform_amd64(u64 *state, const unsigned char *data,
-	size_t nblks, const struct whirlpool_tables_s *tables);
+    size_t nblks, const struct whirlpool_tables_s *tables) ASM_FUNC_ABI;
 
 static unsigned int
 whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks)
@@ -1202,7 +1211,7 @@ whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks)
   whirlpool_context_t *context = ctx;
 
   return _gcry_whirlpool_transform_amd64(
-		context->hash_state, data, nblks, &tab);
+		context->hash_state, data, nblks, &tab) + ASM_EXTRA_STACK;
 }
 
 #else /* USE_AMD64_ASM */

commit 1089a13073c26a9a456e43ec38d937e6ee7f4077
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat May 2 13:05:12 2015 +0300

    Enable AMD64 SHA512 implementations for WIN64
    
    * cipher/sha512-avx-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/sha512-avx-bmi2-amd64.S: Ditto.
    * cipher/sha512-ssse3-amd64.S: Ditto.
    * cipher/sha512.c (USE_SSSE3, USE_AVX, USE_AVX2): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_SSSE3 || USE_AVX || USE_AVX2] (ASM_FUNC_ABI)
    (ASM_EXTRA_STACK): New.
    (_gcry_sha512_transform_amd64_ssse3, _gcry_sha512_transform_amd64_avx)
    (_gcry_sha512_transform_amd64_avx_bmi2): Add ASM_FUNC_ABI to
    prototypes.
    (transform): Add ASM_EXTRA_STACK to stack burn value.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index 3449b87..699c271 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -41,7 +41,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA512)
 
@@ -51,6 +52,12 @@
 #  define ADD_RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .intel_syntax noprefix
 
 .text
@@ -259,7 +266,7 @@ frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size))
 ; L is the message length in SHA512 blocks
 */
 .globl _gcry_sha512_transform_amd64_avx
-.type _gcry_sha512_transform_amd64_avx, at function;
+ELF(.type _gcry_sha512_transform_amd64_avx, at function;)
 .align 16
 _gcry_sha512_transform_amd64_avx:
 	xor eax, eax
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index d6301f3..02f95af 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -43,7 +43,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
     defined(USE_SHA512)
@@ -54,6 +55,12 @@
 #  define ADD_RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .intel_syntax noprefix
 
 .text
@@ -596,7 +603,7 @@ rotate_Ys
 ; L is the message length in SHA512 blocks
 */
 .globl _gcry_sha512_transform_amd64_avx2
-.type _gcry_sha512_transform_amd64_avx2, at function;
+ELF(.type _gcry_sha512_transform_amd64_avx2, at function;)
 .align 16
 _gcry_sha512_transform_amd64_avx2:
 	xor eax, eax
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index 4c80baa..c721bcf 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -44,7 +44,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512)
 
@@ -54,6 +55,12 @@
 #  define ADD_RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .intel_syntax noprefix
 
 .text
@@ -261,7 +268,7 @@ frame_size = ((frame_GPRSAVE) + (frame_GPRSAVE_size))
 ; L is the message length in SHA512 blocks.
 */
 .globl _gcry_sha512_transform_amd64_ssse3
-.type _gcry_sha512_transform_amd64_ssse3, at function;
+ELF(.type _gcry_sha512_transform_amd64_ssse3, at function;)
 .align 16
 _gcry_sha512_transform_amd64_ssse3:
 	xor eax, eax
diff --git a/cipher/sha512.c b/cipher/sha512.c
index 5a6af80..029f8f0 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -68,27 +68,31 @@
 
 /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
 #undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSSE3 1
 #endif
 
 
 /* USE_AVX indicates whether to compile with Intel AVX code. */
 #undef USE_AVX
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX 1
 #endif
 
 
 /* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
 #undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX2 1
 #endif
 
@@ -543,6 +547,21 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data)
 }
 
 
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
 #ifdef USE_ARM_NEON_ASM
 void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
 					const unsigned char *data,
@@ -551,17 +570,20 @@ void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
 
 #ifdef USE_SSSE3
 unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
-					        void *state, size_t num_blks);
+                                                void *state,
+                                                size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX
 unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
-					      void *state, size_t num_blks);
+                                              void *state,
+                                              size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX2
 unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
-					       void *state, size_t num_blks);
+                                               void *state,
+                                               size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 
@@ -574,19 +596,19 @@ transform (void *context, const unsigned char *data, size_t nblks)
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_AVX
   if (ctx->use_avx)
     return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_SSSE3
   if (ctx->use_ssse3)
     return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_ARM_NEON_ASM
@@ -607,6 +629,14 @@ transform (void *context, const unsigned char *data, size_t nblks)
     }
   while (--nblks);
 
+#ifdef ASM_EXTRA_STACK
+  /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
+   *  the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
+   *  here too.
+   */
+  burn += ASM_EXTRA_STACK;
+#endif
+
   return burn;
 }
 

commit 022959099644f64df5f2a83ade21159864f64837
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat May 2 13:05:02 2015 +0300

    Enable AMD64 SHA256 implementations for WIN64
    
    * cipher/sha256-avx-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/sha256-avx2-bmi2-amd64.S: Ditto.
    * cipher/sha256-ssse3-amd64.S: Ditto.
    * cipher/sha256.c (USE_SSSE3, USE_AVX, USE_AVX2): Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_SSSE3 || USE_AVX || USE_AVX2] (ASM_FUNC_ABI)
    (ASM_EXTRA_STACK): New.
    (_gcry_sha256_transform_amd64_ssse3, _gcry_sha256_transform_amd64_avx)
    (_gcry_sha256_transform_amd64_avx2): Add ASM_FUNC_ABI to prototypes.
    (transform): Add ASM_EXTRA_STACK to stack burn value.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index 3912db7..8bf26bd 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -54,7 +54,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256)
 
@@ -64,6 +65,12 @@
 #  define ADD_RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .intel_syntax noprefix
 
 #define	VMOVDQ vmovdqu /* assume buffers not aligned */
@@ -370,7 +377,7 @@ rotate_Xs
 */
 .text
 .globl _gcry_sha256_transform_amd64_avx
-.type  _gcry_sha256_transform_amd64_avx, at function;
+ELF(.type  _gcry_sha256_transform_amd64_avx, at function;)
 .align 16
 _gcry_sha256_transform_amd64_avx:
 	vzeroupper
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index 09df711..74b6063 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -54,7 +54,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
     defined(USE_SHA256)
@@ -65,6 +66,12 @@
 #  define ADD_RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .intel_syntax noprefix
 
 #define	VMOVDQ vmovdqu /* ; assume buffers not aligned  */
@@ -555,7 +562,7 @@ rotate_Xs
 */
 .text
 .globl _gcry_sha256_transform_amd64_avx2
-.type _gcry_sha256_transform_amd64_avx2, at function
+ELF(.type _gcry_sha256_transform_amd64_avx2, at function)
 .align 32
 _gcry_sha256_transform_amd64_avx2:
 	push	rbx
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index 80b1cec..9ec87e4 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -55,7 +55,8 @@
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
     defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256)
 
@@ -65,6 +66,12 @@
 #  define ADD_RIP
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
 .intel_syntax noprefix
 
 #define	MOVDQ movdqu /* assume buffers not aligned */
@@ -376,7 +383,7 @@ rotate_Xs
 */
 .text
 .globl _gcry_sha256_transform_amd64_ssse3
-.type  _gcry_sha256_transform_amd64_ssse3, at function;
+ELF(.type  _gcry_sha256_transform_amd64_ssse3, at function;)
 .align 16
 _gcry_sha256_transform_amd64_ssse3:
 	push	rbx
diff --git a/cipher/sha256.c b/cipher/sha256.c
index d3af172..59ffa43 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -49,25 +49,29 @@
 
 /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
 #undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSSE3 1
 #endif
 
 /* USE_AVX indicates whether to compile with Intel AVX code. */
 #undef USE_AVX
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX 1
 #endif
 
 /* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
 #undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX2 1
 #endif
 
@@ -322,19 +326,37 @@ transform_blk (void *ctx, const unsigned char *data)
 #undef R
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
 #ifdef USE_SSSE3
 unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
-					        u32 state[8], size_t num_blks);
+                                                u32 state[8],
+                                                size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX
 unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
-					      u32 state[8], size_t num_blks);
+                                              u32 state[8],
+                                              size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX2
 unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
-					       u32 state[8], size_t num_blks);
+                                               u32 state[8],
+                                               size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 
@@ -347,19 +369,19 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
 #ifdef USE_AVX2
   if (hd->use_avx2)
     return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_AVX
   if (hd->use_avx)
     return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_SSSE3
   if (hd->use_ssse3)
     return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
   do
@@ -369,6 +391,14 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
     }
   while (--nblks);
 
+#ifdef ASM_EXTRA_STACK
+  /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
+   *  the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
+   *  here too.
+   */
+  burn += ASM_EXTRA_STACK;
+#endif
+
   return burn;
 }
 

commit e433676a899fa0d274d40547166b03c7c8bd8e78
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat May 2 12:57:07 2015 +0300

    Enable AMD64 SHA1 implementations for WIN64
    
    * cipher/sha1-avx-amd64.S: Enable when
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    (ELF): New macro to mask lines with ELF specific commands.
    * cipher/sha1-avx-bmi2-amd64.S: Ditto.
    * cipher/sha1-ssse3-amd64.S: Ditto.
    * cipher/sha1.c (USE_SSSE3, USE_AVX, USE_BMI2): Enable
    when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
    [USE_SSSE3 || USE_AVX || USE_BMI2] (ASM_FUNC_ABI)
    (ASM_EXTRA_STACK): New.
    (_gcry_sha1_transform_amd64_ssse3, _gcry_sha1_transform_amd64_avx)
    (_gcry_sha1_transform_amd64_avx_bmi2): Add ASM_FUNC_ABI to
    prototypes.
    (transform): Add ASM_EXTRA_STACK to stack burn value.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S
index 6bec389..062a45b 100644
--- a/cipher/sha1-avx-amd64.S
+++ b/cipher/sha1-avx-amd64.S
@@ -29,7 +29,8 @@
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_BMI2) && \
     defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1)
 
@@ -40,6 +41,13 @@
 #endif
 
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 /* Context structure */
 
 #define state_h0 0
@@ -209,7 +217,7 @@
  */
 .text
 .globl _gcry_sha1_transform_amd64_avx
-.type _gcry_sha1_transform_amd64_avx, at function
+ELF(.type _gcry_sha1_transform_amd64_avx, at function)
 .align 16
 _gcry_sha1_transform_amd64_avx:
   /* input:
diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S
index cd5af5b..22bcbb3 100644
--- a/cipher/sha1-avx-bmi2-amd64.S
+++ b/cipher/sha1-avx-bmi2-amd64.S
@@ -29,7 +29,8 @@
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_BMI2) && \
     defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
 
@@ -40,6 +41,13 @@
 #endif
 
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 /* Context structure */
 
 #define state_h0 0
@@ -206,7 +214,7 @@
  */
 .text
 .globl _gcry_sha1_transform_amd64_avx_bmi2
-.type _gcry_sha1_transform_amd64_avx_bmi2, at function
+ELF(.type _gcry_sha1_transform_amd64_avx_bmi2, at function)
 .align 16
 _gcry_sha1_transform_amd64_avx_bmi2:
   /* input:
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
index 226988d..98a19e6 100644
--- a/cipher/sha1-ssse3-amd64.S
+++ b/cipher/sha1-ssse3-amd64.S
@@ -29,7 +29,8 @@
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
 
 #ifdef __PIC__
@@ -39,6 +40,13 @@
 #endif
 
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 /* Context structure */
 
 #define state_h0 0
@@ -220,7 +228,7 @@
  */
 .text
 .globl _gcry_sha1_transform_amd64_ssse3
-.type _gcry_sha1_transform_amd64_ssse3, at function
+ELF(.type _gcry_sha1_transform_amd64_ssse3, at function)
 .align 16
 _gcry_sha1_transform_amd64_ssse3:
   /* input:
diff --git a/cipher/sha1.c b/cipher/sha1.c
index 6ccf0e8..eb42883 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -45,22 +45,26 @@
 
 /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
 #undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSSE3 1
 #endif
 
 /* USE_AVX indicates whether to compile with Intel AVX code. */
 #undef USE_AVX
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX 1
 #endif
 
 /* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
 #undef USE_BMI2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_BMI2 1
 #endif
 
@@ -287,22 +291,37 @@ transform_blk (void *ctx, const unsigned char *data)
 }
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
 #ifdef USE_SSSE3
 unsigned int
 _gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
-                                  size_t nblks);
+                                  size_t nblks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX
 unsigned int
 _gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
-                                 size_t nblks);
+                                 size_t nblks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_BMI2
 unsigned int
 _gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
-                                     size_t nblks);
+                                     size_t nblks) ASM_FUNC_ABI;
 #endif
 
 
@@ -315,17 +334,17 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
 #ifdef USE_BMI2
   if (hd->use_bmi2)
     return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 #ifdef USE_AVX
   if (hd->use_avx)
     return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 #ifdef USE_SSSE3
   if (hd->use_ssse3)
     return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 #ifdef USE_NEON
   if (hd->use_neon)
@@ -340,6 +359,14 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
     }
   while (--nblks);
 
+#ifdef ASM_EXTRA_STACK
+  /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
+   *  the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
+   *  here too.
+   */
+  burn += ASM_EXTRA_STACK;
+#endif
+
   return burn;
 }
 

commit 4e09aaa36d151c3312019724a77fc09aa345b82f
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Enable AES/AES-NI, AES/SSSE3 and GCM/PCLMUL implementations on WIN64
    
    * cipher/cipher-gcm-intel-pclmul.c (_gcry_ghash_intel_pclmul)
    ( _gcry_ghash_intel_pclmul) [__WIN64__]: Store non-volatile vector
    registers before use and restore after.
    * cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): Remove dependency
    on !defined(__WIN64__).
    * cipher/rijndael-aesni.c [__WIN64__] (aesni_prepare_2_6_variable,
    aesni_prepare, aesni_prepare_2_6, aesni_cleanup)
    ( aesni_cleanup_2_6): New.
    [!__WIN64__] (aesni_prepare_2_6_variable, aesni_prepare_2_6): New.
    (_gcry_aes_aesni_do_setkey, _gcry_aes_aesni_cbc_enc)
    (_gcry_aesni_ctr_enc, _gcry_aesni_cfb_dec, _gcry_aesni_cbc_dec)
    (_gcry_aesni_ocb_crypt, _gcry_aesni_ocb_auth): Use
    'aesni_prepare_2_6'.
    * cipher/rijndael-internal.h (USE_SSSE3): Enable if
    HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS or
    HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS.
    (USE_AESNI): Remove dependency on !defined(__WIN64__)
    * cipher/rijndael-ssse3-amd64.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]
    (vpaes_ssse3_prepare, vpaes_ssse3_cleanup): New.
    [!HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_prepare): New.
    (vpaes_ssse3_prepare_enc, vpaes_ssse3_prepare_dec): Use
    'vpaes_ssse3_prepare'.
    (_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption): Use
    'vpaes_ssse3_prepare' and 'vpaes_ssse3_cleanup'.
    [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (X): Add masking macro to
    exclude '.type' and '.size' markers from assembly code, as they are
    not support on WIN64/COFF objects.
    * configure.ac (gcry_cv_gcc_attribute_ms_abi)
    (gcry_cv_gcc_attribute_sysv_abi, gcry_cv_gcc_default_abi_is_ms_abi)
    (gcry_cv_gcc_default_abi_is_sysv_abi)
    (gcry_cv_gcc_win64_platform_as_ok): New checks.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c
index 79648ce..a327249 100644
--- a/cipher/cipher-gcm-intel-pclmul.c
+++ b/cipher/cipher-gcm-intel-pclmul.c
@@ -249,6 +249,17 @@ void
 _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
 {
   u64 tmp[2];
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[3 * 16];
+
+  /* XMM6-XMM8 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+                "movdqu %%xmm7, 1*16(%0)\n\t"
+                "movdqu %%xmm8, 2*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory");
+#endif
 
   /* Swap endianness of hsub. */
   tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8);
@@ -285,6 +296,21 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
                 : [h_234] "r" (c->u_mode.gcm.gcm_table)
                 : "memory");
 
+#ifdef __WIN64__
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory");
+#else
   /* Clear used registers. */
   asm volatile( "pxor %%xmm0, %%xmm0\n\t"
                 "pxor %%xmm1, %%xmm1\n\t"
@@ -297,6 +323,7 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
                 "pxor %%xmm8, %%xmm8\n\t"
                 ::: "cc" );
 #endif
+#endif
 
   wipememory (tmp, sizeof(tmp));
 }
@@ -309,10 +336,30 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
     { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
   const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+#ifdef __WIN64__
+  char win64tmp[10 * 16];
+#endif
 
   if (nblocks == 0)
     return 0;
 
+#ifdef __WIN64__
+  /* XMM8-XMM15 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t"
+                "movdqu %%xmm7,  1*16(%0)\n\t"
+                "movdqu %%xmm8,  2*16(%0)\n\t"
+                "movdqu %%xmm9,  3*16(%0)\n\t"
+                "movdqu %%xmm10, 4*16(%0)\n\t"
+                "movdqu %%xmm11, 5*16(%0)\n\t"
+                "movdqu %%xmm12, 6*16(%0)\n\t"
+                "movdqu %%xmm13, 7*16(%0)\n\t"
+                "movdqu %%xmm14, 8*16(%0)\n\t"
+                "movdqu %%xmm15, 9*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#endif
+
   /* Preload hash and H1. */
   asm volatile ("movdqu %[hash], %%xmm1\n\t"
                 "movdqa %[hsub], %%xmm0\n\t"
@@ -353,6 +400,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
         }
       while (nblocks >= 4);
 
+#ifndef __WIN64__
       /* Clear used x86-64/XMM registers. */
       asm volatile( "pxor %%xmm8, %%xmm8\n\t"
                     "pxor %%xmm9, %%xmm9\n\t"
@@ -363,6 +411,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
                     "pxor %%xmm14, %%xmm14\n\t"
                     "pxor %%xmm15, %%xmm15\n\t"
                     ::: "cc" );
+#endif
     }
 #endif
 
@@ -385,6 +434,28 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
                 : [hash] "=m" (*result)
                 : [be_mask] "m" (*be_mask));
 
+#ifdef __WIN64__
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                "movdqu 3*16(%0), %%xmm9\n\t"
+                "movdqu 4*16(%0), %%xmm10\n\t"
+                "movdqu 5*16(%0), %%xmm11\n\t"
+                "movdqu 6*16(%0), %%xmm12\n\t"
+                "movdqu 7*16(%0), %%xmm13\n\t"
+                "movdqu 8*16(%0), %%xmm14\n\t"
+                "movdqu 9*16(%0), %%xmm15\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#else
   /* Clear used registers. */
   asm volatile( "pxor %%xmm0, %%xmm0\n\t"
                 "pxor %%xmm1, %%xmm1\n\t"
@@ -395,6 +466,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
                 "pxor %%xmm6, %%xmm6\n\t"
                 "pxor %%xmm7, %%xmm7\n\t"
                 ::: "cc" );
+#endif
 
   return 0;
 }
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 693f218..e20ea56 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -67,9 +67,7 @@
 #if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
 # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
-#   ifndef __WIN64__
-#    define GCM_USE_INTEL_PCLMUL 1
-#   endif
+#   define GCM_USE_INTEL_PCLMUL 1
 #  endif
 # endif
 #endif /* GCM_USE_INTEL_PCLMUL */
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 147679f..910bc68 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -49,24 +49,54 @@ typedef struct u128_s { u32 a, b, c, d; } u128_t;
    the use of these macros.  There purpose is to make sure that the
    SSE regsiters are cleared and won't reveal any information about
    the key or the data.  */
-#define aesni_prepare() do { } while (0)
-#define aesni_cleanup()                                                \
-  do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
-                     "pxor %%xmm1, %%xmm1\n" :: );                     \
-  } while (0)
-#define aesni_cleanup_2_6()                                            \
-  do { asm volatile ("pxor %%xmm2, %%xmm2\n\t"                         \
-                     "pxor %%xmm3, %%xmm3\n"                           \
-                     "pxor %%xmm4, %%xmm4\n"                           \
-                     "pxor %%xmm5, %%xmm5\n"                           \
-                     "pxor %%xmm6, %%xmm6\n":: );                      \
-  } while (0)
-
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define aesni_prepare_2_6_variable char win64tmp[16]
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_6()                                            \
+   do { asm volatile ("movdqu %%xmm6, %0\n\t"                           \
+                      : "=m" (*win64tmp)                                \
+                      :                                                 \
+                      : "memory");                                      \
+   } while (0)
+# define aesni_cleanup()                                                \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
+                      "pxor %%xmm1, %%xmm1\n" :: );                     \
+   } while (0)
+# define aesni_cleanup_2_6()                                            \
+   do { asm volatile ("movdqu %0,   %%xmm6\n\t"                         \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      :                                                 \
+                      : "m" (*win64tmp)                                 \
+                      : "memory");                                      \
+   } while (0)
+#else
+# define aesni_prepare_2_6_variable
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_6() do { } while (0)
+# define aesni_cleanup()                                                \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
+                      "pxor %%xmm1, %%xmm1\n" :: );                     \
+   } while (0)
+# define aesni_cleanup_2_6()                                            \
+   do { asm volatile ("pxor %%xmm2, %%xmm2\n\t"                         \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "pxor %%xmm6, %%xmm6\n":: );                      \
+   } while (0)
+#endif
 
 void
 _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare();
+  aesni_prepare_2_6();
 
   if (ctx->rounds < 12)
     {
@@ -999,7 +1029,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
                          const unsigned char *inbuf, unsigned char *iv,
                          size_t nblocks, int cbc_mac)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
                 : /* No output */
@@ -1044,8 +1077,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 {
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
     { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
                 "movdqa %[ctr], %%xmm5\n\t"  /* Preload CTR */
@@ -1095,7 +1130,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
                          const unsigned char *inbuf, unsigned char *iv,
                          size_t nblocks)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile ("movdqu %[iv], %%xmm6\n\t"
                 : /* No output */
@@ -1177,7 +1215,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 			 const unsigned char *inbuf, unsigned char *iv,
 			 size_t nblocks)
 {
+  aesni_prepare_2_6_variable;
+
   aesni_prepare ();
+  aesni_prepare_2_6();
 
   asm volatile
     ("movdqu %[iv], %%xmm5\n\t"	/* use xmm5 as fast IV storage */
@@ -1331,8 +1372,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6 ();
 
   /* Preload Offset and Checksum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -1473,8 +1516,10 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
   u64 n = c->u_mode.ocb.data_nblocks;
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6 ();
 
   /* Preload Offset and Checksum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
@@ -1625,8 +1670,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
+  aesni_prepare_2_6_variable;
 
   aesni_prepare ();
+  aesni_prepare_2_6 ();
 
   /* Preload Offset and Sum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index bd247a9..33ca53f 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -44,8 +44,9 @@
 #endif
 
 /* USE_SSSE3 indicates whether to use SSSE3 code. */
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 #  define USE_SSSE3 1
 #endif
 
@@ -75,9 +76,7 @@
 #ifdef ENABLE_AESNI_SUPPORT
 # if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
-#   ifndef __WIN64__
-#    define USE_AESNI 1
-#   endif
+#   define USE_AESNI 1
 #  endif
 # endif
 #endif /* ENABLE_AESNI_SUPPORT */
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 3f1b352..21438dc 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -61,7 +61,60 @@
   the use of these macros.  There purpose is to make sure that the
   SSE registers are cleared and won't reveal any information about
   the key or the data.  */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define vpaes_ssse3_prepare() \
+    char win64tmp[16 * 10]; \
+    asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t" \
+                  "movdqu %%xmm7,  1*16(%0)\n\t" \
+                  "movdqu %%xmm8,  2*16(%0)\n\t" \
+                  "movdqu %%xmm9,  3*16(%0)\n\t" \
+                  "movdqu %%xmm10, 4*16(%0)\n\t" \
+                  "movdqu %%xmm11, 5*16(%0)\n\t" \
+                  "movdqu %%xmm12, 6*16(%0)\n\t" \
+                  "movdqu %%xmm13, 7*16(%0)\n\t" \
+                  "movdqu %%xmm14, 8*16(%0)\n\t" \
+                  "movdqu %%xmm15, 9*16(%0)\n\t" \
+                  : \
+                  : "r" (win64tmp) \
+                  : "memory" )
+# define vpaes_ssse3_cleanup() \
+    asm volatile ("pxor	%%xmm0,  %%xmm0 \n\t" \
+                  "pxor	%%xmm1,  %%xmm1 \n\t" \
+                  "pxor	%%xmm2,  %%xmm2 \n\t" \
+                  "pxor	%%xmm3,  %%xmm3 \n\t" \
+                  "pxor	%%xmm4,  %%xmm4 \n\t" \
+                  "pxor	%%xmm5,  %%xmm5 \n\t" \
+                  "movdqu 0*16(%0), %%xmm6 \n\t" \
+                  "movdqu 1*16(%0), %%xmm7 \n\t" \
+                  "movdqu 2*16(%0), %%xmm8 \n\t" \
+                  "movdqu 3*16(%0), %%xmm9 \n\t" \
+                  "movdqu 4*16(%0), %%xmm10 \n\t" \
+                  "movdqu 5*16(%0), %%xmm11 \n\t" \
+                  "movdqu 6*16(%0), %%xmm12 \n\t" \
+                  "movdqu 7*16(%0), %%xmm13 \n\t" \
+                  "movdqu 8*16(%0), %%xmm14 \n\t" \
+                  "movdqu 9*16(%0), %%xmm15 \n\t" \
+                  : \
+                  : "r" (win64tmp) \
+                  : "memory" )
+#else
+# define vpaes_ssse3_prepare() /*_*/
+# define vpaes_ssse3_cleanup() \
+    asm volatile ("pxor	%%xmm0,  %%xmm0 \n\t" \
+                  "pxor	%%xmm1,  %%xmm1 \n\t" \
+                  "pxor	%%xmm2,  %%xmm2 \n\t" \
+                  "pxor	%%xmm3,  %%xmm3 \n\t" \
+                  "pxor	%%xmm4,  %%xmm4 \n\t" \
+                  "pxor	%%xmm5,  %%xmm5 \n\t" \
+                  "pxor	%%xmm6,  %%xmm6 \n\t" \
+                  "pxor	%%xmm7,  %%xmm7 \n\t" \
+                  "pxor	%%xmm8,  %%xmm8 \n\t" \
+                  ::: "memory" )
+#endif
+
 #define vpaes_ssse3_prepare_enc(const_ptr) \
+    vpaes_ssse3_prepare(); \
     asm volatile ("lea	.Laes_consts(%%rip), %q0 \n\t" \
                   "movdqa	          (%q0), %%xmm9  # 0F \n\t" \
                   "movdqa	.Lk_inv   (%q0), %%xmm10 # inv \n\t" \
@@ -75,6 +128,7 @@
                   : "memory" )
 
 #define vpaes_ssse3_prepare_dec(const_ptr) \
+    vpaes_ssse3_prepare(); \
     asm volatile ("lea	.Laes_consts(%%rip), %q0 \n\t" \
                   "movdqa	          (%q0), %%xmm9  # 0F \n\t" \
                   "movdqa	.Lk_inv   (%q0), %%xmm10 # inv \n\t" \
@@ -88,17 +142,6 @@
                   : \
                   : "memory" )
 
-#define vpaes_ssse3_cleanup() \
-    asm volatile ("pxor	%%xmm0,  %%xmm0 \n\t" \
-                  "pxor	%%xmm1,  %%xmm1 \n\t" \
-                  "pxor	%%xmm2,  %%xmm2 \n\t" \
-                  "pxor	%%xmm3,  %%xmm3 \n\t" \
-                  "pxor	%%xmm4,  %%xmm4 \n\t" \
-                  "pxor	%%xmm5,  %%xmm5 \n\t" \
-                  "pxor	%%xmm6,  %%xmm6 \n\t" \
-                  "pxor	%%xmm7,  %%xmm7 \n\t" \
-                  "pxor	%%xmm8,  %%xmm8 \n\t" \
-                  ::: "memory" )
 
 
 void
@@ -106,6 +149,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 {
   unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
 
+  vpaes_ssse3_prepare();
+
   asm volatile ("leaq %q[key], %%rdi"			"\n\t"
                 "movl %[bits], %%esi"			"\n\t"
                 "leaq %[buf], %%rdx"			"\n\t"
@@ -121,6 +166,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
                 : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
                   "cc", "memory");
 
+  vpaes_ssse3_cleanup();
+
   /* Save key for setting up decryption. */
   memcpy(&ctx->keyschdec32[0][0], key, keybits / 8);
 }
@@ -132,6 +179,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
 {
   unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
 
+  vpaes_ssse3_prepare();
+
   asm volatile ("leaq %q[key], %%rdi"			"\n\t"
                 "movl %[bits], %%esi"			"\n\t"
                 "leaq %[buf], %%rdx"			"\n\t"
@@ -146,6 +195,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
                   [rotoffs] "g" ((keybits == 192) ? 0 : 32)
                 : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi",
                   "cc", "memory");
+
+  vpaes_ssse3_cleanup();
 }
 
 
@@ -465,6 +516,11 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define X(...)
+#else
+# define X(...) __VA_ARGS__
+#endif
 
 asm (
   "\n\t" "##"
@@ -494,7 +550,7 @@ asm (
   "\n\t" "##"
   "\n\t" "##"
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_encrypt_core, at function"
+X("\n\t" ".type _aes_encrypt_core, at function")
   "\n\t" "_aes_encrypt_core:"
   "\n\t" "	leaq	.Lk_mc_backward(%rcx), %rdi"
   "\n\t" "	mov	$16,	%rsi"
@@ -570,7 +626,7 @@ asm (
   "\n\t" "	pxor	%xmm4,	%xmm0	# 0 = A"
   "\n\t" "	pshufb	.Lk_sr(%rsi,%rcx), %xmm0"
   "\n\t" "	ret"
-  "\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core"
+X("\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core")
 
   "\n\t" "##"
   "\n\t" "##  Decryption core"
@@ -578,7 +634,7 @@ asm (
   "\n\t" "##  Same API as encryption core."
   "\n\t" "##"
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_decrypt_core, at function"
+X("\n\t" ".type _aes_decrypt_core, at function")
   "\n\t" "_aes_decrypt_core:"
   "\n\t" "	movl	%eax,	%esi"
   "\n\t" "	shll	$4,	%esi"
@@ -670,7 +726,7 @@ asm (
   "\n\t" "	pxor	%xmm4,	%xmm0	# 0 = A"
   "\n\t" "	pshufb	.Lk_sr(%rsi,%rcx), %xmm0"
   "\n\t" "	ret"
-  "\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core"
+X("\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core")
 
   "\n\t" "########################################################"
   "\n\t" "##                                                    ##"
@@ -679,7 +735,7 @@ asm (
   "\n\t" "########################################################"
 
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_schedule_core, at function"
+X("\n\t" ".type _aes_schedule_core, at function")
   "\n\t" "_aes_schedule_core:"
   "\n\t" "	# rdi = key"
   "\n\t" "	# rsi = size in bits"
@@ -1039,7 +1095,7 @@ asm (
   "\n\t" "	pxor	%xmm7,  %xmm7"
   "\n\t" "	pxor	%xmm8,  %xmm8"
   "\n\t" "	ret"
-  "\n\t" ".size _aes_schedule_core,.-_aes_schedule_core"
+X("\n\t" ".size _aes_schedule_core,.-_aes_schedule_core")
 
   "\n\t" "########################################################"
   "\n\t" "##                                                    ##"
@@ -1048,7 +1104,7 @@ asm (
   "\n\t" "########################################################"
 
   "\n\t" ".align 16"
-  "\n\t" ".type _aes_consts, at object"
+X("\n\t" ".type _aes_consts, at object")
   "\n\t" ".Laes_consts:"
   "\n\t" "_aes_consts:"
   "\n\t" "	# s0F"
@@ -1226,7 +1282,7 @@ asm (
   "\n\t" "	.quad	0xC7AA6DB9D4943E2D"
   "\n\t" "	.quad	0x12D7560F93441D00"
   "\n\t" "	.quad	0xCA4B8159D8C58E9C"
-  "\n\t" ".size _aes_consts,.-_aes_consts"
+X("\n\t" ".size _aes_consts,.-_aes_consts")
 );
 
 #endif /* USE_SSSE3 */
diff --git a/configure.ac b/configure.ac
index 594209f..0f16175 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1127,6 +1127,93 @@ fi
 ####                                     ####
 #############################################
 
+
+# Following tests depend on warnings to cause compile to fail, so set -Werror
+# temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether compiler supports 'ms_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute],
+       [gcry_cv_gcc_attribute_ms_abi],
+       [gcry_cv_gcc_attribute_ms_abi=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[int __attribute__ ((ms_abi)) proto(int);]])],
+          [gcry_cv_gcc_attribute_ms_abi=yes])])
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1,
+     [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute])
+fi
+
+
+#
+# Check whether compiler supports 'sysv_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute],
+       [gcry_cv_gcc_attribute_sysv_abi],
+       [gcry_cv_gcc_attribute_sysv_abi=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[int __attribute__ ((sysv_abi)) proto(int);]])],
+          [gcry_cv_gcc_attribute_sysv_abi=yes])])
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1,
+     [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute])
+fi
+
+
+#
+# Check whether default calling convention is 'ms_abi'.
+#
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   AC_CACHE_CHECK([whether default calling convention is 'ms_abi'],
+          [gcry_cv_gcc_default_abi_is_ms_abi],
+          [gcry_cv_gcc_default_abi_is_ms_abi=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((ms_abi))(*msabi_func)(void);
+                 /* warning on SysV abi targets, passes on Windows based targets */
+                 msabi_func = def_func;
+                 return msabi_func;
+             }]])],
+             [gcry_cv_gcc_default_abi_is_ms_abi=yes])])
+   if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then
+      AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1,
+        [Defined if default calling convention is 'ms_abi'])
+   fi
+fi
+
+
+#
+# Check whether default calling convention is 'sysv_abi'.
+#
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'],
+          [gcry_cv_gcc_default_abi_is_sysv_abi],
+          [gcry_cv_gcc_default_abi_is_sysv_abi=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((sysv_abi))(*sysvabi_func)(void);
+                 /* warning on MS ABI targets, passes on SysV ABI targets */
+                 sysvabi_func = def_func;
+                 return sysvabi_func;
+             }]])],
+             [gcry_cv_gcc_default_abi_is_sysv_abi=yes])])
+   if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then
+      AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1,
+        [Defined if default calling convention is 'sysv_abi'])
+   fi
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
 #
 # Check whether GCC inline assembler supports SSSE3 instructions
 # This is required for the AES-NI instructions.
@@ -1281,9 +1368,6 @@ if test $amd64_as_feature_detection = yes; then
           [[__asm__(
                 /* Test if '.type' and '.size' are supported.  */
                 /* These work only on ELF targets. */
-		/* TODO: add COFF (mingw64, cygwin64) support to assembly
-                 * implementations.  Mingw64/cygwin64 also require additional
-                 * work because they use different calling convention. */
 		"asmfunc:\n\t"
                 ".size asmfunc,.-asmfunc;\n\t"
                 ".type asmfunc, at function;\n\t"
@@ -1299,6 +1383,24 @@ if test $amd64_as_feature_detection = yes; then
      AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1,
               [Defined if underlying assembler is compatible with amd64 assembly implementations])
   fi
+  if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" &&
+     test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" &&
+     test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then
+    AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations],
+      [gcry_cv_gcc_win64_platform_as_ok],
+      [gcry_cv_gcc_win64_platform_as_ok=no
+      AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+        [[__asm__(
+              ".globl asmfunc\n\t"
+              "asmfunc:\n\t"
+              "xorq \$(1234), %rbp;\n\t"
+          );]])],
+        [gcry_cv_gcc_win64_platform_as_ok=yes])])
+    if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then
+      AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1,
+                [Defined if underlying assembler is compatible with WIN64 assembly implementations])
+    fi
+  fi
 fi
 
 

commit 460355f23e770637d29e3af7b998a957a2b5bc88
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Add W64 support for mpi amd64 assembly
    
    acinclude.m4 (GNUPG_SYS_SYMBOL_UNDERSCORE): Set
    'ac_cv_sys_symbol_underscore=no' on MingW-W64.
    mpi/amd64/func_abi.h: New.
    mpi/amd64/mpih-add1.S (_gcry_mpih_add_n): Add FUNC_ENTRY and FUNC_EXIT.
    mpi/amd64/mpih-lshift.S (_gcry_mpih_lshift): Ditto.
    mpi/amd64/mpih-mul1.S (_gcry_mpih_mul_1): Ditto.
    mpi/amd64/mpih-mul2.S (_gcry_mpih_addmul_1): Ditto.
    mpi/amd64/mpih-mul3.S (_gcry_mpih_submul_1): Ditto.
    mpi/amd64/mpih-rshift.S (_gcry_mpih_rshift): Ditto.
    mpi/amd64/mpih-sub1.S (_gcry_mpih_sub_n): Ditto.
    mpi/config.links [host=x86_64-*mingw*]: Enable assembly modules.
    [host=x86_64-*-*]: Append mpi/amd64/func_abi.h to mpi/asm-syntax.h.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/acinclude.m4 b/acinclude.m4
index 0791b84..764efd4 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -101,9 +101,12 @@ AC_DEFUN([GNUPG_CHECK_GNUMAKE],
 AC_DEFUN([GNUPG_SYS_SYMBOL_UNDERSCORE],
 [tmp_do_check="no"
 case "${host}" in
-    *-mingw32*)
+    i?86-*-mingw32*)
         ac_cv_sys_symbol_underscore=yes
         ;;
+    x86_64-*-mingw32*)
+        ac_cv_sys_symbol_underscore=no
+        ;;
     i386-emx-os2 | i[3456]86-pc-os2*emx | i386-pc-msdosdjgpp)
         ac_cv_sys_symbol_underscore=yes
         ;;
diff --git a/mpi/amd64/func_abi.h b/mpi/amd64/func_abi.h
new file mode 100644
index 0000000..ce44674
--- /dev/null
+++ b/mpi/amd64/func_abi.h
@@ -0,0 +1,19 @@
+#ifdef USE_MS_ABI
+ /* Store registers and move four first input arguments from MS ABI to
+  * SYSV ABI.  */
+ #define FUNC_ENTRY() \
+	pushq %rsi; \
+	pushq %rdi; \
+	movq %rdx, %rsi; \
+	movq %rcx, %rdi; \
+	movq %r8, %rdx; \
+	movq %r9, %rcx;
+
+ /* Restore registers.  */
+ #define FUNC_EXIT() \
+	popq %rdi; \
+	popq %rsi;
+#else
+ #define FUNC_ENTRY() /**/
+ #define FUNC_EXIT() /**/
+#endif
diff --git a/mpi/amd64/mpih-add1.S b/mpi/amd64/mpih-add1.S
index f0ec89c..6a90262 100644
--- a/mpi/amd64/mpih-add1.S
+++ b/mpi/amd64/mpih-add1.S
@@ -43,6 +43,7 @@
 .text
 	.globl C_SYMBOL_NAME(_gcry_mpih_add_n)
 C_SYMBOL_NAME(_gcry_mpih_add_n:)
+	FUNC_ENTRY()
 	leaq	(%rsi,%rcx,8), %rsi
 	leaq	(%rdi,%rcx,8), %rdi
 	leaq	(%rdx,%rcx,8), %rdx
@@ -59,5 +60,6 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:)
 
 	movq	%rcx, %rax		/* zero %rax */
 	adcq	%rax, %rax
+	FUNC_EXIT()
 	ret
 	
\ No newline at end of file
diff --git a/mpi/amd64/mpih-lshift.S b/mpi/amd64/mpih-lshift.S
index e87dd1a..9e8979b 100644
--- a/mpi/amd64/mpih-lshift.S
+++ b/mpi/amd64/mpih-lshift.S
@@ -42,6 +42,7 @@
 .text
 	.globl C_SYMBOL_NAME(_gcry_mpih_lshift)
 C_SYMBOL_NAME(_gcry_mpih_lshift:)
+	FUNC_ENTRY()
 	movq	-8(%rsi,%rdx,8), %mm7
 	movd	%ecx, %mm1
 	movl	$64, %eax
@@ -74,4 +75,5 @@ C_SYMBOL_NAME(_gcry_mpih_lshift:)
 .Lende:	psllq	%mm1, %mm2
 	movq	%mm2, (%rdi)
 	emms
+	FUNC_EXIT()
 	ret
diff --git a/mpi/amd64/mpih-mul1.S b/mpi/amd64/mpih-mul1.S
index 54b0ab4..67ab47e 100644
--- a/mpi/amd64/mpih-mul1.S
+++ b/mpi/amd64/mpih-mul1.S
@@ -46,6 +46,7 @@
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_mul_1)
 C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 
+	FUNC_ENTRY()
 	movq	%rdx, %r11
 	leaq	(%rsi,%rdx,8), %rsi
 	leaq	(%rdi,%rdx,8), %rdi
@@ -62,4 +63,5 @@ C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 	jne	.Loop
 
 	movq	%r8, %rax
+	FUNC_EXIT()
 	ret
diff --git a/mpi/amd64/mpih-mul2.S b/mpi/amd64/mpih-mul2.S
index a332a1d..1aa4fa0 100644
--- a/mpi/amd64/mpih-mul2.S
+++ b/mpi/amd64/mpih-mul2.S
@@ -41,6 +41,7 @@
 	TEXT
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
+	FUNC_ENTRY()
 	movq	%rdx, %r11
 	leaq	(%rsi,%rdx,8), %rsi
 	leaq	(%rdi,%rdx,8), %rdi
@@ -61,4 +62,5 @@ C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
 	jne	.Loop
 
 	movq	%r8, %rax
+	FUNC_EXIT()
 	ret
diff --git a/mpi/amd64/mpih-mul3.S b/mpi/amd64/mpih-mul3.S
index 4d458a7..bc41c4e 100644
--- a/mpi/amd64/mpih-mul3.S
+++ b/mpi/amd64/mpih-mul3.S
@@ -42,7 +42,7 @@
 	TEXT
 	GLOBL	C_SYMBOL_NAME(_gcry_mpih_submul_1)
 C_SYMBOL_NAME(_gcry_mpih_submul_1:)
-
+	FUNC_ENTRY()
 	movq	%rdx, %r11
 	leaq	(%rsi,%r11,8), %rsi
 	leaq	(%rdi,%r11,8), %rdi
@@ -63,4 +63,5 @@ C_SYMBOL_NAME(_gcry_mpih_submul_1:)
 	jne	.Loop
 
 	movq	%r8, %rax
+	FUNC_EXIT()
 	ret
diff --git a/mpi/amd64/mpih-rshift.S b/mpi/amd64/mpih-rshift.S
index 4cfc8f6..311b85b 100644
--- a/mpi/amd64/mpih-rshift.S
+++ b/mpi/amd64/mpih-rshift.S
@@ -42,6 +42,7 @@
 .text
 	.globl C_SYMBOL_NAME(_gcry_mpih_rshift)
 C_SYMBOL_NAME(_gcry_mpih_rshift:)
+	FUNC_ENTRY()
 	movq	(%rsi), %mm7
 	movd	%ecx, %mm1
 	movl	$64, %eax
@@ -77,4 +78,5 @@ C_SYMBOL_NAME(_gcry_mpih_rshift:)
 .Lende:	psrlq	%mm1, %mm2
 	movq	%mm2, -8(%rdi)
 	emms
+	FUNC_EXIT()
 	ret
diff --git a/mpi/amd64/mpih-sub1.S b/mpi/amd64/mpih-sub1.S
index b3609b0..ccf6496 100644
--- a/mpi/amd64/mpih-sub1.S
+++ b/mpi/amd64/mpih-sub1.S
@@ -42,6 +42,7 @@
 .text
 	.globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
 C_SYMBOL_NAME(_gcry_mpih_sub_n:)
+	FUNC_ENTRY()
 	leaq	(%rsi,%rcx,8), %rsi
 	leaq	(%rdi,%rcx,8), %rdi
 	leaq	(%rdx,%rcx,8), %rdx
@@ -58,4 +59,5 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:)
 
 	movq	%rcx, %rax		/* zero %rax */
 	adcq	%rax, %rax
+	FUNC_EXIT()
 	ret
diff --git a/mpi/config.links b/mpi/config.links
index d71918a..2fb5e8a 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -129,17 +129,22 @@ case "${host}" in
     x86_64-apple-darwin*)
 	echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
 	path="amd64"
         mpi_cpu_arch="x86"
 	;;
     x86_64-*mingw32*)
-	echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
-	path=""
-	mpi_cpu_arch="x86"
+	echo '#define USE_MS_ABI' >>./mpi/asm-syntax.h
+	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
+	path="amd64"
+        mpi_cpu_arch="x86"
         ;;
     x86_64-*-*)
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h
+	cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
 	path="amd64"
         mpi_cpu_arch="x86"
 	;;
@@ -314,7 +319,7 @@ case "${host}" in
 	echo '#define ELF_SYNTAX'               >>./mpi/asm-syntax.h
 	cat   $srcdir/mpi/powerpc32/syntax.h	>>./mpi/asm-syntax.h
 	path="powerpc32"
-       mpi_cpu_arch="ppc"
+	mpi_cpu_arch="ppc"
 	;;
 
     rs6000-*-aix[456789]*    | \

commit 6c21cf5fed1ad430fa41445eac2350802bc8aaed
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Fri May 1 19:15:34 2015 +0300

    DES: Silence compiler warnings on Windows
    
    * cipher/des.c (working_memcmp): Make pointer arguments 'const void *'.
    --
    
    Following warning seen on Windows target build:
    
    des.c: In function 'is_weak_key':
    des.c:1019:40: warning: pointer targets in passing argument 1 of 'working_memcmp' differ in signedness [-Wpointer-sign]
           if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) )
                                            ^
    des.c:149:1: note: expected 'const char *' but argument is of type 'unsigned char *'
     working_memcmp( const char *a, const char *b, size_t n )
     ^
    des.c:1019:46: warning: pointer targets in passing argument 2 of 'working_memcmp' differ in signedness [-Wpointer-sign]
           if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) )
                                                  ^
    des.c:149:1: note: expected 'const char *' but argument is of type 'unsigned char *'
     working_memcmp( const char *a, const char *b, size_t n )
     ^
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/des.c b/cipher/des.c
index bc2a474..d4863d1 100644
--- a/cipher/des.c
+++ b/cipher/des.c
@@ -146,8 +146,10 @@
  * depending on whether characters are signed or not.
  */
 static int
-working_memcmp( const char *a, const char *b, size_t n )
+working_memcmp( const void *_a, const void *_b, size_t n )
 {
+    const char *a = _a;
+    const char *b = _b;
     for( ; n; n--, a++, b++ )
 	if( *a != *b )
 	    return (int)(*(byte*)a) - (int)(*(byte*)b);

commit 9cf224322007d90193d4910f0da6e0e29ce01d70
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Cast pointers to integers using uintptr_t instead of long

diff --git a/cipher/cipher.c b/cipher/cipher.c
index d1550c0..7a29824 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -481,11 +481,11 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
           size_t off = 0;
 
 #ifdef NEED_16BYTE_ALIGNED_CONTEXT
-          if ( ((unsigned long)h & 0x0f) )
+          if ( ((uintptr_t)h & 0x0f) )
             {
               /* The malloced block is not aligned on a 16 byte
                  boundary.  Correct for this.  */
-              off = 16 - ((unsigned long)h & 0x0f);
+              off = 16 - ((uintptr_t)h & 0x0f);
               h = (void*)((char*)h + off);
             }
 #endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
diff --git a/cipher/md.c b/cipher/md.c
index 9fef555..3ab46ef 100644
--- a/cipher/md.c
+++ b/cipher/md.c
@@ -1148,7 +1148,7 @@ md_stop_debug( gcry_md_hd_t md )
 
 #ifdef HAVE_U64_TYPEDEF
   {  /* a kludge to pull in the __muldi3 for Solaris */
-    volatile u32 a = (u32)(ulong)md;
+    volatile u32 a = (u32)(uintptr_t)md;
     volatile u64 b = 42;
     volatile u64 c;
     c = a * b;

commit d5a7e00b6b222566a5650639ef29684b047c1909
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Fix rndhw for 64-bit Windows build
    
    * configure.ac: Add sizeof check for 'void *'.
    * random/rndhw.c (poll_padlock): Check for SIZEOF_VOID_P == 8
    instead of defined(__LP64__).
    (RDRAND_LONG): Check for SIZEOF_UNSIGNED_LONG == 8 instead of
    defined(__LP64__).
    --
    
    __LP64__ is not predefined for 64-bit mingw64-gcc, which caused wrong
    assembly code selections. Do selection based on type sizes instead,
    to support x86_64, x32 and win64 properly.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/configure.ac b/configure.ac
index 555ad1e..594209f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -344,6 +344,7 @@ AC_CHECK_SIZEOF(unsigned short, 2)
 AC_CHECK_SIZEOF(unsigned int, 4)
 AC_CHECK_SIZEOF(unsigned long, 4)
 AC_CHECK_SIZEOF(unsigned long long, 0)
+AC_CHECK_SIZEOF(void *, 0)
 
 AC_TYPE_UINTPTR_T
 
diff --git a/random/rndhw.c b/random/rndhw.c
index e625512..8e50751 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -69,7 +69,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
   nbytes = 0;
   while (nbytes < 64)
     {
-#if defined(__x86_64__) && defined(__LP64__)
+#if defined(__x86_64__) && SIZEOF_VOID_P == 8
       asm volatile
         ("movq %1, %%rdi\n\t"         /* Set buffer.  */
          "xorq %%rdx, %%rdx\n\t"      /* Request up to 8 bytes.  */
@@ -123,7 +123,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
 #ifdef USE_DRNG
 # define RDRAND_RETRY_LOOPS	10
 # define RDRAND_INT	".byte 0x0f,0xc7,0xf0"
-# if defined(__x86_64__) && defined(__LP64__)
+# if defined(__x86_64__) && SIZEOF_UNSIGNED_LONG == 8
 #  define RDRAND_LONG	".byte 0x48,0x0f,0xc7,0xf0"
 # else
 #  define RDRAND_LONG	RDRAND_INT

commit 0cdd24456b33defc7f8176fa82ab694fbc284385
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Prepare random/win32.c fast poll for 64-bit Windows
    
    * random/win32.c (_gcry_rndw32_gather_random_fast) [ADD]: Rename to
    ADDINT.
    (_gcry_rndw32_gather_random_fast): Add ADDPTR.
    (_gcry_rndw32_gather_random_fast): Disable entropy gathering from
    GetQueueStatus(QS_ALLEVENTS).
    (_gcry_rndw32_gather_random_fast): Change minimumWorkingSetSize and
    maximumWorkingSetSize to SIZE_T from DWORD.
    (_gcry_rndw32_gather_random_fast): Only add lower 32-bits of
    minimumWorkingSetSize and maximumWorkingSetSize to random poll.
    (_gcry_rndw32_gather_random_fast) [__WIN64__]: Read TSC directly
    using intrinsic.
    --
    
    Introduce entropy gatherer changes related to 64-bit Windows platform as done
    in cryptlib fast poll:
     - Change ADD macro to ADDPTR/ADDINT to handle pointer values. ADDPTR
       discards high 32-bits of 64-bit pointer values.
     - minimum/maximumWorkingSetSize changed to SIZE_T type to avoid stack
       corruption on 64-bit; only low 32-bits are used for entropy.
     - Use __rdtsc() intrinsic on 64-bit (as TSC is always available).
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/random/rndw32.c b/random/rndw32.c
index c495131..4ab1bca 100644
--- a/random/rndw32.c
+++ b/random/rndw32.c
@@ -826,39 +826,47 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
      cursor position for last message, 1 ms time for last message,
      handle of window with clipboard open, handle of process heap,
      handle of procs window station, types of events in input queue,
-     and milliseconds since Windows was started.  */
+     and milliseconds since Windows was started. On 64-bit platform
+     some of these return values are pointers and thus 64-bit wide.
+     We discard the upper 32-bit of those values.  */
 
   {
     byte buffer[20*sizeof(ulong)], *bufptr;
 
     bufptr = buffer;
-#define ADD(f)  do { ulong along = (ulong)(f);                  \
-                     memcpy (bufptr, &along, sizeof (along) );  \
-                     bufptr += sizeof (along);                  \
-                   } while (0)
-
-    ADD ( GetActiveWindow ());
-    ADD ( GetCapture ());
-    ADD ( GetClipboardOwner ());
-    ADD ( GetClipboardViewer ());
-    ADD ( GetCurrentProcess ());
-    ADD ( GetCurrentProcessId ());
-    ADD ( GetCurrentThread ());
-    ADD ( GetCurrentThreadId ());
-    ADD ( GetDesktopWindow ());
-    ADD ( GetFocus ());
-    ADD ( GetInputState ());
-    ADD ( GetMessagePos ());
-    ADD ( GetMessageTime ());
-    ADD ( GetOpenClipboardWindow ());
-    ADD ( GetProcessHeap ());
-    ADD ( GetProcessWindowStation ());
-    ADD ( GetQueueStatus (QS_ALLEVENTS));
-    ADD ( GetTickCount ());
+#define ADDINT(f)  do { ulong along = (ulong)(f);                  \
+                        memcpy (bufptr, &along, sizeof (along) );  \
+                        bufptr += sizeof (along);                  \
+                      } while (0)
+#define ADDPTR(f)  do { void *aptr = (f);                          \
+                        ADDINT((SIZE_T)aptr);                      \
+                      } while (0)
+
+    ADDPTR ( GetActiveWindow ());
+    ADDPTR ( GetCapture ());
+    ADDPTR ( GetClipboardOwner ());
+    ADDPTR ( GetClipboardViewer ());
+    ADDPTR ( GetCurrentProcess ());
+    ADDINT ( GetCurrentProcessId ());
+    ADDPTR ( GetCurrentThread ());
+    ADDINT ( GetCurrentThreadId ());
+    ADDPTR ( GetDesktopWindow ());
+    ADDPTR ( GetFocus ());
+    ADDINT ( GetInputState ());
+    ADDINT ( GetMessagePos ());
+    ADDINT ( GetMessageTime ());
+    ADDPTR ( GetOpenClipboardWindow ());
+    ADDPTR ( GetProcessHeap ());
+    ADDPTR ( GetProcessWindowStation ());
+    /* Following function in some cases stops returning events, and cannot
+       be used as an entropy source.  */
+    /*ADDINT ( GetQueueStatus (QS_ALLEVENTS));*/
+    ADDINT ( GetTickCount ());
 
     gcry_assert ( bufptr-buffer < sizeof (buffer) );
     (*add) ( buffer, bufptr-buffer, origin );
-#undef ADD
+#undef ADDINT
+#undef ADDPTR
   }
 
   /* Get multiword system information: Current caret position, current
@@ -888,7 +896,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
   {
     HANDLE handle;
     FILETIME creationTime, exitTime, kernelTime, userTime;
-    DWORD minimumWorkingSetSize, maximumWorkingSetSize;
+    SIZE_T minimumWorkingSetSize, maximumWorkingSetSize;
 
     handle = GetCurrentThread ();
     GetThreadTimes (handle, &creationTime, &exitTime,
@@ -910,10 +918,9 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
        process.  */
     GetProcessWorkingSetSize (handle, &minimumWorkingSetSize,
                               &maximumWorkingSetSize);
-    (*add) ( &minimumWorkingSetSize,
-             sizeof (minimumWorkingSetSize), origin );
-    (*add) ( &maximumWorkingSetSize,
-             sizeof (maximumWorkingSetSize), origin );
+    /* On 64-bit system, discard the high 32-bits. */
+    (*add) ( &minimumWorkingSetSize, sizeof (int), origin );
+    (*add) ( &maximumWorkingSetSize, sizeof (int), origin );
   }
 
 
@@ -961,7 +968,20 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
 
      To make things unambiguous, we detect a CPU new enough to call RDTSC
      directly by checking for CPUID capabilities, and fall back to QPC if
-     this isn't present.  */
+     this isn't present.
+
+     On AMD64, TSC is always available and intrinsic is provided for accessing
+     it.  */
+#ifdef __WIN64__
+    {
+      unsigned __int64 aint64;
+
+      /* Note: cryptlib does not discard upper 32 bits of TSC on WIN64, but does
+       * on WIN32.  Is this correct?  */
+      aint64 = __rdtsc();
+      (*add) (&aint64, sizeof(aint64), origin);
+    }
+#else
 #ifdef __GNUC__
 /*   FIXME: We would need to implement the CPU feature tests first.  */
 /*   if (cpu_has_feature_rdtsc) */
@@ -990,6 +1010,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t,
           (*add) (&aword, sizeof (aword), origin );
         }
     }
+#endif /*__WIN64__*/
 
 
 }

commit f701954555340a503f6e52cc18d58b0c515427b7
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Disable GCM and AES-NI assembly implementations for WIN64
    
    * cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): Do not enable when
    __WIN64__ defined.
    * cipher/rijndael-internal.h (USE_AESNI): Ditto.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index e20ea56..693f218 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -67,7 +67,9 @@
 #if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
 # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
-#   define GCM_USE_INTEL_PCLMUL 1
+#   ifndef __WIN64__
+#    define GCM_USE_INTEL_PCLMUL 1
+#   endif
 #  endif
 # endif
 #endif /* GCM_USE_INTEL_PCLMUL */
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 854980b..bd247a9 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -75,7 +75,9 @@
 #ifdef ENABLE_AESNI_SUPPORT
 # if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
 #  if __GNUC__ >= 4
-#   define USE_AESNI 1
+#   ifndef __WIN64__
+#    define USE_AESNI 1
+#   endif
 #  endif
 # endif
 #endif /* ENABLE_AESNI_SUPPORT */

commit e78560a4b717f7154f910a8ce4128de152f586da
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Wed Apr 29 18:18:07 2015 +0300

    Disable building mpi assembly routines on WIN64
    
    * mpi/config.links: Disable assembly for host 'x86_64-*mingw32*'.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/mpi/config.links b/mpi/config.links
index f44299d..d71918a 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -132,6 +132,11 @@ case "${host}" in
 	path="amd64"
         mpi_cpu_arch="x86"
 	;;
+    x86_64-*mingw32*)
+	echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
+	path=""
+	mpi_cpu_arch="x86"
+        ;;
     x86_64-*-*)
 	echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
 	cat  $srcdir/mpi/i386/syntax.h	    >>./mpi/asm-syntax.h

-----------------------------------------------------------------------

Summary of changes:
 acinclude.m4                     |   5 +-
 cipher/cipher-gcm-intel-pclmul.c |  72 ++++++++++++++++++++++++++
 cipher/cipher.c                  |   4 +-
 cipher/des.c                     |   4 +-
 cipher/md.c                      |   2 +-
 cipher/rijndael-aesni.c          |  73 +++++++++++++++++++++-----
 cipher/rijndael-amd64.S          |  17 ++++--
 cipher/rijndael-internal.h       |   8 +--
 cipher/rijndael-ssse3-amd64.c    |  94 ++++++++++++++++++++++++++-------
 cipher/rijndael.c                |  34 ++++++++++++
 cipher/sha1-avx-amd64.S          |  12 ++++-
 cipher/sha1-avx-bmi2-amd64.S     |  12 ++++-
 cipher/sha1-ssse3-amd64.S        |  12 ++++-
 cipher/sha1.c                    |  51 +++++++++++++-----
 cipher/sha256-avx-amd64.S        |  11 +++-
 cipher/sha256-avx2-bmi2-amd64.S  |  11 +++-
 cipher/sha256-ssse3-amd64.S      |  11 +++-
 cipher/sha256.c                  |  60 +++++++++++++++------
 cipher/sha512-avx-amd64.S        |  11 +++-
 cipher/sha512-avx2-bmi2-amd64.S  |  11 +++-
 cipher/sha512-ssse3-amd64.S      |  11 +++-
 cipher/sha512.c                  |  60 +++++++++++++++------
 cipher/whirlpool-sse2-amd64.S    |  13 +++--
 cipher/whirlpool.c               |  15 ++++--
 configure.ac                     | 109 +++++++++++++++++++++++++++++++++++++--
 mpi/amd64/func_abi.h             |  19 +++++++
 mpi/amd64/mpih-add1.S            |   2 +
 mpi/amd64/mpih-lshift.S          |   2 +
 mpi/amd64/mpih-mul1.S            |   2 +
 mpi/amd64/mpih-mul2.S            |   2 +
 mpi/amd64/mpih-mul3.S            |   3 +-
 mpi/amd64/mpih-rshift.S          |   2 +
 mpi/amd64/mpih-sub1.S            |   2 +
 mpi/config.links                 |  12 ++++-
 random/rndhw.c                   |   4 +-
 random/rndw32.c                  |  83 ++++++++++++++++++-----------
 36 files changed, 707 insertions(+), 149 deletions(-)
 create mode 100644 mpi/amd64/func_abi.h


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits


More information about the Gcrypt-devel mailing list