[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-226-g9b0c6c8
by Jussi Kivilinna
cvs at cvs.gnupg.org
Sun May 17 15:17:44 CEST 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".
The branch, master has been updated
via 9b0c6c8141ae9bd056392a3f6b5704b505fc8501 (commit)
via eb0ed576893b6c7990dbcb568510f831d246cea6 (commit)
via 12bc93ca8187b8061c2e705427ef22f5a71d29b0 (commit)
via 8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f (commit)
via b65e9e71d5ee992db5c96793c6af999545daad28 (commit)
via 9597cfddf03c467825da152be5ca0d12a8c30d88 (commit)
via 6a6646df80386204675d8b149ab60e74d7ca124c (commit)
via 9a4fb3709864bf3e3918800d44ff576590cd4e92 (commit)
via e05682093ffb003b589a697428d918d755ac631d (commit)
via c46b015bedba7ce0db68929bd33a86a54ab3d919 (commit)
via ee8fc4edcb3466b03246c8720b90731bf274ff1d (commit)
from bac42c68b069f17abcca810a21439c7233815747 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 9b0c6c8141ae9bd056392a3f6b5704b505fc8501
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 13:07:34 2015 +0300
Enable AMD64 Twofish implementation on WIN64
* cipher/twofish-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/twofish.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
(twofish_amd64_encrypt_block, twofish_amd64_decrypt_block)
(twofish_amd64_ctr_enc, twofish_amd64_cbc_dec)
(twofish_amd64_cfb_dec): New wrapper functions for AMD64
assembly functions.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index a225307..ea88b94 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -20,7 +20,14 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_TWOFISH)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
#ifdef __PIC__
# define RIP %rip
@@ -166,7 +173,7 @@
.align 8
.globl _gcry_twofish_amd64_encrypt_block
-.type _gcry_twofish_amd64_encrypt_block, at function;
+ELF(.type _gcry_twofish_amd64_encrypt_block, at function;)
_gcry_twofish_amd64_encrypt_block:
/* input:
@@ -205,11 +212,11 @@ _gcry_twofish_amd64_encrypt_block:
addq $(3 * 8), %rsp;
ret;
-.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
+ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
.align 8
.globl _gcry_twofish_amd64_decrypt_block
-.type _gcry_twofish_amd64_decrypt_block, at function;
+ELF(.type _gcry_twofish_amd64_decrypt_block, at function;)
_gcry_twofish_amd64_decrypt_block:
/* input:
@@ -248,7 +255,7 @@ _gcry_twofish_amd64_decrypt_block:
addq $(3 * 8), %rsp;
ret;
-.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
+ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
#undef CTX
@@ -462,7 +469,7 @@ _gcry_twofish_amd64_decrypt_block:
outunpack3(RAB, 2);
.align 8
-.type __twofish_enc_blk3, at function;
+ELF(.type __twofish_enc_blk3, at function;)
__twofish_enc_blk3:
/* input:
@@ -485,10 +492,10 @@ __twofish_enc_blk3:
outunpack_enc3();
ret;
-.size __twofish_enc_blk3,.-__twofish_enc_blk3;
+ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
.align 8
-.type __twofish_dec_blk3, at function;
+ELF(.type __twofish_dec_blk3, at function;)
__twofish_dec_blk3:
/* input:
@@ -511,11 +518,11 @@ __twofish_dec_blk3:
outunpack_dec3();
ret;
-.size __twofish_dec_blk3,.-__twofish_dec_blk3;
+ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
.align 8
.globl _gcry_twofish_amd64_ctr_enc
-.type _gcry_twofish_amd64_ctr_enc, at function;
+ELF(.type _gcry_twofish_amd64_ctr_enc, at function;)
_gcry_twofish_amd64_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -593,11 +600,11 @@ _gcry_twofish_amd64_ctr_enc:
addq $(8 * 8), %rsp;
ret;
-.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;
+ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
.align 8
.globl _gcry_twofish_amd64_cbc_dec
-.type _gcry_twofish_amd64_cbc_dec, at function;
+ELF(.type _gcry_twofish_amd64_cbc_dec, at function;)
_gcry_twofish_amd64_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -659,11 +666,11 @@ _gcry_twofish_amd64_cbc_dec:
addq $(9 * 8), %rsp;
ret;
-.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;
+ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
.align 8
.globl _gcry_twofish_amd64_cfb_dec
-.type _gcry_twofish_amd64_cfb_dec, at function;
+ELF(.type _gcry_twofish_amd64_cfb_dec, at function;)
_gcry_twofish_amd64_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -725,7 +732,7 @@ _gcry_twofish_amd64_cfb_dec:
addq $(8 * 8), %rsp;
ret;
-.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;
+ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
#endif /*USE_TWOFISH*/
#endif /*__x86_64*/
diff --git a/cipher/twofish.c b/cipher/twofish.c
index ecd76e3..ce83fad 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -53,7 +53,8 @@
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64_ASM 1
#endif
@@ -754,6 +755,77 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
const byte *in, byte *iv);
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+ const void *arg3, const void *arg4)
+{
+ /* Call SystemV ABI function without storing non-volatile XMM registers,
+ * as target function does not use vector instruction sets. */
+ asm volatile ("callq *%0\n\t"
+ : "+a" (fn),
+ "+D" (arg1),
+ "+S" (arg2),
+ "+d" (arg3),
+ "+c" (arg4)
+ :
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
+static inline void
+twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL);
+#else
+ _gcry_twofish_amd64_encrypt_block(c, out, in);
+#endif
+}
+
+static inline void
+twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL);
+#else
+ _gcry_twofish_amd64_decrypt_block(c, out, in);
+#endif
+}
+
+static inline void
+twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
+ byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr);
+#else
+ _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
+#endif
+}
+
+static inline void
+twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv);
+#else
+ _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
+#endif
+}
+
+static inline void
+twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv);
+#else
+ _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
+#endif
+}
+
#elif defined(USE_ARM_ASM)
/* Assembly implementations of Twofish. */
@@ -833,7 +905,7 @@ static unsigned int
twofish_encrypt (void *context, byte *out, const byte *in)
{
TWOFISH_context *ctx = context;
- _gcry_twofish_amd64_encrypt_block(ctx, out, in);
+ twofish_amd64_encrypt_block(ctx, out, in);
return /*burn_stack*/ (4*sizeof (void*));
}
@@ -900,7 +972,7 @@ static unsigned int
twofish_decrypt (void *context, byte *out, const byte *in)
{
TWOFISH_context *ctx = context;
- _gcry_twofish_amd64_decrypt_block(ctx, out, in);
+ twofish_amd64_decrypt_block(ctx, out, in);
return /*burn_stack*/ (4*sizeof (void*));
}
@@ -980,7 +1052,7 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- _gcry_twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+ twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
nblocks -= 3;
outbuf += 3 * TWOFISH_BLOCKSIZE;
@@ -1038,7 +1110,7 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- _gcry_twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+ twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
nblocks -= 3;
outbuf += 3 * TWOFISH_BLOCKSIZE;
@@ -1087,7 +1159,7 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- _gcry_twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+ twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
nblocks -= 3;
outbuf += 3 * TWOFISH_BLOCKSIZE;
commit eb0ed576893b6c7990dbcb568510f831d246cea6
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 13:07:48 2015 +0300
Enable AMD64 Serpent implementations on WIN64
* cipher/serpent-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/serpent-sse2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/chacha20.c (USE_SSE2, USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_SSE2 || USE_AVX2] (ASM_FUNC_ABI): New.
(_gcry_serpent_sse2_ctr_enc, _gcry_serpent_sse2_cbc_dec)
(_gcry_serpent_sse2_cfb_dec, _gcry_serpent_avx2_ctr_enc)
(_gcry_serpent_avx2_cbc_dec, _gcry_serpent_avx2_cfb_dec): Add
ASM_FUNC_ABI.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 03d29ae..3f59f06 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -20,9 +20,16 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \
defined(ENABLE_AVX2_SUPPORT)
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
#ifdef __PIC__
# define RIP (%rip)
#else
@@ -404,7 +411,7 @@
.text
.align 8
-.type __serpent_enc_blk16, at function;
+ELF(.type __serpent_enc_blk16, at function;)
__serpent_enc_blk16:
/* input:
* %rdi: ctx, CTX
@@ -489,10 +496,10 @@ __serpent_enc_blk16:
transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
ret;
-.size __serpent_enc_blk16,.-__serpent_enc_blk16;
+ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
.align 8
-.type __serpent_dec_blk16, at function;
+ELF(.type __serpent_dec_blk16, at function;)
__serpent_dec_blk16:
/* input:
* %rdi: ctx, CTX
@@ -579,7 +586,7 @@ __serpent_dec_blk16:
transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
ret;
-.size __serpent_dec_blk16,.-__serpent_dec_blk16;
+ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
@@ -589,7 +596,7 @@ __serpent_dec_blk16:
.align 8
.globl _gcry_serpent_avx2_ctr_enc
-.type _gcry_serpent_avx2_ctr_enc, at function;
+ELF(.type _gcry_serpent_avx2_ctr_enc, at function;)
_gcry_serpent_avx2_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -695,11 +702,11 @@ _gcry_serpent_avx2_ctr_enc:
vzeroall;
ret
-.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;
+ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
.align 8
.globl _gcry_serpent_avx2_cbc_dec
-.type _gcry_serpent_avx2_cbc_dec, at function;
+ELF(.type _gcry_serpent_avx2_cbc_dec, at function;)
_gcry_serpent_avx2_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -746,11 +753,11 @@ _gcry_serpent_avx2_cbc_dec:
vzeroall;
ret
-.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;
+ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
.align 8
.globl _gcry_serpent_avx2_cfb_dec
-.type _gcry_serpent_avx2_cfb_dec, at function;
+ELF(.type _gcry_serpent_avx2_cfb_dec, at function;)
_gcry_serpent_avx2_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -799,7 +806,7 @@ _gcry_serpent_avx2_cfb_dec:
vzeroall;
ret
-.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;
+ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
.data
.align 16
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index 395f660..adbf4e2 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
@@ -20,7 +20,14 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
#ifdef __PIC__
# define RIP (%rip)
@@ -427,7 +434,7 @@
.text
.align 8
-.type __serpent_enc_blk8, at function;
+ELF(.type __serpent_enc_blk8, at function;)
__serpent_enc_blk8:
/* input:
* %rdi: ctx, CTX
@@ -512,10 +519,10 @@ __serpent_enc_blk8:
transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
ret;
-.size __serpent_enc_blk8,.-__serpent_enc_blk8;
+ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
.align 8
-.type __serpent_dec_blk8, at function;
+ELF(.type __serpent_dec_blk8, at function;)
__serpent_dec_blk8:
/* input:
* %rdi: ctx, CTX
@@ -602,11 +609,11 @@ __serpent_dec_blk8:
transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
ret;
-.size __serpent_dec_blk8,.-__serpent_dec_blk8;
+ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
.align 8
.globl _gcry_serpent_sse2_ctr_enc
-.type _gcry_serpent_sse2_ctr_enc, at function;
+ELF(.type _gcry_serpent_sse2_ctr_enc, at function;)
_gcry_serpent_sse2_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -732,11 +739,11 @@ _gcry_serpent_sse2_ctr_enc:
pxor RNOT, RNOT;
ret
-.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;
+ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
.align 8
.globl _gcry_serpent_sse2_cbc_dec
-.type _gcry_serpent_sse2_cbc_dec, at function;
+ELF(.type _gcry_serpent_sse2_cbc_dec, at function;)
_gcry_serpent_sse2_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -793,11 +800,11 @@ _gcry_serpent_sse2_cbc_dec:
pxor RNOT, RNOT;
ret
-.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;
+ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
.align 8
.globl _gcry_serpent_sse2_cfb_dec
-.type _gcry_serpent_sse2_cfb_dec, at function;
+ELF(.type _gcry_serpent_sse2_cfb_dec, at function;)
_gcry_serpent_sse2_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -857,7 +864,7 @@ _gcry_serpent_sse2_cfb_dec:
pxor RNOT, RNOT;
ret
-.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;
+ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
#endif /*defined(USE_SERPENT)*/
#endif /*__x86_64*/
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 0be49da..7d0e112 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -34,13 +34,15 @@
/* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
#undef USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSE2 1
#endif
/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
#undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# if defined(ENABLE_AVX2_SUPPORT)
# define USE_AVX2 1
# endif
@@ -86,6 +88,18 @@ typedef struct serpent_context
} serpent_context_t;
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_SSE2) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+# define ASM_FUNC_ABI
+# endif
+#endif
+
+
#ifdef USE_SSE2
/* Assembler implementations of Serpent using SSE2. Process 8 block in
parallel.
@@ -93,17 +107,17 @@ typedef struct serpent_context
extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *ctr);
+ unsigned char *ctr) ASM_FUNC_ABI;
extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
#endif
#ifdef USE_AVX2
@@ -113,17 +127,17 @@ extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *ctr);
+ unsigned char *ctr) ASM_FUNC_ABI;
extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
#endif
#ifdef USE_NEON
commit 12bc93ca8187b8061c2e705427ef22f5a71d29b0
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 12:37:21 2015 +0300
Enable AMD64 Salsa20 implementation on WIN64
* cipher/salsa20-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/salsa20.c (USE_AMD64): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_AMD64] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
(_gcry_salsa20_amd64_keysetup, _gcry_salsa20_amd64_ivsetup)
(_gcry_salsa20_amd64_encrypt_blocks): Add ASM_FUNC_ABI.
[USE_AMD64] (salsa20_core): Add ASM_EXTRA_STACK.
(salsa20_do_encrypt_stream) [USE_AMD64]: Add ASM_EXTRA_STACK.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index 7046dbb..470c32a 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
@@ -25,13 +25,20 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SALSA20)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
.text
.align 8
.globl _gcry_salsa20_amd64_keysetup
-.type _gcry_salsa20_amd64_keysetup, at function;
+ELF(.type _gcry_salsa20_amd64_keysetup, at function;)
_gcry_salsa20_amd64_keysetup:
movl 0(%rsi),%r8d
movl 4(%rsi),%r9d
@@ -83,7 +90,7 @@ _gcry_salsa20_amd64_keysetup:
.align 8
.globl _gcry_salsa20_amd64_ivsetup
-.type _gcry_salsa20_amd64_ivsetup, at function;
+ELF(.type _gcry_salsa20_amd64_ivsetup, at function;)
_gcry_salsa20_amd64_ivsetup:
movl 0(%rsi),%r8d
movl 4(%rsi),%esi
@@ -97,7 +104,7 @@ _gcry_salsa20_amd64_ivsetup:
.align 8
.globl _gcry_salsa20_amd64_encrypt_blocks
-.type _gcry_salsa20_amd64_encrypt_blocks, at function;
+ELF(.type _gcry_salsa20_amd64_encrypt_blocks, at function;)
_gcry_salsa20_amd64_encrypt_blocks:
/*
* Modifications to original implementation:
@@ -918,7 +925,7 @@ _gcry_salsa20_amd64_encrypt_blocks:
add $64,%rdi
add $64,%rsi
jmp .L_bytes_are_64_128_or_192
-.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;
+ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
#endif /*defined(USE_SALSA20)*/
#endif /*__x86_64*/
diff --git a/cipher/salsa20.c b/cipher/salsa20.c
index d75fe51..fa3d23b 100644
--- a/cipher/salsa20.c
+++ b/cipher/salsa20.c
@@ -43,7 +43,8 @@
/* USE_AMD64 indicates whether to compile with AMD64 code. */
#undef USE_AMD64
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64 1
#endif
@@ -118,12 +119,25 @@ static const char *selftest (void);
#ifdef USE_AMD64
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
/* AMD64 assembly implementations of Salsa20. */
-void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits);
-void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv);
+void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits)
+ ASM_FUNC_ABI;
+void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv)
+ ASM_FUNC_ABI;
unsigned int
_gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
- size_t len, int rounds);
+ size_t len, int rounds) ASM_FUNC_ABI;
static void
salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
@@ -141,7 +155,8 @@ static unsigned int
salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
{
memset(dst, 0, SALSA20_BLOCK_SIZE);
- return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds);
+ return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds)
+ + ASM_EXTRA_STACK;
}
#else /* USE_AMD64 */
@@ -418,6 +433,7 @@ salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
size_t nblocks = length / SALSA20_BLOCK_SIZE;
burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
nblocks, rounds);
+ burn += ASM_EXTRA_STACK;
length -= SALSA20_BLOCK_SIZE * nblocks;
outbuf += SALSA20_BLOCK_SIZE * nblocks;
inbuf += SALSA20_BLOCK_SIZE * nblocks;
commit 8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 12:39:39 2015 +0300
Enable AMD64 Poly1305 implementations on WIN64
* cipher/poly1305-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/poly1305-sse2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/poly1305-internal.h (POLY1305_SYSV_FUNC_ABI): New.
(POLY1305_USE_SSE2, POLY1305_USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(OPS_FUNC_ABI): New.
(poly1305_ops_t): Use OPS_FUNC_ABI.
* cipher/poly1305.c (_gcry_poly1305_amd64_sse2_init_ext)
(_gcry_poly1305_amd64_sse2_finish_ext)
(_gcry_poly1305_amd64_sse2_blocks, _gcry_poly1305_amd64_avx2_init_ext)
(_gcry_poly1305_amd64_avx2_finish_ext)
(_gcry_poly1305_amd64_avx2_blocks, _gcry_poly1305_armv7_neon_init_ext)
(_gcry_poly1305_armv7_neon_finish_ext)
(_gcry_poly1305_armv7_neon_blocks, poly1305_init_ext_ref32)
(poly1305_blocks_ref32, poly1305_finish_ext_ref32)
(poly1305_init_ext_ref8, poly1305_blocks_ref8)
(poly1305_finish_ext_ref8): Use OPS_FUNC_ABI.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S
index 0ba7e76..9362a5a 100644
--- a/cipher/poly1305-avx2-amd64.S
+++ b/cipher/poly1305-avx2-amd64.S
@@ -25,15 +25,23 @@
#include <config.h>
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
.text
.align 8
.globl _gcry_poly1305_amd64_avx2_init_ext
-.type _gcry_poly1305_amd64_avx2_init_ext, at function;
+ELF(.type _gcry_poly1305_amd64_avx2_init_ext, at function;)
_gcry_poly1305_amd64_avx2_init_ext:
.Lpoly1305_init_ext_avx2_local:
xor %edx, %edx
@@ -391,12 +399,12 @@ _gcry_poly1305_amd64_avx2_init_ext:
popq %r13
popq %r12
ret
-.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;)
.align 8
.globl _gcry_poly1305_amd64_avx2_blocks
-.type _gcry_poly1305_amd64_avx2_blocks, at function;
+ELF(.type _gcry_poly1305_amd64_avx2_blocks, at function;)
_gcry_poly1305_amd64_avx2_blocks:
.Lpoly1305_blocks_avx2_local:
vzeroupper
@@ -717,12 +725,12 @@ _gcry_poly1305_amd64_avx2_blocks:
leave
addq $8, %rax
ret
-.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;
+ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;)
.align 8
.globl _gcry_poly1305_amd64_avx2_finish_ext
-.type _gcry_poly1305_amd64_avx2_finish_ext, at function;
+ELF(.type _gcry_poly1305_amd64_avx2_finish_ext, at function;)
_gcry_poly1305_amd64_avx2_finish_ext:
.Lpoly1305_finish_ext_avx2_local:
vzeroupper
@@ -949,6 +957,6 @@ _gcry_poly1305_amd64_avx2_finish_ext:
popq %rbp
addq $(8*5), %rax
ret
-.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;)
#endif
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h
index dfc0c04..bcbe5df 100644
--- a/cipher/poly1305-internal.h
+++ b/cipher/poly1305-internal.h
@@ -44,24 +44,30 @@
#define POLY1305_REF_ALIGNMENT sizeof(void *)
+#undef POLY1305_SYSV_FUNC_ABI
+
/* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
#undef POLY1305_USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define POLY1305_USE_SSE2 1
# define POLY1305_SSE2_BLOCKSIZE 32
# define POLY1305_SSE2_STATESIZE 248
# define POLY1305_SSE2_ALIGNMENT 16
+# define POLY1305_SYSV_FUNC_ABI 1
#endif
/* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
#undef POLY1305_USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
# define POLY1305_USE_AVX2 1
# define POLY1305_AVX2_BLOCKSIZE 64
# define POLY1305_AVX2_STATESIZE 328
# define POLY1305_AVX2_ALIGNMENT 32
+# define POLY1305_SYSV_FUNC_ABI 1
#endif
@@ -112,6 +118,17 @@
#endif
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef OPS_FUNC_ABI
+#if defined(POLY1305_SYSV_FUNC_ABI) && \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define OPS_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define OPS_FUNC_ABI
+#endif
+
+
typedef struct poly1305_key_s
{
byte b[POLY1305_KEYLEN];
@@ -121,10 +138,10 @@ typedef struct poly1305_key_s
typedef struct poly1305_ops_s
{
size_t block_size;
- void (*init_ext) (void *ctx, const poly1305_key_t * key);
- unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes);
+ void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI;
+ unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI;
unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining,
- byte mac[POLY1305_TAGLEN]);
+ byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI;
} poly1305_ops_t;
diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S
index 106b119..219eb07 100644
--- a/cipher/poly1305-sse2-amd64.S
+++ b/cipher/poly1305-sse2-amd64.S
@@ -25,14 +25,22 @@
#include <config.h>
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
.text
.align 8
.globl _gcry_poly1305_amd64_sse2_init_ext
-.type _gcry_poly1305_amd64_sse2_init_ext, at function;
+ELF(.type _gcry_poly1305_amd64_sse2_init_ext, at function;)
_gcry_poly1305_amd64_sse2_init_ext:
.Lpoly1305_init_ext_x86_local:
xor %edx, %edx
@@ -273,12 +281,12 @@ _gcry_poly1305_amd64_sse2_init_ext:
popq %r13
popq %r12
ret
-.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;)
.align 8
.globl _gcry_poly1305_amd64_sse2_finish_ext
-.type _gcry_poly1305_amd64_sse2_finish_ext, at function;
+ELF(.type _gcry_poly1305_amd64_sse2_finish_ext, at function;)
_gcry_poly1305_amd64_sse2_finish_ext:
.Lpoly1305_finish_ext_x86_local:
pushq %rbp
@@ -424,12 +432,12 @@ _gcry_poly1305_amd64_sse2_finish_ext:
popq %rbp
addq $8, %rax
ret
-.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;)
.align 8
.globl _gcry_poly1305_amd64_sse2_blocks
-.type _gcry_poly1305_amd64_sse2_blocks, at function;
+ELF(.type _gcry_poly1305_amd64_sse2_blocks, at function;)
_gcry_poly1305_amd64_sse2_blocks:
.Lpoly1305_blocks_x86_local:
pushq %rbp
@@ -1030,6 +1038,6 @@ _gcry_poly1305_amd64_sse2_blocks:
pxor %xmm8, %xmm8
pxor %xmm0, %xmm0
ret
-.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;
+ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;)
#endif
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 28dbbf8..1adf0e7 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -40,12 +40,13 @@ static const char *selftest (void);
#ifdef POLY1305_USE_SSE2
-void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_amd64_sse2_ops = {
POLY1305_SSE2_BLOCKSIZE,
@@ -59,12 +60,13 @@ static const poly1305_ops_t poly1305_amd64_sse2_ops = {
#ifdef POLY1305_USE_AVX2
-void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_amd64_avx2_ops = {
POLY1305_AVX2_BLOCKSIZE,
@@ -78,12 +80,13 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = {
#ifdef POLY1305_USE_NEON
-void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_armv7_neon_ops = {
POLY1305_NEON_BLOCKSIZE,
@@ -110,7 +113,7 @@ typedef struct poly1305_state_ref32_s
} poly1305_state_ref32_t;
-static void
+static OPS_FUNC_ABI void
poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
{
poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -142,7 +145,7 @@ poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
{
poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -230,7 +233,7 @@ poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_finish_ext_ref32 (void *state, const byte * m,
size_t remaining, byte mac[POLY1305_TAGLEN])
{
@@ -370,7 +373,7 @@ typedef struct poly1305_state_ref8_t
} poly1305_state_ref8_t;
-static void
+static OPS_FUNC_ABI void
poly1305_init_ext_ref8 (void *state, const poly1305_key_t * key)
{
poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -471,7 +474,7 @@ poly1305_freeze_ref8 (byte h[17])
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
{
poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -519,7 +522,7 @@ poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_finish_ext_ref8 (void *state, const byte * m, size_t remaining,
byte mac[POLY1305_TAGLEN])
{
commit b65e9e71d5ee992db5c96793c6af999545daad28
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 10:31:18 2015 +0300
Enable AMD64 3DES implementation on WIN64
* cipher/des-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/des.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
(tripledes_ecb_crypt) [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call
assembly function through 'call_sysv_fn'.
(tripledes_amd64_ctr_enc, tripledes_amd64_cbc_dec)
(tripledes_amd64_cfb_dec): New wrapper functions for bulk
assembly functions.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index e8b2c56..307d211 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -20,7 +20,8 @@
#ifdef __x86_64
#include <config.h>
-#if defined(USE_DES) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
#ifdef __PIC__
# define RIP (%rip)
@@ -28,6 +29,12 @@
# define RIP
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
.text
#define s1 0
@@ -185,7 +192,7 @@
.align 8
.globl _gcry_3des_amd64_crypt_block
-.type _gcry_3des_amd64_crypt_block, at function;
+ELF(.type _gcry_3des_amd64_crypt_block, at function;)
_gcry_3des_amd64_crypt_block:
/* input:
@@ -271,7 +278,7 @@ _gcry_3des_amd64_crypt_block:
popq %rbp;
ret;
-.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;
+ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
/***********************************************************************
* 3-way 3DES
@@ -458,7 +465,7 @@ _gcry_3des_amd64_crypt_block:
movl right##d, 4(io);
.align 8
-.type _gcry_3des_amd64_crypt_blk3, at function;
+ELF(.type _gcry_3des_amd64_crypt_blk3, at function;)
_gcry_3des_amd64_crypt_blk3:
/* input:
* %rdi: round keys, CTX
@@ -528,11 +535,11 @@ _gcry_3des_amd64_crypt_blk3:
final_permutation3(RR, RL);
ret;
-.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;
+ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
.align 8
.globl _gcry_3des_amd64_cbc_dec
-.type _gcry_3des_amd64_cbc_dec, at function;
+ELF(.type _gcry_3des_amd64_cbc_dec, at function;)
_gcry_3des_amd64_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -604,11 +611,11 @@ _gcry_3des_amd64_cbc_dec:
popq %rbp;
ret;
-.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
.align 8
.globl _gcry_3des_amd64_ctr_enc
-.type _gcry_3des_amd64_ctr_enc, at function;
+ELF(.type _gcry_3des_amd64_ctr_enc, at function;)
_gcry_3des_amd64_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -682,11 +689,11 @@ _gcry_3des_amd64_ctr_enc:
popq %rbp;
ret;
-.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
.align 8
.globl _gcry_3des_amd64_cfb_dec
-.type _gcry_3des_amd64_cfb_dec, at function;
+ELF(.type _gcry_3des_amd64_cfb_dec, at function;)
_gcry_3des_amd64_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -757,7 +764,7 @@ _gcry_3des_amd64_cfb_dec:
popq %rbx;
popq %rbp;
ret;
-.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;
+ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
.data
.align 16
diff --git a/cipher/des.c b/cipher/des.c
index d4863d1..be62763 100644
--- a/cipher/des.c
+++ b/cipher/des.c
@@ -127,7 +127,8 @@
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64_ASM 1
#endif
@@ -771,6 +772,24 @@ extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
#define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+ const void *arg3, const void *arg4)
+{
+ /* Call SystemV ABI function without storing non-volatile XMM registers,
+ * as target function does not use vector instruction sets. */
+ asm volatile ("callq *%0\n\t"
+ : "+a" (fn),
+ "+D" (arg1),
+ "+S" (arg2),
+ "+d" (arg3),
+ "+c" (arg4)
+ :
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
/*
* Electronic Codebook Mode Triple-DES encryption/decryption of data
* according to 'mode'. Sometimes this mode is named 'EDE' mode
@@ -784,11 +803,45 @@ tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_3des_amd64_crypt_block, keys, to, from, NULL);
+#else
_gcry_3des_amd64_crypt_block(keys, to, from);
+#endif
return 0;
}
+static inline void
+tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_3des_amd64_ctr_enc, keys, out, in, ctr);
+#else
+ _gcry_3des_amd64_ctr_enc(keys, out, in, ctr);
+#endif
+}
+
+static inline void
+tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_3des_amd64_cbc_dec, keys, out, in, iv);
+#else
+ _gcry_3des_amd64_cbc_dec(keys, out, in, iv);
+#endif
+}
+
+static inline void
+tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_3des_amd64_cfb_dec, keys, out, in, iv);
+#else
+ _gcry_3des_amd64_cfb_dec(keys, out, in, iv);
+#endif
+}
+
#else /*USE_AMD64_ASM*/
#define TRIPLEDES_ECB_BURN_STACK 32
@@ -871,7 +924,7 @@ _gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- _gcry_3des_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
+ tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
nblocks -= 3;
outbuf += 3 * DES_BLOCKSIZE;
@@ -926,7 +979,7 @@ _gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- _gcry_3des_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
+ tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
nblocks -= 3;
outbuf += 3 * DES_BLOCKSIZE;
@@ -974,7 +1027,7 @@ _gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- _gcry_3des_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
+ tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
nblocks -= 3;
outbuf += 3 * DES_BLOCKSIZE;
commit 9597cfddf03c467825da152be5ca0d12a8c30d88
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Tue May 5 21:02:43 2015 +0300
Enable AMD64 ChaCha20 implementations on WIN64
* cipher/chacha20-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/chacha20-sse2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/chacha20-ssse3-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/chacha20.c (USE_SSE2, USE_SSSE3, USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
(chacha20_blocks_t, _gcry_chacha20_amd64_sse2_blocks)
(_gcry_chacha20_amd64_ssse3_blocks, _gcry_chacha20_amd64_avx2_blocks)
(_gcry_chacha20_armv7_neon_blocks, chacha20_blocks): Add ASM_FUNC_ABI.
(chacha20_core): Add ASM_EXTRA_STACK.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/chacha20-avx2-amd64.S b/cipher/chacha20-avx2-amd64.S
index 1f33de8..12bed35 100644
--- a/cipher/chacha20-avx2-amd64.S
+++ b/cipher/chacha20-avx2-amd64.S
@@ -26,7 +26,8 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT) && USE_CHACHA20
#ifdef __PIC__
@@ -35,11 +36,17 @@
# define RIP
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
.text
.align 8
.globl _gcry_chacha20_amd64_avx2_blocks
-.type _gcry_chacha20_amd64_avx2_blocks, at function;
+ELF(.type _gcry_chacha20_amd64_avx2_blocks, at function;)
_gcry_chacha20_amd64_avx2_blocks:
.Lchacha_blocks_avx2_local:
vzeroupper
@@ -938,7 +945,7 @@ _gcry_chacha20_amd64_avx2_blocks:
vzeroall
movl $(63 + 512), %eax
ret
-.size _gcry_chacha20_amd64_avx2_blocks,.-_gcry_chacha20_amd64_avx2_blocks;
+ELF(.size _gcry_chacha20_amd64_avx2_blocks,.-_gcry_chacha20_amd64_avx2_blocks;)
.data
.align 16
diff --git a/cipher/chacha20-sse2-amd64.S b/cipher/chacha20-sse2-amd64.S
index 4811f40..2b9842c 100644
--- a/cipher/chacha20-sse2-amd64.S
+++ b/cipher/chacha20-sse2-amd64.S
@@ -26,13 +26,20 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && USE_CHACHA20
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && USE_CHACHA20
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
.text
.align 8
.globl _gcry_chacha20_amd64_sse2_blocks
-.type _gcry_chacha20_amd64_sse2_blocks, at function;
+ELF(.type _gcry_chacha20_amd64_sse2_blocks, at function;)
_gcry_chacha20_amd64_sse2_blocks:
.Lchacha_blocks_sse2_local:
pushq %rbx
@@ -646,7 +653,7 @@ _gcry_chacha20_amd64_sse2_blocks:
pxor %xmm8, %xmm8
pxor %xmm0, %xmm0
ret
-.size _gcry_chacha20_amd64_sse2_blocks,.-_gcry_chacha20_amd64_sse2_blocks;
+ELF(.size _gcry_chacha20_amd64_sse2_blocks,.-_gcry_chacha20_amd64_sse2_blocks;)
#endif /*defined(USE_CHACHA20)*/
#endif /*__x86_64*/
diff --git a/cipher/chacha20-ssse3-amd64.S b/cipher/chacha20-ssse3-amd64.S
index 50c2ff8..a1a843f 100644
--- a/cipher/chacha20-ssse3-amd64.S
+++ b/cipher/chacha20-ssse3-amd64.S
@@ -26,7 +26,8 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_SSSE3) && USE_CHACHA20
#ifdef __PIC__
@@ -35,11 +36,17 @@
# define RIP
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
.text
.align 8
.globl _gcry_chacha20_amd64_ssse3_blocks
-.type _gcry_chacha20_amd64_ssse3_blocks, at function;
+ELF(.type _gcry_chacha20_amd64_ssse3_blocks, at function;)
_gcry_chacha20_amd64_ssse3_blocks:
.Lchacha_blocks_ssse3_local:
pushq %rbx
@@ -614,7 +621,7 @@ _gcry_chacha20_amd64_ssse3_blocks:
pxor %xmm8, %xmm8
pxor %xmm0, %xmm0
ret
-.size _gcry_chacha20_amd64_ssse3_blocks,.-_gcry_chacha20_amd64_ssse3_blocks;
+ELF(.size _gcry_chacha20_amd64_ssse3_blocks,.-_gcry_chacha20_amd64_ssse3_blocks;)
.data
.align 16;
diff --git a/cipher/chacha20.c b/cipher/chacha20.c
index 2eaeffd..e25e239 100644
--- a/cipher/chacha20.c
+++ b/cipher/chacha20.c
@@ -50,20 +50,23 @@
/* USE_SSE2 indicates whether to compile with Intel SSE2 code. */
#undef USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSE2 1
#endif
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_SSSE3)
# define USE_SSSE3 1
#endif
/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
#undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
# define USE_AVX2 1
#endif
@@ -82,8 +85,23 @@
struct CHACHA20_context_s;
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if (defined(USE_SSE2) || defined(USE_SSSE3) || defined(USE_AVX2)) && \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+
typedef unsigned int (* chacha20_blocks_t)(u32 *state, const byte *src,
- byte *dst, size_t bytes);
+ byte *dst,
+ size_t bytes) ASM_FUNC_ABI;
typedef struct CHACHA20_context_s
{
@@ -97,28 +115,32 @@ typedef struct CHACHA20_context_s
#ifdef USE_SSE2
unsigned int _gcry_chacha20_amd64_sse2_blocks(u32 *state, const byte *in,
- byte *out, size_t bytes);
+ byte *out,
+ size_t bytes) ASM_FUNC_ABI;
#endif /* USE_SSE2 */
#ifdef USE_SSSE3
unsigned int _gcry_chacha20_amd64_ssse3_blocks(u32 *state, const byte *in,
- byte *out, size_t bytes);
+ byte *out,
+ size_t bytes) ASM_FUNC_ABI;
#endif /* USE_SSSE3 */
#ifdef USE_AVX2
unsigned int _gcry_chacha20_amd64_avx2_blocks(u32 *state, const byte *in,
- byte *out, size_t bytes);
+ byte *out,
+ size_t bytes) ASM_FUNC_ABI;
#endif /* USE_AVX2 */
#ifdef USE_NEON
unsigned int _gcry_chacha20_armv7_neon_blocks(u32 *state, const byte *in,
- byte *out, size_t bytes);
+ byte *out,
+ size_t bytes) ASM_FUNC_ABI;
#endif /* USE_NEON */
@@ -141,7 +163,7 @@ static const char *selftest (void);
#ifndef USE_SSE2
-static unsigned int
+ASM_FUNC_ABI static unsigned int
chacha20_blocks (u32 *state, const byte *src, byte *dst, size_t bytes)
{
u32 pad[CHACHA20_INPUT_LENGTH];
@@ -269,7 +291,8 @@ chacha20_blocks (u32 *state, const byte *src, byte *dst, size_t bytes)
static unsigned int
chacha20_core(u32 *dst, struct CHACHA20_context_s *ctx)
{
- return ctx->blocks(ctx->input, NULL, (byte *)dst, CHACHA20_BLOCK_SIZE);
+ return ctx->blocks(ctx->input, NULL, (byte *)dst, CHACHA20_BLOCK_SIZE)
+ + ASM_EXTRA_STACK;
}
commit 6a6646df80386204675d8b149ab60e74d7ca124c
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Tue May 5 20:46:10 2015 +0300
Enable AMD64 CAST5 implementation on WIN64
* cipher/cast5-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(RIP): Remove.
(GET_EXTERN_POINTER): Use 'leaq' version on WIN64.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/cast5.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
(do_encrypt_block, do_decrypt_block)
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly
function through 'call_sysv_fn'.
(cast5_amd64_ctr_enc, cast5_amd64_cbc_dec)
(cast5_amd64_cfb_dec): New wrapper functions for bulk
assembly functions.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index 41fbb74..a5f078e 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -20,14 +20,19 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_CAST5)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5)
-#ifdef __PIC__
-# define RIP %rip
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
+# define GET_EXTERN_POINTER(name, reg) leaq name, reg
+#else
# define GET_EXTERN_POINTER(name, reg) movq name at GOTPCREL(%rip), reg
+#endif
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
#else
-# define RIP
-# define GET_EXTERN_POINTER(name, reg) leaq name, reg
+# define ELF(...) /*_*/
#endif
.text
@@ -180,7 +185,7 @@
.align 8
.globl _gcry_cast5_amd64_encrypt_block
-.type _gcry_cast5_amd64_encrypt_block, at function;
+ELF(.type _gcry_cast5_amd64_encrypt_block, at function;)
_gcry_cast5_amd64_encrypt_block:
/* input:
@@ -216,11 +221,11 @@ _gcry_cast5_amd64_encrypt_block:
popq %rbx;
popq %rbp;
ret;
-.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;
+ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
.align 8
.globl _gcry_cast5_amd64_decrypt_block
-.type _gcry_cast5_amd64_decrypt_block, at function;
+ELF(.type _gcry_cast5_amd64_decrypt_block, at function;)
_gcry_cast5_amd64_decrypt_block:
/* input:
@@ -256,7 +261,7 @@ _gcry_cast5_amd64_decrypt_block:
popq %rbx;
popq %rbp;
ret;
-.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;
+ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
/**********************************************************************
4-way cast5, four blocks parallel
@@ -359,7 +364,7 @@ _gcry_cast5_amd64_decrypt_block:
rorq $32, d;
.align 8
-.type __cast5_enc_blk4, at function;
+ELF(.type __cast5_enc_blk4, at function;)
__cast5_enc_blk4:
/* input:
@@ -384,10 +389,10 @@ __cast5_enc_blk4:
outbswap_block4(RLR0, RLR1, RLR2, RLR3);
ret;
-.size __cast5_enc_blk4,.-__cast5_enc_blk4;
+ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
.align 8
-.type __cast5_dec_blk4, at function;
+ELF(.type __cast5_dec_blk4, at function;)
__cast5_dec_blk4:
/* input:
@@ -414,11 +419,11 @@ __cast5_dec_blk4:
outbswap_block4(RLR0, RLR1, RLR2, RLR3);
ret;
-.size __cast5_dec_blk4,.-__cast5_dec_blk4;
+ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
.align 8
.globl _gcry_cast5_amd64_ctr_enc
-.type _gcry_cast5_amd64_ctr_enc, at function;
+ELF(.type _gcry_cast5_amd64_ctr_enc, at function;)
_gcry_cast5_amd64_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -472,11 +477,11 @@ _gcry_cast5_amd64_ctr_enc:
popq %rbx;
popq %rbp;
ret
-.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;
+ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
.align 8
.globl _gcry_cast5_amd64_cbc_dec
-.type _gcry_cast5_amd64_cbc_dec, at function;
+ELF(.type _gcry_cast5_amd64_cbc_dec, at function;)
_gcry_cast5_amd64_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -526,11 +531,11 @@ _gcry_cast5_amd64_cbc_dec:
popq %rbp;
ret;
-.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;
+ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
.align 8
.globl _gcry_cast5_amd64_cfb_dec
-.type _gcry_cast5_amd64_cfb_dec, at function;
+ELF(.type _gcry_cast5_amd64_cfb_dec, at function;)
_gcry_cast5_amd64_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -581,7 +586,7 @@ _gcry_cast5_amd64_cfb_dec:
popq %rbp;
ret;
-.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;
+ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
#endif /*defined(USE_CAST5)*/
#endif /*__x86_64*/
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 115e1e6..94dcee7 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -48,7 +48,8 @@
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64_ASM 1
#endif
@@ -372,16 +373,72 @@ extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
const byte *in, byte *iv);
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+ const void *arg3, const void *arg4)
+{
+ /* Call SystemV ABI function without storing non-volatile XMM registers,
+ * as target function does not use vector instruction sets. */
+ asm volatile ("callq *%0\n\t"
+ : "+a" (fn),
+ "+D" (arg1),
+ "+S" (arg2),
+ "+d" (arg3),
+ "+c" (arg4)
+ :
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
static void
do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_cast5_amd64_encrypt_block, context, outbuf, inbuf, NULL);
+#else
_gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
+#endif
}
static void
do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_cast5_amd64_decrypt_block, context, outbuf, inbuf, NULL);
+#else
_gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
+#endif
+}
+
+static void
+cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_cast5_amd64_ctr_enc, ctx, out, in, ctr);
+#else
+ _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
+#endif
+}
+
+static void
+cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_cast5_amd64_cbc_dec, ctx, out, in, iv);
+#else
+ _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
+#endif
+}
+
+static void
+cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_cast5_amd64_cfb_dec, ctx, out, in, iv);
+#else
+ _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
+#endif
}
static unsigned int
@@ -396,7 +453,7 @@ static unsigned int
decrypt_block (void *context, byte *outbuf, const byte *inbuf)
{
CAST5_context *c = (CAST5_context *) context;
- _gcry_cast5_amd64_decrypt_block (c, outbuf, inbuf);
+ do_decrypt_block (c, outbuf, inbuf);
return /*burn_stack*/ (2*8);
}
@@ -582,7 +639,7 @@ _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+ cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
nblocks -= 4;
outbuf += 4 * CAST5_BLOCKSIZE;
@@ -651,7 +708,7 @@ _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+ cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
nblocks -= 4;
outbuf += 4 * CAST5_BLOCKSIZE;
@@ -710,7 +767,7 @@ _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+ cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
nblocks -= 4;
outbuf += 4 * CAST5_BLOCKSIZE;
commit 9a4fb3709864bf3e3918800d44ff576590cd4e92
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 13:33:07 2015 +0300
Enable AMD64 Camellia implementations on WIN64
* cipher/camellia-aesni-avx-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/camellia-aesni-avx2-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/camellia-glue.c (USE_AESNI_AVX, USE_AESNI_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_AESNI_AVX || USE_AESNI_AVX2] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
(_gcry_camellia_aesni_avx_ctr_enc, _gcry_camellia_aesni_avx_cbc_dec)
(_gcry_camellia_aesni_avx_cfb_dec, _gcry_camellia_aesni_avx_keygen)
(_gcry_camellia_aesni_avx2_ctr_enc, _gcry_camellia_aesni_avx2_cbc_dec)
(_gcry_camellia_aesni_avx2_cfb_dec): Add ASM_FUNC_ABI.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 6d157a7..c047a21 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -20,7 +20,8 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
#ifdef __PIC__
@@ -29,6 +30,12 @@
# define RIP
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
#define CAMELLIA_TABLE_BYTE_LEN 272
/* struct CAMELLIA_context: */
@@ -769,7 +776,7 @@
.text
.align 8
-.type __camellia_enc_blk16, at function;
+ELF(.type __camellia_enc_blk16, at function;)
__camellia_enc_blk16:
/* input:
@@ -853,10 +860,10 @@ __camellia_enc_blk16:
%xmm15, %rax, %rcx, 24);
jmp .Lenc_done;
-.size __camellia_enc_blk16,.-__camellia_enc_blk16;
+ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
.align 8
-.type __camellia_dec_blk16, at function;
+ELF(.type __camellia_dec_blk16, at function;)
__camellia_dec_blk16:
/* input:
@@ -938,7 +945,7 @@ __camellia_dec_blk16:
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
-.size __camellia_dec_blk16,.-__camellia_dec_blk16;
+ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
@@ -948,7 +955,7 @@ __camellia_dec_blk16:
.align 8
.globl _gcry_camellia_aesni_avx_ctr_enc
-.type _gcry_camellia_aesni_avx_ctr_enc, at function;
+ELF(.type _gcry_camellia_aesni_avx_ctr_enc, at function;)
_gcry_camellia_aesni_avx_ctr_enc:
/* input:
@@ -1062,11 +1069,11 @@ _gcry_camellia_aesni_avx_ctr_enc:
leave;
ret;
-.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;
+ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
.align 8
.globl _gcry_camellia_aesni_avx_cbc_dec
-.type _gcry_camellia_aesni_avx_cbc_dec, at function;
+ELF(.type _gcry_camellia_aesni_avx_cbc_dec, at function;)
_gcry_camellia_aesni_avx_cbc_dec:
/* input:
@@ -1130,11 +1137,11 @@ _gcry_camellia_aesni_avx_cbc_dec:
leave;
ret;
-.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;
+ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
.align 8
.globl _gcry_camellia_aesni_avx_cfb_dec
-.type _gcry_camellia_aesni_avx_cfb_dec, at function;
+ELF(.type _gcry_camellia_aesni_avx_cfb_dec, at function;)
_gcry_camellia_aesni_avx_cfb_dec:
/* input:
@@ -1202,7 +1209,7 @@ _gcry_camellia_aesni_avx_cfb_dec:
leave;
ret;
-.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;
+ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
/*
* IN:
@@ -1309,7 +1316,7 @@ _gcry_camellia_aesni_avx_cfb_dec:
.text
.align 8
-.type __camellia_avx_setup128, at function;
+ELF(.type __camellia_avx_setup128, at function;)
__camellia_avx_setup128:
/* input:
* %rdi: ctx, CTX; subkey storage at key_table(CTX)
@@ -1650,10 +1657,10 @@ __camellia_avx_setup128:
vzeroall;
ret;
-.size __camellia_avx_setup128,.-__camellia_avx_setup128;
+ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
.align 8
-.type __camellia_avx_setup256, at function;
+ELF(.type __camellia_avx_setup256, at function;)
__camellia_avx_setup256:
/* input:
@@ -2127,11 +2134,11 @@ __camellia_avx_setup256:
vzeroall;
ret;
-.size __camellia_avx_setup256,.-__camellia_avx_setup256;
+ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
.align 8
.globl _gcry_camellia_aesni_avx_keygen
-.type _gcry_camellia_aesni_avx_keygen, at function;
+ELF(.type _gcry_camellia_aesni_avx_keygen, at function;)
_gcry_camellia_aesni_avx_keygen:
/* input:
@@ -2159,7 +2166,7 @@ _gcry_camellia_aesni_avx_keygen:
vpor %xmm2, %xmm1, %xmm1;
jmp __camellia_avx_setup256;
-.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;
+ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;)
#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
#endif /*__x86_64*/
diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S
index 25f48bc..a3fa229 100644
--- a/cipher/camellia-aesni-avx2-amd64.S
+++ b/cipher/camellia-aesni-avx2-amd64.S
@@ -20,7 +20,8 @@
#ifdef __x86_64
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
#ifdef __PIC__
@@ -29,6 +30,12 @@
# define RIP
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
#define CAMELLIA_TABLE_BYTE_LEN 272
/* struct CAMELLIA_context: */
@@ -748,7 +755,7 @@
.text
.align 8
-.type __camellia_enc_blk32, at function;
+ELF(.type __camellia_enc_blk32, at function;)
__camellia_enc_blk32:
/* input:
@@ -832,10 +839,10 @@ __camellia_enc_blk32:
%ymm15, %rax, %rcx, 24);
jmp .Lenc_done;
-.size __camellia_enc_blk32,.-__camellia_enc_blk32;
+ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;)
.align 8
-.type __camellia_dec_blk32, at function;
+ELF(.type __camellia_dec_blk32, at function;)
__camellia_dec_blk32:
/* input:
@@ -917,7 +924,7 @@ __camellia_dec_blk32:
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
-.size __camellia_dec_blk32,.-__camellia_dec_blk32;
+ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
@@ -927,7 +934,7 @@ __camellia_dec_blk32:
.align 8
.globl _gcry_camellia_aesni_avx2_ctr_enc
-.type _gcry_camellia_aesni_avx2_ctr_enc, at function;
+ELF(.type _gcry_camellia_aesni_avx2_ctr_enc, at function;)
_gcry_camellia_aesni_avx2_ctr_enc:
/* input:
@@ -1111,11 +1118,11 @@ _gcry_camellia_aesni_avx2_ctr_enc:
leave;
ret;
-.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;
+ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;)
.align 8
.globl _gcry_camellia_aesni_avx2_cbc_dec
-.type _gcry_camellia_aesni_avx2_cbc_dec, at function;
+ELF(.type _gcry_camellia_aesni_avx2_cbc_dec, at function;)
_gcry_camellia_aesni_avx2_cbc_dec:
/* input:
@@ -1183,11 +1190,11 @@ _gcry_camellia_aesni_avx2_cbc_dec:
leave;
ret;
-.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;
+ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;)
.align 8
.globl _gcry_camellia_aesni_avx2_cfb_dec
-.type _gcry_camellia_aesni_avx2_cfb_dec, at function;
+ELF(.type _gcry_camellia_aesni_avx2_cfb_dec, at function;)
_gcry_camellia_aesni_avx2_cfb_dec:
/* input:
@@ -1257,7 +1264,7 @@ _gcry_camellia_aesni_avx2_cfb_dec:
leave;
ret;
-.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;
+ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;)
#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/
#endif /*__x86_64*/
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index f18d135..5032321 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -75,7 +75,8 @@
/* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
#undef USE_AESNI_AVX
#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
-# if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AESNI_AVX 1
# endif
#endif
@@ -83,7 +84,8 @@
/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
#undef USE_AESNI_AVX2
#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
-# if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AESNI_AVX2 1
# endif
#endif
@@ -100,6 +102,20 @@ typedef struct
#endif /*USE_AESNI_AVX2*/
} CAMELLIA_context;
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
#ifdef USE_AESNI_AVX
/* Assembler implementations of Camellia using AES-NI and AVX. Process data
in 16 block same time.
@@ -107,21 +123,21 @@ typedef struct
extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *ctr);
+ unsigned char *ctr) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
const unsigned char *key,
- unsigned int keylen);
+ unsigned int keylen) ASM_FUNC_ABI;
#endif
#ifdef USE_AESNI_AVX2
@@ -131,17 +147,17 @@ extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *ctr);
+ unsigned char *ctr) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx,
unsigned char *out,
const unsigned char *in,
- unsigned char *iv);
+ unsigned char *iv) ASM_FUNC_ABI;
#endif
static const char *selftest(void);
@@ -318,7 +334,7 @@ _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
if (did_use_aesni_avx2)
{
int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
- 2 * sizeof(void *);
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
if (burn_stack_depth < avx2_burn_stack_depth)
burn_stack_depth = avx2_burn_stack_depth;
@@ -347,8 +363,11 @@ _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
if (did_use_aesni_avx)
{
- if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
- burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
}
/* Use generic code to handle smaller chunks... */
@@ -409,7 +428,7 @@ _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
if (did_use_aesni_avx2)
{
int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
- 2 * sizeof(void *);
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;;
if (burn_stack_depth < avx2_burn_stack_depth)
burn_stack_depth = avx2_burn_stack_depth;
@@ -437,8 +456,11 @@ _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
if (did_use_aesni_avx)
{
- if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
- burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
}
/* Use generic code to handle smaller chunks... */
@@ -491,7 +513,7 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
if (did_use_aesni_avx2)
{
int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
- 2 * sizeof(void *);
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
if (burn_stack_depth < avx2_burn_stack_depth)
burn_stack_depth = avx2_burn_stack_depth;
@@ -519,8 +541,11 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
if (did_use_aesni_avx)
{
- if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
- burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
+ int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+ 2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+ if (burn_stack_depth < avx_burn_stack_depth)
+ burn_stack_depth = avx_burn_stack_depth;
}
/* Use generic code to handle smaller chunks... */
commit e05682093ffb003b589a697428d918d755ac631d
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sun May 3 17:28:40 2015 +0300
Enable AMD64 Blowfish implementation on WIN64
* cipher/blowfish-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/blowfish.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
(do_encrypt, do_encrypt_block, do_decrypt_block)
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly
function through 'call_sysv_fn'.
(blowfish_amd64_ctr_enc, blowfish_amd64_cbc_dec)
(blowfish_amd64_cfb_dec): New wrapper functions for bulk
assembly functions.
..
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 87b676f..21b63fc 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -20,7 +20,15 @@
#ifdef __x86_64
#include <config.h>
-#if defined(USE_BLOWFISH) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_BLOWFISH) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
.text
@@ -120,7 +128,7 @@
movq RX0, (RIO);
.align 8
-.type __blowfish_enc_blk1, at function;
+ELF(.type __blowfish_enc_blk1, at function;)
__blowfish_enc_blk1:
/* input:
@@ -145,11 +153,11 @@ __blowfish_enc_blk1:
movq %r11, %rbp;
ret;
-.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
+ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
.align 8
.globl _gcry_blowfish_amd64_do_encrypt
-.type _gcry_blowfish_amd64_do_encrypt, at function;
+ELF(.type _gcry_blowfish_amd64_do_encrypt, at function;)
_gcry_blowfish_amd64_do_encrypt:
/* input:
@@ -171,11 +179,11 @@ _gcry_blowfish_amd64_do_encrypt:
movl RX0d, (RX2);
ret;
-.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;
+ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
.align 8
.globl _gcry_blowfish_amd64_encrypt_block
-.type _gcry_blowfish_amd64_encrypt_block, at function;
+ELF(.type _gcry_blowfish_amd64_encrypt_block, at function;)
_gcry_blowfish_amd64_encrypt_block:
/* input:
@@ -195,11 +203,11 @@ _gcry_blowfish_amd64_encrypt_block:
write_block();
ret;
-.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;
+ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
.align 8
.globl _gcry_blowfish_amd64_decrypt_block
-.type _gcry_blowfish_amd64_decrypt_block, at function;
+ELF(.type _gcry_blowfish_amd64_decrypt_block, at function;)
_gcry_blowfish_amd64_decrypt_block:
/* input:
@@ -231,7 +239,7 @@ _gcry_blowfish_amd64_decrypt_block:
movq %r11, %rbp;
ret;
-.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;
+ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
/**********************************************************************
4-way blowfish, four blocks parallel
@@ -319,7 +327,7 @@ _gcry_blowfish_amd64_decrypt_block:
bswapq RX3;
.align 8
-.type __blowfish_enc_blk4, at function;
+ELF(.type __blowfish_enc_blk4, at function;)
__blowfish_enc_blk4:
/* input:
@@ -343,10 +351,10 @@ __blowfish_enc_blk4:
outbswap_block4();
ret;
-.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;
+ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
.align 8
-.type __blowfish_dec_blk4, at function;
+ELF(.type __blowfish_dec_blk4, at function;)
__blowfish_dec_blk4:
/* input:
@@ -372,11 +380,11 @@ __blowfish_dec_blk4:
outbswap_block4();
ret;
-.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;
+ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
.align 8
.globl _gcry_blowfish_amd64_ctr_enc
-.type _gcry_blowfish_amd64_ctr_enc, at function;
+ELF(.type _gcry_blowfish_amd64_ctr_enc, at function;)
_gcry_blowfish_amd64_ctr_enc:
/* input:
* %rdi: ctx, CTX
@@ -429,11 +437,11 @@ _gcry_blowfish_amd64_ctr_enc:
popq %rbp;
ret;
-.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;
+ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
.align 8
.globl _gcry_blowfish_amd64_cbc_dec
-.type _gcry_blowfish_amd64_cbc_dec, at function;
+ELF(.type _gcry_blowfish_amd64_cbc_dec, at function;)
_gcry_blowfish_amd64_cbc_dec:
/* input:
* %rdi: ctx, CTX
@@ -477,11 +485,11 @@ _gcry_blowfish_amd64_cbc_dec:
popq %rbp;
ret;
-.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;
+ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
.align 8
.globl _gcry_blowfish_amd64_cfb_dec
-.type _gcry_blowfish_amd64_cfb_dec, at function;
+ELF(.type _gcry_blowfish_amd64_cfb_dec, at function;)
_gcry_blowfish_amd64_cfb_dec:
/* input:
* %rdi: ctx, CTX
@@ -527,7 +535,7 @@ _gcry_blowfish_amd64_cfb_dec:
popq %rbx;
popq %rbp;
ret;
-.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;
+ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
#endif /*defined(USE_BLOWFISH)*/
#endif /*__x86_64*/
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index ae470d8..a3fc26c 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -45,7 +45,8 @@
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
(BLOWFISH_ROUNDS == 16)
# define USE_AMD64_ASM 1
#endif
@@ -280,22 +281,87 @@ extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
const byte *in, byte *iv);
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+ const void *arg3, const void *arg4)
+{
+ /* Call SystemV ABI function without storing non-volatile XMM registers,
+ * as target function does not use vector instruction sets. */
+ asm volatile ("callq *%0\n\t"
+ : "+a" (fn),
+ "+D" (arg1),
+ "+S" (arg2),
+ "+d" (arg3),
+ "+c" (arg4)
+ :
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
static void
do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL);
+#else
_gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
+#endif
}
static void
do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf,
+ NULL);
+#else
_gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
+#endif
}
static void
do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf,
+ NULL);
+#else
_gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
+#endif
+}
+
+static inline void
+blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr);
+#else
+ _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
+#endif
+}
+
+static inline void
+blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv);
+#else
+ _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
+#endif
+}
+
+static inline void
+blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv);
+#else
+ _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
+#endif
}
static unsigned int
@@ -605,7 +671,7 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+ blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
nblocks -= 4;
outbuf += 4 * BLOWFISH_BLOCKSIZE;
@@ -674,7 +740,7 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+ blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
nblocks -= 4;
outbuf += 4 * BLOWFISH_BLOCKSIZE;
@@ -734,7 +800,7 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Process data in 4 block chunks. */
while (nblocks >= 4)
{
- _gcry_blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+ blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
nblocks -= 4;
outbuf += 4 * BLOWFISH_BLOCKSIZE;
commit c46b015bedba7ce0db68929bd33a86a54ab3d919
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sun May 3 17:06:56 2015 +0300
Enable AMD64 arcfour implementation on WIN64
* cipher/arcfour-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/arcfour.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(do_encrypt, do_decrypt) [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Use
assembly block to call AMD64 assembly function.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index 8b8031a..2e52ea0 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
@@ -15,12 +15,19 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(USE_ARCFOUR) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
.text
.align 16
.globl _gcry_arcfour_amd64
-.type _gcry_arcfour_amd64, at function
+ELF(.type _gcry_arcfour_amd64, at function)
_gcry_arcfour_amd64:
push %rbp
push %rbx
@@ -91,7 +98,7 @@ _gcry_arcfour_amd64:
pop %rbp
ret
.L__gcry_arcfour_amd64_end:
-.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64
+ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
#endif
#endif
diff --git a/cipher/arcfour.c b/cipher/arcfour.c
index 27537bf..44e8ef4 100644
--- a/cipher/arcfour.c
+++ b/cipher/arcfour.c
@@ -33,7 +33,8 @@
/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
#undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AMD64_ASM 1
#endif
@@ -53,7 +54,21 @@ static void
encrypt_stream (void *context,
byte *outbuf, const byte *inbuf, size_t length)
{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+ const void *fn = _gcry_arcfour_amd64;
+ /* Call SystemV ABI function without storing non-volatile XMM registers,
+ * as target function does not use vector instruction sets. */
+ asm volatile ("callq *%0\n\t"
+ : "+a" (fn),
+ "+D" (context),
+ "+S" (length),
+ "+d" (inbuf),
+ "+c" (outbuf)
+ :
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+#else
_gcry_arcfour_amd64 (context, length, inbuf, outbuf );
+#endif
}
#else /*!USE_AMD64_ASM*/
commit ee8fc4edcb3466b03246c8720b90731bf274ff1d
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Thu May 14 10:02:51 2015 +0300
Update documentation for Poly1305-ChaCha20 AEAD, RFC-7539
* cipher/cipher-poly1305.c: Add RFC-7539 to header.
* doc/gcrypt.texi: Update Poly1305 AEAD documentation with mention of
RFC-7539; Drop Salsa from supported stream ciphers for Poly1305 AEAD.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/cipher-poly1305.c b/cipher/cipher-poly1305.c
index f283333..965a7b6 100644
--- a/cipher/cipher-poly1305.c
+++ b/cipher/cipher-poly1305.c
@@ -1,4 +1,4 @@
-/* cipher-pol1305.c - Poly1305 based AEAD cipher mode
+/* cipher-poly1305.c - Poly1305 based AEAD cipher mode, RFC-7539
* Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna at iki.fi>
*
* This file is part of Libgcrypt.
diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi
index 8683ca8..ab4f685 100644
--- a/doc/gcrypt.texi
+++ b/doc/gcrypt.texi
@@ -1643,9 +1643,10 @@ Associated Data (AEAD) block cipher mode, which is specified in
'NIST Special Publication 800-38D'.
@item GCRY_CIPHER_MODE_POLY1305
- at cindex Poly1305 based AEAD mode
-Poly1305 is an Authenticated Encryption with Associated Data (AEAD)
-mode, which can be used with ChaCha20 and Salsa20 stream ciphers.
+ at cindex Poly1305 based AEAD mode with ChaCha20
+This mode implements the Poly1305 Authenticated Encryption with Associated
+Data (AEAD) mode according to RFC-7539. This mode can be used with ChaCha20
+stream cipher.
@item GCRY_CIPHER_MODE_OCB
@cindex OCB, OCB3
@@ -1687,7 +1688,7 @@ and the according constants. Note that some modes are incompatible
with some algorithms - in particular, stream mode
(@code{GCRY_CIPHER_MODE_STREAM}) only works with stream ciphers.
Poly1305 AEAD mode (@code{GCRY_CIPHER_MODE_POLY1305}) only works with
-ChaCha and Salsa stream ciphers. The block cipher modes
+ChaCha20 stream cipher. The block cipher modes
(@code{GCRY_CIPHER_MODE_ECB}, @code{GCRY_CIPHER_MODE_CBC},
@code{GCRY_CIPHER_MODE_CFB}, @code{GCRY_CIPHER_MODE_OFB} and
@code{GCRY_CIPHER_MODE_CTR}) will work with any block cipher
-----------------------------------------------------------------------
Summary of changes:
cipher/arcfour-amd64.S | 13 ++++--
cipher/arcfour.c | 17 +++++++-
cipher/blowfish-amd64.S | 46 ++++++++++++---------
cipher/blowfish.c | 74 +++++++++++++++++++++++++++++++--
cipher/camellia-aesni-avx-amd64.S | 41 +++++++++++--------
cipher/camellia-aesni-avx2-amd64.S | 29 ++++++++-----
cipher/camellia-glue.c | 61 +++++++++++++++++++--------
cipher/cast5-amd64.S | 43 ++++++++++---------
cipher/cast5.c | 67 +++++++++++++++++++++++++++---
cipher/chacha20-avx2-amd64.S | 13 ++++--
cipher/chacha20-sse2-amd64.S | 13 ++++--
cipher/chacha20-ssse3-amd64.S | 13 ++++--
cipher/chacha20.c | 43 ++++++++++++++-----
cipher/cipher-poly1305.c | 2 +-
cipher/des-amd64.S | 29 ++++++++-----
cipher/des.c | 61 +++++++++++++++++++++++++--
cipher/poly1305-avx2-amd64.S | 22 ++++++----
cipher/poly1305-internal.h | 27 +++++++++---
cipher/poly1305-sse2-amd64.S | 22 ++++++----
cipher/poly1305.c | 33 ++++++++-------
cipher/salsa20-amd64.S | 17 +++++---
cipher/salsa20.c | 26 +++++++++---
cipher/serpent-avx2-amd64.S | 29 ++++++++-----
cipher/serpent-sse2-amd64.S | 29 ++++++++-----
cipher/serpent.c | 30 ++++++++++----
cipher/twofish-amd64.S | 37 ++++++++++-------
cipher/twofish.c | 84 +++++++++++++++++++++++++++++++++++---
doc/gcrypt.texi | 9 ++--
28 files changed, 699 insertions(+), 231 deletions(-)
hooks/post-receive
--
The GNU crypto library
http://git.gnupg.org
More information about the Gnupg-commits
mailing list