[git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-12-g7547898
by Jussi Kivilinna
cvs at cvs.gnupg.org
Mon Dec 30 14:13:59 CET 2013
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".
The branch, master has been updated
via 7547898109c72a97e3102b2a045ee4fdb2aa40bf (commit)
via a05be441d8cd89b90d8d58e3a343a436dae377d0 (commit)
from bbcb12187afb1756cb27296166b57fa19ee45d4d (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 7547898109c72a97e3102b2a045ee4fdb2aa40bf
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Mon Dec 30 15:10:13 2013 +0200
Add AMD64 assembly implementation for arcfour
* cipher/Makefile.am: Add 'arcfour-amd64.S'.
* cipher/arcfour-amd64.S: New.
* cipher/arcfour.c (USE_AMD64_ASM): New.
[USE_AMD64_ASM] (ARCFOUR_context, _gcry_arcfour_amd64)
(encrypt_stream): New.
* configure.ac [host=x86_64]: Add 'arcfour-amd64.lo'.
--
Patch adds Marc Bevand's public-domain AMD64 assembly implementation of RC4 to
libgcrypt. Original implementation is at:
http://www.zorinaq.com/papers/rc4-amd64.html
Benchmarks on Intel i5-4570 (3200 Mhz):
New:
ARCFOUR | nanosecs/byte mebibytes/sec cycles/byte
STREAM enc | 1.29 ns/B 737.7 MiB/s 4.14 c/B
STREAM dec | 1.31 ns/B 730.6 MiB/s 4.18 c/B
Old (C-language):
ARCFOUR | nanosecs/byte mebibytes/sec cycles/byte
STREAM enc | 2.09 ns/B 457.4 MiB/s 6.67 c/B
STREAM dec | 2.09 ns/B 457.2 MiB/s 6.68 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 98c6254..15400e5 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -56,7 +56,7 @@ dsa-common.c rsa-common.c \
rmd.h
EXTRA_libcipher_la_SOURCES = \
-arcfour.c \
+arcfour.c arcfour-amd64.S \
blowfish.c blowfish-amd64.S \
cast5.c cast5-amd64.S cast5-arm.S \
crc.c \
diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
new file mode 100644
index 0000000..c32cd6f
--- /dev/null
+++ b/cipher/arcfour-amd64.S
@@ -0,0 +1,97 @@
+/*
+** RC4 implementation optimized for AMD64.
+**
+** Author: Marc Bevand <bevand_m (at) epita.fr>
+** Licence: I hereby disclaim the copyright on this code and place it
+** in the public domain.
+**
+** The throughput achieved by this code is about 320 MBytes/sec, on
+** a 1.8 GHz AMD Opteron (rev C0) processor.
+**
+** 2013/12/20 <jussi.kivilinna at iki.fi>:
+** - Integrated to libgcrypt
+** - 4.18 cycles/byte on Intel i5-4570
+*/
+
+#ifdef __x86_64__
+#include <config.h>
+#if defined(USE_ARCFOUR) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+
+.text
+.align 16
+.globl _gcry_arcfour_amd64
+.type _gcry_arcfour_amd64, at function
+_gcry_arcfour_amd64:
+ push %rbp
+ push %rbx
+ mov %rdi, %rbp # key = ARG(key)
+ mov %rsi, %rbx # rbx = ARG(len)
+ mov %rdx, %rsi # in = ARG(in)
+ mov %rcx, %rdi # out = ARG(out)
+ mov (4*256)(%rbp), %ecx # x = key->x
+ mov (4*256+4)(%rbp),%edx # y = key->y
+ inc %rcx # x++
+ and $255, %rcx # x &= 0xff
+ lea -8(%rbx,%rsi), %rbx # rbx = in+len-8
+ mov %rbx, %r9 # tmp = in+len-8
+ mov (%rbp,%rcx,4), %eax # tx = d[x]
+ cmp %rsi, %rbx # cmp in with in+len-8
+ jl .Lend # jump if (in+len-8 < in)
+
+.Lstart:
+ add $8, %rsi # increment in
+ add $8, %rdi # increment out
+
+ # generate the next 8 bytes of the rc4 stream into %r8
+ mov $8, %r11 # byte counter
+1: add %al, %dl # y += tx
+ mov (%rbp,%rdx,4), %ebx # ty = d[y]
+ mov %ebx, (%rbp,%rcx,4) # d[x] = ty
+ add %al, %bl # val = ty + tx
+ mov %eax, (%rbp,%rdx,4) # d[y] = tx
+ inc %cl # x++ (NEXT ROUND)
+ mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND)
+ shl $8, %r8
+ movb (%rbp,%rbx,4), %r8b # val = d[val]
+ dec %r11b
+ jnz 1b
+
+ # xor 8 bytes
+ bswap %r8
+ xor -8(%rsi), %r8
+ cmp %r9, %rsi # cmp in+len-8 with in
+ mov %r8, -8(%rdi)
+ jle .Lstart # jump if (in <= in+len-8)
+
+.Lend:
+ add $8, %r9 # tmp = in+len
+
+ # handle the last bytes, one by one
+1: cmp %rsi, %r9 # cmp in with in+len
+ jle .Lfinished # jump if (in+len <= in)
+ add %al, %dl # y += tx
+ mov (%rbp,%rdx,4), %ebx # ty = d[y]
+ mov %ebx, (%rbp,%rcx,4) # d[x] = ty
+ add %al, %bl # val = ty + tx
+ mov %eax, (%rbp,%rdx,4) # d[y] = tx
+ inc %cl # x++ (NEXT ROUND)
+ mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND)
+ movb (%rbp,%rbx,4), %r8b # val = d[val]
+ xor (%rsi), %r8b # xor 1 byte
+ movb %r8b, (%rdi)
+ inc %rsi # in++
+ inc %rdi # out++
+ jmp 1b
+
+.Lfinished:
+ dec %rcx # x--
+ movb %dl, (4*256)(%rbp) # key->y = y
+ movb %cl, (4*256+4)(%rbp) # key->x = x
+ pop %rbx
+ pop %rbp
+ ret
+.L__gcry_arcfour_amd64_end:
+.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64
+
+#endif
+#endif
diff --git a/cipher/arcfour.c b/cipher/arcfour.c
index d692c84..27537bf 100644
--- a/cipher/arcfour.c
+++ b/cipher/arcfour.c
@@ -31,8 +31,33 @@
#include "g10lib.h"
#include "cipher.h"
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# define USE_AMD64_ASM 1
+#endif
+
static const char *selftest(void);
+#ifdef USE_AMD64_ASM
+
+typedef struct {
+ u32 sbox[256];
+ u32 idx_i, idx_j;
+} ARCFOUR_context;
+
+void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata,
+ byte *outdata);
+
+static void
+encrypt_stream (void *context,
+ byte *outbuf, const byte *inbuf, size_t length)
+{
+ _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
+}
+
+#else /*!USE_AMD64_ASM*/
+
typedef struct {
byte sbox[256];
int idx_i, idx_j;
@@ -96,6 +121,8 @@ encrypt_stream (void *context,
_gcry_burn_stack (64);
}
+#endif /*!USE_AMD64_ASM*/
+
static gcry_err_code_t
do_arcfour_setkey (void *context, const byte *key, unsigned int keylen)
diff --git a/configure.ac b/configure.ac
index 8b43d9a..1715161 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1505,6 +1505,13 @@ LIST_MEMBER(arcfour, $enabled_ciphers)
if test "$found" = "1"; then
GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo"
AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included])
+
+ case "${host}" in
+ x86_64-*-*)
+ # Build with the assembly implementation
+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour-amd64.lo"
+ ;;
+ esac
fi
LIST_MEMBER(blowfish, $enabled_ciphers)
commit a05be441d8cd89b90d8d58e3a343a436dae377d0
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Mon Dec 30 15:10:13 2013 +0200
Parse /proc/cpuinfo for ARM HW features
* src/hwf-arm.c [__linux__] (HAS_PROC_CPUINFO)
(detect_arm_proc_cpuinfo): New.
(_gcry_hwf_detect_arm) [HAS_PROC_CPUINFO]: Check '/proc/cpuinfo' for
HW features.
--
Some Linux platforms (read: Android) block read access to '/proc/self/auxv',
which prevents NEON HW detection. Patch adds alternative check which parses
'/proc/cpuinfo' which should be accessable by Android applications.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/src/hwf-arm.c b/src/hwf-arm.c
index 8071894..dbbb607 100644
--- a/src/hwf-arm.c
+++ b/src/hwf-arm.c
@@ -32,6 +32,7 @@
#endif
#undef HAS_SYS_AT_HWCAP
+#undef HAS_PROC_CPUINFO
#ifdef __linux__
#define HAS_SYS_AT_HWCAP 1
@@ -94,6 +95,54 @@ detect_arm_at_hwcap(void)
return features;
}
+#define HAS_PROC_CPUINFO 1
+
+static unsigned int
+detect_arm_proc_cpuinfo(void)
+{
+ char buf[1024]; /* large enough */
+ char *str_features, *str_neon;
+ FILE *f;
+ int readlen, i;
+ static int cpuinfo_initialized = 0;
+ static unsigned int stored_cpuinfo_features;
+
+ if (cpuinfo_initialized)
+ return stored_cpuinfo_features;
+
+ f = fopen("/proc/cpuinfo", "r");
+ if (!f)
+ return 0;
+
+ memset (buf, 0, sizeof(buf));
+ readlen = fread (buf, 1, sizeof(buf), f);
+ fclose (f);
+ if (readlen <= 0 || readlen > sizeof(buf))
+ return 0;
+
+ buf[sizeof(buf) - 1] = '\0';
+
+ cpuinfo_initialized = 1;
+ stored_cpuinfo_features = 0;
+
+ /* Find features line. */
+ str_features = strstr(buf, "Features");
+ if (!str_features)
+ return stored_cpuinfo_features;
+
+ /* Lines to strings. */
+ for (i = 0; i < sizeof(buf); i++)
+ if (buf[i] == '\n')
+ buf[i] = '\0';
+
+ /* Check for NEON. */
+ str_neon = strstr(str_features, " neon");
+ if (str_neon && (str_neon[5] == ' ' || str_neon[5] == '\0'))
+ stored_cpuinfo_features |= HWF_ARM_NEON;
+
+ return stored_cpuinfo_features;
+}
+
#endif /* __linux__ */
unsigned int
@@ -103,8 +152,10 @@ _gcry_hwf_detect_arm (void)
#if defined (HAS_SYS_AT_HWCAP)
ret |= detect_arm_at_hwcap ();
-#else
- ret |= 0;
+#endif
+
+#if defined (HAS_PROC_CPUINFO)
+ ret |= detect_arm_proc_cpuinfo ();
#endif
#if defined(__ARM_NEON__) && defined(ENABLE_NEON_SUPPORT)
-----------------------------------------------------------------------
Summary of changes:
cipher/Makefile.am | 2 +-
cipher/arcfour-amd64.S | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
cipher/arcfour.c | 27 ++++++++++++++
configure.ac | 7 ++++
src/hwf-arm.c | 55 ++++++++++++++++++++++++++-
5 files changed, 185 insertions(+), 3 deletions(-)
create mode 100644 cipher/arcfour-amd64.S
hooks/post-receive
--
The GNU crypto library
http://git.gnupg.org
More information about the Gnupg-commits
mailing list