[git] GCRYPT - branch, master, updated. libgcrypt-1.5.0-194-gda327ae
by Jussi Kivilinna
cvs at cvs.gnupg.org
Tue Aug 20 12:11:09 CEST 2013
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".
The branch, master has been updated
via da327aef3fe24fdf98fffbc8aea69de42ed12456 (commit)
via 151f1e518be2d16bed748ba832384b0472ddcf9b (commit)
via c030e33533fb819afe195eff5f89ec39863b1fbc (commit)
via 796dda37b957b20dba391343937c6325a8c8b288 (commit)
via 79895b9459b9bf8c60cb7abf09d5bf16ed0cf6e3 (commit)
from cafadc1e4fb97581262b0081ba251e05613d4394 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit da327aef3fe24fdf98fffbc8aea69de42ed12456
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sat Aug 17 13:41:03 2013 +0300
mpi: add ARMv6 assembly
* mpi/armv6/mpi-asm-defs.h: New.
* mpi/armv6/mpih-add1.S: New.
* mpi/armv6/mpih-mul1.S: New.
* mpi/armv6/mpih-mul2.S: New.
* mpi/armv6/mpih-mul3.S: New.
* mpi/armv6/mpih-sub1.S: New.
* mpi/config.links [arm]: Enable ARMv6 assembly.
--
Add mpi assembly for ARMv6 (or later). These are partly based on ARM assembly
found in GMP 4.2.1.
Old vs new (Cortex-A8, 1Ghz):
Algorithm generate 100*sign 100*verify
------------------------------------------------
ECDSA 192 bit 1.14x 1.10x 1.13x
ECDSA 224 bit 1.11x 1.12x 1.12x
ECDSA 256 bit 1.20x 1.13x 1.14x
ECDSA 384 bit 1.13x 1.21x 1.21x
ECDSA 521 bit 1.17x 1.20x 1.22x
Algorithm generate 100*sign 100*verify
------------------------------------------------
RSA 1024 bit - 1.31x 1.60x
RSA 2048 bit - 1.41x 1.47x
RSA 3072 bit - 1.50x 1.63x
RSA 4096 bit - 1.50x 1.57x
Algorithm generate 100*sign 100*verify
------------------------------------------------
DSA 1024/160 - 1.39x 1.38x
DSA 2048/224 - 1.50x 1.51x
DSA 3072/256 - 1.59x 1.64x
NEW:
Algorithm generate 100*sign 100*verify
------------------------------------------------
ECDSA 192 bit 70ms 1750ms 3170ms
ECDSA 224 bit 90ms 2210ms 4250ms
ECDSA 256 bit 100ms 2710ms 5170ms
ECDSA 384 bit 230ms 5670ms 11040ms
ECDSA 521 bit 540ms 13370ms 25870ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
RSA 1024 bit 360ms 2200ms 50ms
RSA 2048 bit 2770ms 11900ms 150ms
RSA 3072 bit 6680ms 32530ms 270ms
RSA 4096 bit 10320ms 69440ms 460ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
DSA 1024/160 - 990ms 910ms
DSA 2048/224 - 3830ms 3410ms
DSA 3072/256 - 8270ms 7030ms
OLD:
Algorithm generate 100*sign 100*verify
------------------------------------------------
ECDSA 192 bit 80ms 1920ms 3580ms
ECDSA 224 bit 100ms 2470ms 4760ms
ECDSA 256 bit 120ms 3050ms 5870ms
ECDSA 384 bit 260ms 6840ms 13330ms
ECDSA 521 bit 630ms 16080ms 31500ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
RSA 1024 bit 450ms 2890ms 80ms
RSA 2048 bit 2320ms 16760ms 220ms
RSA 3072 bit 26300ms 48650ms 440ms
RSA 4096 bit 15700ms 103910ms 720ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
DSA 1024/160 - 1380ms 1260ms
DSA 2048/224 - 5740ms 5140ms
DSA 3072/256 - 13130ms 11510ms
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/mpi/armv6/mpi-asm-defs.h b/mpi/armv6/mpi-asm-defs.h
new file mode 100644
index 0000000..047d1f5
--- /dev/null
+++ b/mpi/armv6/mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery. We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here. */
+#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG)
diff --git a/mpi/armv6/mpih-add1.S b/mpi/armv6/mpih-add1.S
new file mode 100644
index 0000000..60ea4c3
--- /dev/null
+++ b/mpi/armv6/mpih-add1.S
@@ -0,0 +1,76 @@
+/* ARMv6 add_n -- Add two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_add_n( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_ptr_t s2_ptr, %r2
+ * mpi_size_t size) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_add_n
+.type _gcry_mpih_add_n,%function
+_gcry_mpih_add_n:
+ push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+ cmn %r0, #0; /* clear carry flag */
+
+ tst %r3, #3;
+ beq .Large_loop;
+
+.Loop:
+ ldr %r4, [%r1], #4;
+ sub %r3, #1;
+ ldr %lr, [%r2], #4;
+ adcs %r4, %lr;
+ tst %r3, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r3, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldm %r1!, {%r4, %r6, %r8, %r10};
+ ldm %r2!, {%r5, %r7, %r9, %lr};
+ sub %r3, #4;
+ adcs %r4, %r5;
+ adcs %r6, %r7;
+ adcs %r8, %r9;
+ adcs %r10, %lr;
+ teq %r3, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ adc %r0, %r3, #0;
+ pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
diff --git a/mpi/armv6/mpih-mul1.S b/mpi/armv6/mpih-mul1.S
new file mode 100644
index 0000000..ae19a15
--- /dev/null
+++ b/mpi/armv6/mpih-mul1.S
@@ -0,0 +1,80 @@
+/* ARMv6 mul_1 -- Multiply a limb vector with a limb and store the result in
+ * a second limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_size_t s1_size, %r2
+ * mpi_limb_t s2_limb) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_mul_1
+.type _gcry_mpih_mul_1,%function
+_gcry_mpih_mul_1:
+ push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %lr};
+ mov %r4, #0;
+
+ tst %r2, #3;
+ beq .Large_loop;
+
+.Loop:
+ ldr %r5, [%r1], #4;
+ mov %lr, #0;
+ umlal %r4, %lr, %r5, %r3;
+ sub %r2, #1;
+ str %r4, [%r0], #4;
+ tst %r2, #3;
+ mov %r4, %lr;
+ bne .Loop;
+
+ teq %r2, #0;
+ beq .Lend;
+
+.Large_loop:
+ mov %r9, #0;
+ ldm %r1!, {%r5, %r6, %r7, %r8};
+ mov %r10, #0;
+ umlal %r4, %r9, %r5, %r3;
+ mov %r11, #0;
+ umlal %r9, %r10, %r6, %r3;
+ mov %lr, #0;
+ umlal %r10, %r11, %r7, %r3;
+ subs %r2, #4;
+ umlal %r11, %lr, %r8, %r3;
+ stm %r0!, {%r4, %r9, %r10, %r11};
+ mov %r4, %lr;
+ bne .Large_loop;
+
+.Lend:
+ mov %r0, %r4;
+ pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %pc};
+.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
diff --git a/mpi/armv6/mpih-mul2.S b/mpi/armv6/mpih-mul2.S
new file mode 100644
index 0000000..02f7c07
--- /dev/null
+++ b/mpi/armv6/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* ARMv6 mul_2 -- Multiply a limb vector with a limb and add the result to
+ * a second limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_size_t s1_size, %r2
+ * mpi_limb_t s2_limb) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_addmul_1
+.type _gcry_mpih_addmul_1,%function
+_gcry_mpih_addmul_1:
+ push {%r4, %r5, %r6, %r8, %r10, %lr};
+ mov %lr, #0;
+ cmn %r0, #0; /* clear carry flag */
+
+ tst %r2, #3;
+ beq .Large_loop;
+.Loop:
+ ldr %r5, [%r1], #4;
+ ldr %r4, [%r0];
+ sub %r2, #1;
+ adcs %r4, %lr;
+ mov %lr, #0;
+ umlal %r4, %lr, %r5, %r3;
+ tst %r2, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r2, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldm %r0, {%r4, %r6, %r8, %r10};
+ ldr %r5, [%r1], #4;
+
+ sub %r2, #4;
+ adcs %r4, %lr;
+ mov %lr, #0;
+ umlal %r4, %lr, %r5, %r3;
+
+ ldr %r5, [%r1], #4;
+ adcs %r6, %lr;
+ mov %lr, #0;
+ umlal %r6, %lr, %r5, %r3;
+
+ ldr %r5, [%r1], #4;
+ adcs %r8, %lr;
+ mov %lr, #0;
+ umlal %r8, %lr, %r5, %r3;
+
+ ldr %r5, [%r1], #4;
+ adcs %r10, %lr;
+ mov %lr, #0;
+ umlal %r10, %lr, %r5, %r3;
+
+ teq %r2, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ adc %r0, %lr, #0;
+ pop {%r4, %r5, %r6, %r8, %r10, %pc};
+.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
diff --git a/mpi/armv6/mpih-mul3.S b/mpi/armv6/mpih-mul3.S
new file mode 100644
index 0000000..e42fc30
--- /dev/null
+++ b/mpi/armv6/mpih-mul3.S
@@ -0,0 +1,97 @@
+/* ARMv6 mul_3 -- Multiply a limb vector with a limb and subtract the result
+ * from a second limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_size_t s1_size, %r2
+ * mpi_limb_t s2_limb) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_submul_1
+.type _gcry_mpih_submul_1,%function
+_gcry_mpih_submul_1:
+ push {%r4, %r5, %r6, %r8, %r9, %r10, %lr};
+ mov %lr, #0;
+ cmp %r0, #0; /* prepare carry flag for sbc */
+
+ tst %r2, #3;
+ beq .Large_loop;
+.Loop:
+ ldr %r5, [%r1], #4;
+ mov %r4, %lr;
+ mov %lr, #0;
+ ldr %r6, [%r0];
+ umlal %r4, %lr, %r5, %r3;
+ sub %r2, #1;
+ sbcs %r4, %r6, %r4;
+ tst %r2, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r2, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldr %r5, [%r1], #4;
+ ldm %r0, {%r4, %r6, %r8, %r10};
+
+ mov %r9, #0;
+ umlal %lr, %r9, %r5, %r3;
+ ldr %r5, [%r1], #4;
+ sbcs %r4, %r4, %lr;
+
+ mov %lr, #0;
+ umlal %r9, %lr, %r5, %r3;
+ ldr %r5, [%r1], #4;
+ sbcs %r6, %r6, %r9;
+
+ mov %r9, #0;
+ umlal %lr, %r9, %r5, %r3;
+ ldr %r5, [%r1], #4;
+ sbcs %r8, %r8, %lr;
+
+ mov %lr, #0;
+ umlal %r9, %lr, %r5, %r3;
+ sub %r2, #4;
+ sbcs %r10, %r10, %r9;
+
+ teq %r2, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ it cc
+ movcc %r2, #1;
+ add %r0, %lr, %r2;
+ pop {%r4, %r5, %r6, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
diff --git a/mpi/armv6/mpih-sub1.S b/mpi/armv6/mpih-sub1.S
new file mode 100644
index 0000000..77d05eb
--- /dev/null
+++ b/mpi/armv6/mpih-sub1.S
@@ -0,0 +1,77 @@
+/* ARMv6 sub_n -- Subtract two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_ptr_t s2_ptr, %r2
+ * mpi_size_t size) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_sub_n
+.type _gcry_mpih_sub_n,%function
+_gcry_mpih_sub_n:
+ push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+ cmp %r0, #0; /* prepare carry flag for sub */
+
+ tst %r3, #3;
+ beq .Large_loop;
+
+.Loop:
+ ldr %r4, [%r1], #4;
+ sub %r3, #1;
+ ldr %lr, [%r2], #4;
+ sbcs %r4, %lr;
+ tst %r3, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r3, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldm %r1!, {%r4, %r6, %r8, %r10};
+ sub %r3, #4;
+ ldm %r2!, {%r5, %r7, %r9, %lr};
+ sbcs %r4, %r5;
+ sbcs %r6, %r7;
+ sbcs %r8, %r9;
+ sbcs %r10, %lr;
+ teq %r3, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ sbc %r0, %r3, #0;
+ neg %r0, %r0;
+ pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
diff --git a/mpi/config.links b/mpi/config.links
index bcc6e3e..f300255 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -136,6 +136,21 @@ case "${host}" in
mpi_extra_modules="udiv-qrnnd"
mpi_cpu_arch="alpha"
;;
+ arm*-*-*)
+ if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
+ if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
+ echo '/* configured for armv6 */' >>./mpi/asm-syntax.h
+ path="armv6"
+ mpi_cpu_arch="armv6"
+ else
+ echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+ path=""
+ fi
+ else
+ echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+ path=""
+ fi
+ ;;
hppa7000*-*-*)
echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
path="hppa1.1 hppa"
commit 151f1e518be2d16bed748ba832384b0472ddcf9b
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sat Aug 17 13:41:46 2013 +0300
Move ARMv6 detection to configure.ac
* cipher/blowfish-armv6.S: Replace __ARM_ARCH >= 6 checks with
HAVE_ARM_ARCH_V6.
* cipher/blowfish.c: Ditto.
* cipher/camellia-armv6.S: Ditto.
* cipher/camellia.h: Ditto.
* cipher/cast5-armv6.S: Ditto.
* cipher/cast5.c: Ditto.
* cipher/rijndael-armv6.S: Ditto.
* cipher/rijndael.c: Ditto.
* configure.ac: Add HAVE_ARM_ARCH_V6 check.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/blowfish-armv6.S b/cipher/blowfish-armv6.S
index b11d27f..eea879f 100644
--- a/cipher/blowfish-armv6.S
+++ b/cipher/blowfish-armv6.S
@@ -20,14 +20,7 @@
#include <config.h>
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
.text
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index fe4e280..2806433 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -52,14 +52,7 @@
/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
#undef USE_ARMV6_ASM
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
# if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
# define USE_ARMV6_ASM 1
# endif
diff --git a/cipher/camellia-armv6.S b/cipher/camellia-armv6.S
index 769db02..3544754 100644
--- a/cipher/camellia-armv6.S
+++ b/cipher/camellia-armv6.S
@@ -20,14 +20,7 @@
#include <config.h>
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
.text
diff --git a/cipher/camellia.h b/cipher/camellia.h
index 48f9160..72f2d1f 100644
--- a/cipher/camellia.h
+++ b/cipher/camellia.h
@@ -32,14 +32,7 @@
#include <config.h>
/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
# undef USE_ARMV6_ASM
-# if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
# define USE_ARMV6_ASM 1
# endif
diff --git a/cipher/cast5-armv6.S b/cipher/cast5-armv6.S
index e4b2339..038fc4f 100644
--- a/cipher/cast5-armv6.S
+++ b/cipher/cast5-armv6.S
@@ -20,14 +20,7 @@
#include <config.h>
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
.text
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 9e7b50f..4377c28 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -53,14 +53,7 @@
/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
#undef USE_ARMV6_ASM
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
# define USE_ARMV6_ASM 1
# endif
diff --git a/cipher/rijndael-armv6.S b/cipher/rijndael-armv6.S
index e778a94..bbbfb0e 100644
--- a/cipher/rijndael-armv6.S
+++ b/cipher/rijndael-armv6.S
@@ -20,14 +20,7 @@
#include <config.h>
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
.text
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 8855e0c..314f106 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -69,14 +69,7 @@
/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
#undef USE_ARMV6_ASM
-#if defined(__arm__) && defined(__ARMEL__) && \
- ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
- || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
- || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
- || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
- || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__))
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
# define USE_ARMV6_ASM 1
# endif
diff --git a/configure.ac b/configure.ac
index c33f36b..f20d0a1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -965,6 +965,29 @@ fi
#
+# Check whether compiler is configured for ARMv6 or newer architecture
+#
+AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer architecture],
+ [gcry_cv_cc_arm_arch_is_v6],
+ [AC_EGREP_CPP(yes,
+ [#if defined(__arm__) && \
+ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
+ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
+ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7EM__))
+ yes
+ #endif
+ ], gcry_cv_cc_arm_arch_is_v6=yes, gcry_cv_cc_arm_arch_is_v6=no)])
+if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
+ AC_DEFINE(HAVE_ARM_ARCH_V6,1,
+ [Defined if ARM architecture is v6 or newer])
+fi
+
+
+#
# Check whether GCC assembler supports features needed for our ARM
# implementations
#
commit c030e33533fb819afe195eff5f89ec39863b1fbc
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sat Aug 17 10:09:33 2013 +0300
Add optimized wipememory for ARM
src/g10lib.h [__arm__] (fast_wipememory2_unaligned_head)
(fast_wipememory2): New macros.
--
Previous patch that removed _gcry_burn_stack optimization causes burn_stack
take over 30% CPU usage when looping 'benchmark cipher blowfish' on
ARM/Cortex-A8. Optimizing wipememory2 for ARM helps situation a lot.
Old vs new (Cortex-A8):
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
IDEA 1.20x 1.18x 1.16x 1.15x 1.16x 1.18x 1.18x 1.16x 1.16x 1.17x
3DES 1.14x 1.14x 1.12x 1.13x 1.12x 1.13x 1.12x 1.13x 1.13x 1.15x
CAST5 1.66x 1.67x 1.43x 1.00x 1.48x 1.00x 1.44x 1.44x 1.04x 0.96x
BLOWFISH 1.56x 1.66x 1.47x 1.00x 1.54x 1.05x 1.44x 1.47x 1.00x 1.00x
AES 1.52x 1.42x 1.04x 1.00x 1.00x 1.00x 1.38x 1.37x 1.00x 1.00x
AES192 1.36x 1.36x 1.00x 1.00x 1.00x 1.04x 1.26x 1.22x 1.00x 1.04x
AES256 1.32x 1.31x 1.03x 1.00x 1.00x 1.00x 1.24x 1.30x 1.03x 0.97x
TWOFISH 1.31x 1.26x 1.23x 1.00x 1.25x 1.00x 1.24x 1.23x 1.00x 1.03x
ARCFOUR 1.05x 0.96x
DES 1.31x 1.33x 1.26x 1.29x 1.28x 1.29x 1.26x 1.29x 1.27x 1.29x
TWOFISH128 1.27x 1.24x 1.23x 1.00x 1.28x 1.00x 1.21x 1.26x 0.97x 1.06x
SERPENT128 1.19x 1.19x 1.15x 1.00x 1.14x 1.00x 1.17x 1.17x 0.98x 1.00x
SERPENT192 1.19x 1.24x 1.17x 1.00x 1.14x 1.00x 1.15x 1.17x 1.00x 1.00x
SERPENT256 1.16x 1.19x 1.17x 1.00x 1.14x 1.00x 1.15x 1.15x 1.00x 1.00x
RFC2268_40 1.00x 0.99x 1.00x 1.01x 1.00x 1.00x 1.03x 1.00x 1.01x 1.00x
SEED 1.20x 1.20x 1.18x 1.17x 1.17x 1.19x 1.18x 1.16x 1.19x 1.19x
CAMELLIA128 1.38x 1.34x 1.31x 1.00x 1.31x 1.00x 1.29x 1.32x 1.00x 1.00x
CAMELLIA192 1.27x 1.27x 1.23x 1.00x 1.25x 1.03x 1.20x 1.23x 1.00x 1.00x
CAMELLIA256 1.27x 1.27x 1.26x 1.00x 1.25x 1.03x 1.20x 1.23x 1.00x 1.00x
SALSA20 1.04x 1.00x
(Note: bulk encryption/decryption do burn_stack after full buffer processing,
instead of after each block.)
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/src/g10lib.h b/src/g10lib.h
index e6d20e9..198ab38 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -248,7 +248,7 @@ void _gcry_burn_stack (int bytes);
#define wipememory(_ptr,_len) wipememory2(_ptr,0,_len)
-/* Optimized fast_wipememory2 for i386 and x86-64 architechtures. Maybe leave
+/* Optimized fast_wipememory2 for i386, x86-64 and arm architectures. May leave
tail bytes unhandled, in which case tail bytes are handled by wipememory2.
*/
#if defined(__x86_64__) && __GNUC__ >= 4
@@ -283,6 +283,38 @@ void _gcry_burn_stack (int bytes);
_vptr += 4; \
} while (_vlen >= 4); \
} while (0)
+#elif defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) && \
+ __GNUC__ >= 4
+
+#ifdef __ARM_FEATURE_UNALIGNED
+#define fast_wipememory2_unaligned_head(_ptr,_set,_len) /*do nothing*/
+#else
+#define fast_wipememory2_unaligned_head(_vptr,_vset,_vlen) do { \
+ while((size_t)(_vptr)&3 && _vlen) \
+ { *_vptr=(_vset); _vptr++; _vlen--; } \
+ } while(0)
+#endif
+
+#define fast_wipememory2(_vptr,_vset,_vlen) do { \
+ unsigned long _vset4 = _vset; \
+ fast_wipememory2_unaligned_head(_vptr,_vset,_vlen); \
+ if (_vlen < 8) \
+ break; \
+ _vset4 *= 0x01010101; \
+ asm volatile( \
+ "mov %%r4, %[set];\n\t" \
+ "mov %%r5, %[set];\n\t" \
+ "1:;\n\t" \
+ "stm %[ptr]!, {%%r4, %%r5};\n\t" \
+ "cmp %[end], %[ptr];\n\t" \
+ "bne 1b;\n\t" \
+ : [ptr] "=r" (_vptr) \
+ : [set] "r" (_vset4), \
+ [end] "r" (_vptr+(_vlen&(~0x7))), \
+ "0" (_vptr) \
+ : "memory", "r4", "r5", "cc"); \
+ _vlen &= 0x7; \
+ } while (0)
#else
#define fast_wipememory2(_ptr,_set,_len)
#endif
commit 796dda37b957b20dba391343937c6325a8c8b288
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Fri Aug 16 19:44:55 2013 +0300
cipher: bufhelp: allow unaligned memory accesses on ARM
* cipher/bufhelp.h [__arm__ && __ARM_FEATURE_UNALIGNED]: Enable
BUFHELP_FAST_UNALIGNED_ACCESS.
--
Newer ARM systems support unaligned memory accesses and on gcc-4.7 and onwards
this is identified by __ARM_FEATURE_UNALIGNED macro.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index 1c173e2..d829cf1 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -29,7 +29,8 @@
#endif
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || \
+ (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED))
/* These architectures are able of unaligned memory accesses and can
handle those fast.
*/
commit 79895b9459b9bf8c60cb7abf09d5bf16ed0cf6e3
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sat Aug 17 10:48:36 2013 +0300
Remove burn_stack optimization
* src/misc.c (_gcry_burn_stack): Remove SIZEOF_UNSIGNED_LONG == 4 or 8
optimization.
--
At least GCC 4.6 on Debian Wheezy (armhf) generates wrong code for burn_stack,
causing recursive structure to be transformed in to iterative without updating
stack pointer between iterations. Therefore only first 64 bytes of stack get
zeroed. This appears to be fixed in GCC 4.7, but lets play this safe and
remove this optimization.
Better approach would probably be to add architecture specific assembly
routine(s) that replace this generic function.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/src/misc.c b/src/misc.c
index 67c2e80..2d9c73a 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -290,35 +290,10 @@ _gcry_log_printhex (const char *text, const void *buffer, size_t length)
void
_gcry_burn_stack (int bytes)
{
-#if SIZEOF_UNSIGNED_LONG == 4 || SIZEOF_UNSIGNED_LONG == 8
- /* Optimized burn_stack for 32-bit and 64-bit architectures. In addition
- to loop unrolling, compiler sees that writes are within 'buf' and
- generation of stack-protection code is avoided. */
- volatile unsigned long buf[64 / SIZEOF_UNSIGNED_LONG];
-
- buf[0] = 0;
- buf[1] = 0;
- buf[2] = 0;
- buf[3] = 0;
- buf[4] = 0;
- buf[5] = 0;
- buf[6] = 0;
- buf[7] = 0;
-# if SIZEOF_UNSIGNED_LONG == 4
- buf[8] = 0;
- buf[9] = 0;
- buf[10] = 0;
- buf[11] = 0;
- buf[12] = 0;
- buf[13] = 0;
- buf[14] = 0;
- buf[15] = 0;
-# endif
-#else
char buf[64];
wipememory (buf, sizeof buf);
-#endif
+
bytes -= sizeof buf;
if (bytes > 0)
_gcry_burn_stack (bytes);
-----------------------------------------------------------------------
Summary of changes:
cipher/blowfish-armv6.S | 9 +---
cipher/blowfish.c | 9 +---
cipher/bufhelp.h | 3 +-
cipher/camellia-armv6.S | 9 +---
cipher/camellia.h | 9 +---
cipher/cast5-armv6.S | 9 +---
cipher/cast5.c | 9 +---
cipher/rijndael-armv6.S | 9 +---
cipher/rijndael.c | 9 +---
configure.ac | 23 ++++++++
mpi/{generic => armv6}/mpi-asm-defs.h | 6 --
mpi/armv6/mpih-add1.S | 76 ++++++++++++++++++++++++++
mpi/armv6/mpih-mul1.S | 80 +++++++++++++++++++++++++++
mpi/armv6/mpih-mul2.S | 94 ++++++++++++++++++++++++++++++++
mpi/armv6/mpih-mul3.S | 97 +++++++++++++++++++++++++++++++++
mpi/armv6/mpih-sub1.S | 77 ++++++++++++++++++++++++++
mpi/config.links | 15 +++++
src/g10lib.h | 34 +++++++++++-
src/misc.c | 27 +---------
19 files changed, 506 insertions(+), 98 deletions(-)
copy mpi/{generic => armv6}/mpi-asm-defs.h (97%)
create mode 100644 mpi/armv6/mpih-add1.S
create mode 100644 mpi/armv6/mpih-mul1.S
create mode 100644 mpi/armv6/mpih-mul2.S
create mode 100644 mpi/armv6/mpih-mul3.S
create mode 100644 mpi/armv6/mpih-sub1.S
hooks/post-receive
--
The GNU crypto library
http://git.gnupg.org
More information about the Gnupg-commits
mailing list