[PATCH 5/5] mpi: add ARMv6 assembly
Jussi Kivilinna
jussi.kivilinna at iki.fi
Mon Aug 19 11:16:16 CEST 2013
* mpi/armv6/mpi-asm-defs.h: New.
* mpi/armv6/mpih-add1.S: New.
* mpi/armv6/mpih-mul1.S: New.
* mpi/armv6/mpih-mul2.S: New.
* mpi/armv6/mpih-mul3.S: New.
* mpi/armv6/mpih-sub1.S: New.
* mpi/config.links [arm]: Enable ARMv6 assembly.
--
Add mpi assembly for ARMv6 (or later). These are partly based on ARM assembly
found in GMP 4.2.1.
Old vs new (Cortex-A8, 1Ghz):
Algorithm generate 100*sign 100*verify
------------------------------------------------
ECDSA 192 bit 1.14x 1.10x 1.13x
ECDSA 224 bit 1.11x 1.12x 1.12x
ECDSA 256 bit 1.20x 1.13x 1.14x
ECDSA 384 bit 1.13x 1.21x 1.21x
ECDSA 521 bit 1.17x 1.20x 1.22x
Algorithm generate 100*sign 100*verify
------------------------------------------------
RSA 1024 bit - 1.31x 1.60x
RSA 2048 bit - 1.41x 1.47x
RSA 3072 bit - 1.50x 1.63x
RSA 4096 bit - 1.50x 1.57x
Algorithm generate 100*sign 100*verify
------------------------------------------------
DSA 1024/160 - 1.39x 1.38x
DSA 2048/224 - 1.50x 1.51x
DSA 3072/256 - 1.59x 1.64x
NEW:
Algorithm generate 100*sign 100*verify
------------------------------------------------
ECDSA 192 bit 70ms 1750ms 3170ms
ECDSA 224 bit 90ms 2210ms 4250ms
ECDSA 256 bit 100ms 2710ms 5170ms
ECDSA 384 bit 230ms 5670ms 11040ms
ECDSA 521 bit 540ms 13370ms 25870ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
RSA 1024 bit 360ms 2200ms 50ms
RSA 2048 bit 2770ms 11900ms 150ms
RSA 3072 bit 6680ms 32530ms 270ms
RSA 4096 bit 10320ms 69440ms 460ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
DSA 1024/160 - 990ms 910ms
DSA 2048/224 - 3830ms 3410ms
DSA 3072/256 - 8270ms 7030ms
OLD:
Algorithm generate 100*sign 100*verify
------------------------------------------------
ECDSA 192 bit 80ms 1920ms 3580ms
ECDSA 224 bit 100ms 2470ms 4760ms
ECDSA 256 bit 120ms 3050ms 5870ms
ECDSA 384 bit 260ms 6840ms 13330ms
ECDSA 521 bit 630ms 16080ms 31500ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
RSA 1024 bit 450ms 2890ms 80ms
RSA 2048 bit 2320ms 16760ms 220ms
RSA 3072 bit 26300ms 48650ms 440ms
RSA 4096 bit 15700ms 103910ms 720ms
Algorithm generate 100*sign 100*verify
------------------------------------------------
DSA 1024/160 - 1380ms 1260ms
DSA 2048/224 - 5740ms 5140ms
DSA 3072/256 - 13130ms 11510ms
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
mpi/armv6/mpi-asm-defs.h | 10 +++++
mpi/armv6/mpih-add1.S | 76 ++++++++++++++++++++++++++++++++++++
mpi/armv6/mpih-mul1.S | 80 ++++++++++++++++++++++++++++++++++++++
mpi/armv6/mpih-mul2.S | 94 +++++++++++++++++++++++++++++++++++++++++++++
mpi/armv6/mpih-mul3.S | 97 ++++++++++++++++++++++++++++++++++++++++++++++
mpi/armv6/mpih-sub1.S | 77 +++++++++++++++++++++++++++++++++++++
mpi/config.links | 15 +++++++
7 files changed, 449 insertions(+)
create mode 100644 mpi/armv6/mpi-asm-defs.h
create mode 100644 mpi/armv6/mpih-add1.S
create mode 100644 mpi/armv6/mpih-mul1.S
create mode 100644 mpi/armv6/mpih-mul2.S
create mode 100644 mpi/armv6/mpih-mul3.S
create mode 100644 mpi/armv6/mpih-sub1.S
diff --git a/mpi/armv6/mpi-asm-defs.h b/mpi/armv6/mpi-asm-defs.h
new file mode 100644
index 0000000..13424e2
--- /dev/null
+++ b/mpi/armv6/mpi-asm-defs.h
@@ -0,0 +1,10 @@
+/* This file defines some basic constants for the MPI machinery. We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here. */
+#define BYTES_PER_MPI_LIMB (SIZEOF_UNSIGNED_LONG)
+
+
+
+
+
+
diff --git a/mpi/armv6/mpih-add1.S b/mpi/armv6/mpih-add1.S
new file mode 100644
index 0000000..60ea4c3
--- /dev/null
+++ b/mpi/armv6/mpih-add1.S
@@ -0,0 +1,76 @@
+/* ARMv6 add_n -- Add two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_add_n( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_ptr_t s2_ptr, %r2
+ * mpi_size_t size) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_add_n
+.type _gcry_mpih_add_n,%function
+_gcry_mpih_add_n:
+ push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+ cmn %r0, #0; /* clear carry flag */
+
+ tst %r3, #3;
+ beq .Large_loop;
+
+.Loop:
+ ldr %r4, [%r1], #4;
+ sub %r3, #1;
+ ldr %lr, [%r2], #4;
+ adcs %r4, %lr;
+ tst %r3, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r3, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldm %r1!, {%r4, %r6, %r8, %r10};
+ ldm %r2!, {%r5, %r7, %r9, %lr};
+ sub %r3, #4;
+ adcs %r4, %r5;
+ adcs %r6, %r7;
+ adcs %r8, %r9;
+ adcs %r10, %lr;
+ teq %r3, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ adc %r0, %r3, #0;
+ pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
diff --git a/mpi/armv6/mpih-mul1.S b/mpi/armv6/mpih-mul1.S
new file mode 100644
index 0000000..ae19a15
--- /dev/null
+++ b/mpi/armv6/mpih-mul1.S
@@ -0,0 +1,80 @@
+/* ARMv6 mul_1 -- Multiply a limb vector with a limb and store the result in
+ * a second limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_size_t s1_size, %r2
+ * mpi_limb_t s2_limb) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_mul_1
+.type _gcry_mpih_mul_1,%function
+_gcry_mpih_mul_1:
+ push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %lr};
+ mov %r4, #0;
+
+ tst %r2, #3;
+ beq .Large_loop;
+
+.Loop:
+ ldr %r5, [%r1], #4;
+ mov %lr, #0;
+ umlal %r4, %lr, %r5, %r3;
+ sub %r2, #1;
+ str %r4, [%r0], #4;
+ tst %r2, #3;
+ mov %r4, %lr;
+ bne .Loop;
+
+ teq %r2, #0;
+ beq .Lend;
+
+.Large_loop:
+ mov %r9, #0;
+ ldm %r1!, {%r5, %r6, %r7, %r8};
+ mov %r10, #0;
+ umlal %r4, %r9, %r5, %r3;
+ mov %r11, #0;
+ umlal %r9, %r10, %r6, %r3;
+ mov %lr, #0;
+ umlal %r10, %r11, %r7, %r3;
+ subs %r2, #4;
+ umlal %r11, %lr, %r8, %r3;
+ stm %r0!, {%r4, %r9, %r10, %r11};
+ mov %r4, %lr;
+ bne .Large_loop;
+
+.Lend:
+ mov %r0, %r4;
+ pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %pc};
+.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
diff --git a/mpi/armv6/mpih-mul2.S b/mpi/armv6/mpih-mul2.S
new file mode 100644
index 0000000..02f7c07
--- /dev/null
+++ b/mpi/armv6/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* ARMv6 mul_2 -- Multiply a limb vector with a limb and add the result to
+ * a second limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_size_t s1_size, %r2
+ * mpi_limb_t s2_limb) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_addmul_1
+.type _gcry_mpih_addmul_1,%function
+_gcry_mpih_addmul_1:
+ push {%r4, %r5, %r6, %r8, %r10, %lr};
+ mov %lr, #0;
+ cmn %r0, #0; /* clear carry flag */
+
+ tst %r2, #3;
+ beq .Large_loop;
+.Loop:
+ ldr %r5, [%r1], #4;
+ ldr %r4, [%r0];
+ sub %r2, #1;
+ adcs %r4, %lr;
+ mov %lr, #0;
+ umlal %r4, %lr, %r5, %r3;
+ tst %r2, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r2, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldm %r0, {%r4, %r6, %r8, %r10};
+ ldr %r5, [%r1], #4;
+
+ sub %r2, #4;
+ adcs %r4, %lr;
+ mov %lr, #0;
+ umlal %r4, %lr, %r5, %r3;
+
+ ldr %r5, [%r1], #4;
+ adcs %r6, %lr;
+ mov %lr, #0;
+ umlal %r6, %lr, %r5, %r3;
+
+ ldr %r5, [%r1], #4;
+ adcs %r8, %lr;
+ mov %lr, #0;
+ umlal %r8, %lr, %r5, %r3;
+
+ ldr %r5, [%r1], #4;
+ adcs %r10, %lr;
+ mov %lr, #0;
+ umlal %r10, %lr, %r5, %r3;
+
+ teq %r2, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ adc %r0, %lr, #0;
+ pop {%r4, %r5, %r6, %r8, %r10, %pc};
+.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
diff --git a/mpi/armv6/mpih-mul3.S b/mpi/armv6/mpih-mul3.S
new file mode 100644
index 0000000..e42fc30
--- /dev/null
+++ b/mpi/armv6/mpih-mul3.S
@@ -0,0 +1,97 @@
+/* ARMv6 mul_3 -- Multiply a limb vector with a limb and subtract the result
+ * from a second limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_size_t s1_size, %r2
+ * mpi_limb_t s2_limb) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_submul_1
+.type _gcry_mpih_submul_1,%function
+_gcry_mpih_submul_1:
+ push {%r4, %r5, %r6, %r8, %r9, %r10, %lr};
+ mov %lr, #0;
+ cmp %r0, #0; /* prepare carry flag for sbc */
+
+ tst %r2, #3;
+ beq .Large_loop;
+.Loop:
+ ldr %r5, [%r1], #4;
+ mov %r4, %lr;
+ mov %lr, #0;
+ ldr %r6, [%r0];
+ umlal %r4, %lr, %r5, %r3;
+ sub %r2, #1;
+ sbcs %r4, %r6, %r4;
+ tst %r2, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r2, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldr %r5, [%r1], #4;
+ ldm %r0, {%r4, %r6, %r8, %r10};
+
+ mov %r9, #0;
+ umlal %lr, %r9, %r5, %r3;
+ ldr %r5, [%r1], #4;
+ sbcs %r4, %r4, %lr;
+
+ mov %lr, #0;
+ umlal %r9, %lr, %r5, %r3;
+ ldr %r5, [%r1], #4;
+ sbcs %r6, %r6, %r9;
+
+ mov %r9, #0;
+ umlal %lr, %r9, %r5, %r3;
+ ldr %r5, [%r1], #4;
+ sbcs %r8, %r8, %lr;
+
+ mov %lr, #0;
+ umlal %r9, %lr, %r5, %r3;
+ sub %r2, #4;
+ sbcs %r10, %r10, %r9;
+
+ teq %r2, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ it cc
+ movcc %r2, #1;
+ add %r0, %lr, %r2;
+ pop {%r4, %r5, %r6, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
diff --git a/mpi/armv6/mpih-sub1.S b/mpi/armv6/mpih-sub1.S
new file mode 100644
index 0000000..77d05eb
--- /dev/null
+++ b/mpi/armv6/mpih-sub1.S
@@ -0,0 +1,77 @@
+/* ARMv6 sub_n -- Subtract two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, %r0
+ * mpi_ptr_t s1_ptr, %r1
+ * mpi_ptr_t s2_ptr, %r2
+ * mpi_size_t size) %r3
+ */
+
+.text
+
+.globl _gcry_mpih_sub_n
+.type _gcry_mpih_sub_n,%function
+_gcry_mpih_sub_n:
+ push {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+ cmp %r0, #0; /* prepare carry flag for sub */
+
+ tst %r3, #3;
+ beq .Large_loop;
+
+.Loop:
+ ldr %r4, [%r1], #4;
+ sub %r3, #1;
+ ldr %lr, [%r2], #4;
+ sbcs %r4, %lr;
+ tst %r3, #3;
+ str %r4, [%r0], #4;
+ bne .Loop;
+
+ teq %r3, #0;
+ beq .Lend;
+
+.Large_loop:
+ ldm %r1!, {%r4, %r6, %r8, %r10};
+ sub %r3, #4;
+ ldm %r2!, {%r5, %r7, %r9, %lr};
+ sbcs %r4, %r5;
+ sbcs %r6, %r7;
+ sbcs %r8, %r9;
+ sbcs %r10, %lr;
+ teq %r3, #0;
+ stm %r0!, {%r4, %r6, %r8, %r10};
+ bne .Large_loop;
+
+.Lend:
+ sbc %r0, %r3, #0;
+ neg %r0, %r0;
+ pop {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
diff --git a/mpi/config.links b/mpi/config.links
index bcc6e3e..f300255 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -136,6 +136,21 @@ case "${host}" in
mpi_extra_modules="udiv-qrnnd"
mpi_cpu_arch="alpha"
;;
+ arm*-*-*)
+ if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
+ if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
+ echo '/* configured for armv6 */' >>./mpi/asm-syntax.h
+ path="armv6"
+ mpi_cpu_arch="armv6"
+ else
+ echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+ path=""
+ fi
+ else
+ echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+ path=""
+ fi
+ ;;
hppa7000*-*-*)
echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
path="hppa1.1 hppa"
More information about the Gcrypt-devel
mailing list