[git] GCRYPT - branch, master, updated. libgcrypt-1.5.0-329-g2fd83fa

Wed Oct 23 17:51:07 CEST 2013

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  2fd83faa876d0be91ab7884b1a9eaa7793559eb9 (commit)
       via  0b39fce7e3ce6761d6bd5195d093ec6857edb7c2 (commit)
       via  10bf6a7e16ed193f90d2749970a420f00d1d3320 (commit)
      from  164eb8c85d773ef4f0939115ec45f5e4b47c1700 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 2fd83faa876d0be91ab7884b1a9eaa7793559eb9
Author: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
Date:   Wed Oct 23 18:36:10 2013 +0300

    Enable assembler optimizations on earlier ARM cores
    
    * cipher/blowfish-armv6.S => cipher/blowfish-arm.S: adapt to pre-armv6 CPUs.
    * cipher/blowfish.c: enable assembly on armv4/armv5 little-endian CPUs.
    * cipher/camellia-armv6.S => cipher/camellia-arm.S: adapt to pre-armv6 CPUs.
    * cipher/camellia.c, cipher-camellia-glue.c: enable assembly on armv4/armv5
      little-endian CPUs.
    * cipher/cast5-armv6.S => cipher/cast5-arm.S: adapt to pre-armv6 CPUs.
    * cipher/cast5.c: enable assembly on armv4/armv5 little-endian CPUs.
    * cipher/rijndael-armv6.S => cipher/rijndael-arm.S: adapt to pre-armv6 CPUs.
    * cipher/rijndael.c: enable assembly on armv4/armv5 little-endian CPUs.
    * cipher/twofish-armv6.S => cipher/twofish-arm.S: adapt to pre-armv6 CPUs.
    * cipher/twofish.c: enable assembly on armv4/armv5 little-endian CPUs.
    
    --
    Our ARMv6 assembly optimized code can be easily adapted to earlier CPUs.
    The only incompatible place is rev instruction used to do byte swapping.
    Replace it on <= ARMv6 with a series of 4 instructions.
    
    Compare:
                    ECB/Stream         CBC             CFB             OFB             CTR
                 --------------- --------------- --------------- --------------- ---------------
    AES            620ms   610ms   650ms   680ms   620ms   630ms   660ms 660ms   630ms   630ms
    CAMELLIA128    720ms   720ms   780ms   790ms   770ms   760ms   780ms 780ms   770ms   760ms
    CAMELLIA256    910ms   910ms   970ms   970ms   960ms   950ms   970ms 970ms   960ms   950ms
    CAST5          820ms   820ms   930ms   920ms   890ms   860ms   930ms 920ms   880ms   890ms
    BLOWFISH       550ms   560ms   650ms   660ms   630ms   600ms   660ms 650ms   610ms   620ms
    
                    ECB/Stream         CBC             CFB             OFB             CTR
                 --------------- --------------- --------------- --------------- ---------------
    AES            130ms   140ms   180ms   200ms   160ms   170ms   190ms 200ms   170ms   170ms
    CAMELLIA128    150ms   160ms   210ms   220ms   200ms   190ms   210ms 220ms   190ms   190ms
    CAMELLIA256    180ms   180ms   260ms   240ms   240ms   230ms   250ms 250ms   230ms   230ms
    CAST5          170ms   160ms   270ms   120ms   240ms   130ms   260ms 270ms   130ms   120ms
    BLOWFISH       160ms   150ms   260ms   110ms   230ms   120ms   250ms 260ms   110ms   120ms
    
    Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
    [ jk: in camellia.h and twofish.c, USE_ARMV6_ASM => USE_ARM_ASM ]
    [ jk: fix blowfish-arm.S when __ARM_FEATURE_UNALIGNED defined ]
    [ jk: in twofish.S remove defined(HAVE_ARM_ARCH_V6) ]
    [ jk: ARMv6 => ARM in comments ]

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index e6b1745..d7db933 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -56,7 +56,7 @@ rmd.h
 EXTRA_libcipher_la_SOURCES = \
 arcfour.c \
 blowfish.c blowfish-amd64.S \
-cast5.c cast5-amd64.S cast5-armv6.S \
+cast5.c cast5-amd64.S cast5-arm.S \
 crc.c \
 des.c \
 dsa.c \
@@ -68,7 +68,7 @@ gost28147.c gost.h \
 gostr3411-94.c \
 md4.c \
 md5.c \
-rijndael.c rijndael-tables.h rijndael-amd64.S rijndael-armv6.S \
+rijndael.c rijndael-tables.h rijndael-amd64.S rijndael-arm.S \
 rmd160.c \
 rsa.c \
 salsa20.c \
@@ -81,10 +81,10 @@ sha512.c sha512-armv7-neon.S \
 stribog.c \
 tiger.c \
 whirlpool.c \
-twofish.c twofish-amd64.S twofish-armv6.S \
+twofish.c twofish-amd64.S twofish-arm.S \
 rfc2268.c \
 camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \
-  camellia-aesni-avx2-amd64.S camellia-armv6.S
+  camellia-aesni-avx2-amd64.S camellia-arm.S
 
 if ENABLE_O_FLAG_MUNGING
 o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g'
diff --git a/cipher/blowfish-armv6.S b/cipher/blowfish-arm.S
similarity index 78%
rename from cipher/blowfish-armv6.S
rename to cipher/blowfish-arm.S
index eea879f..43090d7 100644
--- a/cipher/blowfish-armv6.S
+++ b/cipher/blowfish-arm.S
@@ -1,4 +1,4 @@
-/* blowfish-armv6.S  -  ARM assembly implementation of Blowfish cipher
+/* blowfish-arm.S  -  ARM assembly implementation of Blowfish cipher
  *
  * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
@@ -20,7 +20,7 @@
 
 #include <config.h>
 
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+#if defined(__ARMEL__)
 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
 
 .text
@@ -97,20 +97,33 @@
 	#define str_unaligned_host str_unaligned_le
 
 	/* bswap on little-endian */
-	#define host_to_be(reg) \
+#ifdef HAVE_ARM_ARCH_V6
+	#define host_to_be(reg, rtmp) \
 		rev reg, reg;
-	#define be_to_host(reg) \
+	#define be_to_host(reg, rtmp) \
 		rev reg, reg;
 #else
+	#define host_to_be(reg, rtmp) \
+		eor	rtmp, reg, reg, ror #16; \
+		mov	rtmp, rtmp, lsr #8; \
+		bic	rtmp, rtmp, #65280; \
+		eor	reg, rtmp, reg, ror #8;
+	#define be_to_host(reg, rtmp) \
+		eor	rtmp, reg, reg, ror #16; \
+		mov	rtmp, rtmp, lsr #8; \
+		bic	rtmp, rtmp, #65280; \
+		eor	reg, rtmp, reg, ror #8;
+#endif
+#else
 	#define ldr_unaligned_host ldr_unaligned_be
 	#define str_unaligned_host str_unaligned_be
 
 	/* nop on big-endian */
-	#define host_to_be(reg) /*_*/
-	#define be_to_host(reg) /*_*/
+	#define host_to_be(reg, rtmp) /*_*/
+	#define be_to_host(reg, rtmp) /*_*/
 #endif
 
-#define host_to_host(x) /*_*/
+#define host_to_host(x, y) /*_*/
 
 /***********************************************************************
  * 1-way blowfish
@@ -159,31 +172,31 @@
 	F(RL0, RR0); \
 	F(RR0, RL0);
 
-#define read_block_aligned(rin, offs, l0, r0, convert) \
+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
 	ldr l0, [rin, #((offs) + 0)]; \
 	ldr r0, [rin, #((offs) + 4)]; \
-	convert(l0); \
-	convert(r0);
+	convert(l0, rtmp); \
+	convert(r0, rtmp);
 
-#define write_block_aligned(rout, offs, l0, r0, convert) \
-	convert(l0); \
-	convert(r0); \
+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
+	convert(l0, rtmp); \
+	convert(r0, rtmp); \
 	str l0, [rout, #((offs) + 0)]; \
 	str r0, [rout, #((offs) + 4)];
 
 #ifdef __ARM_FEATURE_UNALIGNED
 	/* unaligned word reads allowed */
 	#define read_block(rin, offs, l0, r0, rtmp0) \
-		read_block_aligned(rin, offs, l0, r0, host_to_be)
+		read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
 
 	#define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
-		write_block_aligned(rout, offs, r0, l0, be_to_host)
+		write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
 
 	#define read_block_host(rin, offs, l0, r0, rtmp0) \
-		read_block_aligned(rin, offs, l0, r0, host_to_host)
+		read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
 
 	#define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
-		write_block_aligned(rout, offs, r0, l0, host_to_host)
+		write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
 #else
 	/* need to handle unaligned reads by byte reads */
 	#define read_block(rin, offs, l0, r0, rtmp0) \
@@ -193,7 +206,7 @@
 			ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block_aligned(rin, offs, l0, r0, host_to_be); \
+			read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \
 		2:;
 
 	#define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
@@ -203,7 +216,7 @@
 			str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block_aligned(rout, offs, l0, r0, be_to_host); \
+			write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \
 		2:;
 
 	#define read_block_host(rin, offs, l0, r0, rtmp0) \
@@ -213,7 +226,7 @@
 			ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block_aligned(rin, offs, l0, r0, host_to_host); \
+			read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \
 		2:;
 
 	#define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
@@ -259,10 +272,10 @@ __blowfish_enc_blk1:
 .size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
 
 .align 8
-.globl  _gcry_blowfish_armv6_do_encrypt
-.type   _gcry_blowfish_armv6_do_encrypt,%function;
+.globl  _gcry_blowfish_arm_do_encrypt
+.type   _gcry_blowfish_arm_do_encrypt,%function;
 
-_gcry_blowfish_armv6_do_encrypt:
+_gcry_blowfish_arm_do_encrypt:
 	/* input:
 	 *	%r0: ctx, CTX
 	 *	%r1: u32 *ret_xl
@@ -280,13 +293,13 @@ _gcry_blowfish_armv6_do_encrypt:
 	str RL0, [%r2];
 
 	pop {%r4-%r11, %ip, %pc};
-.size _gcry_blowfish_armv6_do_encrypt,.-_gcry_blowfish_armv6_do_encrypt;
+.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt;
 
 .align 3
-.global _gcry_blowfish_armv6_encrypt_block
-.type   _gcry_blowfish_armv6_encrypt_block,%function;
+.global _gcry_blowfish_arm_encrypt_block
+.type   _gcry_blowfish_arm_encrypt_block,%function;
 
-_gcry_blowfish_armv6_encrypt_block:
+_gcry_blowfish_arm_encrypt_block:
 	/* input:
 	 *	%r0: ctx, CTX
 	 *	%r1: dst
@@ -301,13 +314,13 @@ _gcry_blowfish_armv6_encrypt_block:
 	write_block(%r1, 0, RR0, RL0, RT0, RT1);
 
 	pop {%r4-%r11, %ip, %pc};
-.size _gcry_blowfish_armv6_encrypt_block,.-_gcry_blowfish_armv6_encrypt_block;
+.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block;
 
 .align 3
-.global _gcry_blowfish_armv6_decrypt_block
-.type   _gcry_blowfish_armv6_decrypt_block,%function;
+.global _gcry_blowfish_arm_decrypt_block
+.type   _gcry_blowfish_arm_decrypt_block,%function;
 
-_gcry_blowfish_armv6_decrypt_block:
+_gcry_blowfish_arm_decrypt_block:
 	/* input:
 	 *	%r0: ctx, CTX
 	 *	%r1: dst
@@ -336,7 +349,7 @@ _gcry_blowfish_armv6_decrypt_block:
 	write_block(%r1, 0, RR0, RL0, RT0, RT1);
 
 	pop {%r4-%r11, %ip, %pc};
-.size _gcry_blowfish_armv6_decrypt_block,.-_gcry_blowfish_armv6_decrypt_block;
+.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block;
 
 /***********************************************************************
  * 2-way blowfish
@@ -441,22 +454,22 @@ _gcry_blowfish_armv6_decrypt_block:
 #define round_dec2(n, load_next_key) \
 	F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1);
 
-#define read_block2_aligned(rin, l0, r0, l1, r1, convert) \
+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
 	ldr l0, [rin, #(0)]; \
 	ldr r0, [rin, #(4)]; \
-	convert(l0); \
+	convert(l0, rtmp); \
 	ldr l1, [rin, #(8)]; \
-	convert(r0); \
+	convert(r0, rtmp); \
 	ldr r1, [rin, #(12)]; \
-	convert(l1); \
-	convert(r1);
+	convert(l1, rtmp); \
+	convert(r1, rtmp);
 
-#define write_block2_aligned(rout, l0, r0, l1, r1, convert) \
-	convert(l0); \
-	convert(r0); \
-	convert(l1); \
+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
+	convert(l0, rtmp); \
+	convert(r0, rtmp); \
+	convert(l1, rtmp); \
 	str l0, [rout, #(0)]; \
-	convert(r1); \
+	convert(r1, rtmp); \
 	str r0, [rout, #(4)]; \
 	str l1, [rout, #(8)]; \
 	str r1, [rout, #(12)];
@@ -464,16 +477,16 @@ _gcry_blowfish_armv6_decrypt_block:
 #ifdef __ARM_FEATURE_UNALIGNED
 	/* unaligned word reads allowed */
 	#define read_block2(rin, l0, r0, l1, r1, rtmp0) \
-		read_block2_aligned(rin, l0, r0, l1, r1, host_to_be)
+		read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
 
 	#define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
-		write_block2_aligned(rout, l0, r0, l1, r1, be_to_host)
+		write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
 
 	#define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
-		read_block2_aligned(rin, l0, r0, l1, r1, host_to_host)
+		read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
 
 	#define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
-		write_block2_aligned(rout, l0, r0, l1, r1, host_to_host)
+		write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
 #else
 	/* need to handle unaligned reads by byte reads */
 	#define read_block2(rin, l0, r0, l1, r1, rtmp0) \
@@ -485,7 +498,7 @@ _gcry_blowfish_armv6_decrypt_block:
 			ldr_unaligned_be(r1, rin, 12, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block2_aligned(rin, l0, r0, l1, r1, host_to_be); \
+			read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \
 		2:;
 
 	#define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
@@ -497,7 +510,7 @@ _gcry_blowfish_armv6_decrypt_block:
 			str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block2_aligned(rout, l0, r0, l1, r1, be_to_host); \
+			write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \
 		2:;
 
 	#define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
@@ -509,7 +522,7 @@ _gcry_blowfish_armv6_decrypt_block:
 			ldr_unaligned_host(r1, rin, 12, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block2_aligned(rin, l0, r0, l1, r1, host_to_host); \
+			read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \
 		2:;
 
 	#define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
@@ -521,21 +534,21 @@ _gcry_blowfish_armv6_decrypt_block:
 			str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block2_aligned(rout, l0, r0, l1, r1, host_to_host); \
+			write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \
 		2:;
 #endif
 
 .align 3
-.type  _gcry_blowfish_armv6_enc_blk2,%function;
+.type  _gcry_blowfish_arm_enc_blk2,%function;
 
-_gcry_blowfish_armv6_enc_blk2:
+_gcry_blowfish_arm_enc_blk2:
 	/* input:
 	 *	preloaded: CTX
 	 *	[RL0, RR0], [RL1, RR1]: src
 	 * output:
 	 *	[RR0, RL0], [RR1, RL1]: dst
 	 */
-	push {%lr};
+	push {RT0,%lr};
 
 	add CTXs2, CTXs0, #(s2 - s0);
 	mov RMASK, #(0xff << 2); /* byte mask */
@@ -550,19 +563,19 @@ _gcry_blowfish_armv6_enc_blk2:
 	round_enc2(14, next_key);
 	round_enc2(16, dummy);
 
-	host_to_be(RR0);
-	host_to_be(RL0);
-	host_to_be(RR1);
-	host_to_be(RL1);
+	host_to_be(RR0, RT0);
+	host_to_be(RL0, RT0);
+	host_to_be(RR1, RT0);
+	host_to_be(RL1, RT0);
 
-	pop {%pc};
-.size _gcry_blowfish_armv6_enc_blk2,.-_gcry_blowfish_armv6_enc_blk2;
+	pop {RT0,%pc};
+.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2;
 
 .align 3
-.globl _gcry_blowfish_armv6_cfb_dec;
-.type  _gcry_blowfish_armv6_cfb_dec,%function;
+.globl _gcry_blowfish_arm_cfb_dec;
+.type  _gcry_blowfish_arm_cfb_dec,%function;
 
-_gcry_blowfish_armv6_cfb_dec:
+_gcry_blowfish_arm_cfb_dec:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst (2 blocks)
@@ -575,15 +588,15 @@ _gcry_blowfish_armv6_cfb_dec:
 
 	/* Load input (iv/%r3 is aligned, src/%r2 might not be) */
 	ldm %r3, {RL0, RR0};
-	host_to_be(RL0);
-	host_to_be(RR0);
+	host_to_be(RL0, RT0);
+	host_to_be(RR0, RT0);
 	read_block(%r2, 0, RL1, RR1, RT0);
 
 	/* Update IV, load src[1] and save to iv[0] */
 	read_block_host(%r2, 8, %r5, %r6, RT0);
 	stm %lr, {%r5, %r6};
 
-	bl _gcry_blowfish_armv6_enc_blk2;
+	bl _gcry_blowfish_arm_enc_blk2;
 	/* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
 
 	/* %r1: dst, %r0: %src */
@@ -599,13 +612,13 @@ _gcry_blowfish_armv6_cfb_dec:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_blowfish_armv6_cfb_dec,.-_gcry_blowfish_armv6_cfb_dec;
+.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec;
 
 .align 3
-.globl _gcry_blowfish_armv6_ctr_enc;
-.type  _gcry_blowfish_armv6_ctr_enc,%function;
+.globl _gcry_blowfish_arm_ctr_enc;
+.type  _gcry_blowfish_arm_ctr_enc,%function;
 
-_gcry_blowfish_armv6_ctr_enc:
+_gcry_blowfish_arm_ctr_enc:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst (2 blocks)
@@ -617,7 +630,7 @@ _gcry_blowfish_armv6_ctr_enc:
 	mov %lr, %r3;
 
 	/* Load IV (big => host endian) */
-	read_block_aligned(%lr, 0, RL0, RR0, be_to_host);
+	read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0);
 
 	/* Construct IVs */
 	adds RR1, RR0, #1; /* +1 */
@@ -626,9 +639,9 @@ _gcry_blowfish_armv6_ctr_enc:
 	adc %r5, RL1, #0;
 
 	/* Store new IV (host => big-endian) */
-	write_block_aligned(%lr, 0, %r5, %r6, host_to_be);
+	write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0);
 
-	bl _gcry_blowfish_armv6_enc_blk2;
+	bl _gcry_blowfish_arm_enc_blk2;
 	/* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
 
 	/* %r1: dst, %r0: %src */
@@ -644,12 +657,12 @@ _gcry_blowfish_armv6_ctr_enc:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_blowfish_armv6_ctr_enc,.-_gcry_blowfish_armv6_ctr_enc;
+.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc;
 
 .align 3
-.type  _gcry_blowfish_armv6_dec_blk2,%function;
+.type  _gcry_blowfish_arm_dec_blk2,%function;
 
-_gcry_blowfish_armv6_dec_blk2:
+_gcry_blowfish_arm_dec_blk2:
 	/* input:
 	 *	preloaded: CTX
 	 *	[RL0, RR0], [RL1, RR1]: src
@@ -669,20 +682,20 @@ _gcry_blowfish_armv6_dec_blk2:
 	round_dec2(3, next_key);
 	round_dec2(1, dummy);
 
-	host_to_be(RR0);
-	host_to_be(RL0);
-	host_to_be(RR1);
-	host_to_be(RL1);
+	host_to_be(RR0, RT0);
+	host_to_be(RL0, RT0);
+	host_to_be(RR1, RT0);
+	host_to_be(RL1, RT0);
 
 	b .Ldec_cbc_tail;
 .ltorg
-.size _gcry_blowfish_armv6_dec_blk2,.-_gcry_blowfish_armv6_dec_blk2;
+.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2;
 
 .align 3
-.globl _gcry_blowfish_armv6_cbc_dec;
-.type  _gcry_blowfish_armv6_cbc_dec,%function;
+.globl _gcry_blowfish_arm_cbc_dec;
+.type  _gcry_blowfish_arm_cbc_dec,%function;
 
-_gcry_blowfish_armv6_cbc_dec:
+_gcry_blowfish_arm_cbc_dec:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst (2 blocks)
@@ -695,7 +708,7 @@ _gcry_blowfish_armv6_cbc_dec:
 
 	/* dec_blk2 is only used by cbc_dec, jump directly in/out instead
 	 * of function call. */
-	b _gcry_blowfish_armv6_dec_blk2;
+	b _gcry_blowfish_arm_dec_blk2;
 .Ldec_cbc_tail:
 	/* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
 
@@ -724,7 +737,7 @@ _gcry_blowfish_armv6_cbc_dec:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_blowfish_armv6_cbc_dec,.-_gcry_blowfish_armv6_cbc_dec;
+.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec;
 
 #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
 #endif /*__ARM_ARCH >= 6*/
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index 2f739c8..ed4e901 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -50,11 +50,11 @@
 # define USE_AMD64_ASM 1
 #endif
 
-/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
-#undef USE_ARMV6_ASM
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
 # if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
-#  define USE_ARMV6_ASM 1
+#  define USE_ARM_ASM 1
 # endif
 #endif
 
@@ -314,44 +314,44 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (2*8);
 }
 
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
 
 /* Assembly implementations of Blowfish. */
-extern void _gcry_blowfish_armv6_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
+extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
 					    u32 *ret_xr);
 
-extern void _gcry_blowfish_armv6_encrypt_block(BLOWFISH_context *c, byte *out,
+extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out,
 					       const byte *in);
 
-extern void _gcry_blowfish_armv6_decrypt_block(BLOWFISH_context *c, byte *out,
+extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out,
 					       const byte *in);
 
 /* These assembly implementations process two blocks in parallel. */
-extern void _gcry_blowfish_armv6_ctr_enc(BLOWFISH_context *ctx, byte *out,
+extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *ctr);
 
-extern void _gcry_blowfish_armv6_cbc_dec(BLOWFISH_context *ctx, byte *out,
+extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
-extern void _gcry_blowfish_armv6_cfb_dec(BLOWFISH_context *ctx, byte *out,
+extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out,
 					 const byte *in, byte *iv);
 
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
-  _gcry_blowfish_armv6_do_encrypt (bc, ret_xl, ret_xr);
+  _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr);
 }
 
 static void
 do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
-  _gcry_blowfish_armv6_encrypt_block (context, outbuf, inbuf);
+  _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf);
 }
 
 static void
 do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
-  _gcry_blowfish_armv6_decrypt_block (context, outbuf, inbuf);
+  _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf);
 }
 
 static unsigned int
@@ -370,7 +370,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (10*4);
 }
 
-#else /*USE_ARMV6_ASM*/
+#else /*USE_ARM_ASM*/
 
 #if BLOWFISH_ROUNDS != 16
 static inline u32
@@ -580,7 +580,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (64);
 }
 
-#endif /*!USE_AMD64_ASM&&!USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
@@ -615,12 +615,12 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Use generic code to handle smaller chunks... */
     /* TODO: use caching instead? */
   }
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
-        _gcry_blowfish_armv6_ctr_enc(ctx, outbuf, inbuf, ctr);
+        _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 2;
         outbuf += 2 * BLOWFISH_BLOCKSIZE;
@@ -683,12 +683,12 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
 
     /* Use generic code to handle smaller chunks... */
   }
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
-        _gcry_blowfish_armv6_cbc_dec(ctx, outbuf, inbuf, iv);
+        _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * BLOWFISH_BLOCKSIZE;
@@ -746,12 +746,12 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
 
     /* Use generic code to handle smaller chunks... */
   }
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
-        _gcry_blowfish_armv6_cfb_dec(ctx, outbuf, inbuf, iv);
+        _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * BLOWFISH_BLOCKSIZE;
diff --git a/cipher/camellia-armv6.S b/cipher/camellia-arm.S
similarity index 93%
rename from cipher/camellia-armv6.S
rename to cipher/camellia-arm.S
index 3544754..820c46e 100644
--- a/cipher/camellia-armv6.S
+++ b/cipher/camellia-arm.S
@@ -1,4 +1,4 @@
-/* camellia-armv6.S  -  ARM assembly implementation of Camellia cipher
+/* camellia-arm.S  -  ARM assembly implementation of Camellia cipher
  *
  * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
@@ -20,7 +20,7 @@
 
 #include <config.h>
 
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+#if defined(__ARMEL__)
 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
 
 .text
@@ -73,44 +73,56 @@
 	strb rtmp0, [rdst, #((offs) + 0)];
 
 #ifdef __ARMEL__
-	/* bswap on little-endian */
-	#define host_to_be(reg) \
+#ifdef HAVE_ARM_ARCH_V6
+	#define host_to_be(reg, rtmp) \
 		rev reg, reg;
-	#define be_to_host(reg) \
+	#define be_to_host(reg, rtmp) \
 		rev reg, reg;
 #else
+	#define host_to_be(reg, rtmp) \
+		eor	rtmp, reg, reg, ror #16; \
+		mov	rtmp, rtmp, lsr #8; \
+		bic	rtmp, rtmp, #65280; \
+		eor	reg, rtmp, reg, ror #8;
+	#define be_to_host(reg, rtmp) \
+		eor	rtmp, reg, reg, ror #16; \
+		mov	rtmp, rtmp, lsr #8; \
+		bic	rtmp, rtmp, #65280; \
+		eor	reg, rtmp, reg, ror #8;
+#endif
+#else
 	/* nop on big-endian */
-	#define host_to_be(reg) /*_*/
-	#define be_to_host(reg) /*_*/
+	#define host_to_be(reg, rtmp) /*_*/
+	#define be_to_host(reg, rtmp) /*_*/
 #endif
 
-#define ldr_input_aligned_be(rin, a, b, c, d) \
+#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \
 	ldr a, [rin, #0]; \
 	ldr b, [rin, #4]; \
-	be_to_host(a); \
+	be_to_host(a, rtmp); \
 	ldr c, [rin, #8]; \
-	be_to_host(b); \
+	be_to_host(b, rtmp); \
 	ldr d, [rin, #12]; \
-	be_to_host(c); \
-	be_to_host(d);
+	be_to_host(c, rtmp); \
+	be_to_host(d, rtmp);
 
-#define str_output_aligned_be(rout, a, b, c, d) \
-	be_to_host(a); \
-	be_to_host(b); \
+#define str_output_aligned_be(rout, a, b, c, d, rtmp) \
+	be_to_host(a, rtmp); \
+	be_to_host(b, rtmp); \
 	str a, [rout, #0]; \
-	be_to_host(c); \
+	be_to_host(c, rtmp); \
 	str b, [rout, #4]; \
-	be_to_host(d); \
+	be_to_host(d, rtmp); \
 	str c, [rout, #8]; \
 	str d, [rout, #12];
 
 #ifdef __ARM_FEATURE_UNALIGNED
 	/* unaligned word reads/writes allowed */
 	#define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \
-		ldr_input_aligned_be(rin, ra, rb, rc, rd)
+		ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp)
 
 	#define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
-		str_output_aligned_be(rout, ra, rb, rc, rd)
+		str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0)
 #else
 	/* need to handle unaligned reads/writes by byte reads */
 	#define ldr_input_be(rin, ra, rb, rc, rd, rtmp0) \
@@ -122,7 +134,7 @@
 			ldr_unaligned_be(rd, rin, 12, rtmp0); \
 			b 2f; \
 		1:;\
-			ldr_input_aligned_be(rin, ra, rb, rc, rd); \
+			ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp0); \
 		2:;
 
 	#define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
@@ -134,7 +146,7 @@
 			str_unaligned_be(rd, rout, 12, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			str_output_aligned_be(rout, ra, rb, rc, rd); \
+			str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0); \
 		2:;
 #endif
 
@@ -240,10 +252,10 @@
 	str_output_be(%r1, YL, YR, XL, XR, RT0, RT1);
 
 .align 3
-.global _gcry_camellia_armv6_encrypt_block
-.type   _gcry_camellia_armv6_encrypt_block,%function;
+.global _gcry_camellia_arm_encrypt_block
+.type   _gcry_camellia_arm_encrypt_block,%function;
 
-_gcry_camellia_armv6_encrypt_block:
+_gcry_camellia_arm_encrypt_block:
 	/* input:
 	 *	%r0: keytable
 	 *	%r1: dst
@@ -285,13 +297,13 @@ _gcry_camellia_armv6_encrypt_block:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_camellia_armv6_encrypt_block,.-_gcry_camellia_armv6_encrypt_block;
+.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;
 
 .align 3
-.global _gcry_camellia_armv6_decrypt_block
-.type   _gcry_camellia_armv6_decrypt_block,%function;
+.global _gcry_camellia_arm_decrypt_block
+.type   _gcry_camellia_arm_decrypt_block,%function;
 
-_gcry_camellia_armv6_decrypt_block:
+_gcry_camellia_arm_decrypt_block:
 	/* input:
 	 *	%r0: keytable
 	 *	%r1: dst
@@ -330,7 +342,7 @@ _gcry_camellia_armv6_decrypt_block:
 
 	b .Ldec_128;
 .ltorg
-.size _gcry_camellia_armv6_decrypt_block,.-_gcry_camellia_armv6_decrypt_block;
+.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;
 
 .data
 
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 29cb7a5..e6d4029 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -193,14 +193,14 @@ camellia_setkey(void *c, const byte *key, unsigned keylen)
   return 0;
 }
 
-#ifdef USE_ARMV6_ASM
+#ifdef USE_ARM_ASM
 
 /* Assembly implementations of CAST5. */
-extern void _gcry_camellia_armv6_encrypt_block(const KEY_TABLE_TYPE keyTable,
+extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable,
 					       byte *outbuf, const byte *inbuf,
 					       const int keybits);
 
-extern void _gcry_camellia_armv6_decrypt_block(const KEY_TABLE_TYPE keyTable,
+extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable,
 					       byte *outbuf, const byte *inbuf,
 					       const int keybits);
 
@@ -209,7 +209,7 @@ static void Camellia_EncryptBlock(const int keyBitLength,
 				  const KEY_TABLE_TYPE keyTable,
 				  unsigned char *cipherText)
 {
-  _gcry_camellia_armv6_encrypt_block(keyTable, cipherText, plaintext,
+  _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext,
 				     keyBitLength);
 }
 
@@ -218,7 +218,7 @@ static void Camellia_DecryptBlock(const int keyBitLength,
 				  const KEY_TABLE_TYPE keyTable,
 				  unsigned char *plaintext)
 {
-  _gcry_camellia_armv6_decrypt_block(keyTable, plaintext, cipherText,
+  _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText,
 				     keyBitLength);
 }
 
@@ -240,7 +240,7 @@ camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
 }
 
-#else /*USE_ARMV6_ASM*/
+#else /*USE_ARM_ASM*/
 
 static unsigned int
 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
@@ -276,7 +276,7 @@ camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
 }
 
-#endif /*!USE_ARMV6_ASM*/
+#endif /*!USE_ARM_ASM*/
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
diff --git a/cipher/camellia.c b/cipher/camellia.c
index 03510a3..9067246 100644
--- a/cipher/camellia.c
+++ b/cipher/camellia.c
@@ -861,7 +861,7 @@ void camellia_setup192(const unsigned char *key, u32 *subkey)
 }
 
 
-#ifndef USE_ARMV6_ASM
+#ifndef USE_ARM_ASM
 /**
  * Stuff related to camellia encryption/decryption
  *
@@ -1321,7 +1321,7 @@ void camellia_decrypt256(const u32 *subkey, u32 *blocks)
 
     return;
 }
-#endif /*!USE_ARMV6_ASM*/
+#endif /*!USE_ARM_ASM*/
 
 
 /***
@@ -1349,7 +1349,7 @@ void Camellia_Ekeygen(const int keyBitLength,
 }
 
 
-#ifndef USE_ARMV6_ASM
+#ifndef USE_ARM_ASM
 void Camellia_EncryptBlock(const int keyBitLength,
 			   const unsigned char *plaintext,
 			   const KEY_TABLE_TYPE keyTable,
@@ -1410,4 +1410,4 @@ void Camellia_DecryptBlock(const int keyBitLength,
     PUTU32(plaintext + 8, tmp[2]);
     PUTU32(plaintext + 12, tmp[3]);
 }
-#endif /*!USE_ARMV6_ASM*/
+#endif /*!USE_ARM_ASM*/
diff --git a/cipher/camellia.h b/cipher/camellia.h
index 72f2d1f..d0e3c18 100644
--- a/cipher/camellia.h
+++ b/cipher/camellia.h
@@ -30,11 +30,11 @@
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
-/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
-# undef USE_ARMV6_ASM
-# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+# undef USE_ARM_ASM
+# if defined(__ARMEL__)
 #  ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
-#   define USE_ARMV6_ASM 1
+#   define USE_ARM_ASM 1
 #  endif
 # endif
 #endif
@@ -70,7 +70,7 @@ void Camellia_Ekeygen(const int keyBitLength,
 		      const unsigned char *rawKey,
 		      KEY_TABLE_TYPE keyTable);
 
-#ifndef USE_ARMV6_ASM
+#ifndef USE_ARM_ASM
 void Camellia_EncryptBlock(const int keyBitLength,
 			   const unsigned char *plaintext,
 			   const KEY_TABLE_TYPE keyTable,
diff --git a/cipher/cast5-armv6.S b/cipher/cast5-arm.S
similarity index 81%
rename from cipher/cast5-armv6.S
rename to cipher/cast5-arm.S
index 038fc4f..ce7fa93 100644
--- a/cipher/cast5-armv6.S
+++ b/cipher/cast5-arm.S
@@ -1,4 +1,4 @@
-/* cast5-armv6.S  -  ARM assembly implementation of CAST5 cipher
+/* cast5-arm.S  -  ARM assembly implementation of CAST5 cipher
  *
  * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
@@ -20,7 +20,7 @@
 
 #include <config.h>
 
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+#if defined(__ARMEL__)
 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
 
 .text
@@ -99,20 +99,33 @@
 	#define str_unaligned_host str_unaligned_le
 
 	/* bswap on little-endian */
-	#define host_to_be(reg) \
+#ifdef HAVE_ARM_ARCH_V6
+	#define host_to_be(reg, rtmp) \
 		rev reg, reg;
-	#define be_to_host(reg) \
+	#define be_to_host(reg, rtmp) \
 		rev reg, reg;
 #else
+	#define host_to_be(reg, rtmp) \
+		eor	rtmp, reg, reg, ror #16; \
+		mov	rtmp, rtmp, lsr #8; \
+		bic	rtmp, rtmp, #65280; \
+		eor	reg, rtmp, reg, ror #8;
+	#define be_to_host(reg, rtmp) \
+		eor	rtmp, reg, reg, ror #16; \
+		mov	rtmp, rtmp, lsr #8; \
+		bic	rtmp, rtmp, #65280; \
+		eor	reg, rtmp, reg, ror #8;
+#endif
+#else
 	#define ldr_unaligned_host ldr_unaligned_be
 	#define str_unaligned_host str_unaligned_be
 
 	/* nop on big-endian */
-	#define host_to_be(reg) /*_*/
-	#define be_to_host(reg) /*_*/
+	#define host_to_be(reg, rtmp) /*_*/
+	#define be_to_host(reg, rtmp) /*_*/
 #endif
 
-#define host_to_host(x) /*_*/
+#define host_to_host(x, y) /*_*/
 
 /**********************************************************************
   1-way cast5
@@ -167,31 +180,31 @@
 #define dec_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
 	Fx(n, rl, rr, 1, loadkm, shiftkr, loadkr)
 
-#define read_block_aligned(rin, offs, l0, r0, convert) \
+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
 	ldr l0, [rin, #((offs) + 0)]; \
 	ldr r0, [rin, #((offs) + 4)]; \
-	convert(l0); \
-	convert(r0);
+	convert(l0, rtmp); \
+	convert(r0, rtmp);
 
-#define write_block_aligned(rout, offs, l0, r0, convert) \
-	convert(l0); \
-	convert(r0); \
+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
+	convert(l0, rtmp); \
+	convert(r0, rtmp); \
 	str l0, [rout, #((offs) + 0)]; \
 	str r0, [rout, #((offs) + 4)];
 
 #ifdef __ARM_FEATURE_UNALIGNED
 	/* unaligned word reads allowed */
 	#define read_block(rin, offs, l0, r0, rtmp0) \
-		read_block_aligned(rin, offs, l0, r0, host_to_be)
+		read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
 
 	#define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
-		write_block_aligned(rout, offs, r0, l0, be_to_host)
+		write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
 
 	#define read_block_host(rin, offs, l0, r0, rtmp0) \
-		read_block_aligned(rin, offs, l0, r0, host_to_host)
+		read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
 
 	#define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
-		write_block_aligned(rout, offs, r0, l0, host_to_host)
+		write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
 #else
 	/* need to handle unaligned reads by byte reads */
 	#define read_block(rin, offs, l0, r0, rtmp0) \
@@ -201,7 +214,7 @@
 			ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block_aligned(rin, offs, l0, r0, host_to_be); \
+			read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \
 		2:;
 
 	#define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
@@ -211,7 +224,7 @@
 			str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block_aligned(rout, offs, l0, r0, be_to_host); \
+			write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \
 		2:;
 
 	#define read_block_host(rin, offs, l0, r0, rtmp0) \
@@ -221,7 +234,7 @@
 			ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block_aligned(rin, offs, l0, r0, host_to_host); \
+			read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \
 		2:;
 
 	#define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
@@ -231,15 +244,15 @@
 			str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block_aligned(rout, offs, l0, r0, host_to_host); \
+			write_block_aligned(rout, offs, l0, r0, host_to_host, rtmp0); \
 		2:;
 #endif
 
 .align 3
-.globl _gcry_cast5_armv6_encrypt_block
-.type  _gcry_cast5_armv6_encrypt_block,%function;
+.globl _gcry_cast5_arm_encrypt_block
+.type  _gcry_cast5_arm_encrypt_block,%function;
 
-_gcry_cast5_armv6_encrypt_block:
+_gcry_cast5_arm_encrypt_block:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst
@@ -279,13 +292,13 @@ _gcry_cast5_armv6_encrypt_block:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_cast5_armv6_encrypt_block,.-_gcry_cast5_armv6_encrypt_block;
+.size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block;
 
 .align 3
-.globl _gcry_cast5_armv6_decrypt_block
-.type  _gcry_cast5_armv6_decrypt_block,%function;
+.globl _gcry_cast5_arm_decrypt_block
+.type  _gcry_cast5_arm_decrypt_block,%function;
 
-_gcry_cast5_armv6_decrypt_block:
+_gcry_cast5_arm_decrypt_block:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst
@@ -325,7 +338,7 @@ _gcry_cast5_armv6_decrypt_block:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_cast5_armv6_decrypt_block,.-_gcry_cast5_armv6_decrypt_block;
+.size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block;
 
 /**********************************************************************
   2-way cast5
@@ -391,22 +404,22 @@ _gcry_cast5_armv6_decrypt_block:
 #define dec_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
 	Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 1, loadkm, shiftkr, loadkr)
 
-#define read_block2_aligned(rin, l0, r0, l1, r1, convert) \
+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
 	ldr l0, [rin, #(0)]; \
 	ldr r0, [rin, #(4)]; \
-	convert(l0); \
+	convert(l0, rtmp); \
 	ldr l1, [rin, #(8)]; \
-	convert(r0); \
+	convert(r0, rtmp); \
 	ldr r1, [rin, #(12)]; \
-	convert(l1); \
-	convert(r1);
+	convert(l1, rtmp); \
+	convert(r1, rtmp);
 
-#define write_block2_aligned(rout, l0, r0, l1, r1, convert) \
-	convert(l0); \
-	convert(r0); \
-	convert(l1); \
+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
+	convert(l0, rtmp); \
+	convert(r0, rtmp); \
+	convert(l1, rtmp); \
 	str l0, [rout, #(0)]; \
-	convert(r1); \
+	convert(r1, rtmp); \
 	str r0, [rout, #(4)]; \
 	str l1, [rout, #(8)]; \
 	str r1, [rout, #(12)];
@@ -414,16 +427,16 @@ _gcry_cast5_armv6_decrypt_block:
 #ifdef __ARM_FEATURE_UNALIGNED
 	/* unaligned word reads allowed */
 	#define read_block2(rin, l0, r0, l1, r1, rtmp0) \
-		read_block2_aligned(rin, l0, r0, l1, r1, host_to_be)
+		read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
 
 	#define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
-		write_block2_aligned(rout, l0, r0, l1, r1, be_to_host)
+		write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
 
 	#define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
-		read_block2_aligned(rin, l0, r0, l1, r1, host_to_host)
+		read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
 
 	#define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
-		write_block2_aligned(rout, l0, r0, l1, r1, host_to_host)
+		write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
 #else
 	/* need to handle unaligned reads by byte reads */
 	#define read_block2(rin, l0, r0, l1, r1, rtmp0) \
@@ -435,7 +448,7 @@ _gcry_cast5_armv6_decrypt_block:
 			ldr_unaligned_be(r1, rin, 12, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block2_aligned(rin, l0, r0, l1, r1, host_to_be); \
+			read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \
 		2:;
 
 	#define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
@@ -447,7 +460,7 @@ _gcry_cast5_armv6_decrypt_block:
 			str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block2_aligned(rout, l0, r0, l1, r1, be_to_host); \
+			write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \
 		2:;
 
 	#define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
@@ -459,7 +472,7 @@ _gcry_cast5_armv6_decrypt_block:
 			ldr_unaligned_host(r1, rin, 12, rtmp0); \
 			b 2f; \
 		1:;\
-			read_block2_aligned(rin, l0, r0, l1, r1, host_to_host); \
+			read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \
 		2:;
 
 	#define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
@@ -471,14 +484,14 @@ _gcry_cast5_armv6_decrypt_block:
 			str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
 			b 2f; \
 		1:;\
-			write_block2_aligned(rout, l0, r0, l1, r1, host_to_host); \
+			write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \
 		2:;
 #endif
 
 .align 3
-.type  _gcry_cast5_armv6_enc_blk2,%function;
+.type  _gcry_cast5_arm_enc_blk2,%function;
 
-_gcry_cast5_armv6_enc_blk2:
+_gcry_cast5_arm_enc_blk2:
 	/* input:
 	 *	preloaded: CTX
 	 *	[RL0, RR0], [RL1, RR1]: src
@@ -510,20 +523,20 @@ _gcry_cast5_armv6_enc_blk2:
 	enc_round2(14, F3, RL, RR, load_km, shift_kr, dummy);
 	enc_round2(15, F1, RR, RL, dummy, dummy, dummy);
 
-	host_to_be(RR0);
-	host_to_be(RL0);
-	host_to_be(RR1);
-	host_to_be(RL1);
+	host_to_be(RR0, RT0);
+	host_to_be(RL0, RT0);
+	host_to_be(RR1, RT0);
+	host_to_be(RL1, RT0);
 
 	pop {%pc};
 .ltorg
-.size _gcry_cast5_armv6_enc_blk2,.-_gcry_cast5_armv6_enc_blk2;
+.size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2;
 
 .align 3
-.globl _gcry_cast5_armv6_cfb_dec;
-.type  _gcry_cast5_armv6_cfb_dec,%function;
+.globl _gcry_cast5_arm_cfb_dec;
+.type  _gcry_cast5_arm_cfb_dec,%function;
 
-_gcry_cast5_armv6_cfb_dec:
+_gcry_cast5_arm_cfb_dec:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst (2 blocks)
@@ -536,15 +549,15 @@ _gcry_cast5_armv6_cfb_dec:
 
 	/* Load input (iv/%r3 is aligned, src/%r2 might not be) */
 	ldm %r3, {RL0, RR0};
-	host_to_be(RL0);
-	host_to_be(RR0);
+	host_to_be(RL0, RT1);
+	host_to_be(RR0, RT1);
 	read_block(%r2, 0, RL1, RR1, %ip);
 
 	/* Update IV, load src[1] and save to iv[0] */
 	read_block_host(%r2, 8, %r5, %r6, %r7);
 	stm %lr, {%r5, %r6};
 
-	bl _gcry_cast5_armv6_enc_blk2;
+	bl _gcry_cast5_arm_enc_blk2;
 	/* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
 
 	/* %r0: dst, %r1: %src */
@@ -560,13 +573,13 @@ _gcry_cast5_armv6_cfb_dec:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_cast5_armv6_cfb_dec,.-_gcry_cast5_armv6_cfb_dec;
+.size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec;
 
 .align 3
-.globl _gcry_cast5_armv6_ctr_enc;
-.type  _gcry_cast5_armv6_ctr_enc,%function;
+.globl _gcry_cast5_arm_ctr_enc;
+.type  _gcry_cast5_arm_ctr_enc,%function;
 
-_gcry_cast5_armv6_ctr_enc:
+_gcry_cast5_arm_ctr_enc:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst (2 blocks)
@@ -578,7 +591,7 @@ _gcry_cast5_armv6_ctr_enc:
 	mov %lr, %r3;
 
 	/* Load IV (big => host endian) */
-	read_block_aligned(%lr, 0, RL0, RR0, be_to_host);
+	read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1);
 
 	/* Construct IVs */
 	adds RR1, RR0, #1; /* +1 */
@@ -587,9 +600,9 @@ _gcry_cast5_armv6_ctr_enc:
 	adc %r5, RL1, #0;
 
 	/* Store new IV (host => big-endian) */
-	write_block_aligned(%lr, 0, %r5, %r6, host_to_be);
+	write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1);
 
-	bl _gcry_cast5_armv6_enc_blk2;
+	bl _gcry_cast5_arm_enc_blk2;
 	/* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
 
 	/* %r0: dst, %r1: %src */
@@ -605,12 +618,12 @@ _gcry_cast5_armv6_ctr_enc:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_cast5_armv6_ctr_enc,.-_gcry_cast5_armv6_ctr_enc;
+.size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc;
 
 .align 3
-.type  _gcry_cast5_armv6_dec_blk2,%function;
+.type  _gcry_cast5_arm_dec_blk2,%function;
 
-_gcry_cast5_armv6_dec_blk2:
+_gcry_cast5_arm_dec_blk2:
 	/* input:
 	 *	preloaded: CTX
 	 *	[RL0, RR0], [RL1, RR1]: src
@@ -641,20 +654,20 @@ _gcry_cast5_armv6_dec_blk2:
 	dec_round2(1, F2, RL, RR, load_km, shift_kr, dummy);
 	dec_round2(0, F1, RR, RL, dummy, dummy, dummy);
 
-	host_to_be(RR0);
-	host_to_be(RL0);
-	host_to_be(RR1);
-	host_to_be(RL1);
+	host_to_be(RR0, RT0);
+	host_to_be(RL0, RT0);
+	host_to_be(RR1, RT0);
+	host_to_be(RL1, RT0);
 
 	b .Ldec_cbc_tail;
 .ltorg
-.size _gcry_cast5_armv6_dec_blk2,.-_gcry_cast5_armv6_dec_blk2;
+.size _gcry_cast5_arm_dec_blk2,.-_gcry_cast5_arm_dec_blk2;
 
 .align 3
-.globl _gcry_cast5_armv6_cbc_dec;
-.type  _gcry_cast5_armv6_cbc_dec,%function;
+.globl _gcry_cast5_arm_cbc_dec;
+.type  _gcry_cast5_arm_cbc_dec,%function;
 
-_gcry_cast5_armv6_cbc_dec:
+_gcry_cast5_arm_cbc_dec:
 	/* input:
 	 *	%r0: CTX
 	 *	%r1: dst (2 blocks)
@@ -667,7 +680,7 @@ _gcry_cast5_armv6_cbc_dec:
 
 	/* dec_blk2 is only used by cbc_dec, jump directly in/out instead
 	 * of function call. */
-	b _gcry_cast5_armv6_dec_blk2;
+	b _gcry_cast5_arm_dec_blk2;
 .Ldec_cbc_tail:
 	/* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
 
@@ -696,7 +709,7 @@ _gcry_cast5_armv6_cbc_dec:
 
 	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_cast5_armv6_cbc_dec,.-_gcry_cast5_armv6_cbc_dec;
+.size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec;
 
 #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
 #endif /*__ARM_ARCH >= 6*/
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 92d9af8..8c016d7 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -52,11 +52,11 @@
 # define USE_AMD64_ASM 1
 #endif
 
-/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
-#undef USE_ARMV6_ASM
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
 # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
-#  define USE_ARMV6_ASM 1
+#  define USE_ARM_ASM 1
 # endif
 #endif
 
@@ -65,7 +65,7 @@
 typedef struct {
     u32  Km[16];
     byte Kr[16];
-#ifdef USE_ARMV6_ASM
+#ifdef USE_ARM_ASM
     u32 Kr_arm_enc[16 / sizeof(u32)];
     u32 Kr_arm_dec[16 / sizeof(u32)];
 #endif
@@ -400,35 +400,35 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (2*8);
 }
 
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
 
-/* ARMv6 assembly implementations of CAST5. */
-extern void _gcry_cast5_armv6_encrypt_block(CAST5_context *c, byte *outbuf,
+/* ARM assembly implementations of CAST5. */
+extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf,
 					    const byte *inbuf);
 
-extern void _gcry_cast5_armv6_decrypt_block(CAST5_context *c, byte *outbuf,
+extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf,
 					    const byte *inbuf);
 
 /* These assembly implementations process two blocks in parallel. */
-extern void _gcry_cast5_armv6_ctr_enc(CAST5_context *ctx, byte *out,
+extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *ctr);
 
-extern void _gcry_cast5_armv6_cbc_dec(CAST5_context *ctx, byte *out,
+extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
-extern void _gcry_cast5_armv6_cfb_dec(CAST5_context *ctx, byte *out,
+extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out,
 				      const byte *in, byte *iv);
 
 static void
 do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
-  _gcry_cast5_armv6_encrypt_block (context, outbuf, inbuf);
+  _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf);
 }
 
 static void
 do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
-  _gcry_cast5_armv6_decrypt_block (context, outbuf, inbuf);
+  _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf);
 }
 
 static unsigned int
@@ -447,7 +447,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (10*4);
 }
 
-#else /*USE_ARMV6_ASM*/
+#else /*USE_ARM_ASM*/
 
 #define F1(D,m,r)  (  (I = ((m) + (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) )
@@ -556,7 +556,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   return /*burn_stack*/ (20+4*sizeof(void*));
 }
 
-#endif /*!USE_ARMV6_ASM*/
+#endif /*!USE_ARM_ASM*/
 
 
 /* Bulk encryption of complete blocks in CTR mode.  This function is only
@@ -592,12 +592,12 @@ _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Use generic code to handle smaller chunks... */
     /* TODO: use caching instead? */
   }
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
-        _gcry_cast5_armv6_ctr_enc(ctx, outbuf, inbuf, ctr);
+        _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 2;
         outbuf += 2 * CAST5_BLOCKSIZE;
@@ -660,12 +660,12 @@ _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
 
     /* Use generic code to handle smaller chunks... */
   }
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
-        _gcry_cast5_armv6_cbc_dec(ctx, outbuf, inbuf, iv);
+        _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * CAST5_BLOCKSIZE;
@@ -722,12 +722,12 @@ _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
 
     /* Use generic code to handle smaller chunks... */
   }
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
   {
     /* Process data in 2 block chunks. */
     while (nblocks >= 2)
       {
-        _gcry_cast5_armv6_cfb_dec(ctx, outbuf, inbuf, iv);
+        _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 2;
         outbuf += 2 * CAST5_BLOCKSIZE;
@@ -936,7 +936,7 @@ do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen )
   for(i=0; i < 16; i++ )
     c->Kr[i] = k[i] & 0x1f;
 
-#ifdef USE_ARMV6_ASM
+#ifdef USE_ARM_ASM
   for (i = 0; i < 4; i++)
     {
       byte Kr_arm[4];
diff --git a/cipher/rijndael-armv6.S b/cipher/rijndael-arm.S
similarity index 98%
rename from cipher/rijndael-armv6.S
rename to cipher/rijndael-arm.S
index bbbfb0e..2a747bf 100644
--- a/cipher/rijndael-armv6.S
+++ b/cipher/rijndael-arm.S
@@ -1,4 +1,4 @@
-/* rijndael-armv6.S  -  ARM assembly implementation of AES cipher
+/* rijndael-arm.S  -  ARM assembly implementation of AES cipher
  *
  * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
@@ -20,7 +20,7 @@
 
 #include <config.h>
 
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+#if defined(__ARMEL__)
 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
 
 .text
@@ -211,10 +211,10 @@
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .align 3
-.global _gcry_aes_armv6_encrypt_block
-.type   _gcry_aes_armv6_encrypt_block,%function;
+.global _gcry_aes_arm_encrypt_block
+.type   _gcry_aes_arm_encrypt_block,%function;
 
-_gcry_aes_armv6_encrypt_block:
+_gcry_aes_arm_encrypt_block:
 	/* input:
 	 *	%r0: keysched, CTX
 	 *	%r1: dst
@@ -324,7 +324,7 @@ _gcry_aes_armv6_encrypt_block:
 	lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
 
 	b .Lenc_done;
-.size _gcry_aes_armv6_encrypt_block,.-_gcry_aes_armv6_encrypt_block;
+.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
 
 #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
 	ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
@@ -465,10 +465,10 @@ _gcry_aes_armv6_encrypt_block:
 	addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
 
 .align 3
-.global _gcry_aes_armv6_decrypt_block
-.type   _gcry_aes_armv6_decrypt_block,%function;
+.global _gcry_aes_arm_decrypt_block
+.type   _gcry_aes_arm_decrypt_block,%function;
 
-_gcry_aes_armv6_decrypt_block:
+_gcry_aes_arm_decrypt_block:
 	/* input:
 	 *	%r0: keysched, CTX
 	 *	%r1: dst
@@ -573,7 +573,7 @@ _gcry_aes_armv6_decrypt_block:
 	decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
 
 	b .Ldec_tail;
-.size _gcry_aes_armv6_encrypt_block,.-_gcry_aes_armv6_encrypt_block;
+.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
 
 .data
 
@@ -850,4 +850,4 @@ _gcry_aes_armv6_decrypt_block:
 .long 0x745c6c48, 0x0000000c, 0x4257b8d0, 0x0000007d
 
 #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
-#endif /*__ARM_ARCH >= 6*/
+#endif /*__ARMEL__ */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 85c1a41..e9bb4f6 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -67,11 +67,11 @@
 # define USE_AMD64_ASM 1
 #endif
 
-/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
-#undef USE_ARMV6_ASM
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
 # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
-#  define USE_ARMV6_ASM 1
+#  define USE_ARM_ASM 1
 # endif
 #endif
 
@@ -123,18 +123,18 @@ extern void _gcry_aes_amd64_decrypt_block(const void *keysched_dec,
 					  int rounds);
 #endif /*USE_AMD64_ASM*/
 
-#ifdef USE_ARMV6_ASM
-/* ARMv6 assembly implementations of AES */
-extern void _gcry_aes_armv6_encrypt_block(const void *keysched_enc,
+#ifdef USE_ARM_ASM
+/* ARM assembly implementations of AES */
+extern void _gcry_aes_arm_encrypt_block(const void *keysched_enc,
 					  unsigned char *out,
 					  const unsigned char *in,
 					  int rounds);
 
-extern void _gcry_aes_armv6_decrypt_block(const void *keysched_dec,
+extern void _gcry_aes_arm_decrypt_block(const void *keysched_dec,
 					  unsigned char *out,
 					  const unsigned char *in,
 					  int rounds);
-#endif /*USE_ARMV6_ASM*/
+#endif /*USE_ARM_ASM*/
 
 
 

@@ -567,8 +567,8 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx,
 {
 #ifdef USE_AMD64_ASM
   _gcry_aes_amd64_encrypt_block(ctx->keyschenc, b, a, ctx->rounds);
-#elif defined(USE_ARMV6_ASM)
-  _gcry_aes_armv6_encrypt_block(ctx->keyschenc, b, a, ctx->rounds);
+#elif defined(USE_ARM_ASM)
+  _gcry_aes_arm_encrypt_block(ctx->keyschenc, b, a, ctx->rounds);
 #else
 #define rk (ctx->keyschenc)
   int rounds = ctx->rounds;
@@ -651,7 +651,7 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx,
   *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]);
   *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]);
 #undef rk
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 }
 
 
@@ -659,7 +659,7 @@ static void
 do_encrypt (const RIJNDAEL_context *ctx,
             unsigned char *bx, const unsigned char *ax)
 {
-#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM)
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   /* BX and AX are not necessary correctly aligned.  Thus we might
      need to copy them here.  We try to align to a 16 bytes.  */
   if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
@@ -680,7 +680,7 @@ do_encrypt (const RIJNDAEL_context *ctx,
       memcpy (bx, b.b, 16);
     }
   else
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
     {
       do_encrypt_aligned (ctx, bx, ax);
     }
@@ -1694,8 +1694,8 @@ do_decrypt_aligned (RIJNDAEL_context *ctx,
 {
 #ifdef USE_AMD64_ASM
   _gcry_aes_amd64_decrypt_block(ctx->keyschdec, b, a, ctx->rounds);
-#elif defined(USE_ARMV6_ASM)
-  _gcry_aes_armv6_decrypt_block(ctx->keyschdec, b, a, ctx->rounds);
+#elif defined(USE_ARM_ASM)
+  _gcry_aes_arm_decrypt_block(ctx->keyschdec, b, a, ctx->rounds);
 #else
 #define rk  (ctx->keyschdec)
   int rounds = ctx->rounds;
@@ -1779,7 +1779,7 @@ do_decrypt_aligned (RIJNDAEL_context *ctx,
   *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]);
   *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]);
 #undef rk
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 }
 
 
@@ -1794,7 +1794,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
       ctx->decryption_prepared = 1;
     }
 
-#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM)
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   /* BX and AX are not necessary correctly aligned.  Thus we might
      need to copy them here.  We try to align to a 16 bytes. */
   if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
@@ -1815,7 +1815,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
       memcpy (bx, b.b, 16);
     }
   else
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
     {
       do_decrypt_aligned (ctx, bx, ax);
     }
diff --git a/cipher/twofish-armv6.S b/cipher/twofish-arm.S
similarity index 92%
rename from cipher/twofish-armv6.S
rename to cipher/twofish-arm.S
index b76ab37..ee22f56 100644
--- a/cipher/twofish-armv6.S
+++ b/cipher/twofish-arm.S
@@ -1,4 +1,4 @@
-/* twofish-armv6.S  -  ARM assembly implementation of Twofish cipher
+/* twofish-arm.S  -  ARM assembly implementation of Twofish cipher
  *
  * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
@@ -20,7 +20,7 @@
 
 #include <config.h>
 
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+#if defined(__ARMEL__)
 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
 
 .text
@@ -257,10 +257,10 @@
 	ror1(RD);
 
 .align 3
-.global _gcry_twofish_armv6_encrypt_block
-.type   _gcry_twofish_armv6_encrypt_block,%function;
+.global _gcry_twofish_arm_encrypt_block
+.type   _gcry_twofish_arm_encrypt_block,%function;
 
-_gcry_twofish_armv6_encrypt_block:
+_gcry_twofish_arm_encrypt_block:
 	/* input:
 	 *	%r0: ctx
 	 *	%r1: dst
@@ -303,16 +303,15 @@ _gcry_twofish_armv6_encrypt_block:
 
 	str_output_le(%r1, RC, RD, RA, RB, RT0, RT1);
 
-	pop {%r4-%r11, %ip, %lr};
-	bx %lr;
+	pop {%r4-%r11, %ip, %pc};
 .ltorg
-.size _gcry_twofish_armv6_encrypt_block,.-_gcry_twofish_armv6_encrypt_block;
+.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
 
 .align 3
-.global _gcry_twofish_armv6_decrypt_block
-.type   _gcry_twofish_armv6_decrypt_block,%function;
+.global _gcry_twofish_arm_decrypt_block
+.type   _gcry_twofish_arm_decrypt_block,%function;
 
-_gcry_twofish_armv6_decrypt_block:
+_gcry_twofish_arm_decrypt_block:
 	/* input:
 	 *	%r0: ctx
 	 *	%r1: dst
@@ -357,9 +356,8 @@ _gcry_twofish_armv6_decrypt_block:
 
 	str_output_le(%r1, RA, RB, RC, RD, RT0, RT1);
 
-	pop {%r4-%r11, %ip, %lr};
-	bx %lr;
-.size _gcry_twofish_armv6_decrypt_block,.-_gcry_twofish_armv6_decrypt_block;
+	pop {%r4-%r11, %ip, %pc};
+.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
 
 #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
-#endif /*__ARM_ARCH >= 6*/
+#endif /*__ARMEL__*/
diff --git a/cipher/twofish.c b/cipher/twofish.c
index d2cabbe..086df76 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -57,11 +57,11 @@
 # define USE_AMD64_ASM 1
 #endif
 
-/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
-#undef USE_ARMV6_ASM
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
 # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
-#  define USE_ARMV6_ASM 1
+#  define USE_ARM_ASM 1
 # endif
 #endif
 
@@ -754,16 +754,16 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
 extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
 					const byte *in, byte *iv);
 
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
 
 /* Assembly implementations of Twofish. */
-extern void _gcry_twofish_armv6_encrypt_block(const TWOFISH_context *c,
+extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c,
 					      byte *out, const byte *in);
 
-extern void _gcry_twofish_armv6_decrypt_block(const TWOFISH_context *c,
+extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c,
 					      byte *out, const byte *in);
 
-#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 /* Macros to compute the g() function in the encryption and decryption
  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
@@ -837,17 +837,17 @@ twofish_encrypt (void *context, byte *out, const byte *in)
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
 
 static unsigned int
 twofish_encrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
-  _gcry_twofish_armv6_encrypt_block(ctx, out, in);
+  _gcry_twofish_arm_encrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
-#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 static void
 do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
@@ -889,7 +889,7 @@ twofish_encrypt (void *context, byte *out, const byte *in)
   return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 

 /* Decrypt one block.  in and out may be the same. */
@@ -904,17 +904,17 @@ twofish_decrypt (void *context, byte *out, const byte *in)
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
-#elif defined(USE_ARMV6_ASM)
+#elif defined(USE_ARM_ASM)
 
 static unsigned int
 twofish_decrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
-  _gcry_twofish_armv6_decrypt_block(ctx, out, in);
+  _gcry_twofish_arm_decrypt_block(ctx, out, in);
   return /*burn_stack*/ (4*sizeof (void*));
 }
 
-#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 static void
 do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
@@ -957,7 +957,7 @@ twofish_decrypt (void *context, byte *out, const byte *in)
   return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 
 

 
diff --git a/configure.ac b/configure.ac
index a1ffdc7..5b7ba0d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1424,7 +1424,7 @@ if test "$found" = "1" ; then
       ;;
       arm*-*-*)
          # Build with the assembly implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-armv6.lo"
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-arm.lo"
       ;;
    esac
 fi
@@ -1441,7 +1441,7 @@ if test "$found" = "1" ; then
       ;;
       arm*-*-*)
          # Build with the assembly implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-armv6.lo"
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-arm.lo"
       ;;
    esac
 fi
@@ -1464,7 +1464,7 @@ if test "$found" = "1" ; then
       ;;
       arm*-*-*)
          # Build with the assembly implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv6.lo"
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-arm.lo"
       ;;
    esac
 fi
@@ -1481,7 +1481,7 @@ if test "$found" = "1" ; then
       ;;
       arm*-*-*)
          # Build with the assembly implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-armv6.lo"
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-arm.lo"
       ;;
    esac
 fi
@@ -1524,7 +1524,7 @@ if test "$found" = "1" ; then
    case "${host}" in
       arm*-*-*)
          # Build with the assembly implementation
-         GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-armv6.lo"
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-arm.lo"
       ;;
    esac
 

commit 0b39fce7e3ce6761d6bd5195d093ec6857edb7c2
Author: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
Date:   Wed Oct 23 18:36:10 2013 +0300

    mpi: enable assembler on all arm architectures
    
    * mpi/config.links: remove check for arm >= v6
    * mpi/armv6 => mpi/arm: rename directory to reflect that is is generic
      enough
    
    --
    MPI ARM assembly do not depend on CPU being armv6. Verified on PXA255:
    
    Before:
    Algorithm         generate  100*sign  100*verify
    ------------------------------------------------
    RSA 1024 bit        3990ms   57980ms      1680ms
    RSA 2048 bit       59620ms   389430ms      5690ms
    RSA 3072 bit       535850ms   1223200ms     12000ms
    RSA 4096 bit       449350ms   2707370ms     20050ms
    
    After:
    Algorithm         generate  100*sign  100*verify
    ------------------------------------------------
    RSA 1024 bit        2190ms   13730ms       320ms
    RSA 2048 bit       12750ms   67640ms       810ms
    RSA 3072 bit       110520ms   166100ms      1350ms
    RSA 4096 bit       100870ms   357560ms      2170ms
    
    Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
    [ jk: ARMv6 => ARM in header comments ]

diff --git a/mpi/armv6/mpi-asm-defs.h b/mpi/arm/mpi-asm-defs.h
similarity index 100%
rename from mpi/armv6/mpi-asm-defs.h
rename to mpi/arm/mpi-asm-defs.h
diff --git a/mpi/armv6/mpih-add1.S b/mpi/arm/mpih-add1.S
similarity index 93%
rename from mpi/armv6/mpih-add1.S
rename to mpi/arm/mpih-add1.S
index 60ea4c3..de6d5ed 100644
--- a/mpi/armv6/mpih-add1.S
+++ b/mpi/arm/mpih-add1.S
@@ -1,5 +1,5 @@
-/* ARMv6 add_n -- Add two limb vectors of the same length > 0 and store
- *                sum in a third limb vector.
+/* ARM add_n -- Add two limb vectors of the same length > 0 and store
+ *              sum in a third limb vector.
  *
  *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
diff --git a/mpi/armv6/mpih-mul1.S b/mpi/arm/mpih-mul1.S
similarity index 94%
rename from mpi/armv6/mpih-mul1.S
rename to mpi/arm/mpih-mul1.S
index 0aa41ef..9e6f361 100644
--- a/mpi/armv6/mpih-mul1.S
+++ b/mpi/arm/mpih-mul1.S
@@ -1,5 +1,5 @@
-/* ARMv6 mul_1 -- Multiply a limb vector with a limb and store the result in
- *                a second limb vector.
+/* ARM mul_1 -- Multiply a limb vector with a limb and store the result in
+ *              a second limb vector.
  *
  *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
diff --git a/mpi/armv6/mpih-mul2.S b/mpi/arm/mpih-mul2.S
similarity index 94%
rename from mpi/armv6/mpih-mul2.S
rename to mpi/arm/mpih-mul2.S
index a7eb8a1..2063be5 100644
--- a/mpi/armv6/mpih-mul2.S
+++ b/mpi/arm/mpih-mul2.S
@@ -1,5 +1,5 @@
-/* ARMv6 mul_2 -- Multiply a limb vector with a limb and add the result to
- *                a second limb vector.
+/* ARM mul_2 -- Multiply a limb vector with a limb and add the result to
+ *              a second limb vector.
  *
  *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
diff --git a/mpi/armv6/mpih-mul3.S b/mpi/arm/mpih-mul3.S
similarity index 94%
rename from mpi/armv6/mpih-mul3.S
rename to mpi/arm/mpih-mul3.S
index 034929e..be2c5e6 100644
--- a/mpi/armv6/mpih-mul3.S
+++ b/mpi/arm/mpih-mul3.S
@@ -1,5 +1,5 @@
-/* ARMv6 mul_3 -- Multiply a limb vector with a limb and subtract the result
- *                from a second limb vector.
+/* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result
+ *              from a second limb vector.
  *
  *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
diff --git a/mpi/armv6/mpih-sub1.S b/mpi/arm/mpih-sub1.S
similarity index 93%
rename from mpi/armv6/mpih-sub1.S
rename to mpi/arm/mpih-sub1.S
index 77d05eb..a573eff 100644
--- a/mpi/armv6/mpih-sub1.S
+++ b/mpi/arm/mpih-sub1.S
@@ -1,5 +1,5 @@
-/* ARMv6 sub_n -- Subtract two limb vectors of the same length > 0 and store
- *		  sum in a third limb vector.
+/* ARM sub_n -- Subtract two limb vectors of the same length > 0 and store
+ *              sum in a third limb vector.
  *
  *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
diff --git a/mpi/config.links b/mpi/config.links
index 9fb4f10..90d1077 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -138,14 +138,9 @@ case "${host}" in
 	;;
     arm*-*-*)
 	if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
-	  if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
-	    echo '/* configured for armv6 */' >>./mpi/asm-syntax.h
-	    path="armv6"
-	    mpi_cpu_arch="arm"
-	  else
-	    echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
-	    path=""
-	  fi
+	  echo '/* configured for arm */' >>./mpi/asm-syntax.h
+	  path="arm"
+	  mpi_cpu_arch="arm"
 	else
 	  echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
 	  path=""

commit 10bf6a7e16ed193f90d2749970a420f00d1d3320
Author: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
Date:   Wed Oct 23 18:36:10 2013 +0300

    Correct ASM assembly test in configure.ac
    
    * configure.ac: correct HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS test to
      require neither ARMv6, nor thumb mode. Our assembly code works
      perfectly even on ARMv4 now.
    
    Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>

diff --git a/configure.ac b/configure.ac
index 69cfbd2..a1ffdc7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1109,11 +1109,10 @@ AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementat
           [[__asm__(
                 /* Test if assembler supports UAL syntax.  */
                 ".syntax unified\n\t"
-                ".thumb\n\t" /* thumb-2 in UAL, thumb-1 otherwise.  */
-                ".code 16\n\t"
+                ".arm\n\t" /* our assembly code is in ARM mode  */
                 /* Following causes error if assembler ignored '.syntax unified'.  */
                 "asmfunc:\n\t"
-                "add.w %r0, %r4, %r8, ror #12;\n\t"
+                "add %r0, %r0, %r4, ror #12;\n\t"
 
                 /* Test if '.type' and '.size' are supported.  */
                 ".size asmfunc,.-asmfunc;\n\t"

-----------------------------------------------------------------------

Summary of changes:
 cipher/Makefile.am                          |    8 +-
 cipher/{blowfish-armv6.S => blowfish-arm.S} |  183 ++++++++++++++-------------
 cipher/blowfish.c                           |   44 +++----
 cipher/{camellia-armv6.S => camellia-arm.S} |   70 +++++-----
 cipher/camellia-glue.c                      |   14 +-
 cipher/camellia.c                           |    8 +-
 cipher/camellia.h                           |   10 +-
 cipher/{cast5-armv6.S => cast5-arm.S}       |  173 +++++++++++++------------
 cipher/cast5.c                              |   46 +++----
 cipher/{rijndael-armv6.S => rijndael-arm.S} |   22 ++--
 cipher/rijndael.c                           |   38 +++---
 cipher/{twofish-armv6.S => twofish-arm.S}   |   28 ++--
 cipher/twofish.c                            |   32 ++---
 configure.ac                                |   15 +--
 mpi/{armv6 => arm}/mpi-asm-defs.h           |    0
 mpi/{armv6 => arm}/mpih-add1.S              |    4 +-
 mpi/{armv6 => arm}/mpih-mul1.S              |    4 +-
 mpi/{armv6 => arm}/mpih-mul2.S              |    4 +-
 mpi/{armv6 => arm}/mpih-mul3.S              |    4 +-
 mpi/{armv6 => arm}/mpih-sub1.S              |    4 +-
 mpi/config.links                            |   11 +-
 21 files changed, 376 insertions(+), 346 deletions(-)
 rename cipher/{blowfish-armv6.S => blowfish-arm.S} (78%)
 rename cipher/{camellia-armv6.S => camellia-arm.S} (93%)
 rename cipher/{cast5-armv6.S => cast5-arm.S} (81%)
 rename cipher/{rijndael-armv6.S => rijndael-arm.S} (98%)
 rename cipher/{twofish-armv6.S => twofish-arm.S} (92%)
 rename mpi/{armv6 => arm}/mpi-asm-defs.h (100%)
 rename mpi/{armv6 => arm}/mpih-add1.S (93%)
 rename mpi/{armv6 => arm}/mpih-mul1.S (94%)
 rename mpi/{armv6 => arm}/mpih-mul2.S (94%)
 rename mpi/{armv6 => arm}/mpih-mul3.S (94%)
 rename mpi/{armv6 => arm}/mpih-sub1.S (93%)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits