[git] GCRYPT - branch, master, updated. libgcrypt-1.7.1-17-g1111d31
by Jussi Kivilinna
cvs at cvs.gnupg.org
Fri Jul 8 12:31:13 CEST 2016
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".
The branch, master has been updated
via 1111d311fd6452abd4080d1072c75ddb1b5a3dd1 (commit)
via 496790940753226f96b731a43d950bd268acd97a (commit)
from cb79630ec567a5f2e03e5f863cda168faa7b8cc8 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 1111d311fd6452abd4080d1072c75ddb1b5a3dd1
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Fri Jul 8 01:22:58 2016 +0300
Fix unaligned accesses with ldm/stm in ChaCha20 and Poly1305 ARM/NEON
* cipher/chacha20-armv7-neon.S (UNALIGNED_STMIA8)
(UNALIGNED_LDMIA4): New.
(_gcry_chacha20_armv7_neon_blocks): Use new helper macros instead of
ldm/stm instructions directly.
* cipher/poly1305-armv7-neon.S (UNALIGNED_LDMIA2)
(UNALIGNED_LDMIA4): New.
(_gcry_poly1305_armv7_neon_init_ext, _gcry_poly1305_armv7_neon_blocks)
(_gcry_poly1305_armv7_neon_finish_ext): Use new helper macros instead
of ldm instruction directly.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/cipher/chacha20-armv7-neon.S b/cipher/chacha20-armv7-neon.S
index 1a395ba..4d3340b 100644
--- a/cipher/chacha20-armv7-neon.S
+++ b/cipher/chacha20-armv7-neon.S
@@ -33,6 +33,40 @@
.fpu neon
.arm
+#define UNALIGNED_STMIA8(ptr, l0, l1, l2, l3, l4, l5, l6, l7) \
+ tst ptr, #3; \
+ beq 1f; \
+ vpush {d0-d3}; \
+ vmov s0, l0; \
+ vmov s1, l1; \
+ vmov s2, l2; \
+ vmov s3, l3; \
+ vmov s4, l4; \
+ vmov s5, l5; \
+ vmov s6, l6; \
+ vmov s7, l7; \
+ vst1.32 {d0-d3}, [ptr]; \
+ add ptr, #32; \
+ vpop {d0-d3}; \
+ b 2f; \
+ 1: stmia ptr!, {l0-l7}; \
+ 2: ;
+
+#define UNALIGNED_LDMIA4(ptr, l0, l1, l2, l3) \
+ tst ptr, #3; \
+ /*beq 1f;*/ \
+ vpush {d0-d1}; \
+ vld1.32 {d0-d1}, [ptr]; \
+ add ptr, #16; \
+ vmov l0, s0; \
+ vmov l1, s1; \
+ vmov l2, s2; \
+ vmov l3, s3; \
+ vpop {d0-d1}; \
+ b 2f; \
+ 1: ldmia ptr!, {l0-l3}; \
+ 2: ;
+
.text
.globl _gcry_chacha20_armv7_neon_blocks
@@ -352,7 +386,8 @@ _gcry_chacha20_armv7_neon_blocks:
add r7, r7, r11
vadd.i32 q11, q11, q14
beq .Lchacha_blocks_neon_nomessage11
- ldmia r12!, {r8-r11}
+ UNALIGNED_LDMIA4(r12, r8, r9, r10, r11)
+ tst r12, r12
eor r0, r0, r8
eor r1, r1, r9
eor r2, r2, r10
@@ -367,7 +402,8 @@ _gcry_chacha20_armv7_neon_blocks:
add r12, r12, #16
eor r7, r7, r11
.Lchacha_blocks_neon_nomessage11:
- stmia r14!, {r0-r7}
+ UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7)
+ tst r12, r12
ldm sp, {r0-r7}
ldr r8, [sp, #(64 +32)]
ldr r9, [sp, #(64 +36)]
@@ -391,7 +427,8 @@ _gcry_chacha20_armv7_neon_blocks:
tst r12, r12
str r9, [sp, #(64 +52)]
beq .Lchacha_blocks_neon_nomessage12
- ldmia r12!, {r8-r11}
+ UNALIGNED_LDMIA4(r12, r8, r9, r10, r11)
+ tst r12, r12
eor r0, r0, r8
eor r1, r1, r9
eor r2, r2, r10
@@ -406,7 +443,8 @@ _gcry_chacha20_armv7_neon_blocks:
add r12, r12, #16
eor r7, r7, r11
.Lchacha_blocks_neon_nomessage12:
- stmia r14!, {r0-r7}
+ UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7)
+ tst r12, r12
beq .Lchacha_blocks_neon_nomessage13
vld1.32 {q12,q13}, [r12]!
vld1.32 {q14,q15}, [r12]!
@@ -613,7 +651,8 @@ _gcry_chacha20_armv7_neon_blocks:
tst r12, r12
add r7, r7, r11
beq .Lchacha_blocks_neon_nomessage21
- ldmia r12!, {r8-r11}
+ UNALIGNED_LDMIA4(r12, r8, r9, r10, r11)
+ tst r12, r12
eor r0, r0, r8
eor r1, r1, r9
eor r2, r2, r10
@@ -628,7 +667,7 @@ _gcry_chacha20_armv7_neon_blocks:
add r12, r12, #16
eor r7, r7, r11
.Lchacha_blocks_neon_nomessage21:
- stmia r14!, {r0-r7}
+ UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7)
ldm sp, {r0-r7}
ldr r8, [sp, #(64 +32)]
ldr r9, [sp, #(64 +36)]
@@ -652,7 +691,8 @@ _gcry_chacha20_armv7_neon_blocks:
tst r12, r12
str r9, [sp, #(64 +52)]
beq .Lchacha_blocks_neon_nomessage22
- ldmia r12!, {r8-r11}
+ UNALIGNED_LDMIA4(r12, r8, r9, r10, r11)
+ tst r12, r12
eor r0, r0, r8
eor r1, r1, r9
eor r2, r2, r10
@@ -667,7 +707,7 @@ _gcry_chacha20_armv7_neon_blocks:
add r12, r12, #16
eor r7, r7, r11
.Lchacha_blocks_neon_nomessage22:
- stmia r14!, {r0-r7}
+ UNALIGNED_STMIA8(r14, r0, r1, r2, r3, r4, r5, r6, r7)
str r12, [sp, #48]
str r14, [sp, #40]
ldr r3, [sp, #52]
diff --git a/cipher/poly1305-armv7-neon.S b/cipher/poly1305-armv7-neon.S
index b1554ed..13cb4a5 100644
--- a/cipher/poly1305-armv7-neon.S
+++ b/cipher/poly1305-armv7-neon.S
@@ -46,6 +46,32 @@
# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
#endif
+#define UNALIGNED_LDMIA2(ptr, l0, l1) \
+ tst ptr, #3; \
+ beq 1f; \
+ vpush {d0}; \
+ vld1.32 {d0}, [ptr]!; \
+ vmov l0, s0; \
+ vmov l1, s1; \
+ vpop {d0}; \
+ b 2f; \
+ 1: ldmia ptr!, {l0-l1}; \
+ 2: ;
+
+#define UNALIGNED_LDMIA4(ptr, l0, l1, l2, l3) \
+ tst ptr, #3; \
+ beq 1f; \
+ vpush {d0-d1}; \
+ vld1.32 {d0-d1}, [ptr]!; \
+ vmov l0, s0; \
+ vmov l1, s1; \
+ vmov l2, s2; \
+ vmov l3, s3; \
+ vpop {d0-d1}; \
+ b 2f; \
+ 1: ldmia ptr!, {l0-l3}; \
+ 2: ;
+
.text
.p2align 2
@@ -64,7 +90,7 @@ _gcry_poly1305_armv7_neon_init_ext:
mov r14, r2
and r2, r2, r2
moveq r14, #-1
- ldmia r1!, {r2-r5}
+ UNALIGNED_LDMIA4(r1, r2, r3, r4, r5)
GET_DATA_POINTER(r7,.Lpoly1305_init_constants_neon,r8)
mov r6, r2
mov r8, r2, lsr #26
@@ -175,7 +201,7 @@ _gcry_poly1305_armv7_neon_init_ext:
eor r6, r6, r6
stmia r0!, {r2-r6}
stmia r0!, {r2-r6}
- ldmia r1!, {r2-r5}
+ UNALIGNED_LDMIA4(r1, r2, r3, r4, r5)
stmia r0, {r2-r6}
add sp, sp, #32
ldmfd sp!, {r4-r11, lr}
@@ -286,7 +312,7 @@ _gcry_poly1305_armv7_neon_blocks:
vmov d14, d12
vmul.i32 q6, q5, d0[0]
.Lpoly1305_blocks_neon_mainloop:
- ldmia r0!, {r2-r5}
+ UNALIGNED_LDMIA4(r0, r2, r3, r4, r5)
vmull.u32 q0, d25, d12[0]
mov r7, r2, lsr #26
vmlal.u32 q0, d24, d12[1]
@@ -302,7 +328,7 @@ _gcry_poly1305_armv7_neon_blocks:
orr r4, r8, r4, lsl #12
orr r5, r9, r5, lsl #18
vmlal.u32 q1, d24, d13[0]
- ldmia r0!, {r7-r10}
+ UNALIGNED_LDMIA4(r0, r7, r8, r9, r10)
vmlal.u32 q1, d23, d13[1]
mov r1, r7, lsr #26
vmlal.u32 q1, d22, d14[0]
@@ -344,7 +370,7 @@ _gcry_poly1305_armv7_neon_blocks:
vmlal.u32 q4, d21, d11[1]
vld1.64 {d21-d24}, [r14, :256]!
vld1.64 {d25}, [r14, :64]
- ldmia r0!, {r2-r5}
+ UNALIGNED_LDMIA4(r0, r2, r3, r4, r5)
vmlal.u32 q0, d25, d26
mov r7, r2, lsr #26
vmlal.u32 q0, d24, d27
@@ -360,7 +386,7 @@ _gcry_poly1305_armv7_neon_blocks:
orr r4, r8, r4, lsl #12
orr r5, r9, r5, lsl #18
vmlal.u32 q1, d24, d28
- ldmia r0!, {r7-r10}
+ UNALIGNED_LDMIA4(r0, r7, r8, r9, r10)
vmlal.u32 q1, d23, d29
mov r1, r7, lsr #26
vmlal.u32 q1, d22, d20
@@ -643,7 +669,7 @@ _gcry_poly1305_armv7_neon_finish_ext:
.Lpoly1305_finish_ext_neon_skip16:
tst r7, #8
beq .Lpoly1305_finish_ext_neon_skip8
- ldmia r1!, {r10-r11}
+ UNALIGNED_LDMIA2(r1, r10, r11)
stmia r9!, {r10-r11}
.Lpoly1305_finish_ext_neon_skip8:
tst r7, #4
commit 496790940753226f96b731a43d950bd268acd97a
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date: Sun Jul 3 18:39:40 2016 +0300
bench-slope: add unaligned buffer mode
* tests/bench-slope.c (unaligned_mode): New.
(do_slope_benchmark): Unalign buffer if in unaligned mode enabled.
(print_help, main): Add '--unaligned' parameter.
--
Patch adds --unaligned parameter to allow measurement of unaligned
buffer overhead.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index d97494c..cdd0fa6 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -42,6 +42,7 @@
static int verbose;
static int csv_mode;
+static int unaligned_mode;
static int num_measurement_repetitions;
/* CPU Ghz value provided by user, allows constructing cycles/byte and other
@@ -411,12 +412,14 @@ do_slope_benchmark (struct bench_obj *obj)
obj->max_bufsize < 1 || obj->min_bufsize > obj->max_bufsize)
goto err_free;
- real_buffer = malloc (obj->max_bufsize + 128);
+ real_buffer = malloc (obj->max_bufsize + 128 + unaligned_mode);
if (!real_buffer)
goto err_free;
/* Get aligned buffer */
buffer = real_buffer;
buffer += 128 - ((real_buffer - (unsigned char *) 0) & (128 - 1));
+ if (unaligned_mode)
+ buffer += unaligned_mode; /* Make buffer unaligned */
for (i = 0; i < obj->max_bufsize; i++)
buffer[i] = 0x55 ^ (-i);
@@ -1748,6 +1751,7 @@ print_help (void)
" for benchmarking.",
" --repetitions <n> Use N repetitions (default "
STR2(NUM_MEASUREMENT_REPETITIONS) ")",
+ " --unaligned Use unaligned input buffers.",
" --csv Use CSV output format",
NULL
};
@@ -1832,6 +1836,12 @@ main (int argc, char **argv)
argc--;
argv++;
}
+ else if (!strcmp (*argv, "--unaligned"))
+ {
+ unaligned_mode = 1;
+ argc--;
+ argv++;
+ }
else if (!strcmp (*argv, "--disable-hwf"))
{
argc--;
-----------------------------------------------------------------------
Summary of changes:
cipher/chacha20-armv7-neon.S | 56 +++++++++++++++++++++++++++++++++++++-------
cipher/poly1305-armv7-neon.S | 40 +++++++++++++++++++++++++------
tests/bench-slope.c | 12 +++++++++-
3 files changed, 92 insertions(+), 16 deletions(-)
hooks/post-receive
--
The GNU crypto library
http://git.gnupg.org
More information about the Gnupg-commits
mailing list