[git] GCRYPT - branch, master, updated. libgcrypt-1.5.0-143-g0bdf26e

by Jussi Kivilinna cvs at cvs.gnupg.org
Fri May 24 14:04:46 CEST 2013


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  0bdf26eea8cdbffefe7e37578f8f896c4f5f5275 (commit)
       via  ab8fc70b5f0c396a5bc941267f59166e860b8c5d (commit)
      from  6deb0ccdf718a0670f80e6762a3842caf76437d6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0bdf26eea8cdbffefe7e37578f8f896c4f5f5275
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Fri May 24 12:43:29 2013 +0300

    cast5: add amd64 assembly implementation
    
    * cipher/Makefile.am: Add 'cast5-amd64.S'.
    * cipher/cast5-amd64.S: New file.
    * cipher/cast5.c (USE_AMD64_ASM): New macro.
    (_gcry_cast5_s1tos4): Merge arrays s1, s2, s3, s4 to single array to
    simplify access from assembly implementation.
    (s1, s2, s3, s4): New macros pointing to subarrays in
    _gcry_cast5_s1tos4.
    [USE_AMD64_ASM] (_gcry_cast5_amd64_encrypt_block)
    (_gcry_cast5_amd64_decrypt_block, _gcry_cast5_amd64_ctr_enc)
    (_gcry_cast5_amd64_cbc_dec, _gcry_cast5_amd64_cfb_dec): New prototypes.
    [USE_AMD64_ASM] (do_encrypt_block, do_decrypt_block, encrypt_block)
    (decrypt_block): New functions.
    (_gcry_cast5_ctr_enc, _gcry_cast5_cbc_dec, _gcry_cast5_cfb_dec)
    (selftest_ctr, selftest_cbc, selftest_cfb): New functions.
    (selftest): Call new bulk selftests.
    * cipher/cipher.c (gcry_cipher_open) [USE_CAST5]: Register CAST5 bulk
    functions for ctr-enc, cbc-dec and cfb-dec.
    * configure.ac (cast5) [x86_64]: Add 'cast5-amd64.lo'.
    * src/cipher.h (_gcry_cast5_ctr_enc, _gcry_cast5_cbc_dec)
    (gcry_cast5_cfb_dec): New prototypes.
    --
    
    Provides non-parallel implementations for small speed-up and 4-way parallel
    implementations that gets accelerated on `out-of-order' CPUs.
    
    Speed old vs. new on AMD Phenom II X6 1055T:
                    ECB/Stream         CBC             CFB             OFB             CTR
                 --------------- --------------- --------------- --------------- ---------------
    CAST5         1.23x   1.22x   1.21x   2.86x   1.21x   2.83x   1.22x   1.17x   2.73x   2.73x
    
    Speed old vs. new on Intel Core i5-2450M (Sandy-Bridge):
                    ECB/Stream         CBC             CFB             OFB             CTR
                 --------------- --------------- --------------- --------------- ---------------
    CAST5         1.00x   1.04x   1.06x   2.56x   1.06x   2.37x   1.03x   1.01x   2.43x   2.41x
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 69f1e6d..1e2696f 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -54,7 +54,7 @@ rmd.h
 EXTRA_libcipher_la_SOURCES = \
 arcfour.c \
 blowfish.c \
-cast5.c \
+cast5.c cast5-amd64.S \
 crc.c \
 des.c \
 dsa.c \
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
new file mode 100644
index 0000000..c3007d3
--- /dev/null
+++ b/cipher/cast5-amd64.S
@@ -0,0 +1,587 @@
+/* cast5-amd64.S  -  AMD64 assembly implementation of CAST5 cipher
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(USE_CAST5)
+
+#ifdef __PIC__
+#  define RIP %rip
+#  define GET_EXTERN_POINTER(name, reg) movq name at GOTPCREL(%rip), reg
+#else
+#  define RIP
+#  define GET_EXTERN_POINTER(name, reg) leaq name, reg
+#endif
+
+.text
+
+.extern _gcry_cast5_s1to4;
+
+#define s1 0
+#define s2 (s1 + (4 * 256))
+#define s3 (s2 + (4 * 256))
+#define s4 (s3 + (4 * 256))
+
+/* structure of CAST5_context: */
+#define Km 0
+#define Kr (Km + (16 * 4))
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+#define RTAB %r8
+
+#define RLR0 %r9
+#define RLR1 %r10
+#define RLR2 %r11
+#define RLR3 %r12
+
+#define RLR0d %r9d
+#define RLR1d %r10d
+#define RLR2d %r11d
+#define RLR3d %r12d
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %dh
+
+#define RKR %rcx
+#define RKRd %ecx
+#define RKRbl %cl
+
+#define RT0 %rbp
+#define RT1 %rsi
+
+#define RT0d %ebp
+#define RT1d %esi
+
+#define RKM0d %r13d
+#define RKM1d %r14d
+
+/***********************************************************************
+ * 1-way cast5
+ ***********************************************************************/
+#define dummy(x)
+
+#define shr_kr(none) \
+	shrq $8,			RKR;
+
+#define F(km, load_next_kr, op0, op1, op2, op3) \
+	op0 ## l RLR0d,			km ## d; \
+	roll RKRbl,			km ## d; \
+	rorq $32,			RLR0; \
+	movzbl km ## bh,		RT0d; \
+	movzbl km ## bl,		RT1d; \
+	roll $16,			km ## d; \
+	movl s1(RTAB,RT0,4),		RT0d; \
+	op1 ## l s2(RTAB,RT1,4),	RT0d; \
+	load_next_kr(kr_next); \
+	movzbl km ## bh,		RT1d; \
+	movzbl km ## bl,		km ## d; \
+	op2 ## l s3(RTAB,RT1,4),	RT0d; \
+	op3 ## l s4(RTAB,km,4),		RT0d; \
+	xorq RT0,			RLR0;
+
+#define F1(km, load_next_kr) \
+	F(##km, load_next_kr, add, xor, sub, add)
+#define F2(km, load_next_kr) \
+	F(##km, load_next_kr, xor, sub, add, xor)
+#define F3(km, load_next_kr) \
+	F(##km, load_next_kr, sub, add, xor, sub)
+
+#define get_round_km(n, km) \
+	movl Km+4*(n)(CTX), 		km;
+
+#define get_round_kr_enc(n) \
+	movq $0x1010101010101010,	RKR; \
+	\
+	/* merge rorl rk and rorl $16 */ \
+	xorq Kr+(n)(CTX),		RKR;
+
+#define get_round_kr_dec(n) \
+	movq $0x1010101010101010,	RKR; \
+	\
+	/* merge rorl rk and rorl $16 */ \
+	xorq Kr+(n - 7)(CTX),		RKR; \
+	bswapq				RKR;
+
+#define round_enc(n, FA, FB, fn1, fn2) \
+	get_round_km(n + 1, RX2d); \
+	FA(RX0, fn1); \
+	get_round_km(n + 2, RX0d); \
+	FB(RX2, fn2);
+
+#define round_enc_last(n, FXA, FXB) \
+	get_round_km(n + 1, RX2d); \
+	\
+	FXA(RX0, shr_kr); \
+	FXB(RX2, dummy);
+
+#define round_enc_1(n, FA, FB) \
+	round_enc(n, FA, FB, shr_kr, shr_kr)
+
+#define round_enc_2(n, FA, FB) \
+	round_enc(n, FA, FB, shr_kr, dummy)
+
+#define round_dec(n, FA, FB, fn1, fn2) \
+	get_round_km(n - 1, RX2d); \
+	FA(RX0, fn1); \
+	get_round_km(n - 2, RX0d); \
+	FB(RX2, fn2);
+
+#define round_dec_last(n, FXA, FXB) \
+	get_round_km(n - 1, RX2d); \
+	FXA(RX0, shr_kr); \
+	FXB(RX2, dummy);
+
+#define round_dec_1(n, FA, FB) \
+	round_dec(n, FA, FB, shr_kr, shr_kr)
+
+#define round_dec_2(n, FA, FB) \
+	round_dec(n, FA, FB, shr_kr, dummy)
+
+#define read_block() \
+	movq (RIO), 		RLR0; \
+	bswapq 			RLR0;
+
+#define write_block() \
+	bswapq 			RLR0; \
+	rorq $32,		RLR0; \
+	movq RLR0, 		(RIO);
+
+.align 8
+.global _gcry_cast5_amd64_encrypt_block
+.type   _gcry_cast5_amd64_encrypt_block, at function;
+
+_gcry_cast5_amd64_encrypt_block:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	pushq %rbp;
+	pushq %rbx;
+
+	movq %rsi, %r10;
+
+	GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+	movq %rdx, RIO;
+	read_block();
+
+	get_round_km(0, RX0d);
+	get_round_kr_enc(0);
+	round_enc_1(0, F1, F2);
+	round_enc_1(2, F3, F1);
+	round_enc_1(4, F2, F3);
+	round_enc_2(6, F1, F2);
+	get_round_kr_enc(8);
+	round_enc_1(8, F3, F1);
+	round_enc_1(10, F2, F3);
+	round_enc_1(12, F1, F2);
+	round_enc_last(14, F3, F1);
+
+	movq %r10, RIO;
+	write_block();
+
+	popq %rbx;
+	popq %rbp;
+	ret;
+.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;
+
+.align 8
+.global _gcry_cast5_amd64_decrypt_block
+.type   _gcry_cast5_amd64_decrypt_block, at function;
+
+_gcry_cast5_amd64_decrypt_block:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	pushq %rbp;
+	pushq %rbx;
+
+	movq %rsi, %r10;
+
+	GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+	movq %rdx, RIO;
+	read_block();
+
+	get_round_km(15, RX0d);
+	get_round_kr_dec(15);
+	round_dec_1(15, F1, F3);
+	round_dec_1(13, F2, F1);
+	round_dec_1(11, F3, F2);
+	round_dec_2(9, F1, F3);
+	get_round_kr_dec(7);
+	round_dec_1(7, F2, F1);
+	round_dec_1(5, F3, F2);
+	round_dec_1(3, F1, F3);
+	round_dec_last(1, F2, F1);
+
+	movq %r10, RIO;
+	write_block();
+
+	popq %rbx;
+	popq %rbp;
+	ret;
+.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;
+
+/**********************************************************************
+  4-way cast5, four blocks parallel
+ **********************************************************************/
+#define F_tail(rlr, rx, op1, op2, op3) \
+	movzbl rx ## bh,		RT0d; \
+	movzbl rx ## bl,		RT1d; \
+	roll $16,			rx ## d; \
+	movl s1(RTAB,RT0,4),		RT0d; \
+	op1 ## l s2(RTAB,RT1,4),	RT0d; \
+	movzbl rx ## bh,		RT1d; \
+	movzbl rx ## bl,		rx ## d; \
+	op2 ## l s3(RTAB,RT1,4),	RT0d; \
+	op3 ## l s4(RTAB,rx,4),		RT0d; \
+	xorq RT0,			rlr;
+
+#define F4(km, load_next_kr, op0, op1, op2, op3) \
+	movl km,			RX0d; \
+	op0 ## l RLR0d,			RX0d; \
+	roll RKRbl,			RX0d; \
+	rorq $32,			RLR0; \
+	\
+	movl km,			RX1d; \
+	op0 ## l RLR1d,			RX1d; \
+	roll RKRbl,			RX1d; \
+	rorq $32,			RLR1; \
+	\
+	movl km,			RX2d; \
+	op0 ## l RLR2d,			RX2d; \
+	roll RKRbl,			RX2d; \
+	rorq $32,			RLR2; \
+	\
+	F_tail(RLR0, RX0, op1, op2, op3); \
+	F_tail(RLR1, RX1, op1, op2, op3); \
+	F_tail(RLR2, RX2, op1, op2, op3); \
+	\
+	movl km,			RX0d; \
+	op0 ## l RLR3d,			RX0d; \
+	roll RKRbl,			RX0d; \
+	load_next_kr();			\
+	rorq $32,			RLR3; \
+	\
+	F_tail(RLR3, RX0, op1, op2, op3);
+
+#define F4_1(km, load_next_kr) \
+	F4(km, load_next_kr, add, xor, sub, add)
+#define F4_2(km, load_next_kr) \
+	F4(km, load_next_kr, xor, sub, add, xor)
+#define F4_3(km, load_next_kr) \
+	F4(km, load_next_kr, sub, add, xor, sub)
+
+#define round_enc4(n, FA, FB, fn1, fn2) \
+	get_round_km(n + 1, RKM1d); \
+	FA(RKM0d, fn1); \
+	get_round_km(n + 2, RKM0d); \
+	FB(RKM1d, fn2);
+
+#define round_enc_last4(n, FXA, FXB) \
+	get_round_km(n + 1, RKM1d); \
+	FXA(RKM0d, shr_kr); \
+	FXB(RKM1d, dummy);
+
+#define round_enc4_1(n, FA, FB) \
+	round_enc4(n, FA, FB, shr_kr, shr_kr);
+
+#define round_enc4_2(n, FA, FB) \
+	round_enc4(n, FA, FB, shr_kr, dummy);
+
+#define round_dec4(n, FA, FB, fn1, fn2) \
+	get_round_km(n - 1, RKM1d); \
+	FA(RKM0d, fn1); \
+	get_round_km(n - 2, RKM0d); \
+	FB(RKM1d, fn2);
+
+#define round_dec_last4(n, FXA, FXB) \
+	get_round_km(n - 1, RKM1d); \
+	FXA(RKM0d, shr_kr); \
+	FXB(RKM1d, dummy);
+
+#define round_dec4_1(n, FA, FB) \
+	round_dec4(n, FA, FB, shr_kr, shr_kr);
+
+#define round_dec4_2(n, FA, FB) \
+	round_dec4(n, FA, FB, shr_kr, dummy);
+
+#define inbswap_block4(a, b, c, d) \
+	bswapq 			a; \
+	bswapq 			b; \
+	bswapq 			c; \
+	bswapq 			d;
+
+#define outbswap_block4(a, b, c, d) \
+	bswapq 			a; \
+	bswapq 			b; \
+	bswapq 			c; \
+	bswapq 			d; \
+	rorq $32,		a; \
+	rorq $32,		b; \
+	rorq $32,		c; \
+	rorq $32,		d;
+
+.align 8
+.type   __cast5_enc_blk4, at function;
+
+__cast5_enc_blk4:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	RLR0,RLR1,RLR2,RLR3: four input plaintext blocks
+	 * output:
+	 *	RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks
+	 */
+	GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+	get_round_km(0, RKM0d);
+	get_round_kr_enc(0);
+	round_enc4_1(0, F4_1, F4_2);
+	round_enc4_1(2, F4_3, F4_1);
+	round_enc4_1(4, F4_2, F4_3);
+	round_enc4_2(6, F4_1, F4_2);
+	get_round_kr_enc(8);
+	round_enc4_1(8, F4_3, F4_1);
+	round_enc4_1(10, F4_2, F4_3);
+	round_enc4_1(12, F4_1, F4_2);
+	round_enc_last4(14, F4_3, F4_1);
+
+	outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+	ret;
+.size __cast5_enc_blk4,.-__cast5_enc_blk4;
+
+.align 8
+.type   __cast5_dec_blk4, at function;
+
+__cast5_dec_blk4:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	RLR0,RLR1,RLR2,RLR3: four input ciphertext blocks
+	 * output:
+	 *	RLR0,RLR1,RLR2,RLR3: four output plaintext blocks
+	 */
+	GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+	inbswap_block4(RLR0, RLR1, RLR2, RLR3);
+
+	get_round_km(15, RKM0d);
+	get_round_kr_dec(15);
+	round_dec4_1(15, F4_1, F4_3);
+	round_dec4_1(13, F4_2, F4_1);
+	round_dec4_1(11, F4_3, F4_2);
+	round_dec4_2(9, F4_1, F4_3);
+	get_round_kr_dec(7);
+	round_dec4_1(7, F4_2, F4_1);
+	round_dec4_1(5, F4_3, F4_2);
+	round_dec4_1(3, F4_1, F4_3);
+	round_dec_last4(1, F4_2, F4_1);
+
+	outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+	ret;
+.size __cast5_dec_blk4,.-__cast5_dec_blk4;
+
+.align 8
+.global _gcry_cast5_amd64_ctr_enc
+.type   _gcry_cast5_amd64_ctr_enc, at function;
+_gcry_cast5_amd64_ctr_enc:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (8 blocks)
+	 *	%rdx: src (8 blocks)
+	 *	%rcx: iv (big endian, 64bit)
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+	pushq %r12;
+	pushq %r13;
+	pushq %r14;
+
+	pushq %rsi;
+	pushq %rdx;
+
+	/* load IV and byteswap */
+	movq (%rcx), RX0;
+	bswapq RX0;
+	movq RX0, RLR0;
+
+	/* construct IVs */
+	leaq 1(RX0), RLR1;
+	leaq 2(RX0), RLR2;
+	leaq 3(RX0), RLR3;
+	leaq 4(RX0), RX0;
+	bswapq RX0;
+
+	/* store new IV */
+	movq RX0, (%rcx);
+
+	call __cast5_enc_blk4;
+
+	popq %r14; /*src*/
+	popq %r13; /*dst*/
+
+	/* XOR key-stream with plaintext */
+	xorq 0 * 8(%r14), RLR0;
+	xorq 1 * 8(%r14), RLR1;
+	xorq 2 * 8(%r14), RLR2;
+	xorq 3 * 8(%r14), RLR3;
+	movq RLR0, 0 * 8(%r13);
+	movq RLR1, 1 * 8(%r13);
+	movq RLR2, 2 * 8(%r13);
+	movq RLR3, 3 * 8(%r13);
+
+	popq %r14;
+	popq %r13;
+	popq %r12;
+	popq %rbx;
+	popq %rbp;
+	ret
+.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;
+
+.align 8
+.global _gcry_cast5_amd64_cbc_dec
+.type   _gcry_cast5_amd64_cbc_dec, at function;
+_gcry_cast5_amd64_cbc_dec:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (8 blocks)
+	 *	%rdx: src (8 blocks)
+	 *	%rcx: iv (64bit)
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+	pushq %r12;
+	pushq %r13;
+	pushq %r14;
+
+	pushq %rcx;
+	pushq %rsi;
+	pushq %rdx;
+
+	/* load input */
+	movq 0 * 8(%rdx), RLR0;
+	movq 1 * 8(%rdx), RLR1;
+	movq 2 * 8(%rdx), RLR2;
+	movq 3 * 8(%rdx), RLR3;
+
+	call __cast5_dec_blk4;
+
+	popq RX0; /*src*/
+	popq RX1; /*dst*/
+	popq RX2; /*iv*/
+
+	movq 3 * 8(RX0), %r14;
+	xorq      (RX2), RLR0;
+	xorq 0 * 8(RX0), RLR1;
+	xorq 1 * 8(RX0), RLR2;
+	xorq 2 * 8(RX0), RLR3;
+	movq %r14, (RX2); /* store new IV */
+
+	movq RLR0, 0 * 8(RX1);
+	movq RLR1, 1 * 8(RX1);
+	movq RLR2, 2 * 8(RX1);
+	movq RLR3, 3 * 8(RX1);
+
+	popq %r14;
+	popq %r13;
+	popq %r12;
+	popq %rbx;
+	popq %rbp;
+	ret;
+
+.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;
+
+.align 8
+.global _gcry_cast5_amd64_cfb_dec
+.type   _gcry_cast5_amd64_cfb_dec, at function;
+_gcry_cast5_amd64_cfb_dec:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (8 blocks)
+	 *	%rdx: src (8 blocks)
+	 *	%rcx: iv (64bit)
+	 */
+
+	pushq %rbp;
+	pushq %rbx;
+	pushq %r12;
+	pushq %r13;
+	pushq %r14;
+
+	pushq %rsi;
+	pushq %rdx;
+
+	/* Load input */
+	movq (%rcx), RLR0;
+	movq 0 * 8(%rdx), RLR1;
+	movq 1 * 8(%rdx), RLR2;
+	movq 2 * 8(%rdx), RLR3;
+
+	inbswap_block4(RLR0, RLR1, RLR2, RLR3);
+
+	/* Update IV */
+	movq 3 * 8(%rdx), %rdx;
+	movq %rdx, (%rcx);
+
+	call __cast5_enc_blk4;
+
+	popq %rdx; /*src*/
+	popq %rcx; /*dst*/
+
+	xorq 0 * 8(%rdx), RLR0;
+	xorq 1 * 8(%rdx), RLR1;
+	xorq 2 * 8(%rdx), RLR2;
+	xorq 3 * 8(%rdx), RLR3;
+	movq RLR0, 0 * 8(%rcx);
+	movq RLR1, 1 * 8(%rcx);
+	movq RLR2, 2 * 8(%rcx);
+	movq RLR3, 3 * 8(%rcx);
+
+	popq %r14;
+	popq %r13;
+	popq %r12;
+	popq %rbx;
+	popq %rbp;
+	ret;
+
+.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;
+
+#endif /*defined(USE_CAST5)*/
+#endif /*__x86_64*/
diff --git a/cipher/cast5.c b/cipher/cast5.c
index 9905f5c..8c5664d 100644
--- a/cipher/cast5.c
+++ b/cipher/cast5.c
@@ -42,6 +42,14 @@
 #include "g10lib.h"
 #include "types.h"
 #include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__)
+# define USE_AMD64_ASM 1
+#endif
 
 #define CAST5_BLOCKSIZE 8
 
@@ -56,8 +64,12 @@ static void decrypt_block (void *c, byte *outbuf, const byte *inbuf);
 
 
 
+#define s1 _gcry_cast5_s1to4[0]
+#define s2 _gcry_cast5_s1to4[1]
+#define s3 _gcry_cast5_s1to4[2]
+#define s4 _gcry_cast5_s1to4[3]
 
-static const u32 s1[256] = {
+const u32 _gcry_cast5_s1to4[4][256] = { {
 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949,
 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e,
 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d,
@@ -90,8 +102,7 @@ static const u32 s1[256] = {
 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e,
 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9,
 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf
-};
-static const u32 s2[256] = {
+}, {
 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651,
 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3,
 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb,
@@ -124,8 +135,7 @@ static const u32 s2[256] = {
 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa,
 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9,
 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1
-};
-static const u32 s3[256] = {
+}, {
 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90,
 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5,
 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e,
@@ -158,8 +168,7 @@ static const u32 s3[256] = {
 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d,
 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a,
 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783
-};
-static const u32 s4[256] = {
+}, {
 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1,
 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf,
 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15,
@@ -192,7 +201,7 @@ static const u32 s4[256] = {
 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04,
 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282,
 0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2
-};
+} };
 static const u32 s5[256] = {
 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f,
 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a,
@@ -331,6 +340,53 @@ static const u32 s8[256] = {
 };
 
 
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of CAST5. */
+extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf,
+					    const byte *inbuf);
+
+extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf,
+					    const byte *inbuf);
+
+/* These assembly implementations process four blocks in parallel. */
+extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out,
+				      const byte *in, byte *ctr);
+
+extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
+				      const byte *in, byte *iv);
+
+extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
+				      const byte *in, byte *iv);
+
+static void
+do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
+}
+
+static void encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+  CAST5_context *c = (CAST5_context *) context;
+  do_encrypt_block (c, outbuf, inbuf);
+  _gcry_burn_stack (2*8);
+}
+
+static void decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+  CAST5_context *c = (CAST5_context *) context;
+  _gcry_cast5_amd64_decrypt_block (c, outbuf, inbuf);
+  _gcry_burn_stack (2*8);
+}
+
+#else /*USE_AMD64_ASM*/
+
 #if defined(__GNUC__) && defined(__i386__)
 static inline u32
 rol(int n, u32 x)
@@ -463,6 +519,201 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
   _gcry_burn_stack (20+4*sizeof(void*));
 }
 
+#endif /*!USE_AMD64_ASM*/
+
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size CAST5_BLOCKSIZE. */
+void
+_gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+		    const void *inbuf_arg, unsigned int nblocks)
+{
+  CAST5_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[CAST5_BLOCKSIZE];
+  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE;
+
+  int i;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 8 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        _gcry_cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 4;
+        outbuf += 4 * CAST5_BLOCKSIZE;
+        inbuf  += 4 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+    /* TODO: use caching instead? */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      do_encrypt_block(ctx, tmpbuf, ctr);
+      /* XOR the input with the encrypted counter and store in output.  */
+      buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
+      outbuf += CAST5_BLOCKSIZE;
+      inbuf  += CAST5_BLOCKSIZE;
+      /* Increment the counter.  */
+      for (i = CAST5_BLOCKSIZE; i > 0; i--)
+        {
+          ctr[i-1]++;
+          if (ctr[i-1])
+            break;
+        }
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+		    const void *inbuf_arg, unsigned int nblocks)
+{
+  CAST5_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[CAST5_BLOCKSIZE];
+  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 8 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        _gcry_cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 4;
+        outbuf += 4 * CAST5_BLOCKSIZE;
+        inbuf  += 4 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* We need to save INBUF away because it may be identical to
+         OUTBUF.  */
+      memcpy(savebuf, inbuf, CAST5_BLOCKSIZE);
+
+      do_decrypt_block (ctx, outbuf, inbuf);
+
+      buf_xor(outbuf, outbuf, iv, CAST5_BLOCKSIZE);
+      memcpy(iv, savebuf, CAST5_BLOCKSIZE);
+      inbuf += CAST5_BLOCKSIZE;
+      outbuf += CAST5_BLOCKSIZE;
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+		    const void *inbuf_arg, unsigned int nblocks)
+{
+  CAST5_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 2 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 8 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        _gcry_cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 4;
+        outbuf += 4 * CAST5_BLOCKSIZE;
+        inbuf  += 4 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      do_encrypt_block(ctx, iv, iv);
+      buf_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
+      outbuf += CAST5_BLOCKSIZE;
+      inbuf  += CAST5_BLOCKSIZE;
+    }
+
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+  const int nblocks = 4+1;
+  const int blocksize = CAST5_BLOCKSIZE;
+  const int context_size = sizeof(CAST5_context);
+
+  return _gcry_selftest_helper_ctr("CAST5", &cast_setkey,
+           &encrypt_block, &_gcry_cast5_ctr_enc, nblocks, blocksize,
+	   context_size);
+}
+
+
+/* Run the self-tests for CAST5-CBC, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+  const int nblocks = 4+2;
+  const int blocksize = CAST5_BLOCKSIZE;
+  const int context_size = sizeof(CAST5_context);
+
+  return _gcry_selftest_helper_cbc("CAST5", &cast_setkey,
+           &encrypt_block, &_gcry_cast5_cbc_dec, nblocks, blocksize,
+	   context_size);
+}
+
+
+/* Run the self-tests for CAST5-CFB, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+  const int nblocks = 4+2;
+  const int blocksize = CAST5_BLOCKSIZE;
+  const int context_size = sizeof(CAST5_context);
+
+  return _gcry_selftest_helper_cfb("CAST5", &cast_setkey,
+           &encrypt_block, &_gcry_cast5_cfb_dec, nblocks, blocksize,
+	   context_size);
+}
+
 
 static const char*
 selftest(void)
@@ -473,6 +724,7 @@ selftest(void)
     byte plain[8] = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
     byte cipher[8]= { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 };
     byte buffer[8];
+    const char *r;
 
     cast_setkey( &c, key, 16 );
     encrypt_block( &c, buffer, plain );
@@ -507,6 +759,16 @@ selftest(void)
 
     }
 #endif
+
+    if ( (r = selftest_cbc ()) )
+      return r;
+
+    if ( (r = selftest_cfb ()) )
+      return r;
+
+    if ( (r = selftest_ctr ()) )
+      return r;
+
     return NULL;
 }
 
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 652d795..79ca755 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -718,6 +718,13 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
               h->bulk.ctr_enc = _gcry_aes_ctr_enc;
               break;
 #endif /*USE_AES*/
+#ifdef USE_CAST5
+	    case GCRY_CIPHER_CAST5:
+              h->bulk.cfb_dec = _gcry_cast5_cfb_dec;
+              h->bulk.cbc_dec = _gcry_cast5_cbc_dec;
+              h->bulk.ctr_enc = _gcry_cast5_ctr_enc;
+              break;
+#endif /*USE_CAMELLIA*/
 #ifdef USE_CAMELLIA
 	    case GCRY_CIPHER_CAMELLIA128:
 	    case GCRY_CIPHER_CAMELLIA192:
diff --git a/configure.ac b/configure.ac
index 3fec8bc..113c71f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1195,6 +1195,13 @@ LIST_MEMBER(cast5, $enabled_ciphers)
 if test "$found" = "1" ; then
    GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo"
    AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-amd64.lo"
+      ;;
+   esac
 fi
 
 LIST_MEMBER(des, $enabled_ciphers)
diff --git a/src/cipher.h b/src/cipher.h
index 9d6cc01..1742003 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -95,6 +95,19 @@ void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
                         void *outbuf_arg, const void *inbuf_arg,
                         unsigned int nblocks);
 
+/*-- cast5.c --*/
+void _gcry_cast5_cfb_dec (void *context, unsigned char *iv,
+			  void *outbuf_arg, const void *inbuf_arg,
+			  unsigned int nblocks);
+
+void _gcry_cast5_cbc_dec (void *context, unsigned char *iv,
+			  void *outbuf_arg, const void *inbuf_arg,
+			  unsigned int nblocks);
+
+void _gcry_cast5_ctr_enc (void *context, unsigned char *ctr,
+			  void *outbuf_arg, const void *inbuf_arg,
+			  unsigned int nblocks);
+
 /*-- camellia-glue.c --*/
 void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr,
                              void *outbuf_arg, const void *inbuf_arg,

commit ab8fc70b5f0c396a5bc941267f59166e860b8c5d
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Fri May 24 12:43:24 2013 +0300

    cipher-selftest: make selftest work with any block-size
    
    * cipher/cipher-selftest.c (_gcry_selftest_helper_cbc_128)
    (_gcry_selftest_helper_cfb_128, _gcry_selftest_helper_ctr_128): Renamed
    functions from '<name>_128' to '<name>'.
    (_gcry_selftest_helper_cbc, _gcry_selftest_helper_cfb)
    (_gcry_selftest_helper_ctr): Make work with different block sizes.
    * cipher/cipher-selftest.h (_gcry_selftest_helper_cbc_128)
    (_gcry_selftest_helper_cfb_128, _gcry_selftest_helper_ctr_128): Renamed
    prototypes from '<name>_128' to '<name>'.
    * cipher/camellia-glue.c (selftest_ctr_128, selftest_cfb_128)
    (selftest_ctr_128): Change to use new function names.
    * cipher/rijndael.c (selftest_ctr_128, selftest_cfb_128)
    (selftest_ctr_128): Change to use new function names.
    * cipher/serpent.c (selftest_ctr_128, selftest_cfb_128)
    (selftest_ctr_128): Change to use new function names.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index f9bbb33..4163e82 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -374,7 +374,7 @@ selftest_ctr_128 (void)
   const int blocksize = CAMELLIA_BLOCK_SIZE;
   const int context_size = sizeof(CAMELLIA_context);
 
-  return _gcry_selftest_helper_ctr_128("CAMELLIA", &camellia_setkey,
+  return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey,
            &camellia_encrypt, &_gcry_camellia_ctr_enc, nblocks, blocksize,
 	   context_size);
 }
@@ -388,7 +388,7 @@ selftest_cbc_128 (void)
   const int blocksize = CAMELLIA_BLOCK_SIZE;
   const int context_size = sizeof(CAMELLIA_context);
 
-  return _gcry_selftest_helper_cbc_128("CAMELLIA", &camellia_setkey,
+  return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey,
            &camellia_encrypt, &_gcry_camellia_cbc_dec, nblocks, blocksize,
 	   context_size);
 }
@@ -402,7 +402,7 @@ selftest_cfb_128 (void)
   const int blocksize = CAMELLIA_BLOCK_SIZE;
   const int context_size = sizeof(CAMELLIA_context);
 
-  return _gcry_selftest_helper_cfb_128("CAMELLIA", &camellia_setkey,
+  return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey,
            &camellia_encrypt, &_gcry_camellia_cfb_dec, nblocks, blocksize,
 	   context_size);
 }
diff --git a/cipher/cipher-selftest.c b/cipher/cipher-selftest.c
index 41eb405..17742e3 100644
--- a/cipher/cipher-selftest.c
+++ b/cipher/cipher-selftest.c
@@ -44,15 +44,14 @@
 #endif
 
 
-/* Run the self-tests for <block cipher>-CBC-128, tests bulk CBC
+/* Run the self-tests for <block cipher>-CBC-<block size>, tests bulk CBC
    decryption.  Returns NULL on success. */
 const char *
-_gcry_selftest_helper_cbc_128 (const char *cipher,
-                               gcry_cipher_setkey_t setkey_func,
-			       gcry_cipher_encrypt_t encrypt_one,
-			       gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec,
-			       const int nblocks, const int blocksize,
-			       const int context_size)
+_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func,
+			   gcry_cipher_encrypt_t encrypt_one,
+			   gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec,
+			   const int nblocks, const int blocksize,
+			   const int context_size)
 {
   int i, offs;
   unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
@@ -63,7 +62,8 @@ _gcry_selftest_helper_cbc_128 (const char *cipher,
       0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22
     };
 
-  /* Allocate buffers, align elements to 16 bytes.  */
+  /* Allocate buffers, align first two elements to 16 bytes and latter to
+     block size.  */
   ctx_aligned_size = context_size + 15;
   ctx_aligned_size -= ctx_aligned_size & 0xf;
 
@@ -97,24 +97,25 @@ _gcry_selftest_helper_cbc_128 (const char *cipher,
 
   /* CBC decrypt.  */
   bulk_cbc_dec (ctx, iv2, plaintext2, ciphertext, 1);
-  if (memcmp (plaintext2, plaintext, 16))
+  if (memcmp (plaintext2, plaintext, blocksize))
     {
       gcry_free (mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CBC test failed (plaintext mismatch)", cipher);
+              "%s-CBC-%d test failed (plaintext mismatch)", cipher,
+	      blocksize * 8);
 #endif
-      return "selftest for 128 bit CBC failed - see syslog for details";
+      return "selftest for CBC failed - see syslog for details";
     }
 
-  if (memcmp (iv2, iv, 16))
+  if (memcmp (iv2, iv, blocksize))
     {
       gcry_free (mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CBC test failed (IV mismatch)", cipher);
+              "%s-CBC-%d test failed (IV mismatch)", cipher, blocksize * 8);
 #endif
-      return "selftest for 128 bit CBC failed - see syslog for details";
+      return "selftest for CBC failed - see syslog for details";
     }
 
   /* Test parallelized code paths */
@@ -140,35 +141,34 @@ _gcry_selftest_helper_cbc_128 (const char *cipher,
       gcry_free (mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CBC test failed (plaintext mismatch, parallel path)",
-	      cipher);
+              "%s-CBC-%d test failed (plaintext mismatch, parallel path)",
+	      cipher, blocksize * 8);
 #endif
-      return "selftest for 128 bit CBC failed - see syslog for details";
+      return "selftest for CBC failed - see syslog for details";
     }
   if (memcmp (iv2, iv, blocksize))
     {
       gcry_free (mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CBC test failed (IV mismatch, parallel path)",
-	      cipher);
+              "%s-CBC-%d test failed (IV mismatch, parallel path)",
+	      cipher, blocksize * 8);
 #endif
-      return "selftest for 128 bit CBC failed - see syslog for details";
+      return "selftest for CBC failed - see syslog for details";
     }
 
   gcry_free (mem);
   return NULL;
 }
 
-/* Run the self-tests for <block cipher>-CFB-128, tests bulk CFB
+/* Run the self-tests for <block cipher>-CFB-<block size>, tests bulk CFB
    decryption.  Returns NULL on success. */
 const char *
-_gcry_selftest_helper_cfb_128 (const char *cipher,
-			       gcry_cipher_setkey_t setkey_func,
-			       gcry_cipher_encrypt_t encrypt_one,
-			       gcry_cipher_bulk_cfb_dec_t bulk_cfb_dec,
-			       const int nblocks, const int blocksize,
-			       const int context_size)
+_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func,
+			   gcry_cipher_encrypt_t encrypt_one,
+			   gcry_cipher_bulk_cfb_dec_t bulk_cfb_dec,
+			   const int nblocks, const int blocksize,
+			   const int context_size)
 {
   int i, offs;
   unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
@@ -179,7 +179,8 @@ _gcry_selftest_helper_cfb_128 (const char *cipher,
       0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x33
     };
 
-  /* Allocate buffers, align elements to 16 bytes.  */
+  /* Allocate buffers, align first two elements to 16 bytes and latter to
+     block size.  */
   ctx_aligned_size = context_size + 15;
   ctx_aligned_size -= ctx_aligned_size & 0xf;
 
@@ -217,9 +218,10 @@ _gcry_selftest_helper_cfb_128 (const char *cipher,
       gcry_free(mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CFB test failed (plaintext mismatch)", cipher);
+              "%s-CFB-%d test failed (plaintext mismatch)", cipher,
+	      blocksize * 8);
 #endif
-      return "selftest for 128 bit CFB failed - see syslog for details";
+      return "selftest for CFB failed - see syslog for details";
     }
 
   if (memcmp(iv2, iv, blocksize))
@@ -227,9 +229,9 @@ _gcry_selftest_helper_cfb_128 (const char *cipher,
       gcry_free(mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CFB test failed (IV mismatch)", cipher);
+              "%s-CFB-%d test failed (IV mismatch)", cipher, blocksize * 8);
 #endif
-      return "selftest for 128 bit CFB failed - see syslog for details";
+      return "selftest for CFB failed - see syslog for details";
     }
 
   /* Test parallelized code paths */
@@ -254,34 +256,34 @@ _gcry_selftest_helper_cfb_128 (const char *cipher,
       gcry_free(mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CFB test failed (plaintext mismatch, parallel path)",
-              cipher);
+              "%s-CFB-%d test failed (plaintext mismatch, parallel path)",
+              cipher, blocksize * 8);
 #endif
-      return "selftest for 128 bit CFB failed - see syslog for details";
+      return "selftest for CFB failed - see syslog for details";
     }
   if (memcmp(iv2, iv, blocksize))
     {
       gcry_free(mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CFB test failed (IV mismatch, parallel path)", cipher);
+              "%s-CFB-%d test failed (IV mismatch, parallel path)", cipher,
+	      blocksize * 8);
 #endif
-      return "selftest for 128 bit CFB failed - see syslog for details";
+      return "selftest for CFB failed - see syslog for details";
     }
 
   gcry_free(mem);
   return NULL;
 }
 
-/* Run the self-tests for <block cipher>-CTR-128, tests IV increment of bulk CTR
-   encryption.  Returns NULL on success. */
+/* Run the self-tests for <block cipher>-CTR-<block size>, tests IV increment
+   of bulk CTR encryption.  Returns NULL on success. */
 const char *
-_gcry_selftest_helper_ctr_128 (const char *cipher,
-                               gcry_cipher_setkey_t setkey_func,
-			       gcry_cipher_encrypt_t encrypt_one,
-			       gcry_cipher_bulk_ctr_enc_t bulk_ctr_enc,
-			       const int nblocks, const int blocksize,
-			       const int context_size)
+_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func,
+			   gcry_cipher_encrypt_t encrypt_one,
+			   gcry_cipher_bulk_ctr_enc_t bulk_ctr_enc,
+			   const int nblocks, const int blocksize,
+			   const int context_size)
 {
   int i, j, offs, diff;
   unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
@@ -292,7 +294,8 @@ _gcry_selftest_helper_ctr_128 (const char *cipher,
       0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
     };
 
-  /* Allocate buffers, align elements to 16 bytes.  */
+  /* Allocate buffers, align first two elements to 16 bytes and latter to
+     block size.  */
   ctx_aligned_size = context_size + 15;
   ctx_aligned_size -= ctx_aligned_size & 0xf;
 
@@ -337,9 +340,10 @@ _gcry_selftest_helper_ctr_128 (const char *cipher,
       gcry_free (mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CTR test failed (plaintext mismatch)", cipher);
+              "%s-CTR-%d test failed (plaintext mismatch)", cipher,
+	      blocksize * 8);
 #endif
-      return "selftest for 128 bit CTR failed - see syslog for details";
+      return "selftest for CTR failed - see syslog for details";
     }
 
   if (memcmp (iv2, iv, blocksize))
@@ -347,9 +351,10 @@ _gcry_selftest_helper_ctr_128 (const char *cipher,
       gcry_free (mem);
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s-128-CTR test failed (IV mismatch)", cipher);
+              "%s-CTR-%d test failed (IV mismatch)", cipher,
+	      blocksize * 8);
 #endif
-      return "selftest for 128 bit CTR failed - see syslog for details";
+      return "selftest for CTR failed - see syslog for details";
     }
 
   /* Test parallelized code paths */
@@ -385,19 +390,20 @@ _gcry_selftest_helper_ctr_128 (const char *cipher,
         gcry_free (mem);
 #ifdef HAVE_SYSLOG
         syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-                "%s-128-CTR test failed (plaintext mismatch, diff: %d)", cipher,
-		diff);
+                "%s-CTR-%d test failed (plaintext mismatch, diff: %d)", cipher,
+		blocksize * 8, diff);
 #endif
-        return "selftest for 128 bit CTR failed - see syslog for details";
+        return "selftest for CTR failed - see syslog for details";
       }
     if (memcmp(iv2, iv, blocksize))
       {
         gcry_free (mem);
 #ifdef HAVE_SYSLOG
         syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-                "%s-128-CTR test failed (IV mismatch, diff: %d)", cipher, diff);
+                "%s-CTR-%d test failed (IV mismatch, diff: %d)", cipher,
+		blocksize * 8, diff);
 #endif
-        return "selftest for 128 bit CTR failed - see syslog for details";
+        return "selftest for CTR failed - see syslog for details";
       }
   }
 
diff --git a/cipher/cipher-selftest.h b/cipher/cipher-selftest.h
index 30bc251..41d77af 100644
--- a/cipher/cipher-selftest.h
+++ b/cipher/cipher-selftest.h
@@ -42,26 +42,26 @@ typedef void (*gcry_cipher_bulk_ctr_enc_t)(void *context, unsigned char *iv,
 
 /* Helper function for bulk CBC decryption selftest */
 const char *
-_gcry_selftest_helper_cbc_128 (const char *cipher, gcry_cipher_setkey_t setkey,
-			       gcry_cipher_encrypt_t encrypt_one,
-			       gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec,
-			       const int nblocks, const int blocksize,
-			       const int context_size);
+_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey,
+			   gcry_cipher_encrypt_t encrypt_one,
+			   gcry_cipher_bulk_cbc_dec_t bulk_cbc_dec,
+			   const int nblocks, const int blocksize,
+			   const int context_size);
 
 /* Helper function for bulk CFB decryption selftest */
 const char *
-_gcry_selftest_helper_cfb_128 (const char *cipher, gcry_cipher_setkey_t setkey,
-			       gcry_cipher_encrypt_t encrypt_one,
-			       gcry_cipher_bulk_cfb_dec_t bulk_cfb_dec,
-			       const int nblocks, const int blocksize,
-			       const int context_size);
+_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey,
+			   gcry_cipher_encrypt_t encrypt_one,
+			   gcry_cipher_bulk_cfb_dec_t bulk_cfb_dec,
+			   const int nblocks, const int blocksize,
+			   const int context_size);
 
 /* Helper function for bulk CTR encryption selftest */
 const char *
-_gcry_selftest_helper_ctr_128 (const char *cipher, gcry_cipher_setkey_t setkey,
-			       gcry_cipher_encrypt_t encrypt_one,
-			       gcry_cipher_bulk_ctr_enc_t bulk_ctr_enc,
-			       const int nblocks, const int blocksize,
-			       const int context_size);
+_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey,
+			   gcry_cipher_encrypt_t encrypt_one,
+			   gcry_cipher_bulk_ctr_enc_t bulk_ctr_enc,
+			   const int nblocks, const int blocksize,
+			   const int context_size);
 
 #endif /*G10_SELFTEST_HELP_H*/
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 9f075ff..ac2fcfb 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -2168,7 +2168,7 @@ selftest_ctr_128 (void)
   const int blocksize = BLOCKSIZE;
   const int context_size = sizeof(RIJNDAEL_context);
 
-  return _gcry_selftest_helper_ctr_128("AES", &rijndael_setkey,
+  return _gcry_selftest_helper_ctr("AES", &rijndael_setkey,
            &rijndael_encrypt, &_gcry_aes_ctr_enc, nblocks, blocksize,
 	   context_size);
 }
@@ -2183,7 +2183,7 @@ selftest_cbc_128 (void)
   const int blocksize = BLOCKSIZE;
   const int context_size = sizeof(RIJNDAEL_context);
 
-  return _gcry_selftest_helper_cbc_128("AES", &rijndael_setkey,
+  return _gcry_selftest_helper_cbc("AES", &rijndael_setkey,
            &rijndael_encrypt, &_gcry_aes_cbc_dec, nblocks, blocksize,
 	   context_size);
 }
@@ -2198,7 +2198,7 @@ selftest_cfb_128 (void)
   const int blocksize = BLOCKSIZE;
   const int context_size = sizeof(RIJNDAEL_context);
 
-  return _gcry_selftest_helper_cfb_128("AES", &rijndael_setkey,
+  return _gcry_selftest_helper_cfb("AES", &rijndael_setkey,
            &rijndael_encrypt, &_gcry_aes_cfb_dec, nblocks, blocksize,
 	   context_size);
 }
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 95ac7c1..c72951e 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -997,7 +997,7 @@ selftest_ctr_128 (void)
   const int blocksize = sizeof(serpent_block_t);
   const int context_size = sizeof(serpent_context_t);
 
-  return _gcry_selftest_helper_ctr_128("SERPENT", &serpent_setkey,
+  return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
            &serpent_encrypt, &_gcry_serpent_ctr_enc, nblocks, blocksize,
 	   context_size);
 }
@@ -1012,7 +1012,7 @@ selftest_cbc_128 (void)
   const int blocksize = sizeof(serpent_block_t);
   const int context_size = sizeof(serpent_context_t);
 
-  return _gcry_selftest_helper_cbc_128("SERPENT", &serpent_setkey,
+  return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
            &serpent_encrypt, &_gcry_serpent_cbc_dec, nblocks, blocksize,
 	   context_size);
 }
@@ -1027,7 +1027,7 @@ selftest_cfb_128 (void)
   const int blocksize = sizeof(serpent_block_t);
   const int context_size = sizeof(serpent_context_t);
 
-  return _gcry_selftest_helper_cfb_128("SERPENT", &serpent_setkey,
+  return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
            &serpent_encrypt, &_gcry_serpent_cfb_dec, nblocks, blocksize,
 	   context_size);
 }

-----------------------------------------------------------------------

Summary of changes:
 cipher/Makefile.am       |    2 +-
 cipher/camellia-glue.c   |    6 +-
 cipher/cast5-amd64.S     |  587 ++++++++++++++++++++++++++++++++++++++++++++++
 cipher/cast5.c           |  278 +++++++++++++++++++++-
 cipher/cipher-selftest.c |  116 +++++-----
 cipher/cipher-selftest.h |   30 ++--
 cipher/cipher.c          |    7 +
 cipher/rijndael.c        |    6 +-
 cipher/serpent.c         |    6 +-
 configure.ac             |    7 +
 src/cipher.h             |   13 +
 11 files changed, 970 insertions(+), 88 deletions(-)
 create mode 100644 cipher/cast5-amd64.S


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org




More information about the Gnupg-commits mailing list