[PATCH 3/6] Add bulk OCB for Twofish AMD64 implementation

Jussi Kivilinna jussi.kivilinna at iki.fi
Mon Jul 27 11:04:25 CEST 2015


* cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk
functions for Twofish.
* cipher/twofish-amd64.S: Add OCB assembly functions.
* cipher/twofish.c (_gcry_twofish_amd64_ocb_enc)
(_gcry_twofish_amd64_ocb_dec, _gcry_twofish_amd64_ocb_auth): New
prototypes.
(call_sysv_fn5, call_sysv_fn6, twofish_amd64_ocb_enc)
(twofish_amd64_ocb_dec, twofish_amd64_ocb_auth, get_l)
(_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): New.
* src/cipher.h (_gcry_twofish_ocb_crypt)
(_gcry_twofish_ocb_auth): New.
* tests/basic.c (check_ocb_cipher): Add test-vector for Twofish.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/cipher.c        |    2 
 cipher/twofish-amd64.S |  310 ++++++++++++++++++++++++++++++++++++++++++++++++
 cipher/twofish.c       |  259 ++++++++++++++++++++++++++++++++++++++++
 src/cipher.h           |    5 +
 tests/basic.c          |   20 ++-
 5 files changed, 588 insertions(+), 8 deletions(-)

diff --git a/cipher/cipher.c b/cipher/cipher.c
index 2d2b0ad..8483c5f 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -561,6 +561,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
               h->bulk.cbc_dec = _gcry_twofish_cbc_dec;
               h->bulk.cfb_dec = _gcry_twofish_cfb_dec;
               h->bulk.ctr_enc = _gcry_twofish_ctr_enc;
+              h->bulk.ocb_crypt = _gcry_twofish_ocb_crypt;
+              h->bulk.ocb_auth  = _gcry_twofish_ocb_auth;
               break;
 #endif /*USE_TWOFISH*/
 
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index ea88b94..aa964e0 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -1,6 +1,6 @@
 /* twofish-amd64.S  -  AMD64 assembly implementation of Twofish cipher
  *
- * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
  * This file is part of Libgcrypt.
  *
@@ -734,5 +734,313 @@ _gcry_twofish_amd64_cfb_dec:
 	ret;
 ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
 
+.align 8
+.globl _gcry_twofish_amd64_ocb_enc
+ELF(.type   _gcry_twofish_amd64_ocb_enc, at function;)
+_gcry_twofish_amd64_ocb_enc:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (3 blocks)
+	 *	%rdx: src (3 blocks)
+	 *	%rcx: offset
+	 *	%r8 : checksum
+	 *	%r9 : L pointers (void *L[3])
+	 */
+	subq $(8 * 8), %rsp;
+	movq %rbp, (0 * 8)(%rsp);
+	movq %rbx, (1 * 8)(%rsp);
+	movq %r12, (2 * 8)(%rsp);
+	movq %r13, (3 * 8)(%rsp);
+	movq %r14, (4 * 8)(%rsp);
+	movq %r15, (5 * 8)(%rsp);
+
+	movq %rsi, (6 * 8)(%rsp);
+	movq %rdx, RX0;
+	movq %rcx, RX1;
+	movq %r8, RX2;
+	movq %r9, RY0;
+	movq %rsi, RY1;
+
+	/* Load offset */
+	movq (0 * 8)(RX1), RT0;
+	movq (1 * 8)(RX1), RT1;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq (RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (0 * 8)(RX0), RAB0;
+	movq (1 * 8)(RX0), RCD0;
+	/* Store Offset_i */
+	movq RT0, (0 * 8)(RY1);
+	movq RT1, (1 * 8)(RY1);
+	/* Checksum_i = Checksum_{i-1} xor P_i  */
+	xor RAB0, (0 * 8)(RX2);
+	xor RCD0, (1 * 8)(RX2);
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB0;
+	xorq RT1, RCD0;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq 8(RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (2 * 8)(RX0), RAB1;
+	movq (3 * 8)(RX0), RCD1;
+	/* Store Offset_i */
+	movq RT0, (2 * 8)(RY1);
+	movq RT1, (3 * 8)(RY1);
+	/* Checksum_i = Checksum_{i-1} xor P_i  */
+	xor RAB1, (0 * 8)(RX2);
+	xor RCD1, (1 * 8)(RX2);
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB1;
+	xorq RT1, RCD1;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq 16(RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (4 * 8)(RX0), RAB2;
+	movq (5 * 8)(RX0), RCD2;
+	/* Store Offset_i */
+	movq RT0, (4 * 8)(RY1);
+	movq RT1, (5 * 8)(RY1);
+	/* Checksum_i = Checksum_{i-1} xor P_i  */
+	xor RAB2, (0 * 8)(RX2);
+	xor RCD2, (1 * 8)(RX2);
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB2;
+	xorq RT1, RCD2;
+
+	/* Store offset */
+	movq RT0, (0 * 8)(RX1);
+	movq RT1, (1 * 8)(RX1);
+
+	/* CX_i = ENCIPHER(K, PX_i)  */
+	call __twofish_enc_blk3;
+
+	movq (6 * 8)(%rsp), RX1; /*dst*/
+
+	/* C_i = CX_i xor Offset_i  */
+	xorq RCD0, (0 * 8)(RX1);
+	xorq RAB0, (1 * 8)(RX1);
+	xorq RCD1, (2 * 8)(RX1);
+	xorq RAB1, (3 * 8)(RX1);
+	xorq RCD2, (4 * 8)(RX1);
+	xorq RAB2, (5 * 8)(RX1);
+
+	movq (0 * 8)(%rsp), %rbp;
+	movq (1 * 8)(%rsp), %rbx;
+	movq (2 * 8)(%rsp), %r12;
+	movq (3 * 8)(%rsp), %r13;
+	movq (4 * 8)(%rsp), %r14;
+	movq (5 * 8)(%rsp), %r15;
+	addq $(8 * 8), %rsp;
+
+	ret;
+ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_dec
+ELF(.type   _gcry_twofish_amd64_ocb_dec, at function;)
+_gcry_twofish_amd64_ocb_dec:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (3 blocks)
+	 *	%rdx: src (3 blocks)
+	 *	%rcx: offset
+	 *	%r8 : checksum
+	 *	%r9 : L pointers (void *L[3])
+	 */
+	subq $(8 * 8), %rsp;
+	movq %rbp, (0 * 8)(%rsp);
+	movq %rbx, (1 * 8)(%rsp);
+	movq %r12, (2 * 8)(%rsp);
+	movq %r13, (3 * 8)(%rsp);
+	movq %r14, (4 * 8)(%rsp);
+	movq %r15, (5 * 8)(%rsp);
+
+	movq %rsi, (6 * 8)(%rsp);
+	movq %r8,  (7 * 8)(%rsp);
+	movq %rdx, RX0;
+	movq %rcx, RX1;
+	movq %r9, RY0;
+	movq %rsi, RY1;
+
+	/* Load offset */
+	movq (0 * 8)(RX1), RT0;
+	movq (1 * 8)(RX1), RT1;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq (RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (0 * 8)(RX0), RAB0;
+	movq (1 * 8)(RX0), RCD0;
+	/* Store Offset_i */
+	movq RT0, (0 * 8)(RY1);
+	movq RT1, (1 * 8)(RY1);
+	/* CX_i = C_i xor Offset_i */
+	xorq RT0, RAB0;
+	xorq RT1, RCD0;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq 8(RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (2 * 8)(RX0), RAB1;
+	movq (3 * 8)(RX0), RCD1;
+	/* Store Offset_i */
+	movq RT0, (2 * 8)(RY1);
+	movq RT1, (3 * 8)(RY1);
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB1;
+	xorq RT1, RCD1;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq 16(RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (4 * 8)(RX0), RAB2;
+	movq (5 * 8)(RX0), RCD2;
+	/* Store Offset_i */
+	movq RT0, (4 * 8)(RY1);
+	movq RT1, (5 * 8)(RY1);
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB2;
+	xorq RT1, RCD2;
+
+	/* Store offset */
+	movq RT0, (0 * 8)(RX1);
+	movq RT1, (1 * 8)(RX1);
+
+	/* PX_i = DECIPHER(K, CX_i)  */
+	call __twofish_dec_blk3;
+
+	movq (7 * 8)(%rsp), RX2; /*checksum*/
+	movq (6 * 8)(%rsp), RX1; /*dst*/
+
+	/* Load checksum */
+	movq (0 * 8)(RX2), RT0;
+	movq (1 * 8)(RX2), RT1;
+
+	/* P_i = PX_i xor Offset_i  */
+	xorq RCD0, (0 * 8)(RX1);
+	xorq RAB0, (1 * 8)(RX1);
+	xorq RCD1, (2 * 8)(RX1);
+	xorq RAB1, (3 * 8)(RX1);
+	xorq RCD2, (4 * 8)(RX1);
+	xorq RAB2, (5 * 8)(RX1);
+
+	/* Checksum_i = Checksum_{i-1} xor P_i  */
+	xorq (0 * 8)(RX1), RT0;
+	xorq (1 * 8)(RX1), RT1;
+	xorq (2 * 8)(RX1), RT0;
+	xorq (3 * 8)(RX1), RT1;
+	xorq (4 * 8)(RX1), RT0;
+	xorq (5 * 8)(RX1), RT1;
+
+	/* Store checksum */
+	movq RT0, (0 * 8)(RX2);
+	movq RT1, (1 * 8)(RX2);
+
+	movq (0 * 8)(%rsp), %rbp;
+	movq (1 * 8)(%rsp), %rbx;
+	movq (2 * 8)(%rsp), %r12;
+	movq (3 * 8)(%rsp), %r13;
+	movq (4 * 8)(%rsp), %r14;
+	movq (5 * 8)(%rsp), %r15;
+	addq $(8 * 8), %rsp;
+
+	ret;
+ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_auth
+ELF(.type   _gcry_twofish_amd64_ocb_auth, at function;)
+_gcry_twofish_amd64_ocb_auth:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: abuf (3 blocks)
+	 *	%rdx: offset
+	 *	%rcx: checksum
+	 *	%r8 : L pointers (void *L[3])
+	 */
+	subq $(8 * 8), %rsp;
+	movq %rbp, (0 * 8)(%rsp);
+	movq %rbx, (1 * 8)(%rsp);
+	movq %r12, (2 * 8)(%rsp);
+	movq %r13, (3 * 8)(%rsp);
+	movq %r14, (4 * 8)(%rsp);
+	movq %r15, (5 * 8)(%rsp);
+
+	movq %rcx, (6 * 8)(%rsp);
+	movq %rsi, RX0;
+	movq %rdx, RX1;
+	movq %r8, RY0;
+
+	/* Load offset */
+	movq (0 * 8)(RX1), RT0;
+	movq (1 * 8)(RX1), RT1;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq (RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (0 * 8)(RX0), RAB0;
+	movq (1 * 8)(RX0), RCD0;
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB0;
+	xorq RT1, RCD0;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq 8(RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (2 * 8)(RX0), RAB1;
+	movq (3 * 8)(RX0), RCD1;
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB1;
+	xorq RT1, RCD1;
+
+	/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	movq 16(RY0), RY2;
+	xorq (0 * 8)(RY2), RT0;
+	xorq (1 * 8)(RY2), RT1;
+	movq (4 * 8)(RX0), RAB2;
+	movq (5 * 8)(RX0), RCD2;
+	/* PX_i = P_i xor Offset_i */
+	xorq RT0, RAB2;
+	xorq RT1, RCD2;
+
+	/* Store offset */
+	movq RT0, (0 * 8)(RX1);
+	movq RT1, (1 * 8)(RX1);
+
+	/* C_i = ENCIPHER(K, PX_i)  */
+	call __twofish_enc_blk3;
+
+	movq (6 * 8)(%rsp), RX1; /*checksum*/
+
+	/* Checksum_i = C_i xor Checksum_i  */
+	xorq RCD0, RCD1;
+	xorq RAB0, RAB1;
+	xorq RCD1, RCD2;
+	xorq RAB1, RAB2;
+	xorq RCD2, (0 * 8)(RX1);
+	xorq RAB2, (1 * 8)(RX1);
+
+	movq (0 * 8)(%rsp), %rbp;
+	movq (1 * 8)(%rsp), %rbx;
+	movq (2 * 8)(%rsp), %r12;
+	movq (3 * 8)(%rsp), %r13;
+	movq (4 * 8)(%rsp), %r14;
+	movq (5 * 8)(%rsp), %r15;
+	addq $(8 * 8), %rsp;
+
+	ret;
+ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
+
 #endif /*USE_TWOFISH*/
 #endif /*__x86_64*/
diff --git a/cipher/twofish.c b/cipher/twofish.c
index ce83fad..9b9c35f 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -45,6 +45,7 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "bufhelp.h"
+#include "cipher-internal.h"
 #include "cipher-selftest.h"
 
 
@@ -755,6 +756,18 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
 extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
 					const byte *in, byte *iv);
 
+extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out,
+					const byte *in, byte *offset,
+					byte *checksum, const void *Ls[3]);
+
+extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out,
+					const byte *in, byte *offset,
+					byte *checksum, const void *Ls[3]);
+
+extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
+					 const byte *abuf, byte *offset,
+					 byte *checksum, const void *Ls[3]);
+
 #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
 static inline void
 call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
@@ -771,6 +784,43 @@ call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
                 :
                 : "cc", "memory", "r8", "r9", "r10", "r11");
 }
+
+static inline void
+call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2,
+               const void *arg3, const void *arg4, const void *arg5)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("movq %[arg5], %%r8\n\t"
+		"callq *%0\n\t"
+		: "+a" (fn),
+		  "+D" (arg1),
+		  "+S" (arg2),
+		  "+d" (arg3),
+		  "+c" (arg4)
+		: [arg5] "g" (arg5)
+		: "cc", "memory", "r8", "r9", "r10", "r11");
+}
+
+static inline void
+call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2,
+               const void *arg3, const void *arg4, const void *arg5,
+	       const void *arg6)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("movq %[arg5], %%r8\n\t"
+		"movq %[arg6], %%r9\n\t"
+		"callq *%0\n\t"
+		: "+a" (fn),
+		  "+D" (arg1),
+		  "+S" (arg2),
+		  "+d" (arg3),
+		  "+c" (arg4)
+		: [arg5] "g" (arg5),
+		  [arg6] "g" (arg6)
+		: "cc", "memory", "r8", "r9", "r10", "r11");
+}
 #endif
 
 static inline void
@@ -826,6 +876,39 @@ twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
 #endif
 }
 
+static inline void
+twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
+		      byte *offset, byte *checksum, const void *Ls[3])
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls);
+#else
+  _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
+#endif
+}
+
+static inline void
+twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
+		      byte *offset, byte *checksum, const void *Ls[3])
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls);
+#else
+  _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
+#endif
+}
+
+static inline void
+twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
+		       byte *offset, byte *checksum, const void *Ls[3])
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls);
+#else
+  _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
+#endif
+}
+
 #elif defined(USE_ARM_ASM)
 
 /* Assembly implementations of Twofish. */
@@ -1188,6 +1271,182 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
   _gcry_burn_stack(burn_stack_depth);
 }
 
+static inline const unsigned char *
+get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i)
+{
+  unsigned int ntz = _gcry_ctz64 (i);
+
+  if (ntz < OCB_L_TABLE_SIZE)
+      return c->u_mode.ocb.L[ntz];
+  else
+      return _gcry_cipher_ocb_get_l (c, l_tmp, i);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+void
+_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+			const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  TWOFISH_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char l_tmp[TWOFISH_BLOCKSIZE];
+  const unsigned char *l;
+  unsigned int burn, burn_stack_depth = 0;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+
+#ifdef USE_AMD64_ASM
+  {
+    const void *Ls[3];
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+	/* l_tmp will be used only every 65536-th block. */
+	Ls[0] = get_l(c, l_tmp, blkn + 1);
+	Ls[1] = get_l(c, l_tmp, blkn + 2);
+	Ls[2] = get_l(c, l_tmp, blkn + 3);
+	blkn += 3;
+
+	if (encrypt)
+	  twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+				Ls);
+	else
+	  twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+				Ls);
+
+	nblocks -= 3;
+	outbuf += 3 * TWOFISH_BLOCKSIZE;
+	inbuf  += 3 * TWOFISH_BLOCKSIZE;
+
+	burn = 8 * sizeof(void*);
+	if (burn > burn_stack_depth)
+	  burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  if (encrypt)
+    {
+      for (; nblocks; nblocks--)
+	{
+	  l = get_l(c, l_tmp, ++blkn);
+
+	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	  buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE);
+	  buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE);
+	  /* Checksum_i = Checksum_{i-1} xor P_i  */
+	  buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE);
+	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+	  buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE);
+	  burn = twofish_encrypt(ctx, l_tmp, l_tmp);
+	  if (burn > burn_stack_depth)
+	    burn_stack_depth = burn;
+	  buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE);
+	  buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE);
+
+	  inbuf += TWOFISH_BLOCKSIZE;
+	  outbuf += TWOFISH_BLOCKSIZE;
+	}
+    }
+  else
+    {
+      for (; nblocks; nblocks--)
+	{
+	  l = get_l(c, l_tmp, ++blkn);
+
+	  /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+	  buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE);
+	  buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE);
+	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+	  buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE);
+	  burn = twofish_decrypt(ctx, l_tmp, l_tmp);
+	  if (burn > burn_stack_depth)
+	    burn_stack_depth = burn;
+	  buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE);
+	  /* Checksum_i = Checksum_{i-1} xor P_i  */
+	  buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE);
+	  buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE);
+
+	  inbuf += TWOFISH_BLOCKSIZE;
+	  outbuf += TWOFISH_BLOCKSIZE;
+	}
+    }
+
+  c->u_mode.ocb.data_nblocks = blkn;
+
+  wipememory(&l_tmp, sizeof(l_tmp));
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+void
+_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+			size_t nblocks)
+{
+  TWOFISH_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  unsigned char l_tmp[TWOFISH_BLOCKSIZE];
+  const unsigned char *l;
+  unsigned int burn, burn_stack_depth = 0;
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+#ifdef USE_AMD64_ASM
+  {
+    const void *Ls[3];
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+	/* l_tmp will be used only every 65536-th block. */
+	Ls[0] = get_l(c, l_tmp, blkn + 1);
+	Ls[1] = get_l(c, l_tmp, blkn + 2);
+	Ls[2] = get_l(c, l_tmp, blkn + 3);
+	blkn += 3;
+
+	twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+			      c->u_mode.ocb.aad_sum, Ls);
+
+	nblocks -= 3;
+	abuf += 3 * TWOFISH_BLOCKSIZE;
+
+	burn = 8 * sizeof(void*);
+	if (burn > burn_stack_depth)
+	  burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for (; nblocks; nblocks--)
+    {
+      l = get_l(c, l_tmp, ++blkn);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      buf_xor_1 (c->u_mode.ocb.aad_offset, l, TWOFISH_BLOCKSIZE);
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, TWOFISH_BLOCKSIZE);
+      burn = twofish_encrypt(ctx, l_tmp, l_tmp);
+      if (burn > burn_stack_depth)
+	burn_stack_depth = burn;
+      buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, TWOFISH_BLOCKSIZE);
+
+      abuf += TWOFISH_BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.aad_nblocks = blkn;
+
+  wipememory(&l_tmp, sizeof(l_tmp));
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+}
+
 

 
 /* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR
diff --git a/src/cipher.h b/src/cipher.h
index a0aac51..1a66f6d 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -217,6 +217,11 @@ void _gcry_twofish_cbc_dec (void *context, unsigned char *iv,
 void _gcry_twofish_cfb_dec (void *context, unsigned char *iv,
                             void *outbuf_arg, const void *inbuf_arg,
                             size_t nblocks);
+void _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+			      const void *inbuf_arg, size_t nblocks,
+			      int encrypt);
+void _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+			     size_t nblocks);
 
 /*-- dsa.c --*/
 void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data);
diff --git a/tests/basic.c b/tests/basic.c
index e3f4bfd..124df55 100644
--- a/tests/basic.c
+++ b/tests/basic.c
@@ -3330,20 +3330,26 @@ check_ocb_cipher (void)
 
   /* Check large buffer encryption/decryption. */
   check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16,
-                            "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8"
-                            "\x33\xfd\x7a\x4f\x42\x60\x5d\x20");
+			    "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8"
+			    "\x33\xfd\x7a\x4f\x42\x60\x5d\x20");
   check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32,
-                            "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d"
-                            "\xfe\x96\x67\xc9\xc8\x41\x03\x51");
+			    "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d"
+			    "\xfe\x96\x67\xc9\xc8\x41\x03\x51");
   check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA128, 16,
-                            "\x28\x23\x38\x45\x2b\xfd\x42\x45"
+			    "\x28\x23\x38\x45\x2b\xfd\x42\x45"
 			    "\x43\x64\x7e\x67\x7f\xf4\x8b\xcd");
   check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA192, 24,
-                            "\xee\xca\xe5\x39\x27\x2d\x33\xe7"
+			    "\xee\xca\xe5\x39\x27\x2d\x33\xe7"
 			    "\x79\x74\xb0\x1d\x37\x12\xd5\x6c");
   check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA256, 32,
-                            "\x39\x39\xd0\x2d\x05\x68\x74\xee"
+			    "\x39\x39\xd0\x2d\x05\x68\x74\xee"
 			    "\x18\x6b\xea\x3d\x0b\xd3\x58\xae");
+  check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 16,
+			    "\x63\xe3\x0e\xb9\x11\x6f\x14\xba"
+			    "\x79\xe4\xa7\x9e\xad\x3c\x02\x0c");
+  check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 32,
+			    "\xf6\xd4\xfe\x4e\x50\x85\x13\x59"
+			    "\x69\x0e\x4c\x67\x3e\xdd\x47\x90");
 }
 
 




More information about the Gcrypt-devel mailing list