[PATCH 1/2] Add OCB bulk crypt/auth functions for AES/AES-NI

Jussi Kivilinna jussi.kivilinna at iki.fi
Sun Apr 26 13:47:25 CEST 2015


* cipher/cipher-internal.h (gcry_cipher_handle): Add bulk.ocb_crypt
and bulk.ocb_auth.
(_gcry_cipher_ocb_get_l): New prototype.
* cipher/cipher-ocb.c (get_l): Rename to ...
(_gcry_cipher_ocb_get_l): ... this.
(_gcry_cipher_ocb_authenticate, ocb_crypt): Use bulk function when
available.
* cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk
functions for AES.
* cipher/rijndael-aesni.c (get_l, aesni_ocb_enc, aes_ocb_dec)
(_gcry_aes_aesni_ocb_crypt, _gcry_aes_aesni_ocb_auth): New.
* cipher/rijndael.c [USE_AESNI] (_gcry_aes_aesni_ocb_crypt)
(_gcry_aes_aesni_ocb_auth): New prototypes.
(_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth): New.
* src/cipher.h (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth): New
prototypes.
* tests/basic.c (check_ocb_cipher_largebuf): New.
(check_ocb_cipher): Add large buffer encryption/decryption test.
--

Patch adds bulk encryption/decryption/authentication code for AES-NI
accelerated AES.

Benchmark on Intel i5-4570 (3200 Mhz, turbo off):

Before:
 AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
        OCB enc |      2.12 ns/B     449.7 MiB/s      6.79 c/B
        OCB dec |      2.12 ns/B     449.6 MiB/s      6.79 c/B
       OCB auth |      2.07 ns/B     459.9 MiB/s      6.64 c/B

After:
 AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
        OCB enc |     0.292 ns/B    3262.5 MiB/s     0.935 c/B
        OCB dec |     0.297 ns/B    3212.2 MiB/s     0.950 c/B
       OCB auth |     0.260 ns/B    3666.1 MiB/s     0.832 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/cipher-internal.h |    5 
 cipher/cipher-ocb.c      |   84 +++++---
 cipher/cipher.c          |    2 
 cipher/rijndael-aesni.c  |  483 ++++++++++++++++++++++++++++++++++++++++++++++
 cipher/rijndael.c        |  161 +++++++++++++++
 src/cipher.h             |    4 
 tests/basic.c            |  174 +++++++++++++++++
 7 files changed, 884 insertions(+), 29 deletions(-)

diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 50b0324..e20ea56 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -128,6 +128,9 @@ struct gcry_cipher_handle
     void (*ctr_enc)(void *context, unsigned char *iv,
                     void *outbuf_arg, const void *inbuf_arg,
                     size_t nblocks);
+    void (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg,
+                      const void *inbuf_arg, size_t nblocks, int encrypt);
+    void (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks);
   } bulk;
 
 
@@ -440,6 +443,8 @@ gcry_err_code_t _gcry_cipher_ocb_get_tag
 gcry_err_code_t _gcry_cipher_ocb_check_tag
 /*           */ (gcry_cipher_hd_t c,
                  const unsigned char *intag, size_t taglen);
+const unsigned char *_gcry_cipher_ocb_get_l
+/*           */ (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n);
 
 
 #endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index 62e79bb..bc6fd87 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -115,8 +115,8 @@ bit_copy (unsigned char *d, const unsigned char *s,
    every 65536-th block.  L_TMP is a helper buffer of size
    OCB_BLOCK_LEN which is used to hold the computation if not taken
    from the table.  */
-static const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+const unsigned char *
+_gcry_cipher_ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
 {
   int ntz = _gcry_ctz64 (n);
 
@@ -257,6 +257,15 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
   if (!abuflen)
     return 0;
 
+  /* Use a bulk method if available.  */
+  if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth)
+    {
+      size_t nblks = abuflen / OCB_BLOCK_LEN;
+      c->bulk.ocb_auth (c, abuf, nblks);
+      abuf += nblks * OCB_BLOCK_LEN;
+      abuflen -= nblks * OCB_BLOCK_LEN;
+    }
+
   /* Hash all full blocks.  */
   while (abuflen >= OCB_BLOCK_LEN)
     {
@@ -264,7 +273,8 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       buf_xor_1 (c->u_mode.ocb.aad_offset,
-                 get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks), OCB_BLOCK_LEN);
+                 _gcry_cipher_ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
+                 OCB_BLOCK_LEN);
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
       buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
       c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
@@ -341,40 +351,56 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
   else if ((inbuflen % OCB_BLOCK_LEN))
     return GPG_ERR_INV_LENGTH;  /* We support only full blocks for now.  */
 
-  if (encrypt)
+  /* Use a bulk method if available.  */
+  if (nblks && c->bulk.ocb_crypt)
     {
-      /* Checksum_i = Checksum_{i-1} xor P_i  */
-      ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
+      c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+      inbuf  += nblks * OCB_BLOCK_LEN;
+      outbuf += nblks * OCB_BLOCK_LEN;
+      inbuflen -= nblks * OCB_BLOCK_LEN;
+      outbuflen -= nblks * OCB_BLOCK_LEN;
+      nblks = 0;
     }
 
-  /* Encrypt all full blocks.  */
-  while (inbuflen >= OCB_BLOCK_LEN)
+  if (nblks)
     {
-      c->u_mode.ocb.data_nblocks++;
+      gcry_cipher_encrypt_t crypt_fn =
+          encrypt ? c->spec->encrypt : c->spec->decrypt;
 
-      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-      buf_xor_1 (c->u_iv.iv,
-                 get_l (c, l_tmp, c->u_mode.ocb.data_nblocks), OCB_BLOCK_LEN);
-      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
-      buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
       if (encrypt)
-        nburn = c->spec->encrypt (&c->context.c, outbuf, outbuf);
-      else
-        nburn = c->spec->decrypt (&c->context.c, outbuf, outbuf);
-      burn = nburn > burn ? nburn : burn;
-      buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+        {
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+          ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
+        }
 
-      inbuf += OCB_BLOCK_LEN;
-      inbuflen -= OCB_BLOCK_LEN;
-      outbuf += OCB_BLOCK_LEN;
-      outbuflen =- OCB_BLOCK_LEN;
-    }
+      /* Encrypt all full blocks.  */
+      while (inbuflen >= OCB_BLOCK_LEN)
+        {
+          c->u_mode.ocb.data_nblocks++;
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_iv.iv,
+                     _gcry_cipher_ocb_get_l (c, l_tmp,
+                                             c->u_mode.ocb.data_nblocks),
+                     OCB_BLOCK_LEN);
+          /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+          buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+          nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+          burn = nburn > burn ? nburn : burn;
+          buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+          inbuf += OCB_BLOCK_LEN;
+          inbuflen -= OCB_BLOCK_LEN;
+          outbuf += OCB_BLOCK_LEN;
+          outbuflen =- OCB_BLOCK_LEN;
+        }
 
-  if (!encrypt)
-    {
-      /* Checksum_i = Checksum_{i-1} xor P_i  */
-      ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
-     }
+      if (!encrypt)
+        {
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+          ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
+        }
+    }
 
   /* Encrypt final partial block.  Note that we expect INBUFLEN to be
      shorter than OCB_BLOCK_LEN (see above).  */
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 0a13fe6..6e1173f 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -510,6 +510,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
               h->bulk.cbc_enc = _gcry_aes_cbc_enc;
               h->bulk.cbc_dec = _gcry_aes_cbc_dec;
               h->bulk.ctr_enc = _gcry_aes_ctr_enc;
+              h->bulk.ocb_crypt = _gcry_aes_ocb_crypt;
+              h->bulk.ocb_auth  = _gcry_aes_ocb_auth;
               break;
 #endif /*USE_AES*/
 #ifdef USE_BLOWFISH
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 3c367ce..9a81602 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -29,6 +29,7 @@
 #include "bufhelp.h"
 #include "cipher-selftest.h"
 #include "rijndael-internal.h"
+#include "./cipher-internal.h"
 
 
 #ifdef USE_AESNI
@@ -1251,4 +1252,486 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
   aesni_cleanup_2_6 ();
 }
 
+
+static inline const unsigned char *
+get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
+       unsigned char *ctr)
+{
+  const unsigned char *l;
+  unsigned int ntz;
+
+  if (i & 0xffffffffU)
+    {
+      asm ("rep;bsf %k[low], %k[ntz]\n\t"
+           : [ntz] "=r" (ntz)
+           : [low] "r" (i & 0xffffffffU)
+           : "cc");
+    }
+  else
+    {
+      if (OCB_L_TABLE_SIZE < 32)
+        {
+          ntz = 32;
+        }
+      else if (i)
+        {
+          asm ("rep;bsf %k[high], %k[ntz]\n\t"
+               : [ntz] "=r" (ntz)
+               : [high] "r" (i >> 32)
+               : "cc");
+          ntz += 32;
+        }
+      else
+        {
+          ntz = 64;
+        }
+    }
+
+  if (ntz < OCB_L_TABLE_SIZE)
+    {
+      l = c->u_mode.ocb.L[ntz];
+    }
+  else
+    {
+      /* Store Offset & Checksum before calling external function */
+      asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                    "movdqu %%xmm6, %[ctr]\n\t"
+                    : [iv] "=m" (*iv),
+                      [ctr] "=m" (*ctr)
+                    :
+                    : "memory" );
+
+      l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
+
+      /* Restore Offset & Checksum */
+      asm volatile ("movdqu %[iv], %%xmm5\n\t"
+                    "movdqu %[ctr], %%xmm6\n\t"
+                    : /* No output */
+                    : [iv] "m" (*iv),
+                      [ctr] "m" (*ctr)
+                    : "memory" );
+    }
+
+  return l;
+}
+
+
+static void
+aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+               const void *inbuf_arg, size_t nblocks)
+{
+  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 n = c->u_mode.ocb.data_nblocks;
+
+  aesni_prepare ();
+
+  /* Preload Offset and Checksum */
+  asm volatile ("movdqu %[iv], %%xmm5\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_iv.iv),
+                  [ctr] "m" (*c->u_ctr.ctr)
+                : "memory" );
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
+    {
+      const unsigned char *l[4];
+
+      /* l_tmp will be used only every 65536-th block. */
+      l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+                    "movdqu %[inbuf0], %%xmm1\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm1,    %%xmm6\n\t"
+                    "pxor   %%xmm5,    %%xmm1\n\t"
+                    "movdqu %%xmm5,    %[outbuf0]\n\t"
+                    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+                    : [l0] "m" (*l[0]),
+                      [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+                    "movdqu %[inbuf1], %%xmm2\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm2,    %%xmm6\n\t"
+                    "pxor   %%xmm5,    %%xmm2\n\t"
+                    "movdqu %%xmm5,    %[outbuf1]\n\t"
+                    : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+                    : [l1] "m" (*l[1]),
+                      [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+                    "movdqu %[inbuf2], %%xmm3\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm3,    %%xmm6\n\t"
+                    "pxor   %%xmm5,    %%xmm3\n\t"
+                    "movdqu %%xmm5,    %[outbuf2]\n\t"
+                    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+                    : [l2] "m" (*l[2]),
+                      [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+                    "movdqu %[inbuf3], %%xmm4\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm4,    %%xmm6\n\t"
+                    "pxor   %%xmm5,    %%xmm4\n\t"
+                    :
+                    : [l3] "m" (*l[3]),
+                      [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+                    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+                    "pxor   %%xmm0,    %%xmm1\n\t"
+                    "movdqu %%xmm1,    %[outbuf0]\n\t"
+                    "movdqu %[outbuf1],%%xmm0\n\t"
+                    "pxor   %%xmm0,    %%xmm2\n\t"
+                    "movdqu %%xmm2,    %[outbuf1]\n\t"
+                    "movdqu %[outbuf2],%%xmm0\n\t"
+                    "pxor   %%xmm0,    %%xmm3\n\t"
+                    "movdqu %%xmm3,    %[outbuf2]\n\t"
+                    "pxor   %%xmm5,    %%xmm4\n\t"
+                    "movdqu %%xmm4,    %[outbuf3]\n\t"
+                    : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+                      [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+                      [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+                      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+                    :
+                    : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+  for ( ;nblocks; nblocks-- )
+    {
+      const unsigned char *l;
+
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm0,   %%xmm6\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.data_nblocks = n;
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_iv.iv),
+                  [ctr] "=m" (*c->u_ctr.ctr)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_6 ();
+
+  wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
+static void
+aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+               const void *inbuf_arg, size_t nblocks)
+{
+  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 n = c->u_mode.ocb.data_nblocks;
+
+  aesni_prepare ();
+
+  /* Preload Offset and Checksum */
+  asm volatile ("movdqu %[iv], %%xmm5\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_iv.iv),
+                  [ctr] "m" (*c->u_ctr.ctr)
+                : "memory" );
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
+    {
+      const unsigned char *l[4];
+
+      /* l_tmp will be used only every 65536-th block. */
+      l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+                    "movdqu %[inbuf0], %%xmm1\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm1\n\t"
+                    "movdqu %%xmm5,    %[outbuf0]\n\t"
+                    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+                    : [l0] "m" (*l[0]),
+                      [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+                    "movdqu %[inbuf1], %%xmm2\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm2\n\t"
+                    "movdqu %%xmm5,    %[outbuf1]\n\t"
+                    : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+                    : [l1] "m" (*l[1]),
+                      [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+                    "movdqu %[inbuf2], %%xmm3\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm3\n\t"
+                    "movdqu %%xmm5,    %[outbuf2]\n\t"
+                    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+                    : [l2] "m" (*l[2]),
+                      [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+                    "movdqu %[inbuf3], %%xmm4\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm4\n\t"
+                    :
+                    : [l3] "m" (*l[3]),
+                      [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+                    : "memory" );
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+                    "pxor   %%xmm0,    %%xmm1\n\t"
+                    "movdqu %%xmm1,    %[outbuf0]\n\t"
+                    "movdqu %[outbuf1],%%xmm0\n\t"
+                    "pxor   %%xmm0,    %%xmm2\n\t"
+                    "movdqu %%xmm2,    %[outbuf1]\n\t"
+                    "movdqu %[outbuf2],%%xmm0\n\t"
+                    "pxor   %%xmm0,    %%xmm3\n\t"
+                    "movdqu %%xmm3,    %[outbuf2]\n\t"
+                    "pxor   %%xmm5,    %%xmm4\n\t"
+                    "movdqu %%xmm4,    %[outbuf3]\n\t"
+                    "pxor   %%xmm1,    %%xmm6\n\t"
+                    "pxor   %%xmm2,    %%xmm6\n\t"
+                    "pxor   %%xmm3,    %%xmm6\n\t"
+                    "pxor   %%xmm4,    %%xmm6\n\t"
+                    : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+                      [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+                      [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+                      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+                    :
+                    : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+  for ( ;nblocks; nblocks-- )
+    {
+      const unsigned char *l;
+
+      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "pxor   %%xmm0, %%xmm6\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.data_nblocks = n;
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_iv.iv),
+                  [ctr] "=m" (*c->u_ctr.ctr)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_6 ();
+
+  wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
+void
+_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+                          const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  if (encrypt)
+    aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+  else
+    aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+}
+
+
+void
+_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                          size_t nblocks)
+{
+  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  u64 n = c->u_mode.ocb.aad_nblocks;
+
+  aesni_prepare ();
+
+  /* Preload Offset and Sum */
+  asm volatile ("movdqu %[iv], %%xmm5\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_mode.ocb.aad_offset),
+                  [ctr] "m" (*c->u_mode.ocb.aad_sum)
+                : "memory" );
+
+  for ( ;nblocks > 3 ; nblocks -= 4 )
+    {
+      const unsigned char *l[4];
+
+      /* l_tmp will be used only every 65536-th block. */
+      l[0] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                   c->u_mode.ocb.aad_sum);
+      l[1] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                   c->u_mode.ocb.aad_sum);
+      l[2] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                   c->u_mode.ocb.aad_sum);
+      l[3] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                   c->u_mode.ocb.aad_sum);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+                    "movdqu %[abuf0],  %%xmm1\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm1\n\t"
+                    :
+                    : [l0] "m" (*l[0]),
+                      [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm0\n\t"
+                    "movdqu %[abuf1],  %%xmm2\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm2\n\t"
+                    :
+                    : [l1] "m" (*l[1]),
+                      [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l2],     %%xmm0\n\t"
+                    "movdqu %[abuf2],  %%xmm3\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm3\n\t"
+                    :
+                    : [l2] "m" (*l[2]),
+                      [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+                    : "memory" );
+      asm volatile ("movdqu %[l3],     %%xmm0\n\t"
+                    "movdqu %[abuf3],  %%xmm4\n\t"
+                    "pxor   %%xmm0,    %%xmm5\n\t"
+                    "pxor   %%xmm5,    %%xmm4\n\t"
+                    :
+                    : [l3] "m" (*l[3]),
+                      [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+                    : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("pxor   %%xmm1,   %%xmm6\n\t"
+                    "pxor   %%xmm2,   %%xmm6\n\t"
+                    "pxor   %%xmm3,   %%xmm6\n\t"
+                    "pxor   %%xmm4,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += 4*BLOCKSIZE;
+    }
+  for ( ;nblocks; nblocks-- )
+    {
+      const unsigned char *l;
+
+      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+                c->u_mode.ocb.aad_sum);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[abuf],  %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [abuf] "m" (*abuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm0,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.aad_nblocks = n;
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+                  [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_6 ();
+
+  wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
 #endif /* USE_AESNI */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index a481e6f..ade41c9 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -48,6 +48,7 @@
 #include "bufhelp.h"
 #include "cipher-selftest.h"
 #include "rijndael-internal.h"
+#include "./cipher-internal.h"
 
 
 #ifdef USE_AMD64_ASM
@@ -97,6 +98,11 @@ extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx,
                                      unsigned char *outbuf,
                                      const unsigned char *inbuf,
                                      unsigned char *iv, size_t nblocks);
+extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                       const void *inbuf_arg, size_t nblocks,
+                                       int encrypt);
+extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                      size_t nblocks);
 #endif
 
 #ifdef USE_SSSE3
@@ -1150,6 +1156,161 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
 
 
 

+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+void
+_gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int burn_depth = 0;
+
+  if (encrypt)
+    {
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+    }
+  else
+    {
+      check_decryption_preparation (ctx);
+
+      if (ctx->prefetch_dec_fn)
+        ctx->prefetch_dec_fn();
+    }
+
+  if (0)
+    ;
+#ifdef USE_AESNI
+  else if (ctx->use_aesni)
+    {
+      _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
+      burn_depth = 0;
+    }
+#endif /*USE_AESNI*/
+  else if (encrypt)
+    {
+      union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+      rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+      for ( ;nblocks; nblocks-- )
+        {
+          u64 i = ++c->u_mode.ocb.data_nblocks;
+          unsigned int ntz = _gcry_ctz64 (i);
+          const unsigned char *l;
+
+          if (ntz < OCB_L_TABLE_SIZE)
+              l = c->u_mode.ocb.L[ntz];
+          else
+              l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+          buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+          inbuf += BLOCKSIZE;
+          outbuf += BLOCKSIZE;
+        }
+    }
+  else
+    {
+      union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+      rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
+
+      for ( ;nblocks; nblocks-- )
+        {
+          u64 i = ++c->u_mode.ocb.data_nblocks;
+          unsigned int ntz = _gcry_ctz64 (i);
+          const unsigned char *l;
+
+          if (ntz < OCB_L_TABLE_SIZE)
+              l = c->u_mode.ocb.L[ntz];
+          else
+              l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+          buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+          buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+          inbuf += BLOCKSIZE;
+          outbuf += BLOCKSIZE;
+        }
+    }
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+/* Bulk authentication of complete blocks in OCB mode. */
+void
+_gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  unsigned int burn_depth = 0;
+
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  if (0)
+    ;
+#ifdef USE_AESNI
+  else if (ctx->use_aesni)
+    {
+      _gcry_aes_aesni_ocb_auth (c, abuf, nblocks);
+      burn_depth = 0;
+    }
+#endif /*USE_AESNI*/
+  else
+    {
+      union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+      rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+      for ( ;nblocks; nblocks-- )
+        {
+          u64 i = ++c->u_mode.ocb.aad_nblocks;
+          unsigned int ntz = _gcry_ctz64 (i);
+          const unsigned char *l;
+
+          if (ntz < OCB_L_TABLE_SIZE)
+              l = c->u_mode.ocb.L[ntz];
+          else
+              l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE);
+          burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
+
+          abuf += BLOCKSIZE;
+        }
+
+      wipememory(&l_tmp, sizeof(l_tmp));
+    }
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+

 /* Run the self-tests for AES 128.  Returns NULL on success. */
 static const char*
 selftest_basic_128 (void)
diff --git a/src/cipher.h b/src/cipher.h
index f4f6cc4..7ad0b2c 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -135,6 +135,10 @@ void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
 void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
                         void *outbuf_arg, const void *inbuf_arg,
                         size_t nblocks);
+void _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                         const void *inbuf_arg, size_t nblocks, int encrypt);
+void _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                         size_t nblocks);
 
 /*-- blowfish.c --*/
 void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv,
diff --git a/tests/basic.c b/tests/basic.c
index 6ebc056..1175b38 100644
--- a/tests/basic.c
+++ b/tests/basic.c
@@ -3153,6 +3153,172 @@ do_check_ocb_cipher (int inplace)
 
 
 static void
+check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
+{
+  static const unsigned char key[32] =
+        "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+        "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
+  static const unsigned char nonce[12] =
+        "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x00\x01\x02\x03";
+  const size_t buflen = 1024 * 1024 * 2 + 32;
+  unsigned char *inbuf;
+  unsigned char *outbuf;
+  gpg_error_t err = 0;
+  gcry_cipher_hd_t hde, hdd;
+  unsigned char tag[16];
+  int i;
+
+  inbuf = xmalloc(buflen);
+  if (!inbuf)
+    {
+      fail ("out-of-memory\n");
+      return;
+    }
+  outbuf = xmalloc(buflen);
+  if (!outbuf)
+    {
+      fail ("out-of-memory\n");
+      xfree(inbuf);
+      return;
+    }
+
+  for (i = 0; i < buflen; i++)
+    inbuf[i] = 'a';
+
+  err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0);
+  if (!err)
+    err = gcry_cipher_open (&hdd, algo, GCRY_CIPHER_MODE_OCB, 0);
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_open failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+      goto out_free;
+    }
+
+  err = gcry_cipher_setkey (hde, key, keylen);
+  if (!err)
+    err = gcry_cipher_setkey (hdd, key, keylen);
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_setkey failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+      gcry_cipher_close (hde);
+      gcry_cipher_close (hdd);
+      goto out_free;
+    }
+
+  err = gcry_cipher_setiv (hde, nonce, 12);
+  if (!err)
+    err = gcry_cipher_setiv (hdd, nonce, 12);
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_setiv failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+      gcry_cipher_close (hde);
+      gcry_cipher_close (hdd);
+      goto out_free;
+    }
+
+  err = gcry_cipher_authenticate (hde, inbuf, buflen);
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+      gcry_cipher_close (hde);
+      gcry_cipher_close (hdd);
+      goto out_free;
+    }
+
+  err = gcry_cipher_final (hde);
+  if (!err)
+    {
+      err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen);
+    }
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_encrypt failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+      gcry_cipher_close (hde);
+      gcry_cipher_close (hdd);
+      goto out_free;
+    }
+
+  /* Check that the tag matches. */
+  err = gcry_cipher_gettag (hde, tag, 16);
+  if (err)
+    {
+      fail ("cipher_ocb, gcry_cipher_gettag failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+    }
+  if (memcmp (tagexpect, tag, 16))
+    {
+      mismatch (tagexpect, 16, tag, 16);
+      fail ("cipher-ocb, encrypt tag mismatch (large, algo %d)\n", algo);
+    }
+
+  err = gcry_cipher_authenticate (hdd, inbuf, buflen);
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+      gcry_cipher_close (hde);
+      gcry_cipher_close (hdd);
+      goto out_free;
+    }
+
+  /* Now for the decryption.  */
+  err = gcry_cipher_final (hdd);
+  if (!err)
+    {
+      err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0);
+    }
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_decrypt (large, algo %d) failed: %s\n",
+            algo, gpg_strerror (err));
+      gcry_cipher_close (hde);
+      gcry_cipher_close (hdd);
+      goto out_free;
+    }
+
+  /* We still have TAG from the encryption.  */
+  err = gcry_cipher_checktag (hdd, tag, 16);
+  if (err)
+    {
+      fail ("cipher-ocb, gcry_cipher_checktag failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+    }
+
+  /* Check that the decrypt output matches the original plaintext.  */
+  if (memcmp (inbuf, outbuf, buflen))
+    {
+      /*mismatch (inbuf, buflen, outbuf, buflen);*/
+      fail ("cipher-ocb, decrypt data mismatch (large, algo %d)\n", algo);
+    }
+
+  /* Check that gettag also works for decryption.  */
+  err = gcry_cipher_gettag (hdd, tag, 16);
+  if (err)
+    {
+      fail ("cipher_ocb, decrypt gettag failed (large, algo %d): %s\n",
+            algo, gpg_strerror (err));
+    }
+  if (memcmp (tagexpect, tag, 16))
+    {
+      mismatch (tagexpect, 16, tag, 16);
+      fail ("cipher-ocb, decrypt tag mismatch (large, algo %d)\n", algo);
+    }
+
+  gcry_cipher_close (hde);
+  gcry_cipher_close (hdd);
+
+out_free:
+  xfree(outbuf);
+  xfree(inbuf);
+}
+
+
+static void
 check_ocb_cipher (void)
 {
   /* Check OCB cipher with separate destination and source buffers for
@@ -3161,6 +3327,14 @@ check_ocb_cipher (void)
 
   /* Check OCB cipher with inplace encrypt/decrypt. */
   do_check_ocb_cipher(1);
+
+  /* Check large buffer encryption/decryption. */
+  check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16,
+                            "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8"
+                            "\x33\xfd\x7a\x4f\x42\x60\x5d\x20");
+  check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32,
+                            "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d"
+                            "\xfe\x96\x67\xc9\xc8\x41\x03\x51");
 }
 
 





More information about the Gcrypt-devel mailing list