[PATCH 2/3] OCB: Move large L handling from bottom to upper level

Jussi Kivilinna jussi.kivilinna at iki.fi
Mon Dec 5 15:14:29 CET 2016


* cipher/cipher-ocb.c (_gcry_cipher_ocb_get_l): Remove.
(ocb_get_L_big): New.
(_gcry_cipher_ocb_authenticate): L-big handling done in upper
processing loop, so that lower level never sees the case where
'aad_nblocks % 65536 == 0'; Add missing stack burn.
(ocb_aad_finalize): Add missing stack burn.
(ocb_crypt): L-big handling done in upper processing loop, so that
lower level never sees the case where 'data_nblocks % 65536 == 0'.
* cipher/cipher-internal.h (_gcry_cipher_ocb_get_l): Remove.
(ocb_get_l): Remove 'l_tmp' usage and simplify since input
is more limited now, 'N is not multiple of 65536'.
* cipher/rijndael-aesni.c (get_l): Remove.
(aesni_ocb_enc, aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Remove
l_tmp; Use 'ocb_get_l'.
* cipher/rijndael-ssse3-amd64.c (get_l): Remove.
(ssse3_ocb_enc, ssse3_ocb_dec, _gcry_aes_ssse3_ocb_auth): Remove
l_tmp; Use 'ocb_get_l'.
* cipher/camellia-glue.c: Remove OCB l_tmp usage.
* cipher/rijndael-armv8-ce.c: Ditto.
* cipher/rijndael.c: Ditto.
* cipher/serpent.c: Ditto.
* cipher/twofish.c: Ditto.
--

Move large L value generation to up-most level to simplify lower level
ocb_get_l for greater performance and simpler implementation. This helps
implementing OCB in assembly as 'ocb_get_l' no longer has function call
on slow-path.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/camellia-glue.c        |   18 +--
 cipher/cipher-internal.h      |   36 +++--
 cipher/cipher-ocb.c           |  271 +++++++++++++++++++++++++++++------------
 cipher/rijndael-aesni.c       |   96 +--------------
 cipher/rijndael-armv8-ce.c    |   20 +--
 cipher/rijndael-ssse3-amd64.c |   96 ---------------
 cipher/rijndael.c             |    6 -
 cipher/serpent.c              |   24 +---
 cipher/twofish.c              |   20 +--
 9 files changed, 248 insertions(+), 339 deletions(-)

diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 1be35c9..7687094 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -619,7 +619,6 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   CAMELLIA_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
   int burn_stack_depth;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
@@ -664,9 +663,8 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 32 block chunks. */
 	  while (nblocks >= 32)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
 
 	      if (encrypt)
 		_gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -725,9 +723,8 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      if (encrypt)
 		_gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -759,8 +756,6 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
   c->u_mode.ocb.data_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
@@ -776,7 +771,6 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
   CAMELLIA_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
-  unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
   int burn_stack_depth;
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 
@@ -818,9 +812,8 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 32 block chunks. */
 	  while (nblocks >= 32)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
 
 	      _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
 						 c->u_mode.ocb.aad_offset,
@@ -875,9 +868,8 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
 						c->u_mode.ocb.aad_offset,
@@ -905,8 +897,6 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
   c->u_mode.ocb.aad_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 01352f3..7204d48 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -459,28 +459,28 @@ gcry_err_code_t _gcry_cipher_ocb_get_tag
 gcry_err_code_t _gcry_cipher_ocb_check_tag
 /*           */ (gcry_cipher_hd_t c,
                  const unsigned char *intag, size_t taglen);
-const unsigned char *_gcry_cipher_ocb_get_l
-/*           */ (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n);
 
 
-/* Inline version of _gcry_cipher_ocb_get_l, with hard-coded fast paths for
-   most common cases.  */
+/* Return the L-value for block N.  Note: 'cipher_ocb.c' ensures that N
+ * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can
+ * be directly passed to _gcry_ctz() function and resulting index will
+ * never overflow the table.  */
 static inline const unsigned char *
-ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+ocb_get_l (gcry_cipher_hd_t c, u64 n)
 {
-  if (n & 1)
-    return c->u_mode.ocb.L[0];
-  else if (n & 2)
-    return c->u_mode.ocb.L[1];
-  else
-    {
-      unsigned int ntz = _gcry_ctz64 (n);
-
-      if (ntz < OCB_L_TABLE_SIZE)
-	return c->u_mode.ocb.L[ntz];
-      else
-	return _gcry_cipher_ocb_get_l (c, l_tmp, n);
-    }
+  unsigned long ntz;
+
+#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4)
+  /* Assumes that N != 0. */
+  asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+        : [ntz] "=r" (ntz)
+        : [low] "r" ((unsigned long)n)
+        : "cc");
+#else
+  ntz = _gcry_ctz (n);
+#endif
+
+  return c->u_mode.ocb.L[ntz];
 }
 
 #endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index d1f01d5..db42aaf 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -109,25 +109,17 @@ bit_copy (unsigned char *d, const unsigned char *s,
 }
 
 
-/* Return the L-value for block N.  In most cases we use the table;
-   only if the lower OCB_L_TABLE_SIZE bits of N are zero we need to
-   compute it.  With a table size of 16 we need to this this only
-   every 65536-th block.  L_TMP is a helper buffer of size
-   OCB_BLOCK_LEN which is used to hold the computation if not taken
-   from the table.  */
-const unsigned char *
-_gcry_cipher_ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+/* Get L_big value for block N, where N is multiple of 65536. */
+static void
+ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf)
 {
   int ntz = _gcry_ctz64 (n);
 
-  if (ntz < OCB_L_TABLE_SIZE)
-    return c->u_mode.ocb.L[ntz];
+  gcry_assert(ntz >= OCB_L_TABLE_SIZE);
 
-  double_block_cpy (l_tmp, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
+  double_block_cpy (l_buf, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
   for (ntz -= OCB_L_TABLE_SIZE; ntz; ntz--)
-    double_block (l_tmp);
-
-  return l_tmp;
+    double_block (l_buf);
 }
 
 
@@ -241,7 +233,11 @@ gcry_err_code_t
 _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
                                size_t abuflen)
 {
+  const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+  const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
   unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
 
   /* Check that a nonce and thus a key has been set and that we have
      not yet computed the tag.  We also return an error if the aad has
@@ -264,14 +260,24 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
         {
           c->u_mode.ocb.aad_nblocks++;
 
+          if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0)
+            {
+              /* Table overflow, L needs to be generated. */
+              ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp);
+            }
+          else
+            {
+              buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                       OCB_BLOCK_LEN);
+            }
+
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset,
-                     ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
-                     OCB_BLOCK_LEN);
+          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
           buf_xor (l_tmp, c->u_mode.ocb.aad_offset,
                    c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
-          c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
           buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           c->u_mode.ocb.aad_nleftover = 0;
@@ -279,40 +285,83 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
     }
 
   if (!abuflen)
-    return 0;
-
-  /* Use a bulk method if available.  */
-  if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth)
     {
-      size_t nblks;
-      size_t nleft;
-      size_t ndone;
+      if (burn > 0)
+        _gcry_burn_stack (burn + 4*sizeof(void*));
 
-      nblks = abuflen / OCB_BLOCK_LEN;
-      nleft = c->bulk.ocb_auth (c, abuf, nblks);
-      ndone = nblks - nleft;
-
-      abuf += ndone * OCB_BLOCK_LEN;
-      abuflen -= ndone * OCB_BLOCK_LEN;
-      nblks = nleft;
+      return 0;
     }
 
-  /* Hash all full blocks.  */
+  /* Full blocks handling. */
   while (abuflen >= OCB_BLOCK_LEN)
     {
-      c->u_mode.ocb.aad_nblocks++;
+      size_t nblks = abuflen / OCB_BLOCK_LEN;
+      size_t nmaxblks;
 
-      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-      buf_xor_1 (c->u_mode.ocb.aad_offset,
-                 ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
-                 OCB_BLOCK_LEN);
-      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-      buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
-      c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
-      buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+      /* Check how many blocks to process till table overflow. */
+      nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks;
+      nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+      if (nmaxblks == 0)
+        {
+          /* Table overflow, generate L and process one block. */
+          c->u_mode.ocb.aad_nblocks++;
+          ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+          abuf += OCB_BLOCK_LEN;
+          abuflen -= OCB_BLOCK_LEN;
+          nblks--;
+
+          /* With overflow handled, retry loop again. Next overflow will
+           * happen after 65535 blocks. */
+          continue;
+        }
+
+      nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+      /* Use a bulk method if available.  */
+      if (nblks && c->bulk.ocb_auth)
+        {
+          size_t nleft;
+          size_t ndone;
+
+          nleft = c->bulk.ocb_auth (c, abuf, nblks);
+          ndone = nblks - nleft;
+
+          abuf += ndone * OCB_BLOCK_LEN;
+          abuflen -= ndone * OCB_BLOCK_LEN;
+          nblks = nleft;
+        }
+
+      /* Hash all full blocks.  */
+      while (nblks)
+        {
+          c->u_mode.ocb.aad_nblocks++;
+
+          gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_mode.ocb.aad_offset,
+                     ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                     OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
-      abuf += OCB_BLOCK_LEN;
-      abuflen -= OCB_BLOCK_LEN;
+          abuf += OCB_BLOCK_LEN;
+          abuflen -= OCB_BLOCK_LEN;
+          nblks--;
+        }
     }
 
   /* Store away the remaining data.  */
@@ -321,6 +370,9 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
     c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover++] = *abuf;
   gcry_assert (!abuflen);
 
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+
   return 0;
 }
 
@@ -330,6 +382,8 @@ static void
 ocb_aad_finalize (gcry_cipher_hd_t c)
 {
   unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
 
   /* Check that a nonce and thus a key has been set and that we have
      not yet computed the tag.  We also skip this if the aad has been
@@ -352,7 +406,8 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
       l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
       buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
       /* Sum = Sum_m xor ENCIPHER(K, CipherInput)  */
-      c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+      nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+      burn = nburn > burn ? nburn : burn;
       buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
       c->u_mode.ocb.aad_nleftover = 0;
@@ -361,6 +416,9 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
   /* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can
    * return an erro when called again.  */
   c->u_mode.ocb.aad_finalized = 1;
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
 }
 
 
@@ -387,10 +445,13 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
            unsigned char *outbuf, size_t outbuflen,
            const unsigned char *inbuf, size_t inbuflen)
 {
+  const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+  const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
   unsigned char l_tmp[OCB_BLOCK_LEN];
   unsigned int burn = 0;
   unsigned int nburn;
-  size_t nblks = inbuflen / OCB_BLOCK_LEN;
+  gcry_cipher_encrypt_t crypt_fn =
+      encrypt ? c->spec->encrypt : c->spec->decrypt;
 
   /* Check that a nonce and thus a key has been set and that we are
      not yet in end of data state. */
@@ -407,58 +468,112 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
   else if ((inbuflen % OCB_BLOCK_LEN))
     return GPG_ERR_INV_LENGTH;  /* We support only full blocks for now.  */
 
-  /* Use a bulk method if available.  */
-  if (nblks && c->bulk.ocb_crypt)
-    {
-      size_t nleft;
-      size_t ndone;
-
-      nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
-      ndone = nblks - nleft;
-
-      inbuf += ndone * OCB_BLOCK_LEN;
-      outbuf += ndone * OCB_BLOCK_LEN;
-      inbuflen -= ndone * OCB_BLOCK_LEN;
-      outbuflen -= ndone * OCB_BLOCK_LEN;
-      nblks = nleft;
-    }
-
-  if (nblks)
+  /* Full blocks handling. */
+  while (inbuflen >= OCB_BLOCK_LEN)
     {
-      gcry_cipher_encrypt_t crypt_fn =
-          encrypt ? c->spec->encrypt : c->spec->decrypt;
+      size_t nblks = inbuflen / OCB_BLOCK_LEN;
+      size_t nmaxblks;
 
-      if (encrypt)
-        {
-          /* Checksum_i = Checksum_{i-1} xor P_i  */
-          ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
-        }
+      /* Check how many blocks to process till table overflow. */
+      nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks;
+      nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
 
-      /* Encrypt all full blocks.  */
-      while (inbuflen >= OCB_BLOCK_LEN)
+      if (nmaxblks == 0)
         {
+          /* Table overflow, generate L and process one block. */
           c->u_mode.ocb.data_nblocks++;
+          ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp);
+
+          if (encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, inbuf, 1);
+            }
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv,
-                     ocb_get_l (c, l_tmp, c->u_mode.ocb.data_nblocks),
-                     OCB_BLOCK_LEN);
+          buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
           buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
           nburn = crypt_fn (&c->context.c, outbuf, outbuf);
           burn = nburn > burn ? nburn : burn;
           buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
 
+          if (!encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, outbuf, 1);
+            }
+
           inbuf += OCB_BLOCK_LEN;
           inbuflen -= OCB_BLOCK_LEN;
           outbuf += OCB_BLOCK_LEN;
           outbuflen =- OCB_BLOCK_LEN;
+          nblks--;
+
+          /* With overflow handled, retry loop again. Next overflow will
+           * happen after 65535 blocks. */
+          continue;
+        }
+
+      nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+      /* Use a bulk method if available.  */
+      if (nblks && c->bulk.ocb_crypt)
+        {
+          size_t nleft;
+          size_t ndone;
+
+          nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+          ndone = nblks - nleft;
+
+          inbuf += ndone * OCB_BLOCK_LEN;
+          outbuf += ndone * OCB_BLOCK_LEN;
+          inbuflen -= ndone * OCB_BLOCK_LEN;
+          outbuflen -= ndone * OCB_BLOCK_LEN;
+          nblks = nleft;
         }
 
-      if (!encrypt)
+      if (nblks)
         {
-          /* Checksum_i = Checksum_{i-1} xor P_i  */
-          ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
+          size_t nblks_chksum = nblks;
+
+          if (encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum);
+            }
+
+          /* Encrypt all full blocks.  */
+          while (nblks)
+            {
+              c->u_mode.ocb.data_nblocks++;
+
+              gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
+
+              /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+              buf_xor_1 (c->u_iv.iv,
+                         ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+                         OCB_BLOCK_LEN);
+              /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+              buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+              nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+              burn = nburn > burn ? nburn : burn;
+              buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+              inbuf += OCB_BLOCK_LEN;
+              inbuflen -= OCB_BLOCK_LEN;
+              outbuf += OCB_BLOCK_LEN;
+              outbuflen =- OCB_BLOCK_LEN;
+              nblks--;
+            }
+
+          if (!encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr,
+                            outbuf - nblks_chksum * OCB_BLOCK_LEN,
+                            nblks_chksum);
+            }
         }
     }
 
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 8b28b3a..7852e19 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -1331,74 +1331,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 
-static inline const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
-       unsigned char *ctr)
-{
-  const unsigned char *l;
-  unsigned int ntz;
-
-  if (i & 0xffffffffU)
-    {
-      asm ("rep;bsf %k[low], %k[ntz]\n\t"
-           : [ntz] "=r" (ntz)
-           : [low] "r" (i & 0xffffffffU)
-           : "cc");
-    }
-  else
-    {
-      if (OCB_L_TABLE_SIZE < 32)
-        {
-          ntz = 32;
-        }
-      else if (i)
-        {
-          asm ("rep;bsf %k[high], %k[ntz]\n\t"
-               : [ntz] "=r" (ntz)
-               : [high] "r" (i >> 32)
-               : "cc");
-          ntz += 32;
-        }
-      else
-        {
-          ntz = 64;
-        }
-    }
-
-  if (ntz < OCB_L_TABLE_SIZE)
-    {
-      l = c->u_mode.ocb.L[ntz];
-    }
-  else
-    {
-      /* Store Offset & Checksum before calling external function */
-      asm volatile ("movdqu %%xmm5, %[iv]\n\t"
-                    "movdqu %%xmm6, %[ctr]\n\t"
-                    : [iv] "=m" (*iv),
-                      [ctr] "=m" (*ctr)
-                    :
-                    : "memory" );
-
-      l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
-
-      /* Restore Offset & Checksum */
-      asm volatile ("movdqu %[iv], %%xmm5\n\t"
-                    "movdqu %[ctr], %%xmm6\n\t"
-                    : /* No output */
-                    : [iv] "m" (*iv),
-                      [ctr] "m" (*ctr)
-                    : "memory" );
-    }
-
-  return l;
-}
-
-
 static void
 aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -1420,7 +1356,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks && n % 4; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -1449,9 +1385,8 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      /* l_tmp will be used only every 65536-th block. */
       n += 4;
-      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -1522,7 +1457,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -1559,8 +1494,6 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 
@@ -1568,7 +1501,6 @@ static void
 aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -1589,7 +1521,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks && n % 4; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -1618,9 +1550,8 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      /* l_tmp will be used only every 65536-th block. */
       n += 4;
-      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -1691,7 +1622,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -1728,8 +1659,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 
@@ -1748,7 +1677,6 @@ void
 _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
@@ -1768,8 +1696,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   for ( ;nblocks && n % 4; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                c->u_mode.ocb.aad_sum);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -1794,10 +1721,8 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      /* l_tmp will be used only every 65536-th block. */
       n += 4;
-      l = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
-		c->u_mode.ocb.aad_sum);
+      l = ocb_get_l(c, n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -1849,8 +1774,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   for ( ;nblocks; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                c->u_mode.ocb.aad_sum);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -1883,8 +1807,6 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 
diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c
index bed4066..1bf74da 100644
--- a/cipher/rijndael-armv8-ce.c
+++ b/cipher/rijndael-armv8-ce.c
@@ -336,7 +336,6 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   u64 blkn = c->u_mode.ocb.data_nblocks;
   u64 blkn_offs = blkn - blkn % 32;
   unsigned int n = 32 - blkn % 32;
-  unsigned char l_tmp[16];
   void *Ls[32];
   void **l;
   size_t i;
@@ -364,9 +363,8 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       /* Process data in 32 block chunks. */
       while (nblocks >= 32)
         {
-          /* l_tmp will be used only every 65536-th block. */
           blkn_offs += 32;
-          *l = (void *)ocb_get_l(c, l_tmp, blkn_offs);
+          *l = (void *)ocb_get_l(c, blkn_offs);
 
           crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, 32,
                     nrounds);
@@ -378,13 +376,13 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 
       if (nblocks && l < &Ls[nblocks])
         {
-          *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs);
+          *l = (void *)ocb_get_l(c, 32 + blkn_offs);
         }
     }
   else
     {
       for (i = 0; i < nblocks; i++)
-        Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn);
+        Ls[i] = (void *)ocb_get_l(c, ++blkn);
     }
 
   if (nblocks)
@@ -392,8 +390,6 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, nblocks,
                nrounds);
     }
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 void
@@ -407,7 +403,6 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
   u64 blkn = c->u_mode.ocb.aad_nblocks;
   u64 blkn_offs = blkn - blkn % 32;
   unsigned int n = 32 - blkn % 32;
-  unsigned char l_tmp[16];
   void *Ls[32];
   void **l;
   size_t i;
@@ -435,9 +430,8 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
       /* Process data in 32 block chunks. */
       while (nblocks >= 32)
         {
-          /* l_tmp will be used only every 65536-th block. */
           blkn_offs += 32;
-          *l = (void *)ocb_get_l(c, l_tmp, blkn_offs);
+          *l = (void *)ocb_get_l(c, blkn_offs);
 
           _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
                                       c->u_mode.ocb.aad_sum, Ls, 32, nrounds);
@@ -448,13 +442,13 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
 
       if (nblocks && l < &Ls[nblocks])
         {
-          *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs);
+          *l = (void *)ocb_get_l(c, 32 + blkn_offs);
         }
     }
   else
     {
       for (i = 0; i < nblocks; i++)
-        Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn);
+        Ls[i] = (void *)ocb_get_l(c, ++blkn);
     }
 
   if (nblocks)
@@ -462,8 +456,6 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
       _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
                                   c->u_mode.ocb.aad_sum, Ls, nblocks, nrounds);
     }
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 #endif /* USE_ARM_CE */
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 937d868..a8e89d4 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -527,92 +527,10 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 
-static inline const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
-       unsigned char *ctr, const void **aes_const_ptr,
-       byte ssse3_state[SSSE3_STATE_SIZE], int encrypt)
-{
-  const unsigned char *l;
-  unsigned int ntz;
-
-  if (i & 1)
-    return c->u_mode.ocb.L[0];
-  else if (i & 2)
-    return c->u_mode.ocb.L[1];
-  else if (i & 0xffffffffU)
-    {
-      asm ("rep;bsf %k[low], %k[ntz]\n\t"
-           : [ntz] "=r" (ntz)
-           : [low] "r" (i & 0xffffffffU)
-           : "cc");
-    }
-  else
-    {
-      if (OCB_L_TABLE_SIZE < 32)
-        {
-          ntz = 32;
-        }
-      else if (i)
-        {
-          asm ("rep;bsf %k[high], %k[ntz]\n\t"
-               : [ntz] "=r" (ntz)
-               : [high] "r" (i >> 32)
-               : "cc");
-          ntz += 32;
-        }
-      else
-        {
-          ntz = 64;
-        }
-    }
-
-  if (ntz < OCB_L_TABLE_SIZE)
-    {
-      l = c->u_mode.ocb.L[ntz];
-    }
-  else
-    {
-      /* Store Offset & Checksum before calling external function */
-      asm volatile ("movdqu %%xmm7, %[iv]\n\t"
-                    "movdqu %%xmm6, %[ctr]\n\t"
-                    : [iv] "=m" (*iv),
-                      [ctr] "=m" (*ctr)
-                    :
-                    : "memory" );
-
-      /* Restore SSSE3 state. */
-      vpaes_ssse3_cleanup();
-
-      l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
-
-      /* Save SSSE3 state. */
-      if (encrypt)
-	{
-	  vpaes_ssse3_prepare_enc (*aes_const_ptr);
-	}
-      else
-	{
-	  vpaes_ssse3_prepare_dec (*aes_const_ptr);
-	}
-
-      /* Restore Offset & Checksum */
-      asm volatile ("movdqu %[iv], %%xmm7\n\t"
-                    "movdqu %[ctr], %%xmm6\n\t"
-                    : /* No output */
-                    : [iv] "m" (*iv),
-                      [ctr] "m" (*ctr)
-                    : "memory" );
-    }
-
-  return l;
-}
-
-
 static void
 ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -635,8 +553,7 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       const unsigned char *l;
 
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr,
-		ssse3_state, 1);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -671,7 +588,6 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                 :
                 : "memory" );
 
-  wipememory(&l_tmp, sizeof(l_tmp));
   vpaes_ssse3_cleanup ();
 }
 
@@ -679,7 +595,6 @@ static void
 ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -702,8 +617,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       const unsigned char *l;
 
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr,
-		ssse3_state, 0);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -738,7 +652,6 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                 :
                 : "memory" );
 
-  wipememory(&l_tmp, sizeof(l_tmp));
   vpaes_ssse3_cleanup ();
 }
 
@@ -758,7 +671,6 @@ void
 _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
@@ -780,8 +692,7 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       const unsigned char *l;
 
-      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                c->u_mode.ocb.aad_sum, &aes_const_ptr, ssse3_state, 1);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -812,7 +723,6 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                 :
                 : "memory" );
 
-  wipememory(&l_tmp, sizeof(l_tmp));
   vpaes_ssse3_cleanup ();
 }
 
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index cc6a722..66ea0f3 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1353,7 +1353,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
-          const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+          const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
           buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
@@ -1378,7 +1378,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
-          const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+          const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
           buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
@@ -1445,7 +1445,7 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.aad_nblocks;
-          const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+          const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
           buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
diff --git a/cipher/serpent.c b/cipher/serpent.c
index ef19d3b..ea4b8ed 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -1235,7 +1235,6 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   serpent_context_t *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char l_tmp[sizeof(serpent_block_t)];
   int burn_stack_depth = 2 * sizeof (serpent_block_t);
   u64 blkn = c->u_mode.ocb.data_nblocks;
 #else
@@ -1275,9 +1274,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      if (encrypt)
 		_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1327,9 +1325,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
 	  {
-	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	    if (encrypt)
 	      _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1378,9 +1375,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	      *l = ocb_get_l(c,  blkn - blkn % 8);
 
 	      if (encrypt)
 		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1410,8 +1406,6 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   c->u_mode.ocb.data_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
@@ -1427,7 +1421,6 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   serpent_context_t *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
-  unsigned char l_tmp[sizeof(serpent_block_t)];
   int burn_stack_depth = 2 * sizeof(serpent_block_t);
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 #else
@@ -1465,9 +1458,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum, Ls);
@@ -1512,9 +1504,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
 	  {
-	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	    _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					c->u_mode.ocb.aad_sum, Ls);
@@ -1558,9 +1549,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	      *l = ocb_get_l(c, blkn - blkn % 8);
 
 	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum, Ls);
@@ -1585,8 +1575,6 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   c->u_mode.ocb.aad_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
diff --git a/cipher/twofish.c b/cipher/twofish.c
index 7a4d26a..55f6fb9 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -1261,7 +1261,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   TWOFISH_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char l_tmp[TWOFISH_BLOCKSIZE];
   unsigned int burn, burn_stack_depth = 0;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
@@ -1273,10 +1272,9 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-	/* l_tmp will be used only every 65536-th block. */
-	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 1);
-	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 2);
-	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 3);
+	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
 	blkn += 3;
 
 	if (encrypt)
@@ -1300,8 +1298,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 
   c->u_mode.ocb.data_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #else
@@ -1322,7 +1318,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #ifdef USE_AMD64_ASM
   TWOFISH_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
-  unsigned char l_tmp[TWOFISH_BLOCKSIZE];
   unsigned int burn, burn_stack_depth = 0;
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 
@@ -1334,10 +1329,9 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-	/* l_tmp will be used only every 65536-th block. */
-	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 1);
-	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 2);
-	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 3);
+	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
 	blkn += 3;
 
 	twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
@@ -1356,8 +1350,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   c->u_mode.ocb.aad_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #else




More information about the Gcrypt-devel mailing list