[git] GCRYPT - branch, master, updated. libgcrypt-1.7.3-40-gc83d0d2

by Jussi Kivilinna cvs at cvs.gnupg.org
Sat Dec 10 12:13:48 CET 2016


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  c83d0d2a26059cf471d09f5cb8e7fc5d76c4907b (commit)
       via  2b7b227b8a0bd5ff286258bc187782efac180a7e (commit)
       via  5c418e597f0f20a546d953161695e6caf1f57689 (commit)
       via  2d2e5286d53e1f62fe040dff4c6e01961f00afe2 (commit)
       via  161d339f48c03be7fd0f4249d730f7f1767ef8e4 (commit)
       via  0b03b658bebc69a84d87ef13f9b60a27b0c42305 (commit)
      from  e232c12efac539b2fd2a4472259693b4364a349e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit c83d0d2a26059cf471d09f5cb8e7fc5d76c4907b
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Dec 10 12:29:12 2016 +0200

    hwfeatures: add 'all' for disabling all hardware features
    
    * .gitignore: Add 'tests/basic-disable-all-hwf'.
    * configure.ac: Ditto.
    * tests/Makefile.am: Ditto.
    * src/hwfeatures.c (_gcry_disable_hw_feature): Match 'all' for
    masking all HW features off.
    (parse_hwf_deny_file): Use '_gcry_disable_hw_feature' for matching.
    * tests/basic-disable-all-hwf.in: New.
    --
    
    Also add new test to run 'basic' with all HWF disable. With current
    assembly implementations and build servers using new CPUs, generic
    implementations are not being tested enough anymore and compiler
    problems might end up unnoticed.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/.gitignore b/.gitignore
index 3cd83a2..5d481aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,6 +73,7 @@ tests/ac-data
 tests/ac-schemes
 tests/aeswrap
 tests/basic
+tests/basic-disable-all-hwf
 tests/bench-slope
 tests/benchmark
 tests/curves
diff --git a/configure.ac b/configure.ac
index 17ff407..91562a9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2555,6 +2555,7 @@ src/versioninfo.rc
 tests/Makefile
 ])
 AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g])
+AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x tests/basic-disable-all-hwf])
 AC_OUTPUT
 
 
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 07221e8..99aba34 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -83,6 +83,12 @@ _gcry_disable_hw_feature (const char *name)
 {
   int i;
 
+  if (!strcmp(name, "all"))
+    {
+      disabled_hw_features = ~0;
+      return 0;
+    }
+
   for (i=0; i < DIM (hwflist); i++)
     if (!strcmp (hwflist[i].desc, name))
       {
@@ -159,15 +165,7 @@ parse_hwf_deny_file (void)
       if (!*p || *p == '#')
         continue;
 
-      for (i=0; i < DIM (hwflist); i++)
-        {
-          if (!strcmp (hwflist[i].desc, p))
-            {
-              disabled_hw_features |= hwflist[i].flag;
-              break;
-            }
-        }
-      if (i == DIM (hwflist))
+      if (_gcry_disable_hw_feature (p) == GPG_ERR_INV_NAME)
         {
 #ifdef HAVE_SYSLOG
           syslog (LOG_USER|LOG_WARNING,
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 374e72e..db51cbd 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -26,7 +26,7 @@ tests_bin = \
 
 tests_bin_last = benchmark bench-slope
 
-tests_sh =
+tests_sh = basic-disable-all-hwf
 
 tests_sh_last = hashtest-256g
 
@@ -58,7 +58,8 @@ noinst_HEADERS = t-common.h
 EXTRA_DIST = README rsa-16k.key cavs_tests.sh cavs_driver.pl \
 	     pkcs1v2-oaep.h pkcs1v2-pss.h pkcs1v2-v15c.h pkcs1v2-v15s.h \
 	     t-ed25519.inp stopwatch.h hashtest-256g.in \
-	     sha3-224.h sha3-256.h sha3-384.h sha3-512.h
+	     sha3-224.h sha3-256.h sha3-384.h sha3-512.h \
+	     basic-disable-all-hwf.in
 
 LDADD = $(standard_ldadd) $(GPG_ERROR_LIBS)
 t_lock_LDADD = $(standard_ldadd) $(GPG_ERROR_MT_LIBS)
diff --git a/tests/basic-disable-all-hwf.in b/tests/basic-disable-all-hwf.in
new file mode 100644
index 0000000..1f0a4de
--- /dev/null
+++ b/tests/basic-disable-all-hwf.in
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo "      now running 'basic' test with all hardware features disabled."
+exec ./basic at EXEEXT@ --disable-hwf all

commit 2b7b227b8a0bd5ff286258bc187782efac180a7e
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Dec 10 12:29:12 2016 +0200

    tests/hashtest-256g: add missing executable extension for Win32
    
    * tests/hashtest-256g.in: Add @EXEEXT at .
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/tests/hashtest-256g.in b/tests/hashtest-256g.in
index e897c54..92b1c1b 100755
--- a/tests/hashtest-256g.in
+++ b/tests/hashtest-256g.in
@@ -4,4 +4,4 @@ algos="SHA1 SHA256 SHA512"
 
 test "@RUN_LARGE_DATA_TESTS@" = yes || exit 77
 echo "      now running 256 GiB tests for $algos - this takes looong"
-exec ./hashtest --gigs 256 $algos
+exec ./hashtest at EXEEXT@ --gigs 256 $algos

commit 5c418e597f0f20a546d953161695e6caf1f57689
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Dec 10 12:29:12 2016 +0200

    OCB ARM CE: Move ocb_get_l handling to assembly part
    
    * cipher/rijndael-armv8-aarch32-ce.S: Add OCB 'L_{ntz(i)}' calculation.
    * cipher/rijndael-armv8-aarch64-ce.S: Ditto.
    * cipher/rijndael-armv8-ce.c (_gcry_aes_ocb_enc_armv8_ce)
    (_gcry_aes_ocb_dec_armv8_ce, _gcry_aes_ocb_auth_armv8_ce)
    (ocb_cryt_fn_t): Updated arguments.
    (_gcry_aes_armv8_ce_ocb_crypt, _gcry_aes_armv8_ce_ocb_auth): Remove
    'ocb_get_l' handling and splitting input to 32 block chunks, instead
    pass full buffers to assembly.
    --
    
    Performance on Cortex-A53 (AArch32):
    
    Before:
     AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
            OCB enc |      1.63 ns/B     583.8 MiB/s      1.88 c/B
            OCB dec |      1.67 ns/B     572.1 MiB/s      1.92 c/B
           OCB auth |      1.33 ns/B     717.1 MiB/s      1.53 c/B
    
    After (~12% faster):
     AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
            OCB enc |      1.47 ns/B     650.2 MiB/s      1.69 c/B
            OCB dec |      1.48 ns/B     644.5 MiB/s      1.70 c/B
           OCB auth |      1.19 ns/B     798.2 MiB/s      1.38 c/B
    
    Performance on Cortex-A53 (AArch64):
    
    Before:
     AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
            OCB enc |      1.29 ns/B     738.5 MiB/s      1.49 c/B
            OCB dec |      1.32 ns/B     723.5 MiB/s      1.52 c/B
           OCB auth |      1.15 ns/B     827.0 MiB/s      1.33 c/B
    
    After (~8% faster):
     AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
            OCB enc |      1.21 ns/B     789.1 MiB/s      1.39 c/B
            OCB dec |      1.21 ns/B     789.2 MiB/s      1.39 c/B
           OCB auth |      1.10 ns/B     867.0 MiB/s      1.27 c/B
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/rijndael-armv8-aarch32-ce.S b/cipher/rijndael-armv8-aarch32-ce.S
index bf68f20..f375f67 100644
--- a/cipher/rijndael-armv8-aarch32-ce.S
+++ b/cipher/rijndael-armv8-aarch32-ce.S
@@ -1021,9 +1021,10 @@ _gcry_aes_ctr_enc_armv8_ce:
  *                                  const unsigned char *inbuf,
  *                                  unsigned char *offset,
  *                                  unsigned char *checksum,
- *                                  void **Ls,
+ *                                  unsigned char *L_table,
  *                                  size_t nblocks,
- *                                  unsigned int nrounds);
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
  */
 
 .align 3
@@ -1039,6 +1040,7 @@ _gcry_aes_ocb_enc_armv8_ce:
    *    %st+4: Ls => r5
    *    %st+8: nblocks => r6  (0 < nblocks <= 32)
    *    %st+12: nrounds => r7
+   *    %st+16: blkn => lr
    */
 
   vpush {q4-q7}
@@ -1047,6 +1049,7 @@ _gcry_aes_ocb_enc_armv8_ce:
   ldr r4, [sp, #(104+0)]
   ldr r5, [sp, #(104+4)]
   ldr r6, [sp, #(104+8)]
+  ldr lr, [sp, #(104+16)]
 
   cmp r7, #12
   vld1.8 {q0}, [r3] /* load offset */
@@ -1059,6 +1062,7 @@ _gcry_aes_ocb_enc_armv8_ce:
 #define OCB_ENC(bits, ...) \
   .Locb_enc_entry_##bits: \
     cmp r6, #4; \
+    add lr, #1; \
     blo .Locb_enc_loop_##bits; \
     \
   .Locb_enc_loop4_##bits: \
@@ -1067,7 +1071,23 @@ _gcry_aes_ocb_enc_armv8_ce:
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
     \
-    ldm r5!, {r8, r9, r10, r11}; \
+    add r9, lr, #1; \
+    add r10, lr, #2; \
+    add r11, lr, #3; \
+    rbit r8, lr; \
+    add lr, lr, #4; \
+    rbit r9, r9; \
+    rbit r10, r10; \
+    rbit r11, r11; \
+    clz r8, r8; /* ntz(i+0) */ \
+    clz r9, r9; /* ntz(i+1) */ \
+    clz r10, r10; /* ntz(i+2) */ \
+    clz r11, r11; /* ntz(i+3) */ \
+    add r8, r5, r8, lsl #4; \
+    add r9, r5, r9, lsl #4; \
+    add r10, r5, r10, lsl #4; \
+    add r11, r5, r11, lsl #4; \
+    \
     sub r6, #4; \
     \
     vld1.8 {q9}, [r8];     /* load L_{ntz(i+0)} */ \
@@ -1120,7 +1140,11 @@ _gcry_aes_ocb_enc_armv8_ce:
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
     \
-    ldr r8, [r5], #4; \
+    rbit r8, lr; \
+    add lr, #1; \
+    clz r8, r8; /* ntz(i) */ \
+    add r8, r5, r8, lsl #4; \
+    \
     vld1.8 {q1}, [r2]!; /* load plaintext */ \
     vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
     vld1.8 {q3}, [r4]; /* load checksum */ \
@@ -1171,9 +1195,10 @@ _gcry_aes_ocb_enc_armv8_ce:
  *                                  const unsigned char *inbuf,
  *                                  unsigned char *offset,
  *                                  unsigned char *checksum,
- *                                  void **Ls,
+ *                                  unsigned char *L_table,
  *                                  size_t nblocks,
- *                                  unsigned int nrounds);
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
  */
 
 .align 3
@@ -1189,6 +1214,7 @@ _gcry_aes_ocb_dec_armv8_ce:
    *    %st+4: Ls => r5
    *    %st+8: nblocks => r6  (0 < nblocks <= 32)
    *    %st+12: nrounds => r7
+   *    %st+16: blkn => lr
    */
 
   vpush {q4-q7}
@@ -1197,6 +1223,7 @@ _gcry_aes_ocb_dec_armv8_ce:
   ldr r4, [sp, #(104+0)]
   ldr r5, [sp, #(104+4)]
   ldr r6, [sp, #(104+8)]
+  ldr lr, [sp, #(104+16)]
 
   cmp r7, #12
   vld1.8 {q0}, [r3] /* load offset */
@@ -1209,6 +1236,7 @@ _gcry_aes_ocb_dec_armv8_ce:
 #define OCB_DEC(bits, ...) \
   .Locb_dec_entry_##bits: \
     cmp r6, #4; \
+    add lr, #1; \
     blo .Locb_dec_loop_##bits; \
     \
   .Locb_dec_loop4_##bits: \
@@ -1217,7 +1245,23 @@ _gcry_aes_ocb_dec_armv8_ce:
     /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */ \
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     \
-    ldm r5!, {r8, r9, r10, r11}; \
+    add r9, lr, #1; \
+    add r10, lr, #2; \
+    add r11, lr, #3; \
+    rbit r8, lr; \
+    add lr, lr, #4; \
+    rbit r9, r9; \
+    rbit r10, r10; \
+    rbit r11, r11; \
+    clz r8, r8; /* ntz(i+0) */ \
+    clz r9, r9; /* ntz(i+1) */ \
+    clz r10, r10; /* ntz(i+2) */ \
+    clz r11, r11; /* ntz(i+3) */ \
+    add r8, r5, r8, lsl #4; \
+    add r9, r5, r9, lsl #4; \
+    add r10, r5, r10, lsl #4; \
+    add r11, r5, r11, lsl #4; \
+    \
     sub r6, #4; \
     \
     vld1.8 {q9}, [r8];     /* load L_{ntz(i+0)} */ \
@@ -1270,7 +1314,11 @@ _gcry_aes_ocb_dec_armv8_ce:
     /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */ \
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     \
-    ldr r8, [r5], #4; \
+    rbit r8, lr; \
+    add lr, #1; \
+    clz r8, r8; /* ntz(i) */ \
+    add r8, r5, r8, lsl #4; \
+    \
     vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
     vld1.8 {q1}, [r2]!; /* load ciphertext */ \
     subs r6, #1; \
@@ -1320,9 +1368,10 @@ _gcry_aes_ocb_dec_armv8_ce:
  *                                   const unsigned char *abuf,
  *                                   unsigned char *offset,
  *                                   unsigned char *checksum,
- *                                   void **Ls,
+ *                                   unsigned char *L_table,
  *                                   size_t nblocks,
- *                                   unsigned int nrounds);
+ *                                   unsigned int nrounds,
+ *                                   unsigned int blkn);
  */
 
 .align 3
@@ -1337,6 +1386,7 @@ _gcry_aes_ocb_auth_armv8_ce:
    *    %st+0: Ls => r5
    *    %st+4: nblocks => r6  (0 < nblocks <= 32)
    *    %st+8: nrounds => r7
+   *    %st+12: blkn => lr
    */
 
   vpush {q4-q7}
@@ -1344,6 +1394,7 @@ _gcry_aes_ocb_auth_armv8_ce:
   ldr r7, [sp, #(104+8)]
   ldr r5, [sp, #(104+0)]
   ldr r6, [sp, #(104+4)]
+  ldr lr, [sp, #(104+12)]
 
   cmp r7, #12
   vld1.8 {q0}, [r2] /* load offset */
@@ -1356,6 +1407,7 @@ _gcry_aes_ocb_auth_armv8_ce:
 #define OCB_AUTH(bits, ...) \
   .Locb_auth_entry_##bits: \
     cmp r6, #4; \
+    add lr, #1; \
     blo .Locb_auth_loop_##bits; \
     \
   .Locb_auth_loop4_##bits: \
@@ -1363,7 +1415,23 @@ _gcry_aes_ocb_auth_armv8_ce:
     /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
     /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
     \
-    ldm r5!, {r8, r9, r10, r11}; \
+    add r9, lr, #1; \
+    add r10, lr, #2; \
+    add r11, lr, #3; \
+    rbit r8, lr; \
+    add lr, lr, #4; \
+    rbit r9, r9; \
+    rbit r10, r10; \
+    rbit r11, r11; \
+    clz r8, r8; /* ntz(i+0) */ \
+    clz r9, r9; /* ntz(i+1) */ \
+    clz r10, r10; /* ntz(i+2) */ \
+    clz r11, r11; /* ntz(i+3) */ \
+    add r8, r5, r8, lsl #4; \
+    add r9, r5, r9, lsl #4; \
+    add r10, r5, r10, lsl #4; \
+    add r11, r5, r11, lsl #4; \
+    \
     sub r6, #4; \
     \
     vld1.8 {q9}, [r8];     /* load L_{ntz(i+0)} */ \
@@ -1401,8 +1469,12 @@ _gcry_aes_ocb_auth_armv8_ce:
     /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
     /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
     \
-    ldr r8, [r5], #4; \
-    vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
+    rbit r8, lr; \
+    add lr, #1; \
+    clz r8, r8; /* ntz(i) */ \
+    add r8, r5, r8, lsl #4; \
+    \
+    vld1.8 {q2}, [r8];  /* load L_{ntz(i)} */ \
     vld1.8 {q1}, [r1]!; /* load aadtext */ \
     subs r6, #1; \
     veor q0, q0, q2; \
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 21d0aec..1ebb363 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -28,23 +28,6 @@
 .text
 
 
-#if (SIZEOF_VOID_P == 4)
-  #define ptr8   w8
-  #define ptr9   w9
-  #define ptr10  w10
-  #define ptr11  w11
-  #define ptr_sz 4
-#elif (SIZEOF_VOID_P == 8)
-  #define ptr8   x8
-  #define ptr9   x9
-  #define ptr10  x10
-  #define ptr11  x11
-  #define ptr_sz 8
-#else
-  #error "missing SIZEOF_VOID_P"
-#endif
-
-
 #define GET_DATA_POINTER(reg, name) \
 	adrp    reg, :got:name ; \
 	ldr     reg, [reg, #:got_lo12:name] ;
@@ -855,9 +838,10 @@ _gcry_aes_cfb_dec_armv8_ce:
  *                                  const unsigned char *inbuf,
  *                                  unsigned char *offset,
  *                                  unsigned char *checksum,
- *                                  void **Ls,
+ *                                  unsigned char *L_table,
  *                                  size_t nblocks,
- *                                  unsigned int nrounds);
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
  */
 
 .align 3
@@ -870,11 +854,13 @@ _gcry_aes_ocb_enc_armv8_ce:
    *    x2: inbuf
    *    x3: offset
    *    x4: checksum
-   *    x5: Ls
+   *    x5: Ltable
    *    x6: nblocks (0 < nblocks <= 32)
    *    w7: nrounds
+   *    %st+0: blkn => w12
    */
 
+  ldr w12, [sp]
   ld1 {v0.16b}, [x3] /* load offset */
   ld1 {v16.16b}, [x4] /* load checksum */
 
@@ -886,6 +872,7 @@ _gcry_aes_ocb_enc_armv8_ce:
 #define OCB_ENC(bits, ...) \
   .Locb_enc_entry_##bits: \
     cmp x6, #4; \
+    add x12, x12, #1; \
     b.lo .Locb_enc_loop_##bits; \
     \
   .Locb_enc_loop4_##bits: \
@@ -894,10 +881,24 @@ _gcry_aes_ocb_enc_armv8_ce:
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
     \
-    ldp ptr8, ptr9, [x5], #(ptr_sz*2); \
+    add w9, w12, #1; \
+    add w10, w12, #2; \
+    add w11, w12, #3; \
+    rbit w8, w12; \
+    add w12, w12, #4; \
+    rbit w9, w9; \
+    rbit w10, w10; \
+    rbit w11, w11; \
+    clz w8, w8; /* ntz(i+0) */ \
+    clz w9, w9; /* ntz(i+1) */ \
+    clz w10, w10; /* ntz(i+2) */ \
+    clz w11, w11; /* ntz(i+3) */ \
+    add x8, x5, x8, lsl #4; \
+    ld1 {v1.16b-v4.16b}, [x2], #64;   /* load P_i+<0-3> */ \
+    add x9, x5, x9, lsl #4; \
+    add x10, x5, x10, lsl #4; \
+    add x11, x5, x11, lsl #4; \
     \
-    ld1 {v1.16b-v4.16b}, [x2], #64; /* load P_i+<0-3> */ \
-    ldp ptr10, ptr11, [x5], #(ptr_sz*2); \
     sub x6, x6, #4; \
     \
     ld1 {v5.16b}, [x8];               /* load L_{ntz(i+0)} */ \
@@ -940,7 +941,11 @@ _gcry_aes_ocb_enc_armv8_ce:
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
     \
-    ldr ptr8, [x5], #(ptr_sz); \
+    rbit x8, x12; \
+    add x12, x12, #1; \
+    clz x8, x8; /* ntz(i) */ \
+    add x8, x5, x8, lsl #4; \
+    \
     ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
     ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
     sub x6, x6, #1; \
@@ -983,9 +988,10 @@ _gcry_aes_ocb_enc_armv8_ce:
  *                                  const unsigned char *inbuf,
  *                                  unsigned char *offset,
  *                                  unsigned char *checksum,
- *                                  void **Ls,
+ *                                  unsigned char *L_table,
  *                                  size_t nblocks,
- *                                  unsigned int nrounds);
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
  */
 
 .align 3
@@ -998,11 +1004,13 @@ _gcry_aes_ocb_dec_armv8_ce:
    *    x2: inbuf
    *    x3: offset
    *    x4: checksum
-   *    x5: Ls
+   *    x5: Ltable
    *    x6: nblocks (0 < nblocks <= 32)
    *    w7: nrounds
+   *    %st+0: blkn => w12
    */
 
+  ldr w12, [sp]
   ld1 {v0.16b}, [x3] /* load offset */
   ld1 {v16.16b}, [x4] /* load checksum */
 
@@ -1014,6 +1022,7 @@ _gcry_aes_ocb_dec_armv8_ce:
 #define OCB_DEC(bits) \
   .Locb_dec_entry_##bits: \
     cmp x6, #4; \
+    add w12, w12, #1; \
     b.lo .Locb_dec_loop_##bits; \
     \
   .Locb_dec_loop4_##bits: \
@@ -1022,10 +1031,24 @@ _gcry_aes_ocb_dec_armv8_ce:
     /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */ \
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     \
-    ldp ptr8, ptr9, [x5], #(ptr_sz*2); \
+    add w9, w12, #1; \
+    add w10, w12, #2; \
+    add w11, w12, #3; \
+    rbit w8, w12; \
+    add w12, w12, #4; \
+    rbit w9, w9; \
+    rbit w10, w10; \
+    rbit w11, w11; \
+    clz w8, w8; /* ntz(i+0) */ \
+    clz w9, w9; /* ntz(i+1) */ \
+    clz w10, w10; /* ntz(i+2) */ \
+    clz w11, w11; /* ntz(i+3) */ \
+    add x8, x5, x8, lsl #4; \
+    ld1 {v1.16b-v4.16b}, [x2], #64;   /* load C_i+<0-3> */ \
+    add x9, x5, x9, lsl #4; \
+    add x10, x5, x10, lsl #4; \
+    add x11, x5, x11, lsl #4; \
     \
-    ld1 {v1.16b-v4.16b}, [x2], #64; /* load C_i+<0-3> */ \
-    ldp ptr10, ptr11, [x5], #(ptr_sz*2); \
     sub x6, x6, #4; \
     \
     ld1 {v5.16b}, [x8];               /* load L_{ntz(i+0)} */ \
@@ -1068,7 +1091,11 @@ _gcry_aes_ocb_dec_armv8_ce:
     /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */ \
     /* Checksum_i = Checksum_{i-1} xor P_i  */ \
     \
-    ldr ptr8, [x5], #(ptr_sz); \
+    rbit w8, w12; \
+    add w12, w12, #1; \
+    clz w8, w8; /* ntz(i) */ \
+    add x8, x5, x8, lsl #4; \
+    \
     ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \
     ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
     sub x6, x6, #1; \
@@ -1110,9 +1137,10 @@ _gcry_aes_ocb_dec_armv8_ce:
  *                                   const unsigned char *abuf,
  *                                   unsigned char *offset,
  *                                   unsigned char *checksum,
- *                                   void **Ls,
+ *                                   unsigned char *L_table,
  *                                   size_t nblocks,
- *                                   unsigned int nrounds);
+ *                                   unsigned int nrounds,
+ *                                   unsigned int blkn);
  */
 
 .align 3
@@ -1124,10 +1152,12 @@ _gcry_aes_ocb_auth_armv8_ce:
    *    x1: abuf
    *    x2: offset => x3
    *    x3: checksum => x4
-   *    x4: Ls => x5
+   *    x4: Ltable => x5
    *    x5: nblocks => x6  (0 < nblocks <= 32)
    *    w6: nrounds => w7
+   *    w7: blkn => w12
    */
+  mov x12, x7
   mov x7, x6
   mov x6, x5
   mov x5, x4
@@ -1145,6 +1175,7 @@ _gcry_aes_ocb_auth_armv8_ce:
 #define OCB_AUTH(bits) \
   .Locb_auth_entry_##bits: \
     cmp x6, #4; \
+    add w12, w12, #1; \
     b.lo .Locb_auth_loop_##bits; \
     \
   .Locb_auth_loop4_##bits: \
@@ -1152,10 +1183,24 @@ _gcry_aes_ocb_auth_armv8_ce:
     /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
     /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
     \
-    ldp ptr8, ptr9, [x5], #(ptr_sz*2); \
+    add w9, w12, #1; \
+    add w10, w12, #2; \
+    add w11, w12, #3; \
+    rbit w8, w12; \
+    add w12, w12, #4; \
+    rbit w9, w9; \
+    rbit w10, w10; \
+    rbit w11, w11; \
+    clz w8, w8; /* ntz(i+0) */ \
+    clz w9, w9; /* ntz(i+1) */ \
+    clz w10, w10; /* ntz(i+2) */ \
+    clz w11, w11; /* ntz(i+3) */ \
+    add x8, x5, x8, lsl #4; \
+    ld1 {v1.16b-v4.16b}, [x1], #64;   /* load A_i+<0-3> */ \
+    add x9, x5, x9, lsl #4; \
+    add x10, x5, x10, lsl #4; \
+    add x11, x5, x11, lsl #4; \
     \
-    ld1 {v1.16b-v4.16b}, [x1], #64; /* load A_i+<0-3> */ \
-    ldp ptr10, ptr11, [x5], #(ptr_sz*2); \
     sub x6, x6, #4; \
     \
     ld1 {v5.16b}, [x8];               /* load L_{ntz(i+0)} */ \
@@ -1192,7 +1237,11 @@ _gcry_aes_ocb_auth_armv8_ce:
     /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
     /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
     \
-    ldr ptr8, [x5], #(ptr_sz); \
+    rbit w8, w12; \
+    add w12, w12, #1; \
+    clz w8, w8; /* ntz(i) */ \
+    add x8, x5, x8, lsl #4; \
+    \
     ld1 {v1.16b}, [x1], #16; /* load aadtext */ \
     ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
     sub x6, x6, #1; \
diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c
index 1bf74da..334cf68 100644
--- a/cipher/rijndael-armv8-ce.c
+++ b/cipher/rijndael-armv8-ce.c
@@ -80,30 +80,33 @@ extern void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
                                         const unsigned char *inbuf,
                                         unsigned char *offset,
                                         unsigned char *checksum,
-                                        void **Ls,
+                                        unsigned char *L_table,
                                         size_t nblocks,
-                                        unsigned int nrounds);
+                                        unsigned int nrounds,
+                                        unsigned int blkn);
 extern void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
                                         unsigned char *outbuf,
                                         const unsigned char *inbuf,
                                         unsigned char *offset,
                                         unsigned char *checksum,
-                                        void **Ls,
+                                        unsigned char *L_table,
                                         size_t nblocks,
-                                        unsigned int nrounds);
+                                        unsigned int nrounds,
+                                        unsigned int blkn);
 extern void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
                                          const unsigned char *abuf,
                                          unsigned char *offset,
                                          unsigned char *checksum,
-                                         void **Ls,
+                                         unsigned char *L_table,
                                          size_t nblocks,
-                                         unsigned int nrounds);
+                                         unsigned int nrounds,
+                                         unsigned int blkn);
 
 typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
                                 const unsigned char *inbuf,
                                 unsigned char *offset, unsigned char *checksum,
-                                void **Ls, size_t nblocks,
-                                unsigned int nrounds);
+                                unsigned char *L_table, size_t nblocks,
+                                unsigned int nrounds, unsigned int blkn);
 
 void
 _gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key)
@@ -334,62 +337,11 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   const unsigned char *inbuf = inbuf_arg;
   unsigned int nrounds = ctx->rounds;
   u64 blkn = c->u_mode.ocb.data_nblocks;
-  u64 blkn_offs = blkn - blkn % 32;
-  unsigned int n = 32 - blkn % 32;
-  void *Ls[32];
-  void **l;
-  size_t i;
 
   c->u_mode.ocb.data_nblocks = blkn + nblocks;
 
-  if (nblocks >= 32)
-    {
-      for (i = 0; i < 32; i += 8)
-        {
-          Ls[(i + 0 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-          Ls[(i + 1 + n) % 32] = (void *)c->u_mode.ocb.L[1];
-          Ls[(i + 2 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-          Ls[(i + 3 + n) % 32] = (void *)c->u_mode.ocb.L[2];
-          Ls[(i + 4 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-          Ls[(i + 5 + n) % 32] = (void *)c->u_mode.ocb.L[1];
-          Ls[(i + 6 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-        }
-
-      Ls[(7 + n) % 32] = (void *)c->u_mode.ocb.L[3];
-      Ls[(15 + n) % 32] = (void *)c->u_mode.ocb.L[4];
-      Ls[(23 + n) % 32] = (void *)c->u_mode.ocb.L[3];
-      l = &Ls[(31 + n) % 32];
-
-      /* Process data in 32 block chunks. */
-      while (nblocks >= 32)
-        {
-          blkn_offs += 32;
-          *l = (void *)ocb_get_l(c, blkn_offs);
-
-          crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, 32,
-                    nrounds);
-
-          nblocks -= 32;
-          outbuf += 32 * 16;
-          inbuf  += 32 * 16;
-        }
-
-      if (nblocks && l < &Ls[nblocks])
-        {
-          *l = (void *)ocb_get_l(c, 32 + blkn_offs);
-        }
-    }
-  else
-    {
-      for (i = 0; i < nblocks; i++)
-        Ls[i] = (void *)ocb_get_l(c, ++blkn);
-    }
-
-  if (nblocks)
-    {
-      crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, nblocks,
-               nrounds);
-    }
+  crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+           c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn);
 }
 
 void
@@ -401,61 +353,12 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
   const unsigned char *abuf = abuf_arg;
   unsigned int nrounds = ctx->rounds;
   u64 blkn = c->u_mode.ocb.aad_nblocks;
-  u64 blkn_offs = blkn - blkn % 32;
-  unsigned int n = 32 - blkn % 32;
-  void *Ls[32];
-  void **l;
-  size_t i;
 
   c->u_mode.ocb.aad_nblocks = blkn + nblocks;
 
-  if (nblocks >= 32)
-    {
-      for (i = 0; i < 32; i += 8)
-        {
-          Ls[(i + 0 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-          Ls[(i + 1 + n) % 32] = (void *)c->u_mode.ocb.L[1];
-          Ls[(i + 2 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-          Ls[(i + 3 + n) % 32] = (void *)c->u_mode.ocb.L[2];
-          Ls[(i + 4 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-          Ls[(i + 5 + n) % 32] = (void *)c->u_mode.ocb.L[1];
-          Ls[(i + 6 + n) % 32] = (void *)c->u_mode.ocb.L[0];
-        }
-
-      Ls[(7 + n) % 32] = (void *)c->u_mode.ocb.L[3];
-      Ls[(15 + n) % 32] = (void *)c->u_mode.ocb.L[4];
-      Ls[(23 + n) % 32] = (void *)c->u_mode.ocb.L[3];
-      l = &Ls[(31 + n) % 32];
-
-      /* Process data in 32 block chunks. */
-      while (nblocks >= 32)
-        {
-          blkn_offs += 32;
-          *l = (void *)ocb_get_l(c, blkn_offs);
-
-          _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
-                                      c->u_mode.ocb.aad_sum, Ls, 32, nrounds);
-
-          nblocks -= 32;
-          abuf += 32 * 16;
-        }
-
-      if (nblocks && l < &Ls[nblocks])
-        {
-          *l = (void *)ocb_get_l(c, 32 + blkn_offs);
-        }
-    }
-  else
-    {
-      for (i = 0; i < nblocks; i++)
-        Ls[i] = (void *)ocb_get_l(c, ++blkn);
-    }
-
-  if (nblocks)
-    {
-      _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
-                                  c->u_mode.ocb.aad_sum, Ls, nblocks, nrounds);
-    }
+  _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
+			      c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0],
+			      nblocks, nrounds, (unsigned int)blkn);
 }
 
 #endif /* USE_ARM_CE */

commit 2d2e5286d53e1f62fe040dff4c6e01961f00afe2
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Dec 10 12:29:12 2016 +0200

    OCB: Move large L handling from bottom to upper level
    
    * cipher/cipher-ocb.c (_gcry_cipher_ocb_get_l): Remove.
    (ocb_get_L_big): New.
    (_gcry_cipher_ocb_authenticate): L-big handling done in upper
    processing loop, so that lower level never sees the case where
    'aad_nblocks % 65536 == 0'; Add missing stack burn.
    (ocb_aad_finalize): Add missing stack burn.
    (ocb_crypt): L-big handling done in upper processing loop, so that
    lower level never sees the case where 'data_nblocks % 65536 == 0'.
    * cipher/cipher-internal.h (_gcry_cipher_ocb_get_l): Remove.
    (ocb_get_l): Remove 'l_tmp' usage and simplify since input
    is more limited now, 'N is not multiple of 65536'.
    * cipher/rijndael-aesni.c (get_l): Remove.
    (aesni_ocb_enc, aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Remove
    l_tmp; Use 'ocb_get_l'.
    * cipher/rijndael-ssse3-amd64.c (get_l): Remove.
    (ssse3_ocb_enc, ssse3_ocb_dec, _gcry_aes_ssse3_ocb_auth): Remove
    l_tmp; Use 'ocb_get_l'.
    * cipher/camellia-glue.c: Remove OCB l_tmp usage.
    * cipher/rijndael-armv8-ce.c: Ditto.
    * cipher/rijndael.c: Ditto.
    * cipher/serpent.c: Ditto.
    * cipher/twofish.c: Ditto.
    --
    
    Move large L value generation to up-most level to simplify lower level
    ocb_get_l for greater performance and simpler implementation. This helps
    implementing OCB in assembly as 'ocb_get_l' no longer has function call
    on slow-path.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 1be35c9..7687094 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -619,7 +619,6 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   CAMELLIA_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
   int burn_stack_depth;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
@@ -664,9 +663,8 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 32 block chunks. */
 	  while (nblocks >= 32)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
 
 	      if (encrypt)
 		_gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -725,9 +723,8 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      if (encrypt)
 		_gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -759,8 +756,6 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
   c->u_mode.ocb.data_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
@@ -776,7 +771,6 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
   CAMELLIA_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
-  unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
   int burn_stack_depth;
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 
@@ -818,9 +812,8 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 32 block chunks. */
 	  while (nblocks >= 32)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 32;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
 
 	      _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
 						 c->u_mode.ocb.aad_offset,
@@ -875,9 +868,8 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
 						c->u_mode.ocb.aad_offset,
@@ -905,8 +897,6 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
   c->u_mode.ocb.aad_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 01352f3..7204d48 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -459,28 +459,28 @@ gcry_err_code_t _gcry_cipher_ocb_get_tag
 gcry_err_code_t _gcry_cipher_ocb_check_tag
 /*           */ (gcry_cipher_hd_t c,
                  const unsigned char *intag, size_t taglen);
-const unsigned char *_gcry_cipher_ocb_get_l
-/*           */ (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n);
 
 
-/* Inline version of _gcry_cipher_ocb_get_l, with hard-coded fast paths for
-   most common cases.  */
+/* Return the L-value for block N.  Note: 'cipher_ocb.c' ensures that N
+ * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can
+ * be directly passed to _gcry_ctz() function and resulting index will
+ * never overflow the table.  */
 static inline const unsigned char *
-ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+ocb_get_l (gcry_cipher_hd_t c, u64 n)
 {
-  if (n & 1)
-    return c->u_mode.ocb.L[0];
-  else if (n & 2)
-    return c->u_mode.ocb.L[1];
-  else
-    {
-      unsigned int ntz = _gcry_ctz64 (n);
-
-      if (ntz < OCB_L_TABLE_SIZE)
-	return c->u_mode.ocb.L[ntz];
-      else
-	return _gcry_cipher_ocb_get_l (c, l_tmp, n);
-    }
+  unsigned long ntz;
+
+#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4)
+  /* Assumes that N != 0. */
+  asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+        : [ntz] "=r" (ntz)
+        : [low] "r" ((unsigned long)n)
+        : "cc");
+#else
+  ntz = _gcry_ctz (n);
+#endif
+
+  return c->u_mode.ocb.L[ntz];
 }
 
 #endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index d1f01d5..db42aaf 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -109,25 +109,17 @@ bit_copy (unsigned char *d, const unsigned char *s,
 }
 
 
-/* Return the L-value for block N.  In most cases we use the table;
-   only if the lower OCB_L_TABLE_SIZE bits of N are zero we need to
-   compute it.  With a table size of 16 we need to this this only
-   every 65536-th block.  L_TMP is a helper buffer of size
-   OCB_BLOCK_LEN which is used to hold the computation if not taken
-   from the table.  */
-const unsigned char *
-_gcry_cipher_ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+/* Get L_big value for block N, where N is multiple of 65536. */
+static void
+ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf)
 {
   int ntz = _gcry_ctz64 (n);
 
-  if (ntz < OCB_L_TABLE_SIZE)
-    return c->u_mode.ocb.L[ntz];
+  gcry_assert(ntz >= OCB_L_TABLE_SIZE);
 
-  double_block_cpy (l_tmp, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
+  double_block_cpy (l_buf, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
   for (ntz -= OCB_L_TABLE_SIZE; ntz; ntz--)
-    double_block (l_tmp);
-
-  return l_tmp;
+    double_block (l_buf);
 }
 
 
@@ -241,7 +233,11 @@ gcry_err_code_t
 _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
                                size_t abuflen)
 {
+  const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+  const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
   unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
 
   /* Check that a nonce and thus a key has been set and that we have
      not yet computed the tag.  We also return an error if the aad has
@@ -264,14 +260,24 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
         {
           c->u_mode.ocb.aad_nblocks++;
 
+          if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0)
+            {
+              /* Table overflow, L needs to be generated. */
+              ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp);
+            }
+          else
+            {
+              buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                       OCB_BLOCK_LEN);
+            }
+
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_mode.ocb.aad_offset,
-                     ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
-                     OCB_BLOCK_LEN);
+          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
           /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
           buf_xor (l_tmp, c->u_mode.ocb.aad_offset,
                    c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
-          c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
           buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
           c->u_mode.ocb.aad_nleftover = 0;
@@ -279,40 +285,83 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
     }
 
   if (!abuflen)
-    return 0;
-
-  /* Use a bulk method if available.  */
-  if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth)
     {
-      size_t nblks;
-      size_t nleft;
-      size_t ndone;
+      if (burn > 0)
+        _gcry_burn_stack (burn + 4*sizeof(void*));
 
-      nblks = abuflen / OCB_BLOCK_LEN;
-      nleft = c->bulk.ocb_auth (c, abuf, nblks);
-      ndone = nblks - nleft;
-
-      abuf += ndone * OCB_BLOCK_LEN;
-      abuflen -= ndone * OCB_BLOCK_LEN;
-      nblks = nleft;
+      return 0;
     }
 
-  /* Hash all full blocks.  */
+  /* Full blocks handling. */
   while (abuflen >= OCB_BLOCK_LEN)
     {
-      c->u_mode.ocb.aad_nblocks++;
+      size_t nblks = abuflen / OCB_BLOCK_LEN;
+      size_t nmaxblks;
 
-      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-      buf_xor_1 (c->u_mode.ocb.aad_offset,
-                 ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
-                 OCB_BLOCK_LEN);
-      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
-      buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
-      c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
-      buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+      /* Check how many blocks to process till table overflow. */
+      nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks;
+      nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+      if (nmaxblks == 0)
+        {
+          /* Table overflow, generate L and process one block. */
+          c->u_mode.ocb.aad_nblocks++;
+          ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+          abuf += OCB_BLOCK_LEN;
+          abuflen -= OCB_BLOCK_LEN;
+          nblks--;
+
+          /* With overflow handled, retry loop again. Next overflow will
+           * happen after 65535 blocks. */
+          continue;
+        }
+
+      nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+      /* Use a bulk method if available.  */
+      if (nblks && c->bulk.ocb_auth)
+        {
+          size_t nleft;
+          size_t ndone;
+
+          nleft = c->bulk.ocb_auth (c, abuf, nblks);
+          ndone = nblks - nleft;
+
+          abuf += ndone * OCB_BLOCK_LEN;
+          abuflen -= ndone * OCB_BLOCK_LEN;
+          nblks = nleft;
+        }
+
+      /* Hash all full blocks.  */
+      while (nblks)
+        {
+          c->u_mode.ocb.aad_nblocks++;
+
+          gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          buf_xor_1 (c->u_mode.ocb.aad_offset,
+                     ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                     OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
-      abuf += OCB_BLOCK_LEN;
-      abuflen -= OCB_BLOCK_LEN;
+          abuf += OCB_BLOCK_LEN;
+          abuflen -= OCB_BLOCK_LEN;
+          nblks--;
+        }
     }
 
   /* Store away the remaining data.  */
@@ -321,6 +370,9 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
     c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover++] = *abuf;
   gcry_assert (!abuflen);
 
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+
   return 0;
 }
 
@@ -330,6 +382,8 @@ static void
 ocb_aad_finalize (gcry_cipher_hd_t c)
 {
   unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
 
   /* Check that a nonce and thus a key has been set and that we have
      not yet computed the tag.  We also skip this if the aad has been
@@ -352,7 +406,8 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
       l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
       buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
       /* Sum = Sum_m xor ENCIPHER(K, CipherInput)  */
-      c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+      nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+      burn = nburn > burn ? nburn : burn;
       buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
 
       c->u_mode.ocb.aad_nleftover = 0;
@@ -361,6 +416,9 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
   /* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can
    * return an erro when called again.  */
   c->u_mode.ocb.aad_finalized = 1;
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
 }
 
 
@@ -387,10 +445,13 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
            unsigned char *outbuf, size_t outbuflen,
            const unsigned char *inbuf, size_t inbuflen)
 {
+  const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+  const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
   unsigned char l_tmp[OCB_BLOCK_LEN];
   unsigned int burn = 0;
   unsigned int nburn;
-  size_t nblks = inbuflen / OCB_BLOCK_LEN;
+  gcry_cipher_encrypt_t crypt_fn =
+      encrypt ? c->spec->encrypt : c->spec->decrypt;
 
   /* Check that a nonce and thus a key has been set and that we are
      not yet in end of data state. */
@@ -407,58 +468,112 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
   else if ((inbuflen % OCB_BLOCK_LEN))
     return GPG_ERR_INV_LENGTH;  /* We support only full blocks for now.  */
 
-  /* Use a bulk method if available.  */
-  if (nblks && c->bulk.ocb_crypt)
-    {
-      size_t nleft;
-      size_t ndone;
-
-      nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
-      ndone = nblks - nleft;
-
-      inbuf += ndone * OCB_BLOCK_LEN;
-      outbuf += ndone * OCB_BLOCK_LEN;
-      inbuflen -= ndone * OCB_BLOCK_LEN;
-      outbuflen -= ndone * OCB_BLOCK_LEN;
-      nblks = nleft;
-    }
-
-  if (nblks)
+  /* Full blocks handling. */
+  while (inbuflen >= OCB_BLOCK_LEN)
     {
-      gcry_cipher_encrypt_t crypt_fn =
-          encrypt ? c->spec->encrypt : c->spec->decrypt;
+      size_t nblks = inbuflen / OCB_BLOCK_LEN;
+      size_t nmaxblks;
 
-      if (encrypt)
-        {
-          /* Checksum_i = Checksum_{i-1} xor P_i  */
-          ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
-        }
+      /* Check how many blocks to process till table overflow. */
+      nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks;
+      nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
 
-      /* Encrypt all full blocks.  */
-      while (inbuflen >= OCB_BLOCK_LEN)
+      if (nmaxblks == 0)
         {
+          /* Table overflow, generate L and process one block. */
           c->u_mode.ocb.data_nblocks++;
+          ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp);
+
+          if (encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, inbuf, 1);
+            }
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
-          buf_xor_1 (c->u_iv.iv,
-                     ocb_get_l (c, l_tmp, c->u_mode.ocb.data_nblocks),
-                     OCB_BLOCK_LEN);
+          buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
           /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
           buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
           nburn = crypt_fn (&c->context.c, outbuf, outbuf);
           burn = nburn > burn ? nburn : burn;
           buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
 
+          if (!encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, outbuf, 1);
+            }
+
           inbuf += OCB_BLOCK_LEN;
           inbuflen -= OCB_BLOCK_LEN;
           outbuf += OCB_BLOCK_LEN;
           outbuflen =- OCB_BLOCK_LEN;
+          nblks--;
+
+          /* With overflow handled, retry loop again. Next overflow will
+           * happen after 65535 blocks. */
+          continue;
+        }
+
+      nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+      /* Use a bulk method if available.  */
+      if (nblks && c->bulk.ocb_crypt)
+        {
+          size_t nleft;
+          size_t ndone;
+
+          nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+          ndone = nblks - nleft;
+
+          inbuf += ndone * OCB_BLOCK_LEN;
+          outbuf += ndone * OCB_BLOCK_LEN;
+          inbuflen -= ndone * OCB_BLOCK_LEN;
+          outbuflen -= ndone * OCB_BLOCK_LEN;
+          nblks = nleft;
         }
 
-      if (!encrypt)
+      if (nblks)
         {
-          /* Checksum_i = Checksum_{i-1} xor P_i  */
-          ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
+          size_t nblks_chksum = nblks;
+
+          if (encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum);
+            }
+
+          /* Encrypt all full blocks.  */
+          while (nblks)
+            {
+              c->u_mode.ocb.data_nblocks++;
+
+              gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
+
+              /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+              buf_xor_1 (c->u_iv.iv,
+                         ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+                         OCB_BLOCK_LEN);
+              /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+              buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+              nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+              burn = nburn > burn ? nburn : burn;
+              buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+              inbuf += OCB_BLOCK_LEN;
+              inbuflen -= OCB_BLOCK_LEN;
+              outbuf += OCB_BLOCK_LEN;
+              outbuflen =- OCB_BLOCK_LEN;
+              nblks--;
+            }
+
+          if (!encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr,
+                            outbuf - nblks_chksum * OCB_BLOCK_LEN,
+                            nblks_chksum);
+            }
         }
     }
 
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 8b28b3a..7852e19 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -1331,74 +1331,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 
-static inline const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
-       unsigned char *ctr)
-{
-  const unsigned char *l;
-  unsigned int ntz;
-
-  if (i & 0xffffffffU)
-    {
-      asm ("rep;bsf %k[low], %k[ntz]\n\t"
-           : [ntz] "=r" (ntz)
-           : [low] "r" (i & 0xffffffffU)
-           : "cc");
-    }
-  else
-    {
-      if (OCB_L_TABLE_SIZE < 32)
-        {
-          ntz = 32;
-        }
-      else if (i)
-        {
-          asm ("rep;bsf %k[high], %k[ntz]\n\t"
-               : [ntz] "=r" (ntz)
-               : [high] "r" (i >> 32)
-               : "cc");
-          ntz += 32;
-        }
-      else
-        {
-          ntz = 64;
-        }
-    }
-
-  if (ntz < OCB_L_TABLE_SIZE)
-    {
-      l = c->u_mode.ocb.L[ntz];
-    }
-  else
-    {
-      /* Store Offset & Checksum before calling external function */
-      asm volatile ("movdqu %%xmm5, %[iv]\n\t"
-                    "movdqu %%xmm6, %[ctr]\n\t"
-                    : [iv] "=m" (*iv),
-                      [ctr] "=m" (*ctr)
-                    :
-                    : "memory" );
-
-      l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
-
-      /* Restore Offset & Checksum */
-      asm volatile ("movdqu %[iv], %%xmm5\n\t"
-                    "movdqu %[ctr], %%xmm6\n\t"
-                    : /* No output */
-                    : [iv] "m" (*iv),
-                      [ctr] "m" (*ctr)
-                    : "memory" );
-    }
-
-  return l;
-}
-
-
 static void
 aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -1420,7 +1356,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks && n % 4; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -1449,9 +1385,8 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      /* l_tmp will be used only every 65536-th block. */
       n += 4;
-      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -1522,7 +1457,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -1559,8 +1494,6 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 
@@ -1568,7 +1501,6 @@ static void
 aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -1589,7 +1521,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks && n % 4; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -1618,9 +1550,8 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      /* l_tmp will be used only every 65536-th block. */
       n += 4;
-      l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -1691,7 +1622,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   for ( ;nblocks; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -1728,8 +1659,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 
@@ -1748,7 +1677,6 @@ void
 _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
@@ -1768,8 +1696,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   for ( ;nblocks && n % 4; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                c->u_mode.ocb.aad_sum);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -1794,10 +1721,8 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   for ( ;nblocks > 3 ; nblocks -= 4 )
     {
-      /* l_tmp will be used only every 65536-th block. */
       n += 4;
-      l = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
-		c->u_mode.ocb.aad_sum);
+      l = ocb_get_l(c, n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -1849,8 +1774,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   for ( ;nblocks; nblocks-- )
     {
-      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                c->u_mode.ocb.aad_sum);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -1883,8 +1807,6 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 
diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c
index bed4066..1bf74da 100644
--- a/cipher/rijndael-armv8-ce.c
+++ b/cipher/rijndael-armv8-ce.c
@@ -336,7 +336,6 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   u64 blkn = c->u_mode.ocb.data_nblocks;
   u64 blkn_offs = blkn - blkn % 32;
   unsigned int n = 32 - blkn % 32;
-  unsigned char l_tmp[16];
   void *Ls[32];
   void **l;
   size_t i;
@@ -364,9 +363,8 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       /* Process data in 32 block chunks. */
       while (nblocks >= 32)
         {
-          /* l_tmp will be used only every 65536-th block. */
           blkn_offs += 32;
-          *l = (void *)ocb_get_l(c, l_tmp, blkn_offs);
+          *l = (void *)ocb_get_l(c, blkn_offs);
 
           crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, 32,
                     nrounds);
@@ -378,13 +376,13 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 
       if (nblocks && l < &Ls[nblocks])
         {
-          *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs);
+          *l = (void *)ocb_get_l(c, 32 + blkn_offs);
         }
     }
   else
     {
       for (i = 0; i < nblocks; i++)
-        Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn);
+        Ls[i] = (void *)ocb_get_l(c, ++blkn);
     }
 
   if (nblocks)
@@ -392,8 +390,6 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, nblocks,
                nrounds);
     }
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 void
@@ -407,7 +403,6 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
   u64 blkn = c->u_mode.ocb.aad_nblocks;
   u64 blkn_offs = blkn - blkn % 32;
   unsigned int n = 32 - blkn % 32;
-  unsigned char l_tmp[16];
   void *Ls[32];
   void **l;
   size_t i;
@@ -435,9 +430,8 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
       /* Process data in 32 block chunks. */
       while (nblocks >= 32)
         {
-          /* l_tmp will be used only every 65536-th block. */
           blkn_offs += 32;
-          *l = (void *)ocb_get_l(c, l_tmp, blkn_offs);
+          *l = (void *)ocb_get_l(c, blkn_offs);
 
           _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
                                       c->u_mode.ocb.aad_sum, Ls, 32, nrounds);
@@ -448,13 +442,13 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
 
       if (nblocks && l < &Ls[nblocks])
         {
-          *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs);
+          *l = (void *)ocb_get_l(c, 32 + blkn_offs);
         }
     }
   else
     {
       for (i = 0; i < nblocks; i++)
-        Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn);
+        Ls[i] = (void *)ocb_get_l(c, ++blkn);
     }
 
   if (nblocks)
@@ -462,8 +456,6 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
       _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
                                   c->u_mode.ocb.aad_sum, Ls, nblocks, nrounds);
     }
-
-  wipememory(&l_tmp, sizeof(l_tmp));
 }
 
 #endif /* USE_ARM_CE */
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 937d868..a8e89d4 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -527,92 +527,10 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 
-static inline const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
-       unsigned char *ctr, const void **aes_const_ptr,
-       byte ssse3_state[SSSE3_STATE_SIZE], int encrypt)
-{
-  const unsigned char *l;
-  unsigned int ntz;
-
-  if (i & 1)
-    return c->u_mode.ocb.L[0];
-  else if (i & 2)
-    return c->u_mode.ocb.L[1];
-  else if (i & 0xffffffffU)
-    {
-      asm ("rep;bsf %k[low], %k[ntz]\n\t"
-           : [ntz] "=r" (ntz)
-           : [low] "r" (i & 0xffffffffU)
-           : "cc");
-    }
-  else
-    {
-      if (OCB_L_TABLE_SIZE < 32)
-        {
-          ntz = 32;
-        }
-      else if (i)
-        {
-          asm ("rep;bsf %k[high], %k[ntz]\n\t"
-               : [ntz] "=r" (ntz)
-               : [high] "r" (i >> 32)
-               : "cc");
-          ntz += 32;
-        }
-      else
-        {
-          ntz = 64;
-        }
-    }
-
-  if (ntz < OCB_L_TABLE_SIZE)
-    {
-      l = c->u_mode.ocb.L[ntz];
-    }
-  else
-    {
-      /* Store Offset & Checksum before calling external function */
-      asm volatile ("movdqu %%xmm7, %[iv]\n\t"
-                    "movdqu %%xmm6, %[ctr]\n\t"
-                    : [iv] "=m" (*iv),
-                      [ctr] "=m" (*ctr)
-                    :
-                    : "memory" );
-
-      /* Restore SSSE3 state. */
-      vpaes_ssse3_cleanup();
-
-      l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
-
-      /* Save SSSE3 state. */
-      if (encrypt)
-	{
-	  vpaes_ssse3_prepare_enc (*aes_const_ptr);
-	}
-      else
-	{
-	  vpaes_ssse3_prepare_dec (*aes_const_ptr);
-	}
-
-      /* Restore Offset & Checksum */
-      asm volatile ("movdqu %[iv], %%xmm7\n\t"
-                    "movdqu %[ctr], %%xmm6\n\t"
-                    : /* No output */
-                    : [iv] "m" (*iv),
-                      [ctr] "m" (*ctr)
-                    : "memory" );
-    }
-
-  return l;
-}
-
-
 static void
 ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -635,8 +553,7 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       const unsigned char *l;
 
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr,
-		ssse3_state, 1);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Checksum_i = Checksum_{i-1} xor P_i  */
@@ -671,7 +588,6 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
                 :
                 : "memory" );
 
-  wipememory(&l_tmp, sizeof(l_tmp));
   vpaes_ssse3_cleanup ();
 }
 
@@ -679,7 +595,6 @@ static void
 ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                const void *inbuf_arg, size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
@@ -702,8 +617,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       const unsigned char *l;
 
-      l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr,
-		ssse3_state, 0);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
@@ -738,7 +652,6 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
                 :
                 : "memory" );
 
-  wipememory(&l_tmp, sizeof(l_tmp));
   vpaes_ssse3_cleanup ();
 }
 
@@ -758,7 +671,6 @@ void
 _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
-  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
   RIJNDAEL_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
   u64 n = c->u_mode.ocb.aad_nblocks;
@@ -780,8 +692,7 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       const unsigned char *l;
 
-      l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
-                c->u_mode.ocb.aad_sum, &aes_const_ptr, ssse3_state, 1);
+      l = ocb_get_l(c, ++n);
 
       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
@@ -812,7 +723,6 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                 :
                 : "memory" );
 
-  wipememory(&l_tmp, sizeof(l_tmp));
   vpaes_ssse3_cleanup ();
 }
 
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index cc6a722..66ea0f3 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1353,7 +1353,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
-          const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+          const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
           buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
@@ -1378,7 +1378,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
-          const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+          const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
           buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
@@ -1445,7 +1445,7 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.aad_nblocks;
-          const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+          const unsigned char *l = ocb_get_l(c, i);
 
           /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
           buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
diff --git a/cipher/serpent.c b/cipher/serpent.c
index ef19d3b..ea4b8ed 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -1235,7 +1235,6 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   serpent_context_t *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char l_tmp[sizeof(serpent_block_t)];
   int burn_stack_depth = 2 * sizeof (serpent_block_t);
   u64 blkn = c->u_mode.ocb.data_nblocks;
 #else
@@ -1275,9 +1274,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      if (encrypt)
 		_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1327,9 +1325,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
 	  {
-	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	    if (encrypt)
 	      _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1378,9 +1375,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	      *l = ocb_get_l(c,  blkn - blkn % 8);
 
 	      if (encrypt)
 		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1410,8 +1406,6 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   c->u_mode.ocb.data_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
@@ -1427,7 +1421,6 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   serpent_context_t *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
-  unsigned char l_tmp[sizeof(serpent_block_t)];
   int burn_stack_depth = 2 * sizeof(serpent_block_t);
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 #else
@@ -1465,9 +1458,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 16;
-	      *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
 
 	      _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum, Ls);
@@ -1512,9 +1504,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
 	  {
-	    /* l_tmp will be used only every 65536-th block. */
 	    blkn += 8;
-	    *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	    _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					c->u_mode.ocb.aad_sum, Ls);
@@ -1558,9 +1549,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
-	      /* l_tmp will be used only every 65536-th block. */
 	      blkn += 8;
-	      *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+	      *l = ocb_get_l(c, blkn - blkn % 8);
 
 	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
 					  c->u_mode.ocb.aad_sum, Ls);
@@ -1585,8 +1575,6 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
   c->u_mode.ocb.aad_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #endif
diff --git a/cipher/twofish.c b/cipher/twofish.c
index 7a4d26a..55f6fb9 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -1261,7 +1261,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   TWOFISH_context *ctx = (void *)&c->context.c;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char l_tmp[TWOFISH_BLOCKSIZE];
   unsigned int burn, burn_stack_depth = 0;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
@@ -1273,10 +1272,9 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-	/* l_tmp will be used only every 65536-th block. */
-	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 1);
-	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 2);
-	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 3);
+	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
 	blkn += 3;
 
 	if (encrypt)
@@ -1300,8 +1298,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 
   c->u_mode.ocb.data_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #else
@@ -1322,7 +1318,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 #ifdef USE_AMD64_ASM
   TWOFISH_context *ctx = (void *)&c->context.c;
   const unsigned char *abuf = abuf_arg;
-  unsigned char l_tmp[TWOFISH_BLOCKSIZE];
   unsigned int burn, burn_stack_depth = 0;
   u64 blkn = c->u_mode.ocb.aad_nblocks;
 
@@ -1334,10 +1329,9 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     /* Process data in 3 block chunks. */
     while (nblocks >= 3)
       {
-	/* l_tmp will be used only every 65536-th block. */
-	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 1);
-	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 2);
-	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 3);
+	Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+	Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+	Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
 	blkn += 3;
 
 	twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
@@ -1356,8 +1350,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   c->u_mode.ocb.aad_nblocks = blkn;
 
-  wipememory(&l_tmp, sizeof(l_tmp));
-
   if (burn_stack_depth)
     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
 #else

commit 161d339f48c03be7fd0f4249d730f7f1767ef8e4
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Dec 10 12:29:12 2016 +0200

    OCB: remove 'int64_t' usage
    
    * cipher/cipher-ocb.c (double_block): Use alternative way to generate
    sign-bit mask, without 'int64_t'.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index 92260d2..d1f01d5 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -66,7 +66,7 @@ double_block (unsigned char *b)
   l = buf_get_be64 (b);
   r = buf_get_be64 (b + 8);
 
-  l_0 = (int64_t)l >> 63;
+  l_0 = -(l >> 63);
   l = (l + l) ^ (r >> 63);
   r = (r + r) ^ (l_0 & 135);
 

commit 0b03b658bebc69a84d87ef13f9b60a27b0c42305
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Sat Dec 10 12:29:12 2016 +0200

    random-drbg: use bufhelp function for big-endian store
    
    * random/random-drbg.c (drbg_cpu_to_be32): Remove.
    (drbg_ctr_df, drbg_hash_df): Use 'buf_put_be32' instead of
    'drbg_cpu_to_be32'.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/random/random-drbg.c b/random/random-drbg.c
index e2fe861..baaa65a 100644
--- a/random/random-drbg.c
+++ b/random/random-drbg.c
@@ -155,7 +155,7 @@
 #include "g10lib.h"
 #include "random.h"
 #include "rand-internal.h"
-#include "../cipher/bithelp.h"
+#include "../cipher/bufhelp.h"
 
 
 
@@ -546,27 +546,6 @@ drbg_sec_strength (u32 flags)
     return 32;
 }
 
-/*
- * Convert an integer into a byte representation of this integer.
- * The byte representation is big-endian
- *
- * @val value to be converted
- * @buf buffer holding the converted integer -- caller must ensure that
- *      buffer size is at least 32 bit
- */
-static inline void
-drbg_cpu_to_be32 (u32 val, unsigned char *buf)
-{
-  /* FIXME: This may raise a bus error.  */
-  struct s
-  {
-    u32 conv;
-  };
-  struct s *conversion = (struct s *) buf;
-
-  conversion->conv = be_bswap32 (val);
-}
-
 static void
 drbg_add_buf (unsigned char *dst, size_t dstlen,
               unsigned char *add, size_t addlen)
@@ -802,10 +781,10 @@ drbg_ctr_df (drbg_state_t drbg, unsigned char *df_data,
   /* 10.4.2 step 2 -- calculate the entire length of all input data */
   for (; NULL != tempstr; tempstr = tempstr->next)
     inputlen += tempstr->len;
-  drbg_cpu_to_be32 (inputlen, &L_N[0]);
+  buf_put_be32 (&L_N[0], inputlen);
 
   /* 10.4.2 step 3 */
-  drbg_cpu_to_be32 (bytes_to_return, &L_N[4]);
+  buf_put_be32 (&L_N[4], bytes_to_return);
 
   /* 10.4.2 step 5: length is size of L_N, input_string, one byte, padding */
   padlen = (inputlen + sizeof (L_N) + 1) % (drbg_blocklen (drbg));
@@ -838,7 +817,7 @@ drbg_ctr_df (drbg_state_t drbg, unsigned char *df_data,
       /* 10.4.2 step 9.1 - the padding is implicit as the buffer
        * holds zeros after allocation -- even the increment of i
        * is irrelevant as the increment remains within length of i */
-      drbg_cpu_to_be32 (i, iv);
+      buf_put_be32 (iv, i);
       /* 10.4.2 step 9.2 -- BCC and concatenation with temp */
       ret = drbg_ctr_bcc (drbg, temp + templen, K, &S1);
       if (ret)
@@ -1137,7 +1116,7 @@ drbg_hash_df (drbg_state_t drbg,
 
   /* 10.4.1 step 3 */
   input[0] = 1;
-  drbg_cpu_to_be32 ((outlen * 8), &input[1]);
+  buf_put_be32 (&input[1], (outlen * 8));
 
   /* 10.4.1 step 4.1 -- concatenation of data for input into hash */
   drbg_string_fill (&data1, input, 5);

-----------------------------------------------------------------------

Summary of changes:
 .gitignore                         |   1 +
 cipher/camellia-glue.c             |  18 +--
 cipher/cipher-internal.h           |  36 ++---
 cipher/cipher-ocb.c                | 273 ++++++++++++++++++++++++++-----------
 cipher/rijndael-aesni.c            |  96 ++-----------
 cipher/rijndael-armv8-aarch32-ce.S |  98 +++++++++++--
 cipher/rijndael-armv8-aarch64-ce.S | 125 +++++++++++------
 cipher/rijndael-armv8-ce.c         | 137 +++----------------
 cipher/rijndael-ssse3-amd64.c      |  96 +------------
 cipher/rijndael.c                  |   6 +-
 cipher/serpent.c                   |  24 +---
 cipher/twofish.c                   |  20 +--
 configure.ac                       |   1 +
 random/random-drbg.c               |  31 +----
 src/hwfeatures.c                   |  16 +--
 tests/Makefile.am                  |   5 +-
 tests/basic-disable-all-hwf.in     |   4 +
 tests/hashtest-256g.in             |   2 +-
 18 files changed, 453 insertions(+), 536 deletions(-)
 create mode 100644 tests/basic-disable-all-hwf.in


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits




More information about the Gcrypt-devel mailing list