[PATCH 4/7] Move bulk OCB L pointer array setup code to common header

Jussi Kivilinna jussi.kivilinna at iki.fi
Sun Apr 24 20:40:22 CEST 2022


* cipher/bulkhelp.h: New.
* cipher/camellia-glue.c (_gcry_camellia_ocb_crypt)
(_gcry_camellia_ocb_crypt): Use new
`bulk_ocb_prepare_L_pointers_array_blkXX` function for OCB L pointer
array setup.
* cipher/serpent.c (_gcry_serpent_ocb_crypt)
(_gcry_serpent_ocb_auth): Likewise.
* cipher/sm4.c (_gcry_sm4_ocb_crypt, _gcry_sm4_ocb_auth): Likewise.
* cipher/twofish.c (_gcry_twofish_ocb_crypt)
(_gcry_twofish_ocb_auth): Likewise.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/bulkhelp.h      | 103 +++++++++++++++++++++++++++++++++++++++++
 cipher/camellia-glue.c |  78 ++-----------------------------
 cipher/serpent.c       |  99 +++++++--------------------------------
 cipher/sm4.c           |  63 ++-----------------------
 cipher/twofish.c       |  37 ++-------------
 5 files changed, 132 insertions(+), 248 deletions(-)
 create mode 100644 cipher/bulkhelp.h

diff --git a/cipher/bulkhelp.h b/cipher/bulkhelp.h
new file mode 100644
index 00000000..72668d42
--- /dev/null
+++ b/cipher/bulkhelp.h
@@ -0,0 +1,103 @@
+/* bulkhelp.h  -  Some bulk processing helpers
+ * Copyright (C) 2022 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GCRYPT_BULKHELP_H
+#define GCRYPT_BULKHELP_H
+
+
+#include "g10lib.h"
+#include "cipher-internal.h"
+
+
+#ifdef __x86_64__
+/* Use u64 to store pointers for x32 support (assembly function assumes
+ * 64-bit pointers). */
+typedef u64 ocb_L_uintptr_t;
+#else
+typedef uintptr_t ocb_L_uintptr_t;
+#endif
+
+
+static inline ocb_L_uintptr_t *
+bulk_ocb_prepare_L_pointers_array_blk32 (gcry_cipher_hd_t c,
+                                         ocb_L_uintptr_t Ls[32], u64 blkn)
+{
+  unsigned int n = 32 - (blkn % 32);
+  unsigned int i;
+
+  for (i = 0; i < 32; i += 8)
+    {
+      Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+      Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+      Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+      Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+      Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+      Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+      Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+    }
+
+  Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+  Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
+  Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+  return &Ls[(31 + n) % 32];
+}
+
+
+static inline ocb_L_uintptr_t *
+bulk_ocb_prepare_L_pointers_array_blk16 (gcry_cipher_hd_t c,
+                                         ocb_L_uintptr_t Ls[16], u64 blkn)
+{
+  unsigned int n = 16 - (blkn % 16);
+  unsigned int i;
+
+  for (i = 0; i < 16; i += 8)
+    {
+      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+    }
+
+  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+  return &Ls[(15 + n) % 16];
+}
+
+
+static inline ocb_L_uintptr_t *
+bulk_ocb_prepare_L_pointers_array_blk8 (gcry_cipher_hd_t c,
+                                        ocb_L_uintptr_t Ls[8], u64 blkn)
+{
+  unsigned int n = 8 - (blkn % 8);
+
+  Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+  Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+  Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+  Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+  Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+  Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+  Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+  Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+
+  return &Ls[(7 + n) % 8];
+}
+
+
+#endif /*GCRYPT_BULKHELP_H*/
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 7f009db4..7f6e92d2 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -65,6 +65,7 @@
 #include "bufhelp.h"
 #include "cipher-internal.h"
 #include "cipher-selftest.h"
+#include "bulkhelp.h"
 
 /* Helper macro to force alignment to 16 bytes.  */
 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
@@ -788,9 +789,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_aesni_avx2 = 0;
       u64 Ls[32];
-      unsigned int n = 32 - (blkn % 32);
       u64 *l;
-      int i;
 
       if (nblocks >= 32)
 	{
@@ -808,24 +807,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 	    bulk_ocb_fn = encrypt ? _gcry_camellia_gfni_avx2_ocb_enc
 				  : _gcry_camellia_gfni_avx2_ocb_dec;
 #endif
-
-	  for (i = 0; i < 32; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
-	  Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(31 + n) % 32];
+          l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);
 
 	  /* Process data in 32 block chunks. */
 	  while (nblocks >= 32)
@@ -860,27 +842,11 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_aesni_avx = 0;
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
@@ -947,9 +913,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_aesni_avx2 = 0;
       u64 Ls[32];
-      unsigned int n = 32 - (blkn % 32);
       u64 *l;
-      int i;
 
       if (nblocks >= 32)
 	{
@@ -965,23 +929,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 	    bulk_auth_fn = _gcry_camellia_gfni_avx2_ocb_auth;
 #endif
 
-	  for (i = 0; i < 32; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
-	  Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(31 + n) % 32];
+          l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);
 
 	  /* Process data in 32 block chunks. */
 	  while (nblocks >= 32)
@@ -1016,27 +964,11 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_aesni_avx = 0;
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 159d889f..dfe5cc28 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -31,6 +31,7 @@
 #include "bufhelp.h"
 #include "cipher-internal.h"
 #include "cipher-selftest.h"
+#include "bulkhelp.h"
 
 
 /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
@@ -1272,27 +1273,11 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
@@ -1329,21 +1314,11 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   {
     int did_use_sse2 = 0;
     u64 Ls[8];
-    unsigned int n = 8 - (blkn % 8);
     u64 *l;
 
     if (nblocks >= 8)
       {
-	/* Use u64 to store pointers for x32 support (assembly function
-	  * assumes 64-bit pointers). */
-	Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	l = &Ls[(7 + n) % 8];
+        l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
@@ -1380,33 +1355,25 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
-      const void *Ls[8];
-      unsigned int n = 8 - (blkn % 8);
-      const void **l;
+      uintptr_t Ls[8];
+      uintptr_t *l;
 
       if (nblocks >= 8)
 	{
-	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
-	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
-	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
-	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
-	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
-	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
-	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
-	  l = &Ls[(7 + n) % 8];
+          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
 	      blkn += 8;
-	      *l = ocb_get_l(c,  blkn - blkn % 8);
+	      *l = (uintptr_t)(void *)ocb_get_l(c,  blkn - blkn % 8);
 
 	      if (encrypt)
 		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
-					  c->u_ctr.ctr, Ls);
+					   c->u_ctr.ctr, (void **)Ls);
 	      else
 		_gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
-					  c->u_ctr.ctr, Ls);
+					   c->u_ctr.ctr, (void **)Ls);
 
 	      nblocks -= 8;
 	      outbuf += 8 * sizeof(serpent_block_t);
@@ -1456,27 +1423,11 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+        l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
@@ -1508,21 +1459,11 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   {
     int did_use_sse2 = 0;
     u64 Ls[8];
-    unsigned int n = 8 - (blkn % 8);
     u64 *l;
 
     if (nblocks >= 8)
       {
-	/* Use u64 to store pointers for x32 support (assembly function
-	* assumes 64-bit pointers). */
-	Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	l = &Ls[(7 + n) % 8];
+        l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	/* Process data in 8 block chunks. */
 	while (nblocks >= 8)
@@ -1554,29 +1495,21 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
   if (ctx->use_neon)
     {
       int did_use_neon = 0;
-      const void *Ls[8];
-      unsigned int n = 8 - (blkn % 8);
-      const void **l;
+      uintptr_t Ls[8];
+      uintptr_t *l;
 
       if (nblocks >= 8)
 	{
-	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
-	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
-	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
-	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
-	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
-	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
-	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
-	  l = &Ls[(7 + n) % 8];
+          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
 	    {
 	      blkn += 8;
-	      *l = ocb_get_l(c, blkn - blkn % 8);
+	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
 
 	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
-					  c->u_mode.ocb.aad_sum, Ls);
+					  c->u_mode.ocb.aad_sum, (void **)Ls);
 
 	      nblocks -= 8;
 	      abuf += 8 * sizeof(serpent_block_t);
diff --git a/cipher/sm4.c b/cipher/sm4.c
index d36d9ceb..0148365c 100644
--- a/cipher/sm4.c
+++ b/cipher/sm4.c
@@ -30,6 +30,7 @@
 #include "bufhelp.h"
 #include "cipher-internal.h"
 #include "cipher-selftest.h"
+#include "bulkhelp.h"
 
 /* Helper macro to force alignment to 64 bytes.  */
 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
@@ -1030,27 +1031,11 @@ _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   if (ctx->use_aesni_avx2)
     {
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
@@ -1077,22 +1062,11 @@ _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   if (ctx->use_aesni_avx)
     {
       u64 Ls[8];
-      unsigned int n = 8 - (blkn % 8);
       u64 *l;
 
       if (nblocks >= 8)
 	{
-	  /* Use u64 to store pointers for x32 support (assembly function
-	   * assumes 64-bit pointers). */
-	  Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	  Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	  Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	  Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(7 + n) % 8];
+          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
@@ -1184,27 +1158,11 @@ _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
   if (ctx->use_aesni_avx2)
     {
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
@@ -1227,22 +1185,11 @@ _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
   if (ctx->use_aesni_avx)
     {
       u64 Ls[8];
-      unsigned int n = 8 - (blkn % 8);
       u64 *l;
 
       if (nblocks >= 8)
 	{
-	  /* Use u64 to store pointers for x32 support (assembly function
-	    * assumes 64-bit pointers). */
-	  Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	  Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	  Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	  Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	  Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(7 + n) % 8];
+          l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);
 
 	  /* Process data in 8 block chunks. */
 	  while (nblocks >= 8)
diff --git a/cipher/twofish.c b/cipher/twofish.c
index d19e0790..4ae5d5a6 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -47,6 +47,7 @@
 #include "bufhelp.h"
 #include "cipher-internal.h"
 #include "cipher-selftest.h"
+#include "bulkhelp.h"
 
 
 #define TWOFISH_BLOCKSIZE 16
@@ -1358,27 +1359,11 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
@@ -1471,27 +1456,11 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
     {
       int did_use_avx2 = 0;
       u64 Ls[16];
-      unsigned int n = 16 - (blkn % 16);
       u64 *l;
-      int i;
 
       if (nblocks >= 16)
 	{
-	  for (i = 0; i < 16; i += 8)
-	    {
-	      /* Use u64 to store pointers for x32 support (assembly function
-	       * assumes 64-bit pointers). */
-	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
-	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
-	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
-	    }
-
-	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
-	  l = &Ls[(15 + n) % 16];
+          l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);
 
 	  /* Process data in 16 block chunks. */
 	  while (nblocks >= 16)
-- 
2.34.1




More information about the Gcrypt-devel mailing list