[PATCH] cipher-gcm-ppc: tweak loop structure a bit

Jussi Kivilinna jussi.kivilinna at iki.fi
Sun Apr 2 19:22:48 CEST 2023


* cipher/cipher-gcm-ppc.c (_gcry_ghash_ppc_vpmsum): Increament
'buf' pointer right after use; Use 'for' loop for inner 4-blocks
loop to allow compiler to better optimize loop.
--

Benchmark on POWER9:

Before:
                    |  nanosecs/byte   mebibytes/sec   cycles/byte
 GMAC_AES           |     0.226 ns/B      4211 MiB/s     0.521 c/B

After:
                    |  nanosecs/byte   mebibytes/sec   cycles/byte
 GMAC_AES           |     0.224 ns/B      4248 MiB/s     0.516 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/cipher-gcm-ppc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/cipher/cipher-gcm-ppc.c b/cipher/cipher-gcm-ppc.c
index 4f75e95c..06bf5eb1 100644
--- a/cipher/cipher-gcm-ppc.c
+++ b/cipher/cipher-gcm-ppc.c
@@ -437,6 +437,7 @@ _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
       in1 = vec_load_he (16, buf);
       in2 = vec_load_he (32, buf);
       in3 = vec_load_he (48, buf);
+      buf += 64;
       in0 = vec_be_swap(in0, bswap_const);
       in1 = vec_be_swap(in1, bswap_const);
       in2 = vec_be_swap(in2, bswap_const);
@@ -464,17 +465,13 @@ _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
       Xh3 = asm_xor (Xh3, Xh1);
 
       /* Gerald Estrin's scheme for parallel multiplication of polynomials */
-      while (1)
+      for (; blocks_remaining >= 4; blocks_remaining -= 4)
         {
-	  buf += 64;
-	  blocks_remaining -= 4;
-	  if (!blocks_remaining)
-	    break;
-
 	  in0 = vec_load_he (0, buf);
 	  in1 = vec_load_he (16, buf);
 	  in2 = vec_load_he (32, buf);
 	  in3 = vec_load_he (48, buf);
+	  buf += 64;
 	  in1 = vec_be_swap(in1, bswap_const);
 	  in2 = vec_be_swap(in2, bswap_const);
 	  in3 = vec_be_swap(in3, bswap_const);
-- 
2.37.2




More information about the Gcrypt-devel mailing list