[PATCH 4/5] aes-ppc: use target and optimize attributes for P8 and P9

Jussi Kivilinna jussi.kivilinna at iki.fi
Sun Feb 26 14:00:36 CET 2023


* cipher/rijndael-ppc-functions.h: Add PPC_OPT_ATTR attribute
macro for all functions.
* cipher/rijndael-ppc.c (FUNC_ATTR_OPT, PPC_OPT_ATTR): New.
(_gcry_aes_ppc8_setkey, _gcry_aes_ppc8_prepare_decryption): Add
PPC_OPT_ATTR attribute macro.
* cipher/rijndael-ppc9le.c (FUNC_ATTR_OPT, PPC_OPT_ATTR): New.
--

This change makes sure that PPC accelerated AES gets compiled
with proper optimization level and right target setting.

Benchmark on POWER9:
 AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
        ECB enc |     0.305 ns/B      3129 MiB/s     0.701 c/B
        ECB dec |     0.305 ns/B      3127 MiB/s     0.701 c/B
        CBC enc |      1.66 ns/B     575.3 MiB/s      3.81 c/B
        CBC dec |     0.318 ns/B      2997 MiB/s     0.732 c/B
        CFB enc |      1.66 ns/B     574.7 MiB/s      3.82 c/B
        CFB dec |     0.319 ns/B      2987 MiB/s     0.734 c/B
        OFB enc |      2.15 ns/B     443.4 MiB/s      4.95 c/B
        OFB dec |      2.15 ns/B     443.3 MiB/s      4.95 c/B
        CTR enc |     0.328 ns/B      2907 MiB/s     0.754 c/B
        CTR dec |     0.328 ns/B      2906 MiB/s     0.755 c/B
        XTS enc |     0.516 ns/B      1849 MiB/s      1.19 c/B
        XTS dec |     0.515 ns/B      1850 MiB/s      1.19 c/B
        CCM enc |      1.98 ns/B     480.6 MiB/s      4.56 c/B
        CCM dec |      1.98 ns/B     480.5 MiB/s      4.56 c/B
       CCM auth |      1.66 ns/B     574.9 MiB/s      3.82 c/B
        EAX enc |      1.99 ns/B     480.2 MiB/s      4.57 c/B
        EAX dec |      1.99 ns/B     480.2 MiB/s      4.57 c/B
       EAX auth |      1.66 ns/B     575.2 MiB/s      3.81 c/B
        GCM enc |     0.552 ns/B      1727 MiB/s      1.27 c/B
        GCM dec |     0.552 ns/B      1728 MiB/s      1.27 c/B
       GCM auth |     0.225 ns/B      4240 MiB/s     0.517 c/B
        OCB enc |     0.381 ns/B      2504 MiB/s     0.876 c/B
        OCB dec |     0.385 ns/B      2477 MiB/s     0.886 c/B
       OCB auth |     0.356 ns/B      2682 MiB/s     0.818 c/B
        SIV enc |      1.98 ns/B     480.9 MiB/s      4.56 c/B
        SIV dec |      2.11 ns/B     452.9 MiB/s      4.84 c/B
       SIV auth |      1.66 ns/B     575.4 MiB/s      3.81 c/B
    GCM-SIV enc |     0.726 ns/B      1314 MiB/s      1.67 c/B
    GCM-SIV dec |     0.843 ns/B      1131 MiB/s      1.94 c/B
   GCM-SIV auth |     0.377 ns/B      2527 MiB/s     0.868 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/rijndael-ppc-functions.h | 71 ++++++++++++++++++---------------
 cipher/rijndael-ppc.c           | 17 +++++++-
 cipher/rijndael-ppc9le.c        | 13 ++++++
 3 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h
index 79eca7a2..ec5cda73 100644
--- a/cipher/rijndael-ppc-functions.h
+++ b/cipher/rijndael-ppc-functions.h
@@ -1,6 +1,6 @@
 /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
  * Copyright (C) 2019 Shawn Landden <shawn at git.icu>
- * Copyright (C) 2019-2020, 2022 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ * Copyright (C) 2019-2020, 2022-2023 Jussi Kivilinna <jussi.kivilinna at iki.fi>
  *
  * This file is part of Libgcrypt.
  *
@@ -23,9 +23,9 @@
  * is released under.
  */
 
-unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
-				 unsigned char *out,
-				 const unsigned char *in)
+unsigned int PPC_OPT_ATTR
+ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
+		    const unsigned char *in)
 {
   const block bige_const = asm_load_be_const();
   const u128_t *rk = (u128_t *)&ctx->keyschenc;
@@ -44,9 +44,9 @@ unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
 }
 
 
-unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
-				 unsigned char *out,
-				 const unsigned char *in)
+unsigned int PPC_OPT_ATTR
+DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
+		    const unsigned char *in)
 {
   const block bige_const = asm_load_be_const();
   const u128_t *rk = (u128_t *)&ctx->keyschdec;
@@ -65,9 +65,9 @@ unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
 }
 
 
-void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
-		   void *outbuf_arg, const void *inbuf_arg,
-		   size_t nblocks)
+void PPC_OPT_ATTR
+CFB_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+	      const void *inbuf_arg, size_t nblocks)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = context;
@@ -119,8 +119,9 @@ void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
 }
 
 
-void ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
-		     size_t nblocks, int encrypt)
+void PPC_OPT_ATTR
+ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
+		size_t nblocks, int encrypt)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = context;
@@ -375,9 +376,9 @@ void ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
 }
 
 
-void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
-		   void *outbuf_arg, const void *inbuf_arg,
-		   size_t nblocks)
+void PPC_OPT_ATTR
+CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+	      const void *inbuf_arg, size_t nblocks)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = context;
@@ -573,9 +574,9 @@ void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
 }
 
 
-void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
-		   void *outbuf_arg, const void *inbuf_arg,
-		   size_t nblocks, int cbc_mac)
+void PPC_OPT_ATTR
+CBC_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+	      const void *inbuf_arg, size_t nblocks, int cbc_mac)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = context;
@@ -641,9 +642,10 @@ void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
   VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
 }
 
-void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
-		   void *outbuf_arg, const void *inbuf_arg,
-		   size_t nblocks)
+
+void PPC_OPT_ATTR
+CBC_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+	      const void *inbuf_arg, size_t nblocks)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = context;
@@ -846,9 +848,9 @@ void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
 }
 
 
-void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
-		   void *outbuf_arg, const void *inbuf_arg,
-		   size_t nblocks)
+void PPC_OPT_ATTR
+CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, void *outbuf_arg,
+	      const void *inbuf_arg, size_t nblocks)
 {
   static const unsigned char vec_one_const[16] =
     { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
@@ -1079,9 +1081,9 @@ void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
 }
 
 
-size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
-		       const void *inbuf_arg, size_t nblocks,
-		       int encrypt)
+size_t PPC_OPT_ATTR
+OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg,
+		size_t nblocks, int encrypt)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = (void *)&c->context.c;
@@ -1585,7 +1587,9 @@ size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
   return 0;
 }
 
-size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
+
+size_t PPC_OPT_ATTR
+OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
 {
   const block bige_const = asm_load_be_const();
   RIJNDAEL_context *ctx = (void *)&c->context.c;
@@ -1794,9 +1798,9 @@ size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
 }
 
 
-void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
-		     void *outbuf_arg, const void *inbuf_arg,
-		     size_t nblocks, int encrypt)
+void PPC_OPT_ATTR
+XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, void *outbuf_arg,
+		const void *inbuf_arg, size_t nblocks, int encrypt)
 {
 #ifdef WORDS_BIGENDIAN
   static const block vec_bswap128_const =
@@ -2294,8 +2298,9 @@ void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
 }
 
 
-void CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg,
-		      const void *inbuf_arg, size_t nblocks)
+void PPC_OPT_ATTR
+CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg,
+		 const void *inbuf_arg, size_t nblocks)
 {
 #ifndef WORDS_BIGENDIAN
   static const vec_u32 vec_u32_one = { 1, 0, 0, 0 };
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index d16fbb40..f376e0f1 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -34,6 +34,19 @@
 #include "rijndael-ppc-common.h"
 
 
+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT
+#endif
+
+#ifdef HAVE_GCC_ATTRIBUTE_PPC_TARGET
+# define PPC_OPT_ATTR __attribute__((target("cpu=power8"))) FUNC_ATTR_OPT
+#else
+# define PPC_OPT_ATTR FUNC_ATTR_OPT
+#endif
+
+
 #ifndef WORDS_BIGENDIAN
 static const block vec_bswap32_const_neg =
   { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 };
@@ -124,7 +137,7 @@ keysched_idx(unsigned int in)
 }
 
 
-void
+void PPC_OPT_ATTR
 _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
 {
   u32 tk_u32[MAXKC];
@@ -179,7 +192,7 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
 }
 
 
-void
+void PPC_OPT_ATTR
 _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
 {
   internal_aes_ppc_prepare_decryption (ctx);
diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c
index f7055290..e462befc 100644
--- a/cipher/rijndael-ppc9le.c
+++ b/cipher/rijndael-ppc9le.c
@@ -34,6 +34,19 @@
 #include "rijndael-ppc-common.h"
 
 
+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT
+#endif
+
+#ifdef HAVE_GCC_ATTRIBUTE_PPC_TARGET
+# define PPC_OPT_ATTR __attribute__((target("cpu=power9"))) FUNC_ATTR_OPT
+#else
+# define PPC_OPT_ATTR FUNC_ATTR_OPT
+#endif
+
+
 static ASM_FUNC_ATTR_INLINE block
 asm_load_be_const(void)
 {
-- 
2.37.2




More information about the Gcrypt-devel mailing list