[PATCH 11/11] Add vector register clearing for PowerPC implementations

Jussi Kivilinna jussi.kivilinna at iki.fi
Sun Nov 3 20:56:55 CET 2024


* cipher/Makefile.am: Add 'simd-common-ppc.h'.
* cipher/camellia-simd128.h
[HAVE_GCC_INLINE_ASM_PPC_ALTIVEC]: Include "simd-common-ppc.h".
[HAVE_GCC_INLINE_ASM_PPC_ALTIVEC] (memory_barrier_with_vec)
(clear_vec_regs): Remove.
* cipher/chacha20-p10le-8x.s (clear_vec_regs): New.
(_gcry_chacha20_p10le_8x): Add clear_vec_regs.
* cipher/chacha20-ppc.c: Include "simd-common-ppc.h".
(chacha20_ppc_blocks1, chacha20_ppc_blocks4)
(chacha20_poly1305_ppc_blocks4): Add clear_vec_regs.
* cipher/cipher-gcm-ppc.c: Include "simd-common-ppc.h".
(_gcry_ghash_setup_ppc_vpmsum, _gcry_ghash_ppc_vpmsum): Add
clear_vec_regs.
* cipher/poly1305-p10le.s (clear_vec_regs): New.
(gcry_poly1305_p10le_4blocks): Add clear_vec_regs.
* cipher/rijndael-p10le.c: Include "simd-common-ppc.h".
(_gcry_aes_p10le_gcm_crypt): Add clear_vec_regs.
* cipher/rijndael-ppc-common.h: Include "simd-common-ppc.h".
* cipher/rijndael-ppc-functions.h (ENCRYPT_BLOCK_FUNC):
(DECRYPT_BLOCK_FUNC, CFB_ENC_FUNC, ECB_CRYPT_FUNC, CFB_DEC_FUNC)
(CBC_ENC_FUNC, CBC_DEC_FUNC, CTR_ENC_FUNC, OCB_CRYPT_FUNC)
(OCB_AUTH_FUNC, XTS_CRYPT_FUNC, CTR32LE_ENC_FUNC): Add
clear_vec_regs.
* cipher/rijndael-ppc.c (_gcry_aes_ppc8_setkey)
(_gcry_aes_ppc8_prepare_decryption): Add clear_vec_regs.
* cipher/sha256-ppc.c: Include "simd-common-ppc.h".
(sha256_transform_ppc): Add clear_vec_regs.
* cipher/sha512-ppc.c: Include "simd-common-ppc.h".
(sha512_transform_ppc): Add clear_vec_regs.
* cipher/simd-common-ppc.h: New.
* cipher/sm4-ppc.c: Include "simd-common-ppc.h".
(sm4_ppc_crypt_blk1_16): Add clear_vec_regs.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/Makefile.am              |  2 +-
 cipher/camellia-simd128.h       |  4 +-
 cipher/chacha20-p10le-8x.s      | 41 ++++++++++++++++++
 cipher/chacha20-ppc.c           |  7 +++
 cipher/cipher-gcm-ppc.c         |  5 +++
 cipher/poly1305-p10le.s         | 41 ++++++++++++++++++
 cipher/rijndael-p10le.c         |  5 +++
 cipher/rijndael-ppc-common.h    |  1 +
 cipher/rijndael-ppc-functions.h | 24 ++++++++++
 cipher/rijndael-ppc.c           |  4 ++
 cipher/sha256-ppc.c             |  3 ++
 cipher/sha512-ppc.c             |  3 ++
 cipher/simd-common-ppc.h        | 77 +++++++++++++++++++++++++++++++++
 cipher/sm4-ppc.c                | 34 ++++++++-------
 14 files changed, 232 insertions(+), 19 deletions(-)
 create mode 100644 cipher/simd-common-ppc.h

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 633c53ed..90415d83 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -127,7 +127,7 @@ EXTRA_libcipher_la_SOURCES = \
 	seed.c \
 	serpent.c serpent-sse2-amd64.S serpent-avx2-amd64.S \
 	serpent-avx512-x86.c serpent-armv7-neon.S \
-	simd-common-aarch64.h \
+	simd-common-aarch64.h simd-common-ppc.h \
 	sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \
 	sm4-gfni-avx2-amd64.S sm4-gfni-avx512-amd64.S \
 	sm4-aarch64.S sm4-armv8-aarch64-ce.S sm4-armv9-aarch64-sve-ce.S \
diff --git a/cipher/camellia-simd128.h b/cipher/camellia-simd128.h
index 120fbe5a..df36a1a2 100644
--- a/cipher/camellia-simd128.h
+++ b/cipher/camellia-simd128.h
@@ -47,6 +47,7 @@
 /**********************************************************************
   AT&T x86 asm to intrinsics conversion macros (PowerPC VSX+crypto)
  **********************************************************************/
+#include "simd-common-ppc.h"
 #include <altivec.h>
 
 typedef vector signed char int8x16_t;
@@ -151,9 +152,6 @@ static const uint8x16_t shift_row =
 #define if_aes_subbytes(...) __VA_ARGS__
 #define if_not_aes_subbytes(...) /*_*/
 
-#define memory_barrier_with_vec(a) __asm__("" : "+wa"(a) :: "memory")
-#define clear_vec_regs() ((void)0)
-
 #endif /* __powerpc__ */
 
 #ifdef __ARM_NEON
diff --git a/cipher/chacha20-p10le-8x.s b/cipher/chacha20-p10le-8x.s
index ff68c9ef..f75ffb12 100644
--- a/cipher/chacha20-p10le-8x.s
+++ b/cipher/chacha20-p10le-8x.s
@@ -61,6 +61,45 @@
 #
 .text
 
+.macro clear_vec_regs
+	xxlxor 0, 0, 0
+	xxlxor 1, 1, 1
+	xxlxor 2, 2, 2
+	xxlxor 3, 3, 3
+	xxlxor 4, 4, 4
+	xxlxor 5, 5, 5
+	xxlxor 6, 6, 6
+	xxlxor 7, 7, 7
+	xxlxor 8, 8, 8
+	xxlxor 9, 9, 9
+	xxlxor 10, 10, 10
+	xxlxor 11, 11, 11
+	xxlxor 12, 12, 12
+	xxlxor 13, 13, 13
+	# vs14-vs31 (f14-f31) are  ABI callee saved.
+	xxlxor 32, 32, 32
+	xxlxor 33, 33, 33
+	xxlxor 34, 34, 34
+	xxlxor 35, 35, 35
+	xxlxor 36, 36, 36
+	xxlxor 37, 37, 37
+	xxlxor 38, 38, 38
+	xxlxor 39, 39, 39
+	xxlxor 40, 40, 40
+	xxlxor 41, 41, 41
+	xxlxor 42, 42, 42
+	xxlxor 43, 43, 43
+	xxlxor 44, 44, 44
+	xxlxor 45, 45, 45
+	xxlxor 46, 46, 46
+	xxlxor 47, 47, 47
+	xxlxor 48, 48, 48
+	xxlxor 49, 49, 49
+	xxlxor 50, 50, 50
+	xxlxor 51, 51, 51
+	# vs52-vs63 (v20-v31) are ABI callee saved.
+.endm
+
 .macro QT_loop_8x
 	# QR(v0, v4,  v8, v12, v1, v5,  v9, v13, v2, v6, v10, v14, v3, v7, v11, v15)
 	xxlor	0, 32+25, 32+25
@@ -782,6 +821,8 @@ Out_loop:
 	lvx	30, 26, 9
 	lvx	31, 27, 9
 
+	clear_vec_regs
+
 	add	9, 9, 27
 	addi	14, 17, 16
 	lxvx	14, 14, 9
diff --git a/cipher/chacha20-ppc.c b/cipher/chacha20-ppc.c
index e640010a..376d0642 100644
--- a/cipher/chacha20-ppc.c
+++ b/cipher/chacha20-ppc.c
@@ -25,6 +25,7 @@
     defined(USE_CHACHA20) && \
     __GNUC__ >= 4
 
+#include "simd-common-ppc.h"
 #include <altivec.h>
 #include "bufhelp.h"
 #include "poly1305-internal.h"
@@ -252,6 +253,8 @@ chacha20_ppc_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks)
 
   vec_vsx_st(state3, 3 * 16, state); /* store counter */
 
+  clear_vec_regs();
+
   return 0;
 }
 
@@ -414,6 +417,8 @@ chacha20_ppc_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks)
 
   vec_vsx_st(state3, 3 * 16, state); /* store counter */
 
+  clear_vec_regs();
+
   return 0;
 }
 
@@ -636,6 +641,8 @@ chacha20_poly1305_ppc_blocks4(u32 *state, byte *dst, const byte *src,
   st->h[3] = h1 >> 32;
   st->h[4] = h2;
 
+  clear_vec_regs();
+
   return 0;
 }
 
diff --git a/cipher/cipher-gcm-ppc.c b/cipher/cipher-gcm-ppc.c
index 648d1598..486295af 100644
--- a/cipher/cipher-gcm-ppc.c
+++ b/cipher/cipher-gcm-ppc.c
@@ -80,6 +80,7 @@
 
 #ifdef GCM_USE_PPC_VPMSUM
 
+#include "simd-common-ppc.h"
 #include <altivec.h>
 
 #define ALWAYS_INLINE inline __attribute__((always_inline))
@@ -370,6 +371,8 @@ _gcry_ghash_setup_ppc_vpmsum (void *gcm_table_arg, void *gcm_key)
   STORE_TABLE (gcm_table, 10, H4l);
   STORE_TABLE (gcm_table, 11, H4);
   STORE_TABLE (gcm_table, 12, H4h);
+
+  clear_vec_regs();
 }
 
 unsigned int ASM_FUNC_ATTR
@@ -542,6 +545,8 @@ _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
 
   vec_store_he (vec_be_swap (cur, bswap_const), 0, result);
 
+  clear_vec_regs();
+
   return 0;
 }
 
diff --git a/cipher/poly1305-p10le.s b/cipher/poly1305-p10le.s
index 4202b41e..d21f8245 100644
--- a/cipher/poly1305-p10le.s
+++ b/cipher/poly1305-p10le.s
@@ -57,6 +57,45 @@
 #
 .text
 
+.macro clear_vec_regs
+	xxlxor 0, 0, 0
+	xxlxor 1, 1, 1
+	xxlxor 2, 2, 2
+	xxlxor 3, 3, 3
+	xxlxor 4, 4, 4
+	xxlxor 5, 5, 5
+	xxlxor 6, 6, 6
+	xxlxor 7, 7, 7
+	xxlxor 8, 8, 8
+	xxlxor 9, 9, 9
+	xxlxor 10, 10, 10
+	xxlxor 11, 11, 11
+	xxlxor 12, 12, 12
+	xxlxor 13, 13, 13
+	# vs14-vs31 (f14-f31) are  ABI callee saved.
+	xxlxor 32, 32, 32
+	xxlxor 33, 33, 33
+	xxlxor 34, 34, 34
+	xxlxor 35, 35, 35
+	xxlxor 36, 36, 36
+	xxlxor 37, 37, 37
+	xxlxor 38, 38, 38
+	xxlxor 39, 39, 39
+	xxlxor 40, 40, 40
+	xxlxor 41, 41, 41
+	xxlxor 42, 42, 42
+	xxlxor 43, 43, 43
+	xxlxor 44, 44, 44
+	xxlxor 45, 45, 45
+	xxlxor 46, 46, 46
+	xxlxor 47, 47, 47
+	xxlxor 48, 48, 48
+	xxlxor 49, 49, 49
+	xxlxor 50, 50, 50
+	xxlxor 51, 51, 51
+	# vs52-vs63 (v20-v31) are ABI callee saved.
+.endm
+
 # Block size 16 bytes
 # key = (r, s)
 # clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF
@@ -745,6 +784,8 @@ do_final_update:
 Out_loop:
 	li	3, 0
 
+	clear_vec_regs
+
 	li	14, 256
 	lvx	20, 14, 1
 	addi	14, 14, 16
diff --git a/cipher/rijndael-p10le.c b/cipher/rijndael-p10le.c
index 65d804f9..448b45ed 100644
--- a/cipher/rijndael-p10le.c
+++ b/cipher/rijndael-p10le.c
@@ -30,6 +30,8 @@
 
 #ifdef USE_PPC_CRYPTO_WITH_PPC9LE
 
+#include "simd-common-ppc.h"
+
 
 extern size_t _gcry_ppc10_aes_gcm_encrypt (const void *inp, void *out,
                                            size_t len,
@@ -113,6 +115,9 @@ _gcry_aes_p10le_gcm_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
    */
   s = ndone / GCRY_GCM_BLOCK_LEN;
   s = nblocks - s;
+
+  clear_vec_regs();
+
   return ( s );
 }
 
diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h
index bd2ad8b1..611b5871 100644
--- a/cipher/rijndael-ppc-common.h
+++ b/cipher/rijndael-ppc-common.h
@@ -26,6 +26,7 @@
 #ifndef G10_RIJNDAEL_PPC_COMMON_H
 #define G10_RIJNDAEL_PPC_COMMON_H
 
+#include "simd-common-ppc.h"
 #include <altivec.h>
 
 
diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h
index ec5cda73..eb39717d 100644
--- a/cipher/rijndael-ppc-functions.h
+++ b/cipher/rijndael-ppc-functions.h
@@ -40,6 +40,8 @@ ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
   AES_ENCRYPT (b, rounds);
   VEC_STORE_BE (out, 0, b, bige_const);
 
+  clear_vec_regs();
+
   return 0; /* does not use stack */
 }
 
@@ -61,6 +63,8 @@ DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
   AES_DECRYPT (b, rounds);
   VEC_STORE_BE (out, 0, b, bige_const);
 
+  clear_vec_regs();
+
   return 0; /* does not use stack */
 }
 
@@ -116,6 +120,8 @@ CFB_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
     }
 
   VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
+
+  clear_vec_regs();
 }
 
 
@@ -373,6 +379,8 @@ ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
       out++;
       in++;
     }
+
+  clear_vec_regs();
 }
 
 
@@ -571,6 +579,8 @@ CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
     }
 
   VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+
+  clear_vec_regs();
 }
 
 
@@ -640,6 +650,8 @@ CBC_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
   while (nblocks);
 
   VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
+
+  clear_vec_regs();
 }
 
 
@@ -845,6 +857,8 @@ CBC_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
     }
 
   VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+
+  clear_vec_regs();
 }
 
 
@@ -1078,6 +1092,8 @@ CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, void *outbuf_arg,
     }
 
   VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
+
+  clear_vec_regs();
 }
 
 
@@ -1584,6 +1600,8 @@ OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg,
   VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
   c->u_mode.ocb.data_nblocks = data_nblocks;
 
+  clear_vec_regs();
+
   return 0;
 }
 
@@ -1794,6 +1812,8 @@ OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
   VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
   c->u_mode.ocb.aad_nblocks = data_nblocks;
 
+  clear_vec_regs();
+
   return 0;
 }
 
@@ -2295,6 +2315,8 @@ XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, void *outbuf_arg,
   VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
 
 #undef GEN_TWEAK
+
+  clear_vec_regs();
 }
 
 
@@ -2541,4 +2563,6 @@ CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg,
 #undef VEC_ADD_CTRLE32
 
   VEC_STORE_BE (ctr_arg, 0, vec_reve((block)ctr), bige_const);
+
+  clear_vec_regs();
 }
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index 055b00c0..18fadd6e 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -201,6 +201,8 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
     }
 
   wipememory(tk_vu32, sizeof(tk_vu32));
+
+  clear_vec_regs();
 }
 
 
@@ -208,6 +210,8 @@ void PPC_OPT_ATTR
 _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
 {
   internal_aes_ppc_prepare_decryption (ctx);
+
+  clear_vec_regs();
 }
 
 
diff --git a/cipher/sha256-ppc.c b/cipher/sha256-ppc.c
index e5839a84..bcc08dad 100644
--- a/cipher/sha256-ppc.c
+++ b/cipher/sha256-ppc.c
@@ -25,6 +25,7 @@
     defined(USE_SHA256) && \
     __GNUC__ >= 4
 
+#include "simd-common-ppc.h"
 #include <altivec.h>
 #include "bufhelp.h"
 
@@ -590,6 +591,8 @@ sha256_transform_ppc(u32 state[8], const unsigned char *data, size_t nblks)
   vec_vsx_st (h0_h3, 4 * 0, state);
   vec_vsx_st (h4_h7, 4 * 4, state);
 
+  clear_vec_regs();
+
   return sizeof(w2) + sizeof(w);
 }
 
diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c
index d213c241..ed9486ee 100644
--- a/cipher/sha512-ppc.c
+++ b/cipher/sha512-ppc.c
@@ -25,6 +25,7 @@
     defined(USE_SHA512) && \
     __GNUC__ >= 4
 
+#include "simd-common-ppc.h"
 #include <altivec.h>
 #include "bufhelp.h"
 
@@ -705,6 +706,8 @@ sha512_transform_ppc(u64 state[8], const unsigned char *data, size_t nblks)
   vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
   vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
 
+  clear_vec_regs();
+
   return sizeof(w) + sizeof(w2);
 }
 
diff --git a/cipher/simd-common-ppc.h b/cipher/simd-common-ppc.h
new file mode 100644
index 00000000..620a3b51
--- /dev/null
+++ b/cipher/simd-common-ppc.h
@@ -0,0 +1,77 @@
+/* simd-common-ppc.h  -  Common macros for PowerPC SIMD code
+ *
+ * Copyright (C) 2024 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_SIMD_COMMON_PPC_H
+#define GCRY_SIMD_COMMON_PPC_H
+
+#include <config.h>
+
+#define memory_barrier_with_vec(a) __asm__("" : "+wa"(a) :: "memory")
+
+#define clear_vec_regs() __asm__ volatile("xxlxor 0, 0, 0\n" \
+				          "xxlxor 1, 1, 1\n" \
+				          "xxlxor 2, 2, 2\n" \
+				          "xxlxor 3, 3, 3\n" \
+				          "xxlxor 4, 4, 4\n" \
+				          "xxlxor 5, 5, 5\n" \
+				          "xxlxor 6, 6, 6\n" \
+				          "xxlxor 7, 7, 7\n" \
+				          "xxlxor 8, 8, 8\n" \
+				          "xxlxor 9, 9, 9\n" \
+				          "xxlxor 10, 10, 10\n" \
+				          "xxlxor 11, 11, 11\n" \
+				          "xxlxor 12, 12, 12\n" \
+				          "xxlxor 13, 13, 13\n" \
+				          "xxlxor 32, 32, 32\n" \
+				          "xxlxor 33, 33, 33\n" \
+				          "xxlxor 34, 34, 34\n" \
+				          "xxlxor 35, 35, 35\n" \
+				          "xxlxor 36, 36, 36\n" \
+				          "xxlxor 37, 37, 37\n" \
+				          "xxlxor 38, 38, 38\n" \
+				          "xxlxor 39, 39, 39\n" \
+				          "xxlxor 40, 40, 40\n" \
+				          "xxlxor 41, 41, 41\n" \
+				          "xxlxor 42, 42, 42\n" \
+				          "xxlxor 43, 43, 43\n" \
+				          "xxlxor 44, 44, 44\n" \
+				          "xxlxor 45, 45, 45\n" \
+				          "xxlxor 46, 46, 46\n" \
+				          "xxlxor 47, 47, 47\n" \
+				          "xxlxor 48, 48, 48\n" \
+				          "xxlxor 49, 49, 49\n" \
+				          "xxlxor 50, 50, 50\n" \
+				          "xxlxor 51, 51, 51\n" \
+					  ::: "vs0", "vs1", "vs2", "vs3", \
+					      "vs4", "vs5", "vs6", "vs7", \
+					      "vs8", "vs9", "vs10", "vs11", \
+					      "vs12", "vs13", \
+					      /* vs14-vs31 (f14-f31) are */ \
+					      /* ABI callee saved. */ \
+					      "vs32", "vs33", "vs34", "vs35", \
+					      "vs36", "vs37", "vs38", "vs39", \
+					      "vs40", "vs41", "vs42", "vs43", \
+					      "vs44", "vs45", "vs46", "vs47", \
+					      "vs48", "vs49", "vs50", "vs51", \
+					      /* vs52-vs63 (v20-v31) are */ \
+					      /* ABI callee saved. */ \
+					      "memory")
+
+#endif /* GCRY_SIMD_COMMON_PPC_H */
diff --git a/cipher/sm4-ppc.c b/cipher/sm4-ppc.c
index bb2c55e0..2b26c39d 100644
--- a/cipher/sm4-ppc.c
+++ b/cipher/sm4-ppc.c
@@ -25,6 +25,7 @@
     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
     !defined(WORDS_BIGENDIAN) && (__GNUC__ >= 4)
 
+#include "simd-common-ppc.h"
 #include <altivec.h>
 #include "bufhelp.h"
 
@@ -298,25 +299,28 @@ sm4_ppc_crypt_blk1_16(u32 *rk, byte *out, const byte *in, size_t nblks)
   if (nblks >= 16)
     {
       sm4_ppc_crypt_blk16(rk, out, in);
-      return;
     }
-
-  while (nblks >= 8)
+  else
     {
-      sm4_ppc_crypt_blk8(rk, out, in);
-      in += 8 * 16;
-      out += 8 * 16;
-      nblks -= 8;
+      while (nblks >= 8)
+	{
+	  sm4_ppc_crypt_blk8(rk, out, in);
+	  in += 8 * 16;
+	  out += 8 * 16;
+	  nblks -= 8;
+	}
+
+      while (nblks)
+	{
+	  size_t currblks = nblks > 4 ? 4 : nblks;
+	  sm4_ppc_crypt_blk1_4(rk, out, in, currblks);
+	  in += currblks * 16;
+	  out += currblks * 16;
+	  nblks -= currblks;
+	}
     }
 
-  while (nblks)
-    {
-      size_t currblks = nblks > 4 ? 4 : nblks;
-      sm4_ppc_crypt_blk1_4(rk, out, in, currblks);
-      in += currblks * 16;
-      out += currblks * 16;
-      nblks -= currblks;
-    }
+  clear_vec_regs();
 }
 
 ASM_FUNC_ATTR_NOINLINE FUNC_ATTR_TARGET_P8 void
-- 
2.45.2




More information about the Gcrypt-devel mailing list