[PATCH 3/8] camellia-aesni-avx: add acceleration for ECB/XTS/CTR32LE modes

Jussi Kivilinna jussi.kivilinna at iki.fi
Wed Feb 22 20:29:19 CET 2023


* cipher/camellia-aesni-avx-amd64.S (_gcry_camellia_aesni_avx_ecb_enc)
(_gcry_camellia_aesni_avx_ecb_dec): New.
* cipher/camellia-glue.c (_gcry_camellia_aesni_avx_ecb_enc)
(_gcry_camellia_aesni_avx_ecb_dec): New.
(camellia_setkey): Always enable XTS/ECB/CTR32LE bulk functions.
(camellia_encrypt_blk1_32, camellia_decrypt_blk1_32)
[USE_AESNI_AVX]: Add AESNI/AVX code-path.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/camellia-aesni-avx-amd64.S | 92 +++++++++++++++++++++++++++++++
 cipher/camellia-glue.c            | 59 ++++++++++++++------
 2 files changed, 133 insertions(+), 18 deletions(-)

diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 1f241e03..93c96791 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -1029,6 +1029,98 @@ _gcry_camellia_aesni_avx_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
 
+.align 16
+.globl _gcry_camellia_aesni_avx_ecb_enc
+ELF(.type   _gcry_camellia_aesni_avx_ecb_enc, at function;)
+
+_gcry_camellia_aesni_avx_ecb_enc:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (16 blocks)
+	 *	%rdx: src (16 blocks)
+	 */
+	CFI_STARTPROC();
+
+	pushq %rbp;
+	CFI_PUSH(%rbp);
+	movq %rsp, %rbp;
+	CFI_DEF_CFA_REGISTER(%rbp);
+
+	vzeroupper;
+
+	cmpl $128, key_bitlength(CTX);
+	movl $32, %r8d;
+	movl $24, %eax;
+	cmovel %eax, %r8d; /* max */
+
+	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rdx, (key_table)(CTX));
+
+	subq $(16 * 16), %rsp;
+	andq $~31, %rsp;
+	movq %rsp, %rax;
+
+	call __camellia_enc_blk16;
+
+	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+		     %xmm8, %rsi);
+
+	vzeroall;
+
+	leave;
+	CFI_LEAVE();
+	ret_spec_stop;
+	CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ecb_enc,.-_gcry_camellia_aesni_avx_ecb_enc;)
+
+.align 16
+.globl _gcry_camellia_aesni_avx_ecb_dec
+ELF(.type   _gcry_camellia_aesni_avx_ecb_dec, at function;)
+
+_gcry_camellia_aesni_avx_ecb_dec:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst (16 blocks)
+	 *	%rdx: src (16 blocks)
+	 */
+	CFI_STARTPROC();
+
+	pushq %rbp;
+	CFI_PUSH(%rbp);
+	movq %rsp, %rbp;
+	CFI_DEF_CFA_REGISTER(%rbp);
+
+	vzeroupper;
+
+	cmpl $128, key_bitlength(CTX);
+	movl $32, %r8d;
+	movl $24, %eax;
+	cmovel %eax, %r8d; /* max */
+
+	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+	subq $(16 * 16), %rsp;
+	andq $~31, %rsp;
+	movq %rsp, %rax;
+
+	call __camellia_dec_blk16;
+
+	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+		     %xmm8, %rsi);
+
+	vzeroall;
+
+	leave;
+	CFI_LEAVE();
+	ret_spec_stop;
+	CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ecb_dec,.-_gcry_camellia_aesni_avx_ecb_dec;)
+
 .align 16
 .globl _gcry_camellia_aesni_avx_cbc_dec
 ELF(.type   _gcry_camellia_aesni_avx_cbc_dec, at function;)
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 2e00f563..8b4b4b3c 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -172,15 +172,25 @@ extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
 					     const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
-					     const unsigned char *abuf,
-					     unsigned char *offset,
-					     unsigned char *checksum,
-					     const u64 Ls[16]) ASM_FUNC_ABI;
+					      const unsigned char *abuf,
+					      unsigned char *offset,
+					      unsigned char *checksum,
+					      const u64 Ls[16]) ASM_FUNC_ABI;
 
 extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
 					    const unsigned char *key,
 					    unsigned int keylen) ASM_FUNC_ABI;
 
+extern void _gcry_camellia_aesni_avx_ecb_enc(const CAMELLIA_context *ctx,
+					     unsigned char *out,
+					     const unsigned char *in)
+					     ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ecb_dec(const CAMELLIA_context *ctx,
+					     unsigned char *out,
+					     const unsigned char *in)
+					     ASM_FUNC_ABI;
+
 static const int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 +
                                         2 * sizeof(void *) + ASM_EXTRA_STACK;
 
@@ -473,18 +483,9 @@ camellia_setkey(void *c, const byte *key, unsigned keylen,
   bulk_ops->ctr_enc = _gcry_camellia_ctr_enc;
   bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt;
   bulk_ops->ocb_auth  = _gcry_camellia_ocb_auth;
-#ifdef USE_AESNI_AVX2
-  if (ctx->use_aesni_avx2 || ctx->use_vaes_avx2 || ctx->use_gfni_avx2)
-    {
-      bulk_ops->xts_crypt = _gcry_camellia_xts_crypt;
-      bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt;
-      bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc;
-    }
-#else
-  (void)_gcry_camellia_xts_crypt;
-  (void)_gcry_camellia_ecb_crypt;
-  (void)_gcry_camellia_ctr32le_enc;
-#endif
+  bulk_ops->xts_crypt = _gcry_camellia_xts_crypt;
+  bulk_ops->ecb_crypt = _gcry_camellia_ecb_crypt;
+  bulk_ops->ctr32le_enc = _gcry_camellia_ctr32le_enc;
 
   if (0)
     { }
@@ -651,10 +652,21 @@ camellia_encrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
       return avx2_burn_stack_depth;
     }
 #endif
+#ifdef USE_AESNI_AVX
+  while (ctx->use_aesni_avx && num_blks >= 16)
+    {
+      _gcry_camellia_aesni_avx_ecb_enc (ctx, outbuf, inbuf);
+      stack_burn_size = avx_burn_stack_depth;
+      outbuf += CAMELLIA_BLOCK_SIZE * 16;
+      inbuf += CAMELLIA_BLOCK_SIZE * 16;
+      num_blks -= 16;
+    }
+#endif
 
   while (num_blks)
     {
-      stack_burn_size = camellia_encrypt((void *)ctx, outbuf, inbuf);
+      unsigned int nburn = camellia_encrypt((void *)ctx, outbuf, inbuf);
+      stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
       outbuf += CAMELLIA_BLOCK_SIZE;
       inbuf += CAMELLIA_BLOCK_SIZE;
       num_blks--;
@@ -731,10 +743,21 @@ camellia_decrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
       return avx2_burn_stack_depth;
     }
 #endif
+#ifdef USE_AESNI_AVX
+  while (ctx->use_aesni_avx && num_blks >= 16)
+    {
+      _gcry_camellia_aesni_avx_ecb_dec (ctx, outbuf, inbuf);
+      stack_burn_size = avx_burn_stack_depth;
+      outbuf += CAMELLIA_BLOCK_SIZE * 16;
+      inbuf += CAMELLIA_BLOCK_SIZE * 16;
+      num_blks -= 16;
+    }
+#endif
 
   while (num_blks)
     {
-      stack_burn_size = camellia_decrypt((void *)ctx, outbuf, inbuf);
+      unsigned int nburn = camellia_decrypt((void *)ctx, outbuf, inbuf);
+      stack_burn_size = nburn > stack_burn_size ? nburn : stack_burn_size;
       outbuf += CAMELLIA_BLOCK_SIZE;
       inbuf += CAMELLIA_BLOCK_SIZE;
       num_blks--;
-- 
2.37.2




More information about the Gcrypt-devel mailing list