[PATCH 06/10] Add parallelized AES-NI ECB decryption
Jussi Kivilinna
jussi.kivilinna at mbnet.fi
Fri Nov 23 18:22:20 CET 2012
* cipher/cipher-internal.h (struct gcry_cipher_handle): Add
bulk.ecb_dec.
* cipher/cipher.c (gcry_cipher_open) [USE_AES]: Set bulk.ecb_dec
to _gcry_aes_ecb_dec.
(do_ecb_decrypt): Redirect call into bulk.ecb_dec if non-null.
* src/cipher.h (_gcry_aes_ecb_dec): Add new function prototype.
* cipher/rijndeal.c (_gcry_aes_ecb_dec): Add new function.
--
Parallelized ECB decryption is ~2.0x faster on Intel Sandy-Bridge (x86-64).
Before:
$ tests/benchmark --cipher-repetitions 1000 cipher aes aes192 aes256
Running each test 1000 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
AES 670ms 770ms 2130ms 450ms 1880ms 670ms 2250ms 2280ms 490ms 490ms
AES192 880ms 920ms 2460ms 540ms 2210ms 830ms 2580ms 2570ms 580ms 570ms
AES256 1020ms 1070ms 2800ms 620ms 2560ms 970ms 2880ms 2880ms 660ms 650ms
After:
Running each test 1000 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
AES 690ms 350ms 2130ms 470ms 1890ms 670ms 2220ms 2240ms 490ms 490ms
AES192 900ms 440ms 2460ms 560ms 2210ms 840ms 2550ms 2560ms 570ms 570ms
AES256 1040ms 520ms 2800ms 640ms 2550ms 970ms 2840ms 2850ms 660ms 650ms
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at mbnet.fi>
---
cipher/cipher-internal.h | 3 ++
cipher/cipher.c | 8 +++++
cipher/rijndael.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++
src/cipher.h | 2 +
4 files changed, 83 insertions(+)
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 025bf2e..dcce708 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -89,6 +89,9 @@ struct gcry_cipher_handle
void (*ctr_enc)(void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks);
+ void (*ecb_dec)(void *context, void *outbuf_arg,
+ const void *inbuf_arg,
+ unsigned int nblocks);
} bulk;
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 389bf7a..b0f9773 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -716,6 +716,7 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
h->bulk.cbc_enc = _gcry_aes_cbc_enc;
h->bulk.cbc_dec = _gcry_aes_cbc_dec;
h->bulk.ctr_enc = _gcry_aes_ctr_enc;
+ h->bulk.ecb_dec = _gcry_aes_ecb_dec;
break;
#endif /*USE_AES*/
@@ -881,6 +882,13 @@ do_ecb_decrypt (gcry_cipher_hd_t c,
return GPG_ERR_INV_LENGTH;
nblocks = inbuflen / c->cipher->blocksize;
+ if (nblocks && c->bulk.ecb_dec)
+ {
+ c->bulk.ecb_dec (&c->context.c, outbuf, inbuf, nblocks);
+
+ return 0;
+ }
+
for (n=0; n < nblocks; n++ )
{
c->cipher->decrypt (&c->context.c, outbuf, (byte*)/*arggg*/inbuf );
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 34a0f8c..421b159 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1838,6 +1838,76 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
}
+/* Bulk decryption of complete blocks in ECB mode. This function is only
+ * intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_aes_ecb_dec (void *context, void *outbuf_arg,
+ const void *inbuf_arg, unsigned int nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+
+ if (0)
+ ;
+#ifdef USE_AESNI
+ else if (ctx->use_aesni)
+ {
+ aesni_prepare ();
+
+ if (!ctx->decryption_prepared )
+ {
+ prepare_decryption ( ctx );
+ ctx->decryption_prepared = 1;
+ }
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ asm volatile
+ ("movdqu 0*16(%[inbuf]), %%xmm1\n\t" /* load input blocks */
+ "movdqu 1*16(%[inbuf]), %%xmm2\n\t"
+ "movdqu 2*16(%[inbuf]), %%xmm3\n\t"
+ "movdqu 3*16(%[inbuf]), %%xmm4\n\t"
+ : /* No output */
+ : [inbuf] "r" (inbuf)
+ : "memory");
+
+ do_aesni_dec_vec4 (ctx);
+
+ asm volatile
+ ("movdqu %%xmm1, 0*16(%[outbuf])\n\t" /* store output blocks */
+ "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+ "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+ "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+ : /* No output */
+ : [outbuf] "r" (outbuf)
+ : "memory");
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+
+ for ( ;nblocks; nblocks-- )
+ {
+ do_aesni_dec_aligned (ctx, outbuf, inbuf);
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ aesni_cleanup ();
+ aesni_cleanup_2_5 ();
+ }
+#endif
+ else
+ for ( ;nblocks; nblocks-- )
+ {
+ rijndael_decrypt(context, outbuf, inbuf);
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+}
+
/* Run the self-tests for AES 128. Returns NULL on success. */
diff --git a/src/cipher.h b/src/cipher.h
index 48eeeda..6b34e90 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -94,6 +94,8 @@ void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
void *outbuf_arg, const void *inbuf_arg,
unsigned int nblocks);
+void _gcry_aes_ecb_dec (void *context, void *outbuf_arg,
+ const void *inbuf_arg, unsigned int nblocks);
/*-- dsa.c --*/
More information about the Gcrypt-devel
mailing list