[PATCH 03/10] Optimize AES-NI CBC encryption

Jussi Kivilinna jussi.kivilinna at mbnet.fi
Fri Nov 23 18:22:04 CET 2012


* cipher/rijndeal.c (_gcry_aes_cbc_enc) [USE_AESNI]: Add AES-NI
spesific loop and use SSE2 assembler for xoring and copying of
blocks.
--

This gives ~35% improvement in 'tests/benchmark cipher aes' on Sandy-Bridge
CPU (x86-64).

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at mbnet.fi>
---
 cipher/rijndael.c |   47 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 104f869..982c54e 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1249,23 +1249,50 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
   aesni_prepare ();
   for ( ;nblocks; nblocks-- )
     {
-      for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
-        outbuf[i] = inbuf[i] ^ *ivp++;
-
       if (0)
         ;
-#ifdef USE_PADLOCK
-      else if (ctx->use_padlock)
-        do_padlock (ctx, 0, outbuf, outbuf);
-#endif /*USE_PADLOCK*/
 #ifdef USE_AESNI
       else if (ctx->use_aesni)
-        do_aesni (ctx, 0, outbuf, outbuf);
+        {
+          /* ~35% speed up on Sandy-Bridge when doing xoring and copying with
+             SSE registers.  */
+          asm volatile ("movdqu %[iv], %%xmm0\n\t"
+                        "movdqu %[inbuf], %%xmm1\n\t"
+                        "pxor %%xmm0, %%xmm1\n\t"
+                        "movdqu %%xmm1, %[outbuf]\n\t"
+                        : /* No output */
+                        : [iv] "m" (*iv),
+                          [inbuf] "m" (*inbuf),
+                          [outbuf] "m" (*outbuf)
+                        : "memory" );
+
+          do_aesni (ctx, 0, outbuf, outbuf);
+
+          asm volatile ("movdqu %[outbuf], %%xmm0\n\t"
+                        "movdqu %%xmm0, %[iv]\n\t"
+                        : /* No output */
+                        : [outbuf] "m" (*outbuf),
+                          [iv] "m" (*iv)
+                        : "memory" );
+        }
 #endif /*USE_AESNI*/
       else
-        do_encrypt (ctx, outbuf, outbuf );
+        {
+          for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
+            outbuf[i] = inbuf[i] ^ *ivp++;
+
+          if (0)
+            ;
+#ifdef USE_PADLOCK
+          else if (ctx->use_padlock)
+            do_padlock (ctx, 0, outbuf, outbuf);
+#endif /*USE_PADLOCK*/
+          else
+            do_encrypt (ctx, outbuf, outbuf );
+
+          memcpy (iv, outbuf, BLOCKSIZE);
+        }
 
-      memcpy (iv, outbuf, BLOCKSIZE);
       inbuf += BLOCKSIZE;
       if (!cbc_mac)
         outbuf += BLOCKSIZE;




More information about the Gcrypt-devel mailing list