[git] GCRYPT - branch, master, updated. libgcrypt-1.5.0-68-g9ee9e25

by Jussi Kivilinna cvs at cvs.gnupg.org
Thu Nov 29 20:36:52 CET 2012


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  9ee9e25f519696d509b1a5c1cc04ab0121e98a51 (commit)
      from  6765e0a8618000d3dc7bda035163e0708c43791b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9ee9e25f519696d509b1a5c1cc04ab0121e98a51
Author: Jussi Kivilinna <jussi.kivilinna at mbnet.fi>
Date:   Thu Nov 29 17:31:03 2012 +0200

    Optimize AES-NI CTR mode.
    
    * cipher/rijndael.c [USE_AESNI] (do_aesni_ctr, do_aesni_ctr_4): Make
    handling of 64-bit overflow and carry conditional. Avoid generic to
    vector register passing of value '1'. Generate and use '-1' instead.
    --
    
    We only need to handle 64-bit carry in few special cases, that happen very
    rarely. So move carry handling to slow-path and only detect need for carry
    handling on fast-path. Also avoid moving '1' from generic register to vector
    register, as that might be slow on some CPUs. Instead generate '-1' with
    SSE2 instructions and use subtraction instead of addition to increase IV.
    
    Overall this gives ~8% improvement in speed for AES CTR mode on Intel
    Sandy-Bridge.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at mbnet.fi>

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index cc7f8d6..6313ab2 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1015,24 +1015,20 @@ do_aesni_ctr (const RIJNDAEL_context *ctx,
 
   asm volatile ("movdqa (%[ctr]), %%xmm0\n\t"   /* xmm0, xmm2 := CTR   */
                 "movaps %%xmm0, %%xmm2\n\t"
-                "mov    $1, %%esi\n\t"          /* xmm2++ (big-endian) */
-                "movd   %%esi, %%xmm1\n\t"
-
-                "movl   12(%[ctr]), %%esi\n\t"  /* load lower parts of CTR */
-                "bswapl %%esi\n\t"
-                "movl   8(%[ctr]), %%edi\n\t"
-                "bswapl %%edi\n\t"
+                "pcmpeqd %%xmm1, %%xmm1\n\t"
+                "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
 
                 "pshufb %[mask], %%xmm2\n\t"
-                "paddq  %%xmm1, %%xmm2\n\t"
+                "psubq  %%xmm1, %%xmm2\n\t"     /* xmm2++ (big endian) */
 
-                "addl   $1, %%esi\n\t"
-                "adcl   $0, %%edi\n\t"          /* detect 64bit overflow */
-                "jnc    .Lno_carry%=\n\t"
+                /* detect if 64-bit carry handling is needed */
+                "cmpl   $0xffffffff, 8(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
+                "cmpl   $0xffffffff, 12(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
 
-                /* swap upper and lower halfs */
-                "pshufd $0x4e, %%xmm1, %%xmm1\n\t"
-                "paddq   %%xmm1, %%xmm2\n\t"	/* add carry to upper 64bits */
+                "pslldq $8, %%xmm1\n\t"         /* move lower 64-bit to high */
+                "psubq   %%xmm1, %%xmm2\n\t"    /* add carry to upper 64bits */
 
                 ".Lno_carry%=:\n\t"
 
@@ -1085,7 +1081,7 @@ do_aesni_ctr (const RIJNDAEL_context *ctx,
                   [key] "r" (ctx->keyschenc),
                   [rounds] "g" (ctx->rounds),
                   [mask] "m" (*be_mask)
-                : "%esi", "%edi", "cc", "memory");
+                : "cc", "memory");
 #undef aesenc_xmm1_xmm0
 #undef aesenclast_xmm1_xmm0
 }
@@ -1120,48 +1116,40 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
 
   asm volatile ("movdqa (%[ctr]), %%xmm0\n\t"   /* xmm0, xmm2 := CTR   */
                 "movaps %%xmm0, %%xmm2\n\t"
-                "mov    $1, %%esi\n\t"          /* xmm1 := 1 */
-                "movd   %%esi, %%xmm1\n\t"
-
-                "movl   12(%[ctr]), %%esi\n\t"  /* load lower parts of CTR */
-                "bswapl %%esi\n\t"
-                "movl   8(%[ctr]), %%edi\n\t"
-                "bswapl %%edi\n\t"
+                "pcmpeqd %%xmm1, %%xmm1\n\t"
+                "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
 
                 "pshufb %[mask], %%xmm2\n\t"    /* xmm2 := le(xmm2) */
-                "paddq  %%xmm1, %%xmm2\n\t"     /* xmm2++           */
+                "psubq  %%xmm1, %%xmm2\n\t"     /* xmm2++           */
                 "movaps %%xmm2, %%xmm3\n\t"     /* xmm3 := xmm2     */
-                "paddq  %%xmm1, %%xmm3\n\t"     /* xmm3++           */
+                "psubq  %%xmm1, %%xmm3\n\t"     /* xmm3++           */
                 "movaps %%xmm3, %%xmm4\n\t"     /* xmm4 := xmm3     */
-                "paddq  %%xmm1, %%xmm4\n\t"     /* xmm4++           */
+                "psubq  %%xmm1, %%xmm4\n\t"     /* xmm4++           */
                 "movaps %%xmm4, %%xmm5\n\t"     /* xmm5 := xmm4     */
-                "paddq  %%xmm1, %%xmm5\n\t"     /* xmm5++           */
-
-                /* swap upper and lower halfs */
-                "pshufd $0x4e, %%xmm1, %%xmm1\n\t"
-
-                "addl   $1, %%esi\n\t"
-                "adcl   $0, %%edi\n\t"          /* detect 64bit overflow */
-                "jc     .Lcarry_xmm2%=\n\t"
-                "addl   $1, %%esi\n\t"
-                "adcl   $0, %%edi\n\t"          /* detect 64bit overflow */
-                "jc     .Lcarry_xmm3%=\n\t"
-                "addl   $1, %%esi\n\t"
-                "adcl   $0, %%edi\n\t"          /* detect 64bit overflow */
-                "jc     .Lcarry_xmm4%=\n\t"
-                "addl   $1, %%esi\n\t"
-                "adcl   $0, %%edi\n\t"          /* detect 64bit overflow */
-                "jc     .Lcarry_xmm5%=\n\t"
-                "jmp    .Lno_carry%=\n\t"
-
-                ".Lcarry_xmm2%=:\n\t"
-                "paddq   %%xmm1, %%xmm2\n\t"
+                "psubq  %%xmm1, %%xmm5\n\t"     /* xmm5++           */
+
+                /* detect if 64-bit carry handling is needed */
+                "cmpl   $0xffffffff, 8(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
+                "movl   12(%[ctr]), %%esi\n\t"
+                "bswapl %%esi\n\t"
+                "cmpl   $0xfffffffc, %%esi\n\t"
+                "jb     .Lno_carry%=\n\t"       /* no carry */
+
+                "pslldq $8, %%xmm1\n\t"         /* move lower 64-bit to high */
+                "je     .Lcarry_xmm5%=\n\t"     /* esi == 0xfffffffc */
+                "cmpl   $0xfffffffe, %%esi\n\t"
+                "jb     .Lcarry_xmm4%=\n\t"     /* esi == 0xfffffffd */
+                "je     .Lcarry_xmm3%=\n\t"     /* esi == 0xfffffffe */
+                /* esi == 0xffffffff */
+
+                "psubq   %%xmm1, %%xmm2\n\t"
                 ".Lcarry_xmm3%=:\n\t"
-                "paddq   %%xmm1, %%xmm3\n\t"
+                "psubq   %%xmm1, %%xmm3\n\t"
                 ".Lcarry_xmm4%=:\n\t"
-                "paddq   %%xmm1, %%xmm4\n\t"
+                "psubq   %%xmm1, %%xmm4\n\t"
                 ".Lcarry_xmm5%=:\n\t"
-                "paddq   %%xmm1, %%xmm5\n\t"
+                "psubq   %%xmm1, %%xmm5\n\t"
 
                 ".Lno_carry%=:\n\t"
                 "pshufb %[mask], %%xmm2\n\t"    /* xmm2 := be(xmm2) */
@@ -1170,7 +1158,7 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 "pshufb %[mask], %%xmm5\n\t"    /* xmm5 := be(xmm5) */
                 "movdqa %%xmm5, (%[ctr])\n"     /* Update CTR.      */
 
-                "movdqa (%[key]), %%xmm1\n\t"    /* xmm1 := key[0]    */
+                "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0]    */
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
@@ -1275,7 +1263,7 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                   [key] "r" (ctx->keyschenc),
                   [rounds] "g" (ctx->rounds),
                   [mask] "m" (*be_mask)
-                : "%esi", "%edi", "cc", "memory");
+                : "%esi", "cc", "memory");
 #undef aesenc_xmm1_xmm0
 #undef aesenc_xmm1_xmm2
 #undef aesenc_xmm1_xmm3

-----------------------------------------------------------------------

Summary of changes:
 cipher/rijndael.c |   90 +++++++++++++++++++++++------------------------------
 1 files changed, 39 insertions(+), 51 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org




More information about the Gnupg-commits mailing list