[PATCH 1/2] rijndael-aesni: split assembly block to ease register pressure

Jussi Kivilinna jussi.kivilinna at iki.fi
Tue Jul 19 12:51:34 CEST 2016


* cipher/rijndael-aesni.c (do_aesni_ctr_4): Use single register
constraint for passing 'bige_addb' to assembly block; split
first inline assembly block into two parts.
--

Fixes compiling on i386 with GCC-4.8 and older.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 97e0ad0..8b28b3a 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -794,6 +794,7 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
       { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 },
       { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 }
     };
+  const void *bige_addb = bige_addb_const;
 #define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
 #define aesenc_xmm1_xmm2      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
 #define aesenc_xmm1_xmm3      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
@@ -819,16 +820,15 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 "ja     .Ladd32bit%=\n\t"
 
                 "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0 := CTR (xmm5) */
-                "movdqa %[addb_1], %%xmm2\n\t"  /* xmm2 := be(1) */
-                "movdqa %[addb_2], %%xmm3\n\t"  /* xmm3 := be(2) */
-                "movdqa %[addb_3], %%xmm4\n\t"  /* xmm4 := be(3) */
-                "movdqa %[addb_4], %%xmm5\n\t"  /* xmm5 := be(4) */
+                "movdqa 0*16(%[addb]), %%xmm2\n\t"  /* xmm2 := be(1) */
+                "movdqa 1*16(%[addb]), %%xmm3\n\t"  /* xmm3 := be(2) */
+                "movdqa 2*16(%[addb]), %%xmm4\n\t"  /* xmm4 := be(3) */
+                "movdqa 3*16(%[addb]), %%xmm5\n\t"  /* xmm5 := be(4) */
                 "paddb  %%xmm0, %%xmm2\n\t"     /* xmm2 := be(1) + CTR (xmm0) */
                 "paddb  %%xmm0, %%xmm3\n\t"     /* xmm3 := be(2) + CTR (xmm0) */
                 "paddb  %%xmm0, %%xmm4\n\t"     /* xmm4 := be(3) + CTR (xmm0) */
                 "paddb  %%xmm0, %%xmm5\n\t"     /* xmm5 := be(4) + CTR (xmm0) */
                 "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0] */
-                "movl   %[rounds], %%esi\n\t"
                 "jmp    .Lstore_ctr%=\n\t"
 
                 ".Ladd32bit%=:\n\t"
@@ -871,7 +871,6 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
 
                 ".Lno_carry%=:\n\t"
                 "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0]    */
-                "movl %[rounds], %%esi\n\t"
 
                 "pshufb %%xmm6, %%xmm2\n\t"     /* xmm2 := be(xmm2) */
                 "pshufb %%xmm6, %%xmm3\n\t"     /* xmm3 := be(xmm3) */
@@ -880,8 +879,13 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
 
                 ".Lstore_ctr%=:\n\t"
                 "movdqa %%xmm5, (%[ctr])\n\t"   /* Update CTR (mem).  */
+                :
+                : [ctr] "r" (ctr),
+                  [key] "r" (ctx->keyschenc),
+                  [addb] "r" (bige_addb)
+                : "%esi", "cc", "memory");
 
-                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
+  asm volatile ("pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
@@ -931,7 +935,7 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
                 "movdqa 0xa0(%[key]), %%xmm1\n\t"
-                "cmpl $10, %%esi\n\t"
+                "cmpl $10, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
@@ -943,7 +947,7 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
                 "movdqa 0xc0(%[key]), %%xmm1\n\t"
-                "cmpl $12, %%esi\n\t"
+                "cmpl $12, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
@@ -962,14 +966,9 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 aesenclast_xmm1_xmm3
                 aesenclast_xmm1_xmm4
                 :
-                : [ctr] "r" (ctr),
-                  [key] "r" (ctx->keyschenc),
-                  [rounds] "g" (ctx->rounds),
-                  [addb_1] "m" (bige_addb_const[0][0]),
-                  [addb_2] "m" (bige_addb_const[1][0]),
-                  [addb_3] "m" (bige_addb_const[2][0]),
-                  [addb_4] "m" (bige_addb_const[3][0])
-                : "%esi", "cc", "memory");
+                : [key] "r" (ctx->keyschenc),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
 
   asm volatile ("movdqu (%[src]), %%xmm1\n\t"    /* Get block 1.      */
                 "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR-1 ^= input */




More information about the Gcrypt-devel mailing list