[PATCH] Split inline assembly blocks with many memory operands

Jussi Kivilinna jussi.kivilinna at iki.fi
Wed Jan 20 16:17:02 CET 2021


* cipher/rijndael-aesni.c (aesni_ocb_checksum, aesni_ocb_enc)
(aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Split assembly blocks
with more than 4 memory operands to smaller blocks.
* cipher/sha512-ssse3-i386.c (W2): Split big assembly block to
three smaller blocks.
--

On i386, with -O0, assembly blocks with many memory operands cause
compiler error such as:
 rijndael-aesni.c:2815:7: error: 'asm' operand has impossible constraints

Fix is to split assembly blocks so that number of operands per block is
reduced.

GnuPG-bug-id: 5257
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/rijndael-aesni.c    | 137 +++++++++++++++++++++----------------
 cipher/sha512-ssse3-i386.c |  18 +++--
 2 files changed, 90 insertions(+), 65 deletions(-)

diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 747ef662..95ec4c2b 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -2271,16 +2271,18 @@ aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
 			"vpxor %[ptr1], %%ymm1, %%ymm1\n\t"
 			"vpxor %[ptr2], %%ymm2, %%ymm2\n\t"
 			"vpxor %[ptr3], %%ymm3, %%ymm3\n\t"
-			"vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
-			"vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
-			"vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
-			"vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
 			:
 			: [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
 			  [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
 			  [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
-			  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)),
-			  [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+			  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+			: "memory" );
+	  asm volatile ("vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
+			"vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
+			"vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
+			"vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
+			:
+			: [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
 			  [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
 			  [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
 			  [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
@@ -2325,16 +2327,18 @@ aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
 			"vxorpd %[ptr1], %%ymm1, %%ymm1\n\t"
 			"vxorpd %[ptr2], %%ymm2, %%ymm2\n\t"
 			"vxorpd %[ptr3], %%ymm3, %%ymm3\n\t"
-			"vxorpd %[ptr4], %%ymm0, %%ymm0\n\t"
-			"vxorpd %[ptr5], %%ymm4, %%ymm4\n\t"
-			"vxorpd %[ptr6], %%ymm5, %%ymm5\n\t"
-			"vxorpd %[ptr7], %%ymm7, %%ymm7\n\t"
 			:
 			: [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
 			  [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
 			  [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
-			  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2)),
-			  [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+			  [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+			: "memory" );
+	  asm volatile ("vxorpd %[ptr4], %%ymm0, %%ymm0\n\t"
+			"vxorpd %[ptr5], %%ymm4, %%ymm4\n\t"
+			"vxorpd %[ptr6], %%ymm5, %%ymm5\n\t"
+			"vxorpd %[ptr7], %%ymm7, %%ymm7\n\t"
+			:
+			: [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
 			  [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
 			  [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
 			  [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
@@ -2718,28 +2722,35 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 			"aesenclast %[tmpbuf0],%%xmm8\n\t"
 			"aesenclast %[tmpbuf1],%%xmm9\n\t"
 			"aesenclast %[tmpbuf2],%%xmm10\n\t"
-			"aesenclast %%xmm5,    %%xmm11\n\t"
+			:
+			: [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+			  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+			  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
+			  [lxfkey] "m" (*lxf_key)
+			: "memory" );
+	  asm volatile ("aesenclast %%xmm5,    %%xmm11\n\t"
 			"pxor   %[lxfkey], %%xmm11\n\t"
 			"movdqu %%xmm1,    %[outbuf0]\n\t"
 			"movdqu %%xmm2,    %[outbuf1]\n\t"
-			"movdqu %%xmm3,    %[outbuf2]\n\t"
+			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+			  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+			: [lxfkey] "m" (*lxf_key)
+			: "memory" );
+	  asm volatile ("movdqu %%xmm3,    %[outbuf2]\n\t"
 			"movdqu %%xmm4,    %[outbuf3]\n\t"
 			"movdqu %%xmm8,    %[outbuf4]\n\t"
-			"movdqu %%xmm9,    %[outbuf5]\n\t"
+			: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+			  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+			:
+			: "memory" );
+	  asm volatile ("movdqu %%xmm9,    %[outbuf5]\n\t"
 			"movdqu %%xmm10,   %[outbuf6]\n\t"
 			"movdqu %%xmm11,   %[outbuf7]\n\t"
-			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-			  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-			  [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
-			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
-			  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)),
-			  [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+			: [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
 			  [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
 			  [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
-			: [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-			  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-			  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
-			  [lxfkey] "m" (*lxf_key)
+			:
 			: "memory" );
 
 	  outbuf += 8*BLOCKSIZE;
@@ -2816,17 +2827,18 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
 		    "movdqu %%xmm1,    %[outbuf0]\n\t"
 		    "pxor   %[tmpbuf1],%%xmm2\n\t"
 		    "movdqu %%xmm2,    %[outbuf1]\n\t"
-		    "pxor   %[tmpbuf2],%%xmm3\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+		      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+		    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+		      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("pxor   %[tmpbuf2],%%xmm3\n\t"
 		    "movdqu %%xmm3,    %[outbuf2]\n\t"
 		    "pxor   %%xmm5,    %%xmm4\n\t"
 		    "movdqu %%xmm4,    %[outbuf3]\n\t"
-		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-		      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-		      [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+		    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
 		      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-		    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-		      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-		      [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+		    : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
 		    : "memory" );
 
       outbuf += 4*BLOCKSIZE;
@@ -3199,28 +3211,34 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 			"aesdeclast %[tmpbuf0],%%xmm8\n\t"
 			"aesdeclast %[tmpbuf1],%%xmm9\n\t"
 			"aesdeclast %[tmpbuf2],%%xmm10\n\t"
-			"aesdeclast %%xmm5,    %%xmm11\n\t"
+			:
+			: [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+			  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+			  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+			: "memory" );
+	  asm volatile ("aesdeclast %%xmm5,    %%xmm11\n\t"
 			"pxor   %[lxfkey], %%xmm11\n\t"
 			"movdqu %%xmm1,    %[outbuf0]\n\t"
 			"movdqu %%xmm2,    %[outbuf1]\n\t"
-			"movdqu %%xmm3,    %[outbuf2]\n\t"
+			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+			  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+			: [lxfkey] "m" (*lxf_key)
+			: "memory" );
+	  asm volatile ("movdqu %%xmm3,    %[outbuf2]\n\t"
 			"movdqu %%xmm4,    %[outbuf3]\n\t"
 			"movdqu %%xmm8,    %[outbuf4]\n\t"
-			"movdqu %%xmm9,    %[outbuf5]\n\t"
+			: [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+			  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+			:
+			: "memory" );
+	  asm volatile ("movdqu %%xmm9,    %[outbuf5]\n\t"
 			"movdqu %%xmm10,   %[outbuf6]\n\t"
 			"movdqu %%xmm11,   %[outbuf7]\n\t"
-			: [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-			  [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-			  [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
-			  [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
-			  [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE)),
-			  [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+			: [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
 			  [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
 			  [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
-			: [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-			  [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-			  [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
-			  [lxfkey] "m" (*lxf_key)
+			:
 			: "memory" );
 
 	  outbuf += 8*BLOCKSIZE;
@@ -3292,17 +3310,18 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 		    "movdqu %%xmm1,    %[outbuf0]\n\t"
 		    "pxor   %[tmpbuf1],%%xmm2\n\t"
 		    "movdqu %%xmm2,    %[outbuf1]\n\t"
-		    "pxor   %[tmpbuf2],%%xmm3\n\t"
+		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+		      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+		    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+		      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+		    : "memory" );
+      asm volatile ("pxor   %[tmpbuf2],%%xmm3\n\t"
 		    "movdqu %%xmm3,    %[outbuf2]\n\t"
 		    "pxor   %%xmm5,    %%xmm4\n\t"
 		    "movdqu %%xmm4,    %[outbuf3]\n\t"
-		    : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
-		      [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)),
-		      [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+		    : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
 		      [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
-		    : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
-		      [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
-		      [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+		    : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
 		    : "memory" );
 
       outbuf += 4*BLOCKSIZE;
@@ -3461,16 +3480,18 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 			"movdqu %[abuf1],  %%xmm2\n\t"
 			"movdqu %[abuf2],  %%xmm3\n\t"
 			"movdqu %[abuf3],  %%xmm4\n\t"
-			"movdqu %[abuf4],  %%xmm8\n\t"
-			"movdqu %[abuf5],  %%xmm9\n\t"
-			"movdqu %[abuf6],  %%xmm10\n\t"
-			"movdqu %[abuf7],  %%xmm11\n\t"
 			:
 			: [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)),
 			  [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)),
 			  [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)),
-			  [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)),
-			  [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)),
+			  [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+			: "memory" );
+	  asm volatile ("movdqu %[abuf4],  %%xmm8\n\t"
+			"movdqu %[abuf5],  %%xmm9\n\t"
+			"movdqu %[abuf6],  %%xmm10\n\t"
+			"movdqu %[abuf7],  %%xmm11\n\t"
+			:
+			: [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)),
 			  [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)),
 			  [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)),
 			  [abuf7] "m" (*(abuf + 7 * BLOCKSIZE))
diff --git a/cipher/sha512-ssse3-i386.c b/cipher/sha512-ssse3-i386.c
index 4b12cee4..0fc98d8e 100644
--- a/cipher/sha512-ssse3-i386.c
+++ b/cipher/sha512-ssse3-i386.c
@@ -228,7 +228,11 @@ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
 	asm volatile ("movdqu %[w_t_m_2], %%xmm2;\n\t"			\
 		      "movdqa %%xmm2, %%xmm0;\n\t"			\
 		      "movdqu %[w_t_m_15], %%xmm5;\n\t"			\
-		      "movdqa %%xmm5, %%xmm3;\n\t"			\
+		      :							\
+		      : [w_t_m_2] "m" (w[(i)-2]),			\
+		        [w_t_m_15] "m" (w[(i)-15])			\
+		      : "memory" );					\
+	asm volatile ("movdqa %%xmm5, %%xmm3;\n\t"			\
 		      "psrlq $(61-19), %%xmm0;\n\t"			\
 		      "psrlq $(8-7), %%xmm3;\n\t"			\
 		      "pxor %%xmm2, %%xmm0;\n\t"			\
@@ -251,17 +255,17 @@ static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
 		      "movdqu %[w_t_m_16], %%xmm2;\n\t"			\
 		      "pxor %%xmm4, %%xmm3;\n\t"			\
 		      "movdqu %[w_t_m_7], %%xmm1;\n\t"			\
-		      "paddq %%xmm3, %%xmm0;\n\t"			\
+		      :							\
+		      : [w_t_m_7] "m" (w[(i)-7]),			\
+		        [w_t_m_16] "m" (w[(i)-16])			\
+		      : "memory" );					\
+	asm volatile ("paddq %%xmm3, %%xmm0;\n\t"			\
 		      "paddq %%xmm2, %%xmm0;\n\t"			\
 		      "paddq %%xmm1, %%xmm0;\n\t"			\
 		      "movdqu %%xmm0, %[w_t_m_0];\n\t"			\
 		      "paddq %[k], %%xmm0;\n\t"				\
 		      :	[w_t_m_0] "=m" (w[(i)-0])			\
-		      : [k] "m" (K[i]),					\
-		        [w_t_m_2] "m" (w[(i)-2]),			\
-		        [w_t_m_7] "m" (w[(i)-7]),			\
-		        [w_t_m_15] "m" (w[(i)-15]),			\
-		        [w_t_m_16] "m" (w[(i)-16])			\
+		      : [k] "m" (K[i])					\
 		      : "memory" )
 
 unsigned int ASM_FUNC_ATTR
-- 
2.27.0




More information about the Gcrypt-devel mailing list