[PATCH 1/3] serpent-avx2-amd64: Move register clearing to assembly

Jussi Kivilinna jussi.kivilinna at iki.fi
Sun Sep 1 15:54:09 CEST 2013


* cipher/serpent-avx2-amd64.S (_gcry_serpent_avx2_ctr_enc)
(_gcry_serpent_avx2_cbc_dec, _gcry_serpent_avx2_cfb_dec): Change last
'vzeroupper' to 'vzeroall'.
* cipher/serpent.c (_gcry_serpent_ctr_enc, _gcry_serpent_cbc_dec)
(_gcry_serpent_avx2_cfb_dec) [USE_AVX2]: Remove register clearing with
'vzeroall'.
--

AVX2 implementation was already clearing upper halfs of YMM registers at end of
assembly functions to prevent long SSE<->AVX transition stalls present on Intel
CPUs. Patch changes these 'vzeroupper' instructions to 'vzeroall' to fully
clear YMM registers. After this change register clearing in serpent.c in not
needed.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/serpent-avx2-amd64.S |    6 +++---
 cipher/serpent.c            |    9 ---------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 7586c0c..c726e7b 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -730,7 +730,7 @@ _gcry_serpent_avx2_ctr_enc:
 	vmovdqu RB2, (6 * 32)(%rsi);
 	vmovdqu RB3, (7 * 32)(%rsi);
 
-	vzeroupper;
+	vzeroall;
 
 	ret
 .size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;
@@ -799,7 +799,7 @@ _gcry_serpent_avx2_cbc_dec:
 	vmovdqu RB2, (6 * 32)(%rsi);
 	vmovdqu RB3, (7 * 32)(%rsi);
 
-	vzeroupper;
+	vzeroall;
 
 	ret
 .size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;
@@ -870,7 +870,7 @@ _gcry_serpent_avx2_cfb_dec:
 	vmovdqu RB2, (6 * 32)(%rsi);
 	vmovdqu RB3, (7 * 32)(%rsi);
 
-	vzeroupper;
+	vzeroall;
 
 	ret
 .size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;
diff --git a/cipher/serpent.c b/cipher/serpent.c
index bf03fe7..430a7e9 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -845,9 +845,6 @@ _gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
 
       if (did_use_avx2)
         {
-          /* clear avx2 registers used by serpent-sse2 */
-          asm volatile ("vzeroall;\n":::);
-
           /* serpent-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
@@ -937,9 +934,6 @@ _gcry_serpent_cbc_dec(void *context, unsigned char *iv,
 
       if (did_use_avx2)
         {
-          /* clear avx2 registers used by serpent-sse2 */
-          asm volatile ("vzeroall;\n":::);
-
           /* serpent-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;
@@ -1023,9 +1017,6 @@ _gcry_serpent_cfb_dec(void *context, unsigned char *iv,
 
       if (did_use_avx2)
         {
-          /* clear avx2 registers used by serpent-sse2 */
-          asm volatile ("vzeroall;\n":::);
-
           /* serpent-avx2 assembly code does not use stack */
           if (nblocks == 0)
             burn_stack_depth = 0;




More information about the Gcrypt-devel mailing list