[PATCH] twofish-amd64: do not use xchg instruction
Jussi Kivilinna
jussi.kivilinna at iki.fi
Tue Apr 16 22:03:33 CEST 2019
* cipher/twofish-amd64.S (g1g2_3): Swap ab and cd registers using
'movq' instructions instead of 'xchgq'.
--
Avoiding xchg instruction improves three block parallel performance
by ~3% on Intel Haswell.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
0 files changed
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index 7a836463c..134d6401e 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -368,15 +368,21 @@ ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
/* G1,2 && G2,2 */ \
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
- xchgq cd ## 0, ab ## 0; \
+ movq ab ## 0, RT0; \
+ movq cd ## 0, ab ## 0; \
+ movq RT0, cd ## 0; \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
- xchgq cd ## 1, ab ## 1; \
+ movq ab ## 1, RT0; \
+ movq cd ## 1, ab ## 1; \
+ movq RT0, cd ## 1; \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
- xchgq cd ## 2, ab ## 2;
+ movq ab ## 2, RT0; \
+ movq cd ## 2, ab ## 2; \
+ movq RT0, cd ## 2;
#define enc_round_end(ab, x, y, n) \
addl y ## d, x ## d; \
More information about the Gcrypt-devel
mailing list