[PATCH 1/4] Add straight-line speculation hardening for amd64 and i386 assembly

Jussi Kivilinna jussi.kivilinna at iki.fi
Sat Jan 8 21:13:36 CET 2022


* cipher/asm-common-amd64.h (ret_spec_stop): New.
* cipher/arcfour-amd64.S: Use 'ret_spec_stop' for 'ret' instruction.
* cipher/blake2b-amd64-avx2.S: Likewise.
* cipher/blake2s-amd64-avx.S: Likewise.
* cipher/blowfish-amd64.S: Likewise.
* cipher/camellia-aesni-avx-amd64.S: Likewise.
* cipher/camellia-aesni-avx2-amd64.h: Likewise.
* cipher/cast5-amd64.S: Likewise.
* cipher/chacha20-amd64-avx2.S: Likewise.
* cipher/chacha20-amd64-ssse3.S: Likewise.
* cipher/des-amd64.S: Likewise.
* cipher/rijndael-aarch64.S: Likewise.
* cipher/rijndael-amd64.S: Likewise.
* cipher/rijndael-ssse3-amd64-asm.S: Likewise.
* cipher/rijndael-vaes-avx2-amd64.S: Likewise.
* cipher/salsa20-amd64.S: Likewise.
* cipher/serpent-avx2-amd64.S: Likewise.
* cipher/serpent-sse2-amd64.S: Likewise.
* cipher/sha1-avx-amd64.S: Likewise.
* cipher/sha1-avx-bmi2-amd64.S: Likewise.
* cipher/sha1-avx2-bmi2-amd64.S: Likewise.
* cipher/sha1-ssse3-amd64.S: Likewise.
* cipher/sha256-avx-amd64.S: Likewise.
* cipher/sha256-avx2-bmi2-amd64.S: Likewise.
* cipher/sha256-ssse3-amd64.S: Likewise.
* cipher/sha512-avx-amd64.S: Likewise.
* cipher/sha512-avx2-bmi2-amd64.S: Likewise.
* cipher/sha512-ssse3-amd64.S: Likewise.
* cipher/sm3-avx-bmi2-amd64.S: Likewise.
* cipher/sm4-aesni-avx-amd64.S: Likewise.
* cipher/sm4-aesni-avx2-amd64.S: Likewise.
* cipher/twofish-amd64.S: Likewise.
* cipher/twofish-avx2-amd64.S: Likewise.
* cipher/whirlpool-sse2-amd64.S: Likewise.
* mpi/amd64/func_abi.h (CFI_*): Remove, include from "asm-common-amd64.h"
instead.
(FUNC_EXIT): Use 'ret_spec_stop' for 'ret' instruction.
* mpi/asm-common-amd64.h: New.
* mpi/i386/mpih-add1.S: Use 'ret_spec_stop' for 'ret' instruction.
* mpi/i386/mpih-lshift.S: Likewise.
* mpi/i386/mpih-mul1.S: Likewise.
* mpi/i386/mpih-mul2.S: Likewise.
* mpi/i386/mpih-mul3.S: Likewise.
* mpi/i386/mpih-rshift.S: Likewise.
* mpi/i386/mpih-sub1.S: Likewise.
* mpi/i386/syntax.h (ret_spec_stop): New.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/arcfour-amd64.S             |  2 +-
 cipher/asm-common-amd64.h          |  4 ++++
 cipher/blake2b-amd64-avx2.S        |  2 +-
 cipher/blake2s-amd64-avx.S         |  2 +-
 cipher/blowfish-amd64.S            | 18 +++++++++---------
 cipher/camellia-aesni-avx-amd64.S  | 20 ++++++++++----------
 cipher/camellia-aesni-avx2-amd64.h | 16 ++++++++--------
 cipher/cast5-amd64.S               | 14 +++++++-------
 cipher/chacha20-amd64-avx2.S       |  4 ++--
 cipher/chacha20-amd64-ssse3.S      |  8 ++++----
 cipher/des-amd64.S                 | 10 +++++-----
 cipher/rijndael-amd64.S            |  4 ++--
 cipher/rijndael-ssse3-amd64-asm.S  | 18 +++++++++---------
 cipher/rijndael-vaes-avx2-amd64.S  | 14 +++++++-------
 cipher/salsa20-amd64.S             |  6 +++---
 cipher/serpent-avx2-amd64.S        | 16 ++++++++--------
 cipher/serpent-sse2-amd64.S        | 16 ++++++++--------
 cipher/sha1-avx-amd64.S            |  2 +-
 cipher/sha1-avx-bmi2-amd64.S       |  2 +-
 cipher/sha1-avx2-bmi2-amd64.S      |  2 +-
 cipher/sha1-ssse3-amd64.S          |  2 +-
 cipher/sha256-avx-amd64.S          |  2 +-
 cipher/sha256-avx2-bmi2-amd64.S    |  2 +-
 cipher/sha256-ssse3-amd64.S        |  2 +-
 cipher/sha512-avx-amd64.S          |  2 +-
 cipher/sha512-avx2-bmi2-amd64.S    |  2 +-
 cipher/sha512-ssse3-amd64.S        |  2 +-
 cipher/sm3-avx-bmi2-amd64.S        |  2 +-
 cipher/sm4-aesni-avx-amd64.S       | 20 ++++++++++----------
 cipher/sm4-aesni-avx2-amd64.S      | 14 +++++++-------
 cipher/twofish-amd64.S             | 20 ++++++++++----------
 cipher/twofish-avx2-amd64.S        | 16 ++++++++--------
 cipher/whirlpool-sse2-amd64.S      |  2 +-
 mpi/amd64/func_abi.h               | 28 +++-------------------------
 mpi/asm-common-amd64.h             | 26 ++++++++++++++++++++++++++
 mpi/i386/mpih-add1.S               |  2 +-
 mpi/i386/mpih-lshift.S             |  4 ++--
 mpi/i386/mpih-mul1.S               |  2 +-
 mpi/i386/mpih-mul2.S               |  2 +-
 mpi/i386/mpih-mul3.S               |  2 +-
 mpi/i386/mpih-rshift.S             |  4 ++--
 mpi/i386/mpih-sub1.S               |  2 +-
 mpi/i386/syntax.h                  |  6 ++++++
 43 files changed, 180 insertions(+), 166 deletions(-)
 create mode 100644 mpi/asm-common-amd64.h

diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index 221dfeff..2abd90a7 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
@@ -99,7 +99,7 @@ _gcry_arcfour_amd64:
 	pop	%rbp
 	CFI_POP(%rbp)
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 .L__gcry_arcfour_amd64_end:
 ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
diff --git a/cipher/asm-common-amd64.h b/cipher/asm-common-amd64.h
index 9d4a028a..8ee9d9e7 100644
--- a/cipher/asm-common-amd64.h
+++ b/cipher/asm-common-amd64.h
@@ -186,4 +186,8 @@
 # define EXIT_SYSV_FUNC
 #endif
 
+/* 'ret' instruction replacement for straight-line speculation mitigation */
+#define ret_spec_stop \
+	ret; jmp .; int3;
+
 #endif /* GCRY_ASM_COMMON_AMD64_H */
diff --git a/cipher/blake2b-amd64-avx2.S b/cipher/blake2b-amd64-avx2.S
index 357e8a51..3601b65f 100644
--- a/cipher/blake2b-amd64-avx2.S
+++ b/cipher/blake2b-amd64-avx2.S
@@ -291,7 +291,7 @@ _gcry_blake2b_transform_amd64_avx2:
 
         xor %eax, %eax;
         vzeroall;
-        ret;
+        ret_spec_stop;
         CFI_ENDPROC();
 ELF(.size _gcry_blake2b_transform_amd64_avx2,
     .-_gcry_blake2b_transform_amd64_avx2;)
diff --git a/cipher/blake2s-amd64-avx.S b/cipher/blake2s-amd64-avx.S
index 5b936758..5094b4c1 100644
--- a/cipher/blake2s-amd64-avx.S
+++ b/cipher/blake2s-amd64-avx.S
@@ -269,7 +269,7 @@ _gcry_blake2s_transform_amd64_avx:
 
         xor %eax, %eax;
         vzeroall;
-        ret;
+        ret_spec_stop;
         CFI_ENDPROC();
 ELF(.size _gcry_blake2s_transform_amd64_avx,
     .-_gcry_blake2s_transform_amd64_avx;)
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index bdb361d7..2b4ffa1a 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -151,7 +151,7 @@ __blowfish_enc_blk1:
 	movq %r11, %rbp;
 	CFI_RESTORE(%rbp)
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
 
@@ -182,7 +182,7 @@ _gcry_blowfish_amd64_do_encrypt:
 	movl RX0d, (RX2);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
 
@@ -210,7 +210,7 @@ _gcry_blowfish_amd64_encrypt_block:
 	write_block();
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
 
@@ -253,7 +253,7 @@ _gcry_blowfish_amd64_decrypt_block:
 	CFI_RESTORE(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
 
@@ -367,7 +367,7 @@ __blowfish_enc_blk4:
 
 	outbswap_block4();
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
 
@@ -398,7 +398,7 @@ __blowfish_dec_blk4:
 
 	outbswap_block4();
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
 
@@ -468,7 +468,7 @@ _gcry_blowfish_amd64_ctr_enc:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
 
@@ -529,7 +529,7 @@ _gcry_blowfish_amd64_cbc_dec:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
 
@@ -593,7 +593,7 @@ _gcry_blowfish_amd64_cfb_dec:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
 
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 64cabaa5..5c304e57 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -822,7 +822,7 @@ __camellia_enc_blk16:
 		    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
 		    %xmm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 16(%rax));
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
 
@@ -887,7 +887,7 @@ __camellia_dec_blk16:
 		    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
 		    %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
 
@@ -1021,7 +1021,7 @@ _gcry_camellia_aesni_avx_ctr_enc:
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
 
@@ -1094,7 +1094,7 @@ _gcry_camellia_aesni_avx_cbc_dec:
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
 
@@ -1176,7 +1176,7 @@ _gcry_camellia_aesni_avx_cfb_dec:
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
 
@@ -1328,7 +1328,7 @@ _gcry_camellia_aesni_avx_ocb_enc:
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;)
 
@@ -1499,7 +1499,7 @@ _gcry_camellia_aesni_avx_ocb_dec:
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;)
 
@@ -1647,7 +1647,7 @@ _gcry_camellia_aesni_avx_ocb_auth:
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;)
 
@@ -2096,7 +2096,7 @@ __camellia_avx_setup128:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
 
@@ -2576,7 +2576,7 @@ __camellia_avx_setup256:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
 
diff --git a/cipher/camellia-aesni-avx2-amd64.h b/cipher/camellia-aesni-avx2-amd64.h
index be7bb0aa..e93c40b8 100644
--- a/cipher/camellia-aesni-avx2-amd64.h
+++ b/cipher/camellia-aesni-avx2-amd64.h
@@ -815,7 +815,7 @@ __camellia_enc_blk32:
 		    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
 		    %ymm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 32(%rax));
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;)
 
@@ -880,7 +880,7 @@ __camellia_dec_blk32:
 		    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
 		    %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;)
 
@@ -1084,7 +1084,7 @@ FUNC_NAME(ctr_enc):
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ctr_enc),.-FUNC_NAME(ctr_enc);)
 
@@ -1161,7 +1161,7 @@ FUNC_NAME(cbc_dec):
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(cbc_dec),.-FUNC_NAME(cbc_dec);)
 
@@ -1245,7 +1245,7 @@ FUNC_NAME(cfb_dec):
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(cfb_dec),.-FUNC_NAME(cfb_dec);)
 
@@ -1419,7 +1419,7 @@ FUNC_NAME(ocb_enc):
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ocb_enc),.-FUNC_NAME(ocb_enc);)
 
@@ -1616,7 +1616,7 @@ FUNC_NAME(ocb_dec):
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ocb_dec),.-FUNC_NAME(ocb_dec);)
 
@@ -1787,7 +1787,7 @@ FUNC_NAME(ocb_auth):
 
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ocb_auth),.-FUNC_NAME(ocb_auth);)
 
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index 82f67890..a804654c 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -219,7 +219,7 @@ _gcry_cast5_amd64_encrypt_block:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
 
@@ -269,7 +269,7 @@ _gcry_cast5_amd64_decrypt_block:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
 
@@ -399,7 +399,7 @@ __cast5_enc_blk4:
 	round_enc_last4(14, F4_3, F4_1);
 
 	outbswap_block4(RLR0, RLR1, RLR2, RLR3);
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
 
@@ -432,7 +432,7 @@ __cast5_dec_blk4:
 
 	outbswap_block4(RLR0, RLR1, RLR2, RLR3);
 	CFI_ENDPROC();
-	ret;
+	ret_spec_stop;
 ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
 
 .align 8
@@ -508,7 +508,7 @@ _gcry_cast5_amd64_ctr_enc:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
 
@@ -582,7 +582,7 @@ _gcry_cast5_amd64_cbc_dec:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
 
@@ -655,7 +655,7 @@ _gcry_cast5_amd64_cfb_dec:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
 
diff --git a/cipher/chacha20-amd64-avx2.S b/cipher/chacha20-amd64-avx2.S
index 51e107be..9f2a036a 100644
--- a/cipher/chacha20-amd64-avx2.S
+++ b/cipher/chacha20-amd64-avx2.S
@@ -322,7 +322,7 @@ _gcry_chacha20_amd64_avx2_blocks8:
 	/* eax zeroed by round loop. */
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_chacha20_amd64_avx2_blocks8,
 	  .-_gcry_chacha20_amd64_avx2_blocks8;)
@@ -592,7 +592,7 @@ _gcry_chacha20_poly1305_amd64_avx2_blocks8:
 	xorl %eax, %eax;
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_chacha20_poly1305_amd64_avx2_blocks8,
 	  .-_gcry_chacha20_poly1305_amd64_avx2_blocks8;)
diff --git a/cipher/chacha20-amd64-ssse3.S b/cipher/chacha20-amd64-ssse3.S
index 9cdb69ae..6c737978 100644
--- a/cipher/chacha20-amd64-ssse3.S
+++ b/cipher/chacha20-amd64-ssse3.S
@@ -333,7 +333,7 @@ _gcry_chacha20_amd64_ssse3_blocks4:
 	/* eax zeroed by round loop. */
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_chacha20_amd64_ssse3_blocks4,
 	  .-_gcry_chacha20_amd64_ssse3_blocks4;)
@@ -502,7 +502,7 @@ _gcry_chacha20_amd64_ssse3_blocks1:
 	clear(X13);
 
 	/* eax zeroed by round loop. */
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_chacha20_amd64_ssse3_blocks1,
 	  .-_gcry_chacha20_amd64_ssse3_blocks1;)
@@ -772,7 +772,7 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks4:
 	xorl %eax, %eax;
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4,
 	  .-_gcry_chacha20_poly1305_amd64_ssse3_blocks4;)
@@ -1003,7 +1003,7 @@ _gcry_chacha20_poly1305_amd64_ssse3_blocks1:
 	xorl %eax, %eax;
 	leave;
 	CFI_LEAVE();
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks1,
 	  .-_gcry_chacha20_poly1305_amd64_ssse3_blocks1;)
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index a211dac3..c1bf9f29 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -285,7 +285,7 @@ _gcry_3des_amd64_crypt_block:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
 
@@ -544,7 +544,7 @@ _gcry_3des_amd64_crypt_blk3:
 
 	final_permutation3(RR, RL);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
 
@@ -642,7 +642,7 @@ _gcry_3des_amd64_cbc_dec:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
@@ -740,7 +740,7 @@ _gcry_3des_amd64_ctr_enc:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
@@ -837,7 +837,7 @@ _gcry_3des_amd64_cfb_dec:
 	CFI_POP(%rbp);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
 
diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S
index 3dcaa856..6e3cc819 100644
--- a/cipher/rijndael-amd64.S
+++ b/cipher/rijndael-amd64.S
@@ -270,7 +270,7 @@ _gcry_aes_amd64_encrypt_block:
 	movl $(6 * 8), %eax;
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 
 	CFI_RESTORE_STATE();
 .align 4
@@ -448,7 +448,7 @@ _gcry_aes_amd64_decrypt_block:
 	movl $(6 * 8), %eax;
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 
 	CFI_RESTORE_STATE();
 .align 4
diff --git a/cipher/rijndael-ssse3-amd64-asm.S b/cipher/rijndael-ssse3-amd64-asm.S
index 8124eb21..b98dca26 100644
--- a/cipher/rijndael-ssse3-amd64-asm.S
+++ b/cipher/rijndael-ssse3-amd64-asm.S
@@ -61,7 +61,7 @@ _gcry_aes_ssse3_enc_preload:
 	movdqa	.Lk_sb2   (%rax), %xmm15 # sb2u
 	movdqa	.Lk_sb2+16(%rax), %xmm14 # sb2t
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
 
@@ -83,7 +83,7 @@ _gcry_aes_ssse3_dec_preload:
 	movdqa	.Lk_dsbb   (%rax), %xmm14 # sbbu
 	movdqa	.Lk_dsbe   (%rax), %xmm8  # sbeu
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload)
 
@@ -194,7 +194,7 @@ _aes_encrypt_core:
 	pxor	%xmm4,	%xmm0	# 0 = A
 	pshufb	.Lk_sr(%rsi,%rcx), %xmm0
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
 
@@ -303,7 +303,7 @@ _aes_decrypt_core:
 	pxor	%xmm4,	%xmm0	# 0 = A
 	pshufb	.Lk_sr(%rsi,%rcx), %xmm0
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _aes_decrypt_core,.-_aes_decrypt_core)
 
@@ -439,7 +439,7 @@ _aes_schedule_core:
 	pxor	%xmm6,	%xmm0		# -> b+c+d b+c b a
 	pshufd	$0x0E,	%xmm0,	%xmm6
 	pslldq	$8,	%xmm6		# clobber low side with zeros
-	ret
+	ret_spec_stop
 
 ##
 ##  .Laes_schedule_256
@@ -546,7 +546,7 @@ _aes_schedule_core:
 	# add in smeared stuff
 	pxor	%xmm7,	%xmm0
 	movdqa	%xmm0,	%xmm7
-	ret
+	ret_spec_stop
 
 ##
 ##  .Laes_schedule_transform
@@ -567,7 +567,7 @@ _aes_schedule_core:
 	movdqa	16(%r11), %xmm0 # hi
 	pshufb	%xmm1,	%xmm0
 	pxor	%xmm2,	%xmm0
-	ret
+	ret_spec_stop
 
 ##
 ##  .Laes_schedule_mangle
@@ -639,7 +639,7 @@ _aes_schedule_core:
 	add	$-16,	%r8
 	and	$48,	%r8
 	movdqa	%xmm3,	(%rdx)
-	ret
+	ret_spec_stop
 
 ##
 ##  .Laes_schedule_mangle_last
@@ -679,7 +679,7 @@ _aes_schedule_core:
 	pxor	%xmm7,  %xmm7
 	pxor	%xmm8,  %xmm8
 	EXIT_SYSV_FUNC
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core)
 
diff --git a/cipher/rijndael-vaes-avx2-amd64.S b/cipher/rijndael-vaes-avx2-amd64.S
index d4ecf59f..f94b58db 100644
--- a/cipher/rijndael-vaes-avx2-amd64.S
+++ b/cipher/rijndael-vaes-avx2-amd64.S
@@ -383,7 +383,7 @@ _gcry_vaes_avx2_cbc_dec_amd64:
 	vmovdqu %xmm15, (%rsi);
 
 	vzeroall;
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_vaes_avx2_cbc_dec_amd64,.-_gcry_vaes_avx2_cbc_dec_amd64)
 
@@ -691,7 +691,7 @@ _gcry_vaes_avx2_cfb_dec_amd64:
 	vmovdqu %xmm15, (%rsi);
 
 	vzeroall;
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_vaes_avx2_cfb_dec_amd64,.-_gcry_vaes_avx2_cfb_dec_amd64)
 
@@ -1103,7 +1103,7 @@ _gcry_vaes_avx2_ctr_enc_amd64:
 	vzeroall;
 	xorl %r10d, %r10d;
 	xorl %r11d, %r11d;
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_vaes_avx2_ctr_enc_amd64,.-_gcry_vaes_avx2_ctr_enc_amd64)
 
@@ -1387,7 +1387,7 @@ _gcry_vaes_avx2_ctr32le_enc_amd64:
 .Ldone_ctr32le_enc:
 	vmovdqu %xmm15, (%rsi);
 	vzeroall;
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_vaes_avx2_ctr32le_enc_amd64,.-_gcry_vaes_avx2_ctr32le_enc_amd64)
 
@@ -1535,7 +1535,7 @@ _gcry_vaes_avx2_ocb_checksum:
 .Locb_checksum_done:
 	vpxor (%rax), %xmm0, %xmm0;
 	vmovdqu %xmm0, (%rax);
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_vaes_avx2_ocb_checksum,.-_gcry_vaes_avx2_ocb_checksum)
 
@@ -2398,7 +2398,7 @@ _gcry_vaes_avx2_ocb_crypt_amd64:
 
 	leave;
 	CFI_LEAVE();
-	ret
+	ret_spec_stop
 
 #undef STACK_REGS_POS
 #undef STACK_ALLOC
@@ -2919,7 +2919,7 @@ _gcry_vaes_avx2_xts_crypt_amd64:
 	vzeroall;
 
 	xorl %eax, %eax
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 ELF(.size _gcry_vaes_avx2_xts_crypt_amd64,.-_gcry_vaes_avx2_xts_crypt_amd64)
 
diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index ae8f2715..64626063 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
@@ -83,7 +83,7 @@ _gcry_salsa20_amd64_keysetup:
 	movl   %ecx,8(%rdi)
 	movl   %r8d,12(%rdi)
 .L_keysetupdone:
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 
 .align 8
@@ -99,7 +99,7 @@ _gcry_salsa20_amd64_ivsetup:
 	movl   %esi,44(%rdi)
 	movl   %r9d,32(%rdi)
 	movl   %eax,52(%rdi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC();
 
 .align 8
@@ -926,7 +926,7 @@ _gcry_salsa20_amd64_encrypt_blocks:
 	CFI_DEF_CFA_REGISTER(%rsp)
 	pop %rbx
 	CFI_POP(%rbx)
-	ret
+	ret_spec_stop
 	CFI_RESTORE_STATE();
 .L_bytes_are_128_or_192:
 	sub  $64,%rdx
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index dcee9b62..d3515a21 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -487,7 +487,7 @@ __serpent_enc_blk16:
 	transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1);
 	transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
 
@@ -579,7 +579,7 @@ __serpent_dec_blk16:
 	transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
 	transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
 
@@ -697,7 +697,7 @@ _gcry_serpent_avx2_ctr_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
 
@@ -750,7 +750,7 @@ _gcry_serpent_avx2_cbc_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
 
@@ -805,7 +805,7 @@ _gcry_serpent_avx2_cfb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
 
@@ -919,7 +919,7 @@ _gcry_serpent_avx2_ocb_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;)
 
@@ -1043,7 +1043,7 @@ _gcry_serpent_avx2_ocb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;)
 
@@ -1146,7 +1146,7 @@ _gcry_serpent_avx2_ocb_auth:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;)
 
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index 39cba002..b5935095 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
@@ -509,7 +509,7 @@ __serpent_enc_blk8:
 	transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1);
 	transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
 
@@ -601,7 +601,7 @@ __serpent_dec_blk8:
 	transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
 	transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
 
@@ -733,7 +733,7 @@ _gcry_serpent_sse2_ctr_enc:
 	pxor RTMP2, RTMP2;
 	pxor RNOT, RNOT;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
 
@@ -796,7 +796,7 @@ _gcry_serpent_sse2_cbc_dec:
 	pxor RTMP2, RTMP2;
 	pxor RNOT, RNOT;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
 
@@ -862,7 +862,7 @@ _gcry_serpent_sse2_cfb_dec:
 	pxor RTMP2, RTMP2;
 	pxor RNOT, RNOT;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
 
@@ -976,7 +976,7 @@ _gcry_serpent_sse2_ocb_enc:
 	pxor RTMP2, RTMP2;
 	pxor RNOT, RNOT;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;)
 
@@ -1100,7 +1100,7 @@ _gcry_serpent_sse2_ocb_dec:
 	pxor RTMP2, RTMP2;
 	pxor RNOT, RNOT;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;)
 
@@ -1203,7 +1203,7 @@ _gcry_serpent_sse2_ocb_auth:
 	pxor RTMP2, RTMP2;
 	pxor RNOT, RNOT;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;)
 
diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S
index 85876ad4..acada960 100644
--- a/cipher/sha1-avx-amd64.S
+++ b/cipher/sha1-avx-amd64.S
@@ -420,7 +420,7 @@ _gcry_sha1_transform_amd64_avx:
   xorl %eax, %eax;
 
 .Lret:
-  ret;
+  ret_spec_stop;
   CFI_ENDPROC();
 ELF(.size _gcry_sha1_transform_amd64_avx,
     .-_gcry_sha1_transform_amd64_avx;)
diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S
index 5dfcdca9..5f4b9e69 100644
--- a/cipher/sha1-avx-bmi2-amd64.S
+++ b/cipher/sha1-avx-bmi2-amd64.S
@@ -432,7 +432,7 @@ _gcry_sha1_transform_amd64_avx_bmi2:
   xorl %eax, %eax;
 
 .Lret:
-  ret;
+  ret_spec_stop;
   CFI_ENDPROC();
 ELF(.size _gcry_sha1_transform_amd64_avx_bmi2,
     .-_gcry_sha1_transform_amd64_avx_bmi2;)
diff --git a/cipher/sha1-avx2-bmi2-amd64.S b/cipher/sha1-avx2-bmi2-amd64.S
index 93863230..ed52761b 100644
--- a/cipher/sha1-avx2-bmi2-amd64.S
+++ b/cipher/sha1-avx2-bmi2-amd64.S
@@ -564,7 +564,7 @@ _gcry_sha1_transform_amd64_avx2_bmi2:
   /* stack already burned */
   xorl %eax, %eax;
 
-  ret;
+  ret_spec_stop;
   CFI_ENDPROC();
 ELF(.size _gcry_sha1_transform_amd64_avx2_bmi2,
     .-_gcry_sha1_transform_amd64_avx2_bmi2;)
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
index db62928a..f09b1de1 100644
--- a/cipher/sha1-ssse3-amd64.S
+++ b/cipher/sha1-ssse3-amd64.S
@@ -428,7 +428,7 @@ _gcry_sha1_transform_amd64_ssse3:
   xorl %eax, %eax;
 
 .Lret:
-  ret;
+  ret_spec_stop;
   CFI_ENDPROC();
 ELF(.size _gcry_sha1_transform_amd64_ssse3,
     .-_gcry_sha1_transform_amd64_ssse3;)
diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index ec945f84..be8a799d 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -471,7 +471,7 @@ _gcry_sha256_transform_amd64_avx:
 	pop	rbx
 	CFI_POP(rbx)
 
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
 
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index d130dd4a..60ad442c 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -474,7 +474,7 @@ _gcry_sha256_transform_amd64_avx2:
 	CFI_POP(rbx)
 
 .Lnowork:
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
 .align 64
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index 098b0eb6..401ff6f4 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -493,7 +493,7 @@ _gcry_sha256_transform_amd64_ssse3:
 	pop	rbx
 	CFI_POP(rbx)
 
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
 
diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index 75f7b070..bfc4435d 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -400,7 +400,7 @@ _gcry_sha512_transform_amd64_avx:
 	CFI_ADJUST_CFA_OFFSET(-frame_size);
 
 .Lnowork:
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
 /*
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index 7f119e6c..a431e196 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -439,7 +439,7 @@ _gcry_sha512_transform_amd64_avx2:
 	CFI_DEF_CFA_REGISTER(rsp)
 
 .Lnowork:
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
 /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index 6a1328a6..9cc30892 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -406,7 +406,7 @@ _gcry_sha512_transform_amd64_ssse3:
 	CFI_ADJUST_CFA_OFFSET(-frame_size);
 
 .Lnowork:
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
 /*
diff --git a/cipher/sm3-avx-bmi2-amd64.S b/cipher/sm3-avx-bmi2-amd64.S
index 46226ae6..d9b6206a 100644
--- a/cipher/sm3-avx-bmi2-amd64.S
+++ b/cipher/sm3-avx-bmi2-amd64.S
@@ -544,7 +544,7 @@ _gcry_sm3_transform_amd64_avx_bmi2:
 
   leave;
   CFI_LEAVE();
-  ret;
+  ret_spec_stop;
   CFI_ENDPROC();
 ELF(.size _gcry_sm3_transform_amd64_avx_bmi2,
           .-_gcry_sm3_transform_amd64_avx_bmi2;)
diff --git a/cipher/sm4-aesni-avx-amd64.S b/cipher/sm4-aesni-avx-amd64.S
index 3610b98c..7a99e070 100644
--- a/cipher/sm4-aesni-avx-amd64.S
+++ b/cipher/sm4-aesni-avx-amd64.S
@@ -240,7 +240,7 @@ _gcry_sm4_aesni_avx_expand_key:
 #undef ROUND
 
 	vzeroall;
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_expand_key,.-_gcry_sm4_aesni_avx_expand_key;)
 
@@ -345,7 +345,7 @@ sm4_aesni_avx_crypt_blk1_4:
 .Lblk4_store_output_done:
 	vzeroall;
 	xorl %eax, %eax;
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size sm4_aesni_avx_crypt_blk1_4,.-sm4_aesni_avx_crypt_blk1_4;)
 
@@ -454,7 +454,7 @@ __sm4_crypt_blk8:
 	vpshufb RTMP2, RB2, RB2;
 	vpshufb RTMP2, RB3, RB3;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __sm4_crypt_blk8,.-__sm4_crypt_blk8;)
 
@@ -508,7 +508,7 @@ _gcry_sm4_aesni_avx_crypt_blk1_8:
 .Lblk8_store_output_done:
 	vzeroall;
 	xorl %eax, %eax;
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_crypt_blk1_8,.-_gcry_sm4_aesni_avx_crypt_blk1_8;)
 
@@ -582,7 +582,7 @@ _gcry_sm4_aesni_avx_ctr_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ctr_enc,.-_gcry_sm4_aesni_avx_ctr_enc;)
 
@@ -631,7 +631,7 @@ _gcry_sm4_aesni_avx_cbc_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_cbc_dec,.-_gcry_sm4_aesni_avx_cbc_dec;)
 
@@ -683,7 +683,7 @@ _gcry_sm4_aesni_avx_cfb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_cfb_dec,.-_gcry_sm4_aesni_avx_cfb_dec;)
 
@@ -782,7 +782,7 @@ _gcry_sm4_aesni_avx_ocb_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ocb_enc,.-_gcry_sm4_aesni_avx_ocb_enc;)
 
@@ -891,7 +891,7 @@ _gcry_sm4_aesni_avx_ocb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ocb_dec,.-_gcry_sm4_aesni_avx_ocb_dec;)
 
@@ -979,7 +979,7 @@ _gcry_sm4_aesni_avx_ocb_auth:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ocb_auth,.-_gcry_sm4_aesni_avx_ocb_auth;)
 
diff --git a/cipher/sm4-aesni-avx2-amd64.S b/cipher/sm4-aesni-avx2-amd64.S
index 6e46c0dc..7a8b9558 100644
--- a/cipher/sm4-aesni-avx2-amd64.S
+++ b/cipher/sm4-aesni-avx2-amd64.S
@@ -276,7 +276,7 @@ __sm4_crypt_blk16:
 	vpshufb RTMP2, RB2, RB2;
 	vpshufb RTMP2, RB3, RB3;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __sm4_crypt_blk16,.-__sm4_crypt_blk16;)
 
@@ -394,7 +394,7 @@ _gcry_sm4_aesni_avx2_ctr_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ctr_enc,.-_gcry_sm4_aesni_avx2_ctr_enc;)
 
@@ -447,7 +447,7 @@ _gcry_sm4_aesni_avx2_cbc_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_cbc_dec,.-_gcry_sm4_aesni_avx2_cbc_dec;)
 
@@ -502,7 +502,7 @@ _gcry_sm4_aesni_avx2_cfb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_cfb_dec,.-_gcry_sm4_aesni_avx2_cfb_dec;)
 
@@ -616,7 +616,7 @@ _gcry_sm4_aesni_avx2_ocb_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ocb_enc,.-_gcry_sm4_aesni_avx2_ocb_enc;)
 
@@ -740,7 +740,7 @@ _gcry_sm4_aesni_avx2_ocb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ocb_dec,.-_gcry_sm4_aesni_avx2_ocb_dec;)
 
@@ -843,7 +843,7 @@ _gcry_sm4_aesni_avx2_ocb_auth:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ocb_auth,.-_gcry_sm4_aesni_avx2_ocb_auth;)
 
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index 3cb73431..a7a60553 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -211,7 +211,7 @@ _gcry_twofish_amd64_encrypt_block:
 	CFI_ADJUST_CFA_OFFSET(-3 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
@@ -265,7 +265,7 @@ _gcry_twofish_amd64_decrypt_block:
 	CFI_ADJUST_CFA_OFFSET(-3 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
@@ -511,7 +511,7 @@ __twofish_enc_blk3:
 
 	outunpack_enc3();
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
 
@@ -540,7 +540,7 @@ __twofish_dec_blk3:
 
 	outunpack_dec3();
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
 
@@ -641,7 +641,7 @@ _gcry_twofish_amd64_ctr_enc:
 	CFI_ADJUST_CFA_OFFSET(-8 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
 
@@ -726,7 +726,7 @@ _gcry_twofish_amd64_cbc_dec:
 	CFI_ADJUST_CFA_OFFSET(-9 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
 
@@ -811,7 +811,7 @@ _gcry_twofish_amd64_cfb_dec:
 	CFI_ADJUST_CFA_OFFSET(-8 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
 
@@ -937,7 +937,7 @@ _gcry_twofish_amd64_ocb_enc:
 	CFI_ADJUST_CFA_OFFSET(-8 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
 
@@ -1071,7 +1071,7 @@ _gcry_twofish_amd64_ocb_dec:
 	CFI_ADJUST_CFA_OFFSET(-8 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
 
@@ -1176,7 +1176,7 @@ _gcry_twofish_amd64_ocb_auth:
 	CFI_ADJUST_CFA_OFFSET(-8 * 8);
 
 	EXIT_SYSV_FUNC
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
 
diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S
index 74cad355..930ac792 100644
--- a/cipher/twofish-avx2-amd64.S
+++ b/cipher/twofish-avx2-amd64.S
@@ -431,7 +431,7 @@ __twofish_enc_blk16:
 	outunpack_enc16(RA, RB, RC, RD);
 	transpose4x4_16(RA, RB, RC, RD);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;)
 
@@ -464,7 +464,7 @@ __twofish_dec_blk16:
 	outunpack_dec16(RA, RB, RC, RD);
 	transpose4x4_16(RA, RB, RC, RD);
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;)
 
@@ -582,7 +582,7 @@ _gcry_twofish_avx2_ctr_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;)
 
@@ -635,7 +635,7 @@ _gcry_twofish_avx2_cbc_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;)
 
@@ -690,7 +690,7 @@ _gcry_twofish_avx2_cfb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;)
 
@@ -804,7 +804,7 @@ _gcry_twofish_avx2_ocb_enc:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;)
 
@@ -929,7 +929,7 @@ _gcry_twofish_avx2_ocb_dec:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;)
 
@@ -1032,7 +1032,7 @@ _gcry_twofish_avx2_ocb_auth:
 
 	vzeroall;
 
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;)
 
diff --git a/cipher/whirlpool-sse2-amd64.S b/cipher/whirlpool-sse2-amd64.S
index 5631dc56..37648faa 100644
--- a/cipher/whirlpool-sse2-amd64.S
+++ b/cipher/whirlpool-sse2-amd64.S
@@ -340,7 +340,7 @@ _gcry_whirlpool_transform_amd64:
 	CFI_ADJUST_CFA_OFFSET(-STACK_MAX);
 .Lskip:
 	movl $(STACK_MAX + 8), %eax;
-	ret;
+	ret_spec_stop;
 	CFI_ENDPROC();
 ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;)
 
diff --git a/mpi/amd64/func_abi.h b/mpi/amd64/func_abi.h
index a60363e4..c3f2d026 100644
--- a/mpi/amd64/func_abi.h
+++ b/mpi/amd64/func_abi.h
@@ -1,28 +1,6 @@
 #include <config.h>
 
-#ifdef __x86_64__
-#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
-# define CFI_STARTPROC()            .cfi_startproc
-# define CFI_ENDPROC()              .cfi_endproc
-# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
-# define CFI_REL_OFFSET(reg,off)    .cfi_rel_offset reg, off
-# define CFI_RESTORE(reg)           .cfi_restore reg
-
-# define CFI_PUSH(reg) \
-	CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0)
-# define CFI_POP(reg) \
-	CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg)
-#else
-# define CFI_STARTPROC()
-# define CFI_ENDPROC()
-# define CFI_ADJUST_CFA_OFFSET(off)
-# define CFI_REL_OFFSET(reg,off)
-# define CFI_RESTORE(reg)
-
-# define CFI_PUSH(reg)
-# define CFI_POP(reg)
-#endif
-#endif
+#include "asm-common-amd64.h"
 
 #ifdef USE_MS_ABI
  /* Store registers and move four first input arguments from MS ABI to
@@ -44,13 +22,13 @@
 	CFI_POP(%rdi); \
 	popq %rsi; \
 	CFI_POP(%rsi); \
-	ret; \
+	ret_spec_stop; \
 	CFI_ENDPROC();
 #else
  #define FUNC_ENTRY() \
 	CFI_STARTPROC();
 
  #define FUNC_EXIT() \
-	ret; \
+	ret_spec_stop; \
 	CFI_ENDPROC();
 #endif
diff --git a/mpi/asm-common-amd64.h b/mpi/asm-common-amd64.h
new file mode 100644
index 00000000..ad0e8e62
--- /dev/null
+++ b/mpi/asm-common-amd64.h
@@ -0,0 +1,26 @@
+/* asm-common-amd64.h  -  Common macros for AMD64 assembly
+ *
+ * Copyright (C) 2022 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPI_ASM_COMMON_AMD64_H
+#define MPI_ASM_COMMON_AMD64_H
+
+#include "../cipher/asm-common-amd64.h"
+
+#endif /* MPI_ASM_COMMON_AMD64_H */
diff --git a/mpi/i386/mpih-add1.S b/mpi/i386/mpih-add1.S
index de78a0cb..95a75890 100644
--- a/mpi/i386/mpih-add1.S
+++ b/mpi/i386/mpih-add1.S
@@ -156,6 +156,6 @@ Loop:	movl	(%esi),%eax
 	CFI_POP(%esi)
 	popl %edi
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-lshift.S b/mpi/i386/mpih-lshift.S
index 55da0678..3404cf55 100644
--- a/mpi/i386/mpih-lshift.S
+++ b/mpi/i386/mpih-lshift.S
@@ -86,7 +86,7 @@ L1:	movl	(%esi,%edx,4),%eax
 	popl	%ebx
 	popl	%esi
 	popl	%edi
-	ret
+	ret_spec_stop
 
 Lend:	shll	%cl,%ebx		/* compute least significant limb */
 	movl	%ebx,(%edi)		/* store it */
@@ -97,6 +97,6 @@ Lend:	shll	%cl,%ebx		/* compute least significant limb */
 	CFI_POP(%esi)
 	popl	%edi
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-mul1.S b/mpi/i386/mpih-mul1.S
index 9679ea62..a672d052 100644
--- a/mpi/i386/mpih-mul1.S
+++ b/mpi/i386/mpih-mul1.S
@@ -89,6 +89,6 @@ Loop:
 	CFI_POP(%esi)
 	INSN1(pop,l	,R(edi))
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-mul2.S b/mpi/i386/mpih-mul2.S
index fe4129c4..e09c3f7c 100644
--- a/mpi/i386/mpih-mul2.S
+++ b/mpi/i386/mpih-mul2.S
@@ -91,6 +91,6 @@ Loop:
 	CFI_POP(%esi)
 	INSN1(pop,l	,R(edi))
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-mul3.S b/mpi/i386/mpih-mul3.S
index 87577d54..4112c699 100644
--- a/mpi/i386/mpih-mul3.S
+++ b/mpi/i386/mpih-mul3.S
@@ -91,6 +91,6 @@ Loop:
 	CFI_POP(%esi)
 	INSN1(pop,l	,R(edi))
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-rshift.S b/mpi/i386/mpih-rshift.S
index 35a8201f..5d34696c 100644
--- a/mpi/i386/mpih-rshift.S
+++ b/mpi/i386/mpih-rshift.S
@@ -89,7 +89,7 @@ L2:	movl	(%esi,%edx,4),%eax
 	popl	%ebx
 	popl	%esi
 	popl	%edi
-	ret
+	ret_spec_stop
 
 Lend2:	shrl	%cl,%ebx		/* compute most significant limb */
 	movl	%ebx,(%edi)		/* store it */
@@ -100,6 +100,6 @@ Lend2:	shrl	%cl,%ebx		/* compute most significant limb */
 	CFI_POP(%esi)
 	popl	%edi
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/mpih-sub1.S b/mpi/i386/mpih-sub1.S
index 2bdc1438..49477ae3 100644
--- a/mpi/i386/mpih-sub1.S
+++ b/mpi/i386/mpih-sub1.S
@@ -157,6 +157,6 @@ Loop:	movl	(%esi),%eax
 	CFI_POP(%esi)
 	popl %edi
 	CFI_POP(%edi)
-	ret
+	ret_spec_stop
 	CFI_ENDPROC()
 
diff --git a/mpi/i386/syntax.h b/mpi/i386/syntax.h
index dd300319..bab2d4a6 100644
--- a/mpi/i386/syntax.h
+++ b/mpi/i386/syntax.h
@@ -92,3 +92,9 @@
 #undef ALIGN
 #define ALIGN(log) .align log,0x90
 #endif
+
+/* 'ret' instruction replacement for straight-line speculation mitigation */
+#define ret_spec_stop \
+	ret; \
+	jmp .; \
+	int3;
-- 
2.32.0




More information about the Gcrypt-devel mailing list