[PATCH 3/7] amd64-asm: move constant data to read-only section for cipher algos

Jussi Kivilinna jussi.kivilinna at iki.fi
Tue Jan 17 20:17:37 CET 2023


* cipher/arcfour-amd64.S: Move constant data to read-only
section; Align text section to 64 bytes and functions to 16 bytes.
* cipher/blowfish-amd64.S: Likewise.
* cipher/camellia-aesni-avx-amd64.S: Likewise.
* cipher/camellia-aesni-avx2-amd64.h: Likewise.
* cipher/camellia-gfni-avx512-amd64.S: Likewise.
* cipher/cast5-amd64.S: Likewise.
* cipher/chacha20-amd64-avx2.S: Likewise.
* cipher/chacha20-amd64-avx512.S: Likewise.
* cipher/chacha20-amd64-ssse3.S: Likewise.
* cipher/des-amd64.s: Likewise.
* cipher/rijndael-amd64.S: Likewise.
* cipher/rijndael-ssse3-amd64-asm.S: Likewise.
* cipher/rijndael-vaes-avx2-amd64.S: Likewise.
* cipher/salsa20-amd64.S: Likewise.
* cipher/serpent-avx2-amd64.S: Likewise.
* cipher/serpent-sse2-amd64.S: Likewise.
* cipher/sm4-aesni-avx-amd64.S: Likewise.
* cipher/sm4-aesni-avx2-amd64.S: Likewise.
* cipher/sm4-gfni-avx2-amd64.S: Likewise.
* cipher/sm4-gfni-avx512-amd64.S: Likewise.
* cipher/twofish-amd64.S: Likewise.
* cipher/twofish-avx2-amd64.S: Likewise.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/arcfour-amd64.S              |  2 +-
 cipher/blowfish-amd64.S             | 19 ++++++++--------
 cipher/camellia-aesni-avx-amd64.S   | 34 +++++++++++++++++++----------
 cipher/camellia-aesni-avx2-amd64.h  | 26 ++++++++++++----------
 cipher/camellia-gfni-avx512-amd64.S | 23 ++++++++++---------
 cipher/cast5-amd64.S                | 15 +++++++------
 cipher/chacha20-amd64-avx2.S        | 14 +++++++-----
 cipher/chacha20-amd64-avx512.S      |  8 ++++---
 cipher/chacha20-amd64-ssse3.S       | 18 +++++++++------
 cipher/des-amd64.S                  | 17 ++++++++++-----
 cipher/rijndael-amd64.S             |  5 +++--
 cipher/rijndael-ssse3-amd64-asm.S   |  8 ++++++-
 cipher/rijndael-vaes-avx2-amd64.S   |  3 +++
 cipher/salsa20-amd64.S              |  7 +++---
 cipher/serpent-avx2-amd64.S         | 25 ++++++++++++---------
 cipher/serpent-sse2-amd64.S         | 19 ++++++++--------
 cipher/sm4-aesni-avx-amd64.S        | 28 ++++++++++++++----------
 cipher/sm4-aesni-avx2-amd64.S       | 24 ++++++++++++--------
 cipher/sm4-gfni-avx2-amd64.S        | 32 ++++++++++++++++-----------
 cipher/sm4-gfni-avx512-amd64.S      |  5 ++++-
 cipher/twofish-amd64.S              | 23 +++++++++----------
 cipher/twofish-avx2-amd64.S         | 24 +++++++++++---------
 22 files changed, 229 insertions(+), 150 deletions(-)

diff --git a/cipher/arcfour-amd64.S b/cipher/arcfour-amd64.S
index 2abd90a7..d4cd6083 100644
--- a/cipher/arcfour-amd64.S
+++ b/cipher/arcfour-amd64.S
@@ -21,7 +21,7 @@
 #include "asm-common-amd64.h"
 
 .text
-.align 16
+.align 64
 .globl _gcry_arcfour_amd64
 ELF(.type _gcry_arcfour_amd64, at function)
 _gcry_arcfour_amd64:
diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S
index 2b4ffa1a..9db3dc1b 100644
--- a/cipher/blowfish-amd64.S
+++ b/cipher/blowfish-amd64.S
@@ -27,6 +27,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 /* structure of BLOWFISH_context: */
 #define s0	0
@@ -123,7 +124,7 @@
 	bswapq 			RX0; \
 	movq RX0, 		(RIO);
 
-.align 8
+.align 16
 ELF(.type   __blowfish_enc_blk1, at function;)
 
 __blowfish_enc_blk1:
@@ -155,7 +156,7 @@ __blowfish_enc_blk1:
 	CFI_ENDPROC();
 ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
 
-.align 8
+.align 16
 .globl  _gcry_blowfish_amd64_do_encrypt
 ELF(.type   _gcry_blowfish_amd64_do_encrypt, at function;)
 
@@ -186,7 +187,7 @@ _gcry_blowfish_amd64_do_encrypt:
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
 
-.align 8
+.align 16
 .globl  _gcry_blowfish_amd64_encrypt_block
 ELF(.type   _gcry_blowfish_amd64_encrypt_block, at function;)
 
@@ -214,7 +215,7 @@ _gcry_blowfish_amd64_encrypt_block:
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
 
-.align 8
+.align 16
 .globl  _gcry_blowfish_amd64_decrypt_block
 ELF(.type   _gcry_blowfish_amd64_decrypt_block, at function;)
 
@@ -342,7 +343,7 @@ ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_bloc
 	bswapq 			RX2; \
 	bswapq 			RX3;
 
-.align 8
+.align 16
 ELF(.type   __blowfish_enc_blk4, at function;)
 
 __blowfish_enc_blk4:
@@ -371,7 +372,7 @@ __blowfish_enc_blk4:
 	CFI_ENDPROC();
 ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
 
-.align 8
+.align 16
 ELF(.type   __blowfish_dec_blk4, at function;)
 
 __blowfish_dec_blk4:
@@ -402,7 +403,7 @@ __blowfish_dec_blk4:
 	CFI_ENDPROC();
 ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
 
-.align 8
+.align 16
 .globl  _gcry_blowfish_amd64_ctr_enc
 ELF(.type   _gcry_blowfish_amd64_ctr_enc, at function;)
 _gcry_blowfish_amd64_ctr_enc:
@@ -472,7 +473,7 @@ _gcry_blowfish_amd64_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
 
-.align 8
+.align 16
 .globl  _gcry_blowfish_amd64_cbc_dec
 ELF(.type   _gcry_blowfish_amd64_cbc_dec, at function;)
 _gcry_blowfish_amd64_cbc_dec:
@@ -533,7 +534,7 @@ _gcry_blowfish_amd64_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
 
-.align 8
+.align 16
 .globl  _gcry_blowfish_amd64_cfb_dec
 ELF(.type   _gcry_blowfish_amd64_cfb_dec, at function;)
 _gcry_blowfish_amd64_cfb_dec:
diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 5c304e57..9240d70b 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -619,7 +619,10 @@
 	vmovdqu y6, 14 * 16(rio); \
 	vmovdqu y7, 15 * 16(rio);
 
-.text
+SECTION_RODATA
+
+ELF(.type _camellia_aesni_avx_data, at object;)
+_camellia_aesni_avx_data:
 .align 16
 
 #define SHUFB_BYTES(idx) \
@@ -763,9 +766,11 @@
 .L0f0f0f0f:
 	.long 0x0f0f0f0f
 
+.text
+.align 64
 
-.align 8
 ELF(.type   __camellia_enc_blk16, at function;)
+.align 16
 
 __camellia_enc_blk16:
 	/* input:
@@ -826,7 +831,7 @@ __camellia_enc_blk16:
 	CFI_ENDPROC();
 ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
 
-.align 8
+.align 16
 ELF(.type   __camellia_dec_blk16, at function;)
 
 __camellia_dec_blk16:
@@ -897,7 +902,7 @@ ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_ctr_enc
 ELF(.type   _gcry_camellia_aesni_avx_ctr_enc, at function;)
 
@@ -1025,7 +1030,7 @@ _gcry_camellia_aesni_avx_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_cbc_dec
 ELF(.type   _gcry_camellia_aesni_avx_cbc_dec, at function;)
 
@@ -1098,7 +1103,7 @@ _gcry_camellia_aesni_avx_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_cfb_dec
 ELF(.type   _gcry_camellia_aesni_avx_cfb_dec, at function;)
 
@@ -1180,7 +1185,7 @@ _gcry_camellia_aesni_avx_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_ocb_enc
 ELF(.type   _gcry_camellia_aesni_avx_ocb_enc, at function;)
 
@@ -1332,7 +1337,7 @@ _gcry_camellia_aesni_avx_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_ocb_dec
 ELF(.type   _gcry_camellia_aesni_avx_ocb_dec, at function;)
 
@@ -1503,7 +1508,7 @@ _gcry_camellia_aesni_avx_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_ocb_auth
 ELF(.type   _gcry_camellia_aesni_avx_ocb_auth, at function;)
 
@@ -1720,6 +1725,10 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
 	vpsllq $(64-(nror)), out, out; \
 	vpaddd t0, out, out;
 
+SECTION_RODATA
+
+ELF(.type _camellia_aesni_avx_keysetup_data, at object;)
+_camellia_aesni_avx_keysetup_data:
 
 .align 16
 .Linv_shift_row_and_unpcklbw:
@@ -1752,8 +1761,9 @@ ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;
 .Lsigma6:
 	.long 0xB3E6C1FD, 0xB05688C2;
 
+.text
 
-.align 8
+.align 16
 ELF(.type  __camellia_avx_setup128, at function;)
 __camellia_avx_setup128:
 	/* input:
@@ -2100,7 +2110,7 @@ __camellia_avx_setup128:
 	CFI_ENDPROC();
 ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
 
-.align 8
+.align 16
 ELF(.type  __camellia_avx_setup256, at function;)
 
 __camellia_avx_setup256:
@@ -2580,7 +2590,7 @@ __camellia_avx_setup256:
 	CFI_ENDPROC();
 ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_aesni_avx_keygen
 ELF(.type  _gcry_camellia_aesni_avx_keygen, at function;)
 
diff --git a/cipher/camellia-aesni-avx2-amd64.h b/cipher/camellia-aesni-avx2-amd64.h
index 411e790f..46c2be81 100644
--- a/cipher/camellia-aesni-avx2-amd64.h
+++ b/cipher/camellia-aesni-avx2-amd64.h
@@ -784,7 +784,8 @@
 	vmovdqu y6, 14 * 32(rio); \
 	vmovdqu y7, 15 * 32(rio);
 
-.text
+SECTION_RODATA
+
 .align 32
 
 #define SHUFB_BYTES(idx) \
@@ -997,7 +998,10 @@ ELF(.type   FUNC_NAME(_constants), at object;)
 
 ELF(.size FUNC_NAME(_constants),.-FUNC_NAME(_constants);)
 
-.align 8
+.text
+.align 64
+
+.align 16
 ELF(.type   FUNC_NAME(enc_blk32), at function;)
 
 FUNC_NAME(enc_blk32):
@@ -1059,7 +1063,7 @@ FUNC_NAME(enc_blk32):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(enc_blk32),.-FUNC_NAME(enc_blk32);)
 
-.align 8
+.align 16
 ELF(.type   FUNC_NAME(dec_blk32), at function;)
 
 FUNC_NAME(dec_blk32):
@@ -1130,7 +1134,7 @@ ELF(.size FUNC_NAME(dec_blk32),.-FUNC_NAME(dec_blk32);)
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
+.align 16
 .globl FUNC_NAME(ctr_enc)
 ELF(.type   FUNC_NAME(ctr_enc), at function;)
 
@@ -1325,7 +1329,7 @@ FUNC_NAME(ctr_enc):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ctr_enc),.-FUNC_NAME(ctr_enc);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(cbc_dec)
 ELF(.type   FUNC_NAME(cbc_dec), at function;)
 
@@ -1400,7 +1404,7 @@ FUNC_NAME(cbc_dec):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(cbc_dec),.-FUNC_NAME(cbc_dec);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(cfb_dec)
 ELF(.type   FUNC_NAME(cfb_dec), at function;)
 
@@ -1482,7 +1486,7 @@ FUNC_NAME(cfb_dec):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(cfb_dec),.-FUNC_NAME(cfb_dec);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(ocb_enc)
 ELF(.type   FUNC_NAME(ocb_enc), at function;)
 
@@ -1654,7 +1658,7 @@ FUNC_NAME(ocb_enc):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ocb_enc),.-FUNC_NAME(ocb_enc);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(ocb_dec)
 ELF(.type   FUNC_NAME(ocb_dec), at function;)
 
@@ -1849,7 +1853,7 @@ FUNC_NAME(ocb_dec):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ocb_dec),.-FUNC_NAME(ocb_dec);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(ocb_auth)
 ELF(.type   FUNC_NAME(ocb_auth), at function;)
 
@@ -2018,7 +2022,7 @@ FUNC_NAME(ocb_auth):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(ocb_auth),.-FUNC_NAME(ocb_auth);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(enc_blk1_32)
 ELF(.type   FUNC_NAME(enc_blk1_32), at function;)
 
@@ -2126,7 +2130,7 @@ FUNC_NAME(enc_blk1_32):
 	CFI_ENDPROC();
 ELF(.size FUNC_NAME(enc_blk1_32),.-FUNC_NAME(enc_blk1_32);)
 
-.align 8
+.align 16
 .globl FUNC_NAME(dec_blk1_32)
 ELF(.type   FUNC_NAME(dec_blk1_32), at function;)
 
diff --git a/cipher/camellia-gfni-avx512-amd64.S b/cipher/camellia-gfni-avx512-amd64.S
index 14725b4a..7a98a3ce 100644
--- a/cipher/camellia-gfni-avx512-amd64.S
+++ b/cipher/camellia-gfni-avx512-amd64.S
@@ -584,7 +584,7 @@
 	vmovdqu64 y6, 14 * 64(rio); \
 	vmovdqu64 y7, 15 * 64(rio);
 
-.text
+SECTION_RODATA
 
 #define SHUFB_BYTES(idx) \
 	0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
@@ -691,7 +691,10 @@ ELF(.type   _gcry_camellia_gfni_avx512__constants, at object;)
 
 ELF(.size _gcry_camellia_gfni_avx512__constants,.-_gcry_camellia_gfni_avx512__constants;)
 
-.align 8
+.text
+.align 64
+
+.align 16
 ELF(.type   __camellia_gfni_avx512_enc_blk64, at function;)
 
 __camellia_gfni_avx512_enc_blk64:
@@ -751,7 +754,7 @@ __camellia_gfni_avx512_enc_blk64:
 	CFI_ENDPROC();
 ELF(.size __camellia_gfni_avx512_enc_blk64,.-__camellia_gfni_avx512_enc_blk64;)
 
-.align 8
+.align 16
 ELF(.type   __camellia_gfni_avx512_dec_blk64, at function;)
 
 __camellia_gfni_avx512_dec_blk64:
@@ -820,7 +823,7 @@ ELF(.size __camellia_gfni_avx512_dec_blk64,.-__camellia_gfni_avx512_dec_blk64;)
 	kaddb %k1, %k1, %k1; \
 	vpaddq hi_counter1, out, out{%k1};
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_ctr_enc
 ELF(.type   _gcry_camellia_gfni_avx512_ctr_enc, at function;)
 
@@ -973,7 +976,7 @@ _gcry_camellia_gfni_avx512_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_gfni_avx512_ctr_enc,.-_gcry_camellia_gfni_avx512_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_cbc_dec
 ELF(.type   _gcry_camellia_gfni_avx512_cbc_dec, at function;)
 
@@ -1035,7 +1038,7 @@ _gcry_camellia_gfni_avx512_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_gfni_avx512_cbc_dec,.-_gcry_camellia_gfni_avx512_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_cfb_dec
 ELF(.type   _gcry_camellia_gfni_avx512_cfb_dec, at function;)
 
@@ -1108,7 +1111,7 @@ _gcry_camellia_gfni_avx512_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_gfni_avx512_cfb_dec,.-_gcry_camellia_gfni_avx512_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_ocb_enc
 ELF(.type   _gcry_camellia_gfni_avx512_ocb_enc, at function;)
 
@@ -1271,7 +1274,7 @@ _gcry_camellia_gfni_avx512_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_gfni_avx512_ocb_enc,.-_gcry_camellia_gfni_avx512_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_ocb_dec
 ELF(.type   _gcry_camellia_gfni_avx512_ocb_dec, at function;)
 
@@ -1440,7 +1443,7 @@ _gcry_camellia_gfni_avx512_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_gfni_avx512_ocb_dec,.-_gcry_camellia_gfni_avx512_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_enc_blk64
 ELF(.type   _gcry_camellia_gfni_avx512_enc_blk64, at function;)
 
@@ -1504,7 +1507,7 @@ _gcry_camellia_gfni_avx512_enc_blk64:
 	CFI_ENDPROC();
 ELF(.size _gcry_camellia_gfni_avx512_enc_blk64,.-_gcry_camellia_gfni_avx512_enc_blk64;)
 
-.align 8
+.align 16
 .globl _gcry_camellia_gfni_avx512_dec_blk64
 ELF(.type   _gcry_camellia_gfni_avx512_dec_blk64, at function;)
 
diff --git a/cipher/cast5-amd64.S b/cipher/cast5-amd64.S
index a804654c..39171587 100644
--- a/cipher/cast5-amd64.S
+++ b/cipher/cast5-amd64.S
@@ -26,6 +26,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 .extern _gcry_cast5_s1to4;
 
@@ -173,7 +174,7 @@
 	rorq $32,		RLR0; \
 	movq RLR0, 		(RIO);
 
-.align 8
+.align 16
 .globl _gcry_cast5_amd64_encrypt_block
 ELF(.type   _gcry_cast5_amd64_encrypt_block, at function;)
 
@@ -223,7 +224,7 @@ _gcry_cast5_amd64_encrypt_block:
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
 
-.align 8
+.align 16
 .globl _gcry_cast5_amd64_decrypt_block
 ELF(.type   _gcry_cast5_amd64_decrypt_block, at function;)
 
@@ -373,7 +374,7 @@ ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
 	rorq $32,		c; \
 	rorq $32,		d;
 
-.align 8
+.align 16
 ELF(.type   __cast5_enc_blk4, at function;)
 
 __cast5_enc_blk4:
@@ -403,7 +404,7 @@ __cast5_enc_blk4:
 	CFI_ENDPROC();
 ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
 
-.align 8
+.align 16
 ELF(.type   __cast5_dec_blk4, at function;)
 
 __cast5_dec_blk4:
@@ -435,7 +436,7 @@ __cast5_dec_blk4:
 	ret_spec_stop;
 ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
 
-.align 8
+.align 16
 .globl _gcry_cast5_amd64_ctr_enc
 ELF(.type   _gcry_cast5_amd64_ctr_enc, at function;)
 _gcry_cast5_amd64_ctr_enc:
@@ -512,7 +513,7 @@ _gcry_cast5_amd64_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_cast5_amd64_cbc_dec
 ELF(.type   _gcry_cast5_amd64_cbc_dec, at function;)
 _gcry_cast5_amd64_cbc_dec:
@@ -586,7 +587,7 @@ _gcry_cast5_amd64_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_cast5_amd64_cfb_dec
 ELF(.type   _gcry_cast5_amd64_cfb_dec, at function;)
 _gcry_cast5_amd64_cfb_dec:
diff --git a/cipher/chacha20-amd64-avx2.S b/cipher/chacha20-amd64-avx2.S
index 9f2a036a..99ff7469 100644
--- a/cipher/chacha20-amd64-avx2.S
+++ b/cipher/chacha20-amd64-avx2.S
@@ -33,8 +33,6 @@
    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-.text
-
 #include "asm-common-amd64.h"
 #include "asm-poly1305-amd64.h"
 
@@ -157,8 +155,11 @@
 	PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);	\
 	    ROTATE2(b1, b2,  7, tmp1);
 
+SECTION_RODATA
+
+ELF(.type _chacha20_avx2_data, at object;)
 .align 32
-chacha20_data:
+_chacha20_avx2_data:
 .Lshuf_rol16:
 	.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
 .Lshuf_rol8:
@@ -168,7 +169,10 @@ chacha20_data:
 .Lunsigned_cmp:
 	.long 0x80000000
 
-.align 8
+.text
+.align 64
+
+.align 16
 .globl _gcry_chacha20_amd64_avx2_blocks8
 ELF(.type _gcry_chacha20_amd64_avx2_blocks8, at function;)
 
@@ -333,7 +337,7 @@ ELF(.size _gcry_chacha20_amd64_avx2_blocks8,
 
 #define _ /*_*/
 
-.align 8
+.align 16
 .globl _gcry_chacha20_poly1305_amd64_avx2_blocks8
 ELF(.type _gcry_chacha20_poly1305_amd64_avx2_blocks8, at function;)
 
diff --git a/cipher/chacha20-amd64-avx512.S b/cipher/chacha20-amd64-avx512.S
index 4b183528..e39a505a 100644
--- a/cipher/chacha20-amd64-avx512.S
+++ b/cipher/chacha20-amd64-avx512.S
@@ -33,8 +33,6 @@
    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-.text
-
 #include "asm-common-amd64.h"
 
 /* register macros */
@@ -269,6 +267,8 @@
 				   ROTATE(x1, 7); ROTATE(y1, 7); \
 	  WORD_SHUF(x1, shuf_x1); WORD_SHUF(y1, shuf_x1);
 
+SECTION_RODATA
+
 .align 64
 ELF(.type _gcry_chacha20_amd64_avx512_data, at object;)
 _gcry_chacha20_amd64_avx512_data:
@@ -286,7 +286,9 @@ _gcry_chacha20_amd64_avx512_data:
 	.byte 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 ELF(.size _gcry_chacha20_amd64_avx512_data,.-_gcry_chacha20_amd64_avx512_data)
 
-.align 16
+.text
+
+.align 64
 .globl _gcry_chacha20_amd64_avx512_blocks
 ELF(.type _gcry_chacha20_amd64_avx512_blocks, at function;)
 _gcry_chacha20_amd64_avx512_blocks:
diff --git a/cipher/chacha20-amd64-ssse3.S b/cipher/chacha20-amd64-ssse3.S
index 6c737978..50c4755e 100644
--- a/cipher/chacha20-amd64-ssse3.S
+++ b/cipher/chacha20-amd64-ssse3.S
@@ -33,8 +33,6 @@
    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 
-.text
-
 #include "asm-common-amd64.h"
 #include "asm-poly1305-amd64.h"
 
@@ -151,7 +149,10 @@
 	PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);	\
 	    ROTATE2(b1, b2,  7, tmp1, tmp2);
 
-chacha20_data:
+SECTION_RODATA
+
+ELF(.type _chacha20_ssse3_data, at object;)
+_chacha20_ssse3_data:
 .align 16
 .Lshuf_rol16:
 	.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
@@ -164,7 +165,10 @@ chacha20_data:
 .Lunsigned_cmp:
 	.long 0x80000000,0x80000000,0x80000000,0x80000000
 
-.align 8
+.text
+.align 64
+
+.align 16
 .globl _gcry_chacha20_amd64_ssse3_blocks4
 ELF(.type _gcry_chacha20_amd64_ssse3_blocks4, at function;)
 
@@ -366,7 +370,7 @@ ELF(.size _gcry_chacha20_amd64_ssse3_blocks4,
 				   ROTATE(x1, 7, tmp1); \
 	  WORD_SHUF(x1, shuf_x1);
 
-.align 8
+.align 16
 .globl _gcry_chacha20_amd64_ssse3_blocks1
 ELF(.type _gcry_chacha20_amd64_ssse3_blocks1, at function;)
 
@@ -513,7 +517,7 @@ ELF(.size _gcry_chacha20_amd64_ssse3_blocks1,
 
 #define _ /*_*/
 
-.align 8
+.align 16
 .globl _gcry_chacha20_poly1305_amd64_ssse3_blocks4
 ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks4, at function;)
 
@@ -781,7 +785,7 @@ ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4,
   2-way && 1-way stitched chacha20-poly1305
  **********************************************************************/
 
-.align 8
+.align 16
 .globl _gcry_chacha20_poly1305_amd64_ssse3_blocks1
 ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks1, at function;)
 
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
index c1bf9f29..44a8a90c 100644
--- a/cipher/des-amd64.S
+++ b/cipher/des-amd64.S
@@ -26,6 +26,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 #define s1 0
 #define s2 ((s1) + (64*8))
@@ -180,7 +181,7 @@
 	movl   left##d,   (io); \
 	movl   right##d, 4(io);
 
-.align 8
+.align 16
 .globl _gcry_3des_amd64_crypt_block
 ELF(.type  _gcry_3des_amd64_crypt_block, at function;)
 
@@ -473,7 +474,7 @@ ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
 	movl   left##d,   (io); \
 	movl   right##d, 4(io);
 
-.align 8
+.align 16
 ELF(.type  _gcry_3des_amd64_crypt_blk3, at function;)
 _gcry_3des_amd64_crypt_blk3:
 	/* input:
@@ -548,7 +549,7 @@ _gcry_3des_amd64_crypt_blk3:
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
 
-.align 8
+.align 16
 .globl  _gcry_3des_amd64_cbc_dec
 ELF(.type   _gcry_3des_amd64_cbc_dec, at function;)
 _gcry_3des_amd64_cbc_dec:
@@ -603,6 +604,7 @@ _gcry_3des_amd64_cbc_dec:
 	popq %rdx; /*src*/
 	CFI_POP_TMP_REG();
 	popq %rsi; /*dst*/
+.align 8
 	CFI_POP_TMP_REG();
 
 	bswapl RR0d;
@@ -646,7 +648,7 @@ _gcry_3des_amd64_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
-.align 8
+.align 16
 .globl  _gcry_3des_amd64_ctr_enc
 ELF(.type   _gcry_3des_amd64_ctr_enc, at function;)
 _gcry_3des_amd64_ctr_enc:
@@ -744,7 +746,7 @@ _gcry_3des_amd64_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
 
-.align 8
+.align 16
 .globl  _gcry_3des_amd64_cfb_dec
 ELF(.type   _gcry_3des_amd64_cfb_dec, at function;)
 _gcry_3des_amd64_cfb_dec:
@@ -841,7 +843,12 @@ _gcry_3des_amd64_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
 
+
+SECTION_RODATA
+ELF(.type _des_amd64_data, at object;)
+
 .align 16
+_des_amd64_data:
 .L_s1:
 	.quad 0x0010100001010400, 0x0000000000000000
 	.quad 0x0000100000010000, 0x0010100001010404
diff --git a/cipher/rijndael-amd64.S b/cipher/rijndael-amd64.S
index 6e3cc819..3d5a0bd2 100644
--- a/cipher/rijndael-amd64.S
+++ b/cipher/rijndael-amd64.S
@@ -26,6 +26,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 /* table macros */
 #define E0	(0)
@@ -200,7 +201,7 @@
 #define lastencround(round) \
 	do_lastencround((round) + 1);
 
-.align 8
+.align 16
 .globl _gcry_aes_amd64_encrypt_block
 ELF(.type   _gcry_aes_amd64_encrypt_block, at function;)
 
@@ -377,7 +378,7 @@ ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;)
 #define lastdecround(round) \
 	do_lastdecround(round);
 
-.align 8
+.align 16
 .globl _gcry_aes_amd64_decrypt_block
 ELF(.type   _gcry_aes_amd64_decrypt_block, at function;)
 
diff --git a/cipher/rijndael-ssse3-amd64-asm.S b/cipher/rijndael-ssse3-amd64-asm.S
index b98dca26..52cd0db2 100644
--- a/cipher/rijndael-ssse3-amd64-asm.S
+++ b/cipher/rijndael-ssse3-amd64-asm.S
@@ -43,10 +43,12 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 ##
 ##  _gcry_aes_ssse3_enc_preload
 ##
+.align 16
 ELF(.type _gcry_aes_ssse3_enc_preload, at function)
 .globl _gcry_aes_ssse3_enc_preload
 _gcry_aes_ssse3_enc_preload:
@@ -68,6 +70,7 @@ ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
 ##
 ##  _gcry_aes_ssse3_dec_preload
 ##
+.align 16
 ELF(.type _gcry_aes_ssse3_dec_preload, at function)
 .globl _gcry_aes_ssse3_dec_preload
 _gcry_aes_ssse3_dec_preload:
@@ -689,8 +692,11 @@ ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core)
 ##                                                    ##
 ########################################################
 
+SECTION_RODATA
+
 .align 16
-ELF(.type _aes_consts, at object)
+ELF(.type _aes_ssse3_consts, at object)
+_aes_ssse3_consts:
 .Laes_consts:
 _aes_consts:
 	# s0F
diff --git a/cipher/rijndael-vaes-avx2-amd64.S b/cipher/rijndael-vaes-avx2-amd64.S
index 13fe7ab0..a801ad90 100644
--- a/cipher/rijndael-vaes-avx2-amd64.S
+++ b/cipher/rijndael-vaes-avx2-amd64.S
@@ -27,6 +27,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 /**********************************************************************
   helper macros
@@ -3313,6 +3314,8 @@ ELF(.size _gcry_vaes_avx2_ecb_crypt_amd64,.-_gcry_vaes_avx2_ecb_crypt_amd64)
 /**********************************************************************
   constants
  **********************************************************************/
+SECTION_RODATA
+
 ELF(.type _gcry_vaes_consts, at object)
 _gcry_vaes_consts:
 .align 32
diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S
index 64626063..b681a060 100644
--- a/cipher/salsa20-amd64.S
+++ b/cipher/salsa20-amd64.S
@@ -31,8 +31,9 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
-.align 8
+.align 16
 .globl _gcry_salsa20_amd64_keysetup
 ELF(.type  _gcry_salsa20_amd64_keysetup, at function;)
 _gcry_salsa20_amd64_keysetup:
@@ -86,7 +87,7 @@ _gcry_salsa20_amd64_keysetup:
 	ret_spec_stop
 	CFI_ENDPROC();
 
-.align 8
+.align 16
 .globl _gcry_salsa20_amd64_ivsetup
 ELF(.type  _gcry_salsa20_amd64_ivsetup, at function;)
 _gcry_salsa20_amd64_ivsetup:
@@ -102,7 +103,7 @@ _gcry_salsa20_amd64_ivsetup:
 	ret_spec_stop
 	CFI_ENDPROC();
 
-.align 8
+.align 16
 .globl _gcry_salsa20_amd64_encrypt_blocks
 ELF(.type  _gcry_salsa20_amd64_encrypt_blocks, at function;)
 _gcry_salsa20_amd64_encrypt_blocks:
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 54ff61e4..4da0a228 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -400,8 +400,9 @@
 		BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
 
 .text
+.align 64
 
-.align 8
+.align 16
 ELF(.type   __serpent_enc_blk16, at function;)
 __serpent_enc_blk16:
 	/* input:
@@ -491,7 +492,7 @@ __serpent_enc_blk16:
 	CFI_ENDPROC();
 ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
 
-.align 8
+.align 16
 ELF(.type   __serpent_dec_blk16, at function;)
 __serpent_dec_blk16:
 	/* input:
@@ -583,7 +584,7 @@ __serpent_dec_blk16:
 	CFI_ENDPROC();
 ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_blk16
 ELF(.type   _gcry_serpent_avx2_blk16, at function;)
 _gcry_serpent_avx2_blk16:
@@ -639,7 +640,7 @@ ELF(.size _gcry_serpent_avx2_blk16,.-_gcry_serpent_avx2_blk16;)
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_ctr_enc
 ELF(.type   _gcry_serpent_avx2_ctr_enc, at function;)
 _gcry_serpent_avx2_ctr_enc:
@@ -751,7 +752,7 @@ _gcry_serpent_avx2_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_cbc_dec
 ELF(.type   _gcry_serpent_avx2_cbc_dec, at function;)
 _gcry_serpent_avx2_cbc_dec:
@@ -804,7 +805,7 @@ _gcry_serpent_avx2_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_cfb_dec
 ELF(.type   _gcry_serpent_avx2_cfb_dec, at function;)
 _gcry_serpent_avx2_cfb_dec:
@@ -859,7 +860,7 @@ _gcry_serpent_avx2_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_ocb_enc
 ELF(.type _gcry_serpent_avx2_ocb_enc, at function;)
 
@@ -973,7 +974,7 @@ _gcry_serpent_avx2_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_ocb_dec
 ELF(.type _gcry_serpent_avx2_ocb_dec, at function;)
 
@@ -1097,7 +1098,7 @@ _gcry_serpent_avx2_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_avx2_ocb_auth
 ELF(.type _gcry_serpent_avx2_ocb_auth, at function;)
 
@@ -1200,9 +1201,13 @@ _gcry_serpent_avx2_ocb_auth:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;)
 
-.align 16
+
+SECTION_RODATA
+ELF(.type _serpent_avx2_consts, at object)
+_serpent_avx2_consts:
 
 /* For CTR-mode IV byteswap */
+.align 16
 .Lbswap128_mask:
 	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
 
diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S
index 01723a2a..e7a250d9 100644
--- a/cipher/serpent-sse2-amd64.S
+++ b/cipher/serpent-sse2-amd64.S
@@ -422,8 +422,9 @@
 		BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
 
 .text
+.align 64
 
-.align 8
+.align 16
 ELF(.type   __serpent_enc_blk8, at function;)
 __serpent_enc_blk8:
 	/* input:
@@ -513,7 +514,7 @@ __serpent_enc_blk8:
 	CFI_ENDPROC();
 ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
 
-.align 8
+.align 16
 ELF(.type   __serpent_dec_blk8, at function;)
 __serpent_dec_blk8:
 	/* input:
@@ -605,7 +606,7 @@ __serpent_dec_blk8:
 	CFI_ENDPROC();
 ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_blk8
 ELF(.type   _gcry_serpent_sse2_blk8, at function;)
 _gcry_serpent_sse2_blk8:
@@ -670,7 +671,7 @@ _gcry_serpent_sse2_blk8:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_blk8,.-_gcry_serpent_sse2_blk8;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_ctr_enc
 ELF(.type   _gcry_serpent_sse2_ctr_enc, at function;)
 _gcry_serpent_sse2_ctr_enc:
@@ -802,7 +803,7 @@ _gcry_serpent_sse2_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_cbc_dec
 ELF(.type   _gcry_serpent_sse2_cbc_dec, at function;)
 _gcry_serpent_sse2_cbc_dec:
@@ -865,7 +866,7 @@ _gcry_serpent_sse2_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_cfb_dec
 ELF(.type   _gcry_serpent_sse2_cfb_dec, at function;)
 _gcry_serpent_sse2_cfb_dec:
@@ -931,7 +932,7 @@ _gcry_serpent_sse2_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_ocb_enc
 ELF(.type _gcry_serpent_sse2_ocb_enc, at function;)
 
@@ -1045,7 +1046,7 @@ _gcry_serpent_sse2_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_ocb_dec
 ELF(.type _gcry_serpent_sse2_ocb_dec, at function;)
 
@@ -1169,7 +1170,7 @@ _gcry_serpent_sse2_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_serpent_sse2_ocb_auth
 ELF(.type _gcry_serpent_sse2_ocb_auth, at function;)
 
diff --git a/cipher/sm4-aesni-avx-amd64.S b/cipher/sm4-aesni-avx-amd64.S
index 7a99e070..bb0d20c6 100644
--- a/cipher/sm4-aesni-avx-amd64.S
+++ b/cipher/sm4-aesni-avx-amd64.S
@@ -97,9 +97,12 @@
   4-way && 8-way SM4 with AES-NI and AVX
  **********************************************************************/
 
-.text
+SECTION_RODATA
 .align 16
 
+ELF(.type _sm4_aesni_avx_consts, at object)
+_sm4_aesni_avx_consts:
+
 /*
  * Following four affine transform look-up tables are from work by
  * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
@@ -152,7 +155,10 @@
 .L0f0f0f0f:
 	.long 0x0f0f0f0f
 
-.align 8
+.text
+.align 64
+
+.align 16
 .globl _gcry_sm4_aesni_avx_expand_key
 ELF(.type   _gcry_sm4_aesni_avx_expand_key, at function;)
 _gcry_sm4_aesni_avx_expand_key:
@@ -244,7 +250,7 @@ _gcry_sm4_aesni_avx_expand_key:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_expand_key,.-_gcry_sm4_aesni_avx_expand_key;)
 
-.align 8
+.align 16
 ELF(.type   sm4_aesni_avx_crypt_blk1_4, at function;)
 sm4_aesni_avx_crypt_blk1_4:
 	/* input:
@@ -349,7 +355,7 @@ sm4_aesni_avx_crypt_blk1_4:
 	CFI_ENDPROC();
 ELF(.size sm4_aesni_avx_crypt_blk1_4,.-sm4_aesni_avx_crypt_blk1_4;)
 
-.align 8
+.align 16
 ELF(.type __sm4_crypt_blk8, at function;)
 __sm4_crypt_blk8:
 	/* input:
@@ -458,7 +464,7 @@ __sm4_crypt_blk8:
 	CFI_ENDPROC();
 ELF(.size __sm4_crypt_blk8,.-__sm4_crypt_blk8;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_crypt_blk1_8
 ELF(.type   _gcry_sm4_aesni_avx_crypt_blk1_8, at function;)
 _gcry_sm4_aesni_avx_crypt_blk1_8:
@@ -512,7 +518,7 @@ _gcry_sm4_aesni_avx_crypt_blk1_8:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_crypt_blk1_8,.-_gcry_sm4_aesni_avx_crypt_blk1_8;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_ctr_enc
 ELF(.type   _gcry_sm4_aesni_avx_ctr_enc, at function;)
 _gcry_sm4_aesni_avx_ctr_enc:
@@ -586,7 +592,7 @@ _gcry_sm4_aesni_avx_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ctr_enc,.-_gcry_sm4_aesni_avx_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_cbc_dec
 ELF(.type   _gcry_sm4_aesni_avx_cbc_dec, at function;)
 _gcry_sm4_aesni_avx_cbc_dec:
@@ -635,7 +641,7 @@ _gcry_sm4_aesni_avx_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_cbc_dec,.-_gcry_sm4_aesni_avx_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_cfb_dec
 ELF(.type   _gcry_sm4_aesni_avx_cfb_dec, at function;)
 _gcry_sm4_aesni_avx_cfb_dec:
@@ -687,7 +693,7 @@ _gcry_sm4_aesni_avx_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_cfb_dec,.-_gcry_sm4_aesni_avx_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_ocb_enc
 ELF(.type _gcry_sm4_aesni_avx_ocb_enc, at function;)
 
@@ -786,7 +792,7 @@ _gcry_sm4_aesni_avx_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ocb_enc,.-_gcry_sm4_aesni_avx_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_ocb_dec
 ELF(.type _gcry_sm4_aesni_avx_ocb_dec, at function;)
 
@@ -895,7 +901,7 @@ _gcry_sm4_aesni_avx_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx_ocb_dec,.-_gcry_sm4_aesni_avx_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx_ocb_auth
 ELF(.type _gcry_sm4_aesni_avx_ocb_auth, at function;)
 
diff --git a/cipher/sm4-aesni-avx2-amd64.S b/cipher/sm4-aesni-avx2-amd64.S
index e09fed8f..db94be90 100644
--- a/cipher/sm4-aesni-avx2-amd64.S
+++ b/cipher/sm4-aesni-avx2-amd64.S
@@ -118,9 +118,12 @@
   16-way SM4 with AES-NI and AVX
  **********************************************************************/
 
-.text
+SECTION_RODATA
 .align 16
 
+ELF(.type _sm4_aesni_avx2_consts, at object)
+_sm4_aesni_avx2_consts:
+
 /*
  * Following four affine transform look-up tables are from work by
  * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
@@ -173,7 +176,10 @@
 .L0f0f0f0f:
 	.long 0x0f0f0f0f
 
-.align 8
+.text
+.align 64
+
+.align 16
 ELF(.type   __sm4_crypt_blk16, at function;)
 __sm4_crypt_blk16:
 	/* input:
@@ -288,7 +294,7 @@ __sm4_crypt_blk16:
 	CFI_ENDPROC();
 ELF(.size __sm4_crypt_blk16,.-__sm4_crypt_blk16;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_crypt_blk1_16
 ELF(.type   _gcry_sm4_aesni_avx2_crypt_blk1_16, at function;)
 _gcry_sm4_aesni_avx2_crypt_blk1_16:
@@ -354,7 +360,7 @@ ELF(.size _gcry_sm4_aesni_avx2_crypt_blk1_16,.-_gcry_sm4_aesni_avx2_crypt_blk1_1
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_ctr_enc
 ELF(.type   _gcry_sm4_aesni_avx2_ctr_enc, at function;)
 _gcry_sm4_aesni_avx2_ctr_enc:
@@ -464,7 +470,7 @@ _gcry_sm4_aesni_avx2_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ctr_enc,.-_gcry_sm4_aesni_avx2_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_cbc_dec
 ELF(.type   _gcry_sm4_aesni_avx2_cbc_dec, at function;)
 _gcry_sm4_aesni_avx2_cbc_dec:
@@ -515,7 +521,7 @@ _gcry_sm4_aesni_avx2_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_cbc_dec,.-_gcry_sm4_aesni_avx2_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_cfb_dec
 ELF(.type   _gcry_sm4_aesni_avx2_cfb_dec, at function;)
 _gcry_sm4_aesni_avx2_cfb_dec:
@@ -568,7 +574,7 @@ _gcry_sm4_aesni_avx2_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_cfb_dec,.-_gcry_sm4_aesni_avx2_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_ocb_enc
 ELF(.type _gcry_sm4_aesni_avx2_ocb_enc, at function;)
 
@@ -680,7 +686,7 @@ _gcry_sm4_aesni_avx2_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ocb_enc,.-_gcry_sm4_aesni_avx2_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_ocb_dec
 ELF(.type _gcry_sm4_aesni_avx2_ocb_dec, at function;)
 
@@ -802,7 +808,7 @@ _gcry_sm4_aesni_avx2_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_aesni_avx2_ocb_dec,.-_gcry_sm4_aesni_avx2_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_aesni_avx2_ocb_auth
 ELF(.type _gcry_sm4_aesni_avx2_ocb_auth, at function;)
 
diff --git a/cipher/sm4-gfni-avx2-amd64.S b/cipher/sm4-gfni-avx2-amd64.S
index 4ec0ea39..7c87400e 100644
--- a/cipher/sm4-gfni-avx2-amd64.S
+++ b/cipher/sm4-gfni-avx2-amd64.S
@@ -87,9 +87,12 @@
 #define RB2x         %xmm14
 #define RB3x         %xmm15
 
-.text
+SECTION_RODATA
 .align 32
 
+ELF(.type _sm4_gfni_avx2_consts, at object)
+_sm4_gfni_avx2_consts:
+
 /* Affine transform, SM4 field to AES field */
 .Lpre_affine_s:
 	.byte 0x52, 0xbc, 0x2d, 0x02, 0x9e, 0x25, 0xac, 0x34
@@ -133,7 +136,10 @@
 .Lbswap32_mask:
 	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 
-.align 8
+.text
+.align 64
+
+.align 16
 .globl _gcry_sm4_gfni_avx2_expand_key
 ELF(.type   _gcry_sm4_gfni_avx2_expand_key, at function;)
 _gcry_sm4_gfni_avx2_expand_key:
@@ -216,7 +222,7 @@ _gcry_sm4_gfni_avx2_expand_key:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_gfni_avx2_expand_key,.-_gcry_sm4_gfni_avx2_expand_key;)
 
-.align 8
+.align 16
 ELF(.type   sm4_gfni_avx2_crypt_blk1_4, at function;)
 sm4_gfni_avx2_crypt_blk1_4:
 	/* input:
@@ -314,7 +320,7 @@ sm4_gfni_avx2_crypt_blk1_4:
 	CFI_ENDPROC();
 ELF(.size sm4_gfni_avx2_crypt_blk1_4,.-sm4_gfni_avx2_crypt_blk1_4;)
 
-.align 8
+.align 16
 ELF(.type __sm4_gfni_crypt_blk8, at function;)
 __sm4_gfni_crypt_blk8:
 	/* input:
@@ -415,7 +421,7 @@ __sm4_gfni_crypt_blk8:
 	CFI_ENDPROC();
 ELF(.size __sm4_gfni_crypt_blk8,.-__sm4_gfni_crypt_blk8;)
 
-.align 8
+.align 16
 ELF(.type   _gcry_sm4_gfni_avx2_crypt_blk1_8, at function;)
 _gcry_sm4_gfni_avx2_crypt_blk1_8:
 	/* input:
@@ -472,7 +478,7 @@ ELF(.size _gcry_sm4_gfni_avx2_crypt_blk1_8,.-_gcry_sm4_gfni_avx2_crypt_blk1_8;)
   16-way SM4 with GFNI and AVX2
  **********************************************************************/
 
-.align 8
+.align 16
 ELF(.type   __sm4_gfni_crypt_blk16, at function;)
 __sm4_gfni_crypt_blk16:
 	/* input:
@@ -573,7 +579,7 @@ __sm4_gfni_crypt_blk16:
 	CFI_ENDPROC();
 ELF(.size __sm4_gfni_crypt_blk16,.-__sm4_gfni_crypt_blk16;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_crypt_blk1_16
 ELF(.type   _gcry_sm4_gfni_avx2_crypt_blk1_16, at function;)
 _gcry_sm4_gfni_avx2_crypt_blk1_16:
@@ -641,7 +647,7 @@ ELF(.size _gcry_sm4_gfni_avx2_crypt_blk1_16,.-_gcry_sm4_gfni_avx2_crypt_blk1_16;
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_ctr_enc
 ELF(.type   _gcry_sm4_gfni_avx2_ctr_enc, at function;)
 _gcry_sm4_gfni_avx2_ctr_enc:
@@ -751,7 +757,7 @@ _gcry_sm4_gfni_avx2_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_gfni_avx2_ctr_enc,.-_gcry_sm4_gfni_avx2_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_cbc_dec
 ELF(.type   _gcry_sm4_gfni_avx2_cbc_dec, at function;)
 _gcry_sm4_gfni_avx2_cbc_dec:
@@ -802,7 +808,7 @@ _gcry_sm4_gfni_avx2_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_gfni_avx2_cbc_dec,.-_gcry_sm4_gfni_avx2_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_cfb_dec
 ELF(.type   _gcry_sm4_gfni_avx2_cfb_dec, at function;)
 _gcry_sm4_gfni_avx2_cfb_dec:
@@ -855,7 +861,7 @@ _gcry_sm4_gfni_avx2_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_gfni_avx2_cfb_dec,.-_gcry_sm4_gfni_avx2_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_ocb_enc
 ELF(.type _gcry_sm4_gfni_avx2_ocb_enc, at function;)
 
@@ -967,7 +973,7 @@ _gcry_sm4_gfni_avx2_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_gfni_avx2_ocb_enc,.-_gcry_sm4_gfni_avx2_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_ocb_dec
 ELF(.type _gcry_sm4_gfni_avx2_ocb_dec, at function;)
 
@@ -1089,7 +1095,7 @@ _gcry_sm4_gfni_avx2_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_sm4_gfni_avx2_ocb_dec,.-_gcry_sm4_gfni_avx2_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_sm4_gfni_avx2_ocb_auth
 ELF(.type _gcry_sm4_gfni_avx2_ocb_auth, at function;)
 
diff --git a/cipher/sm4-gfni-avx512-amd64.S b/cipher/sm4-gfni-avx512-amd64.S
index 0f9899d4..00a1c921 100644
--- a/cipher/sm4-gfni-avx512-amd64.S
+++ b/cipher/sm4-gfni-avx512-amd64.S
@@ -103,7 +103,7 @@
 #define RB2z         %zmm14
 #define RB3z         %zmm15
 
-.text
+SECTION_RODATA
 .align 32
 
 /* Affine transform, SM4 field to AES field */
@@ -146,6 +146,9 @@
 	.quad 2, 0
 	.quad 3, 0
 
+.text
+.align 64
+
 .align 16
 .globl _gcry_sm4_gfni_avx512_expand_key
 ELF(.type   _gcry_sm4_gfni_avx512_expand_key, at function;)
diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S
index 8998d296..b19a5b1b 100644
--- a/cipher/twofish-amd64.S
+++ b/cipher/twofish-amd64.S
@@ -26,6 +26,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 /* structure of TWOFISH_context: */
 #define s0 0
@@ -161,7 +162,7 @@
 	xorl (w + 4 * (m))(CTX), x; \
 	movl x, (4 * (n))(out);
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_encrypt_block
 ELF(.type   _gcry_twofish_amd64_encrypt_block, at function;)
 
@@ -215,7 +216,7 @@ _gcry_twofish_amd64_encrypt_block:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_decrypt_block
 ELF(.type   _gcry_twofish_amd64_decrypt_block, at function;)
 
@@ -486,7 +487,7 @@ ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;
 	rorq $32,			RAB2; \
 	outunpack3(RAB, 2);
 
-.align 8
+.align 16
 ELF(.type __twofish_enc_blk3, at function;)
 
 __twofish_enc_blk3:
@@ -515,7 +516,7 @@ __twofish_enc_blk3:
 	CFI_ENDPROC();
 ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
 
-.align 8
+.align 16
 ELF(.type  __twofish_dec_blk3, at function;)
 
 __twofish_dec_blk3:
@@ -544,7 +545,7 @@ __twofish_dec_blk3:
 	CFI_ENDPROC();
 ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_blk3
 ELF(.type   _gcry_twofish_amd64_blk3, at function;)
 _gcry_twofish_amd64_blk3:
@@ -618,7 +619,7 @@ _gcry_twofish_amd64_blk3:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_blk3,.-_gcry_twofish_amd64_blk3;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_ctr_enc
 ELF(.type   _gcry_twofish_amd64_ctr_enc, at function;)
 _gcry_twofish_amd64_ctr_enc:
@@ -719,7 +720,7 @@ _gcry_twofish_amd64_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_cbc_dec
 ELF(.type   _gcry_twofish_amd64_cbc_dec, at function;)
 _gcry_twofish_amd64_cbc_dec:
@@ -804,7 +805,7 @@ _gcry_twofish_amd64_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_cfb_dec
 ELF(.type   _gcry_twofish_amd64_cfb_dec, at function;)
 _gcry_twofish_amd64_cfb_dec:
@@ -889,7 +890,7 @@ _gcry_twofish_amd64_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_ocb_enc
 ELF(.type   _gcry_twofish_amd64_ocb_enc, at function;)
 _gcry_twofish_amd64_ocb_enc:
@@ -1015,7 +1016,7 @@ _gcry_twofish_amd64_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_ocb_dec
 ELF(.type   _gcry_twofish_amd64_ocb_dec, at function;)
 _gcry_twofish_amd64_ocb_dec:
@@ -1149,7 +1150,7 @@ _gcry_twofish_amd64_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_amd64_ocb_auth
 ELF(.type   _gcry_twofish_amd64_ocb_auth, at function;)
 _gcry_twofish_amd64_ocb_auth:
diff --git a/cipher/twofish-avx2-amd64.S b/cipher/twofish-avx2-amd64.S
index 0cb9a64c..19fe0d9c 100644
--- a/cipher/twofish-avx2-amd64.S
+++ b/cipher/twofish-avx2-amd64.S
@@ -27,6 +27,7 @@
 #include "asm-common-amd64.h"
 
 .text
+.align 64
 
 /* structure of TWOFISH_context: */
 #define s0	0
@@ -402,7 +403,7 @@
 	outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
 	outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
 
-.align 8
+.align 16
 ELF(.type __twofish_enc_blk16, at function;)
 __twofish_enc_blk16:
 	/* input:
@@ -435,7 +436,7 @@ __twofish_enc_blk16:
 	CFI_ENDPROC();
 ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;)
 
-.align 8
+.align 16
 ELF(.type __twofish_dec_blk16, at function;)
 __twofish_dec_blk16:
 	/* input:
@@ -468,7 +469,7 @@ __twofish_dec_blk16:
 	CFI_ENDPROC();
 ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_blk16
 ELF(.type   _gcry_twofish_avx2_blk16, at function;)
 _gcry_twofish_avx2_blk16:
@@ -520,7 +521,7 @@ ELF(.size _gcry_twofish_avx2_blk16,.-_gcry_twofish_avx2_blk16;)
 	vpslldq $8, tmp, tmp; \
 	vpsubq tmp, x, x;
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_ctr_enc
 ELF(.type   _gcry_twofish_avx2_ctr_enc, at function;)
 _gcry_twofish_avx2_ctr_enc:
@@ -632,7 +633,7 @@ _gcry_twofish_avx2_ctr_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_cbc_dec
 ELF(.type   _gcry_twofish_avx2_cbc_dec, at function;)
 _gcry_twofish_avx2_cbc_dec:
@@ -685,7 +686,7 @@ _gcry_twofish_avx2_cbc_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_cfb_dec
 ELF(.type   _gcry_twofish_avx2_cfb_dec, at function;)
 _gcry_twofish_avx2_cfb_dec:
@@ -740,7 +741,7 @@ _gcry_twofish_avx2_cfb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_ocb_enc
 ELF(.type _gcry_twofish_avx2_ocb_enc, at function;)
 
@@ -854,7 +855,7 @@ _gcry_twofish_avx2_ocb_enc:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_ocb_dec
 ELF(.type _gcry_twofish_avx2_ocb_dec, at function;)
 
@@ -979,7 +980,7 @@ _gcry_twofish_avx2_ocb_dec:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;)
 
-.align 8
+.align 16
 .globl _gcry_twofish_avx2_ocb_auth
 ELF(.type _gcry_twofish_avx2_ocb_auth, at function;)
 
@@ -1082,10 +1083,13 @@ _gcry_twofish_avx2_ocb_auth:
 	CFI_ENDPROC();
 ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;)
 
+SECTION_RODATA
+
 .align 16
 
 /* For CTR-mode IV byteswap */
- _gcry_twofish_bswap128_mask:
+ELF(.type _gcry_twofish_bswap128_mask, at object)
+_gcry_twofish_bswap128_mask:
 .Lbswap128_mask:
 	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
 ELF(.size _gcry_twofish_bswap128_mask,.-_gcry_twofish_bswap128_mask;)
-- 
2.37.2




More information about the Gcrypt-devel mailing list