[PATCH 2/7] amd64-asm: move constant data to read-only section for hash/mac algos

Jussi Kivilinna jussi.kivilinna at iki.fi
Tue Jan 17 20:17:36 CET 2023


* cipher/asm-common-amd64.h (SECTION_RODATA): New.
* cipher/blake2b-amd64-avx2.S: Use read-only section for constant
data; Align text section to 64 bytes.
* cipher/blake2b-amd64-avx512.S: Likewise.
* cipher/blake2s-amd64-avx.S: Likewise.
* cipher/blake2s-amd64-avx512.S: Likewise.
* cipher/poly1305-amd64-avx512.S: Likewise.
* cipher/sha1-avx-amd64.S: Likewise.
* cipher/sha1-avx-bmi2-amd64.S: Likewise.
* cipher/sha1-avx2-bmi2-amd64.S: Likewise.
* cipher/sha1-ssse3-amd64.S: Likewise.
* cipher/sha256-avx-amd64.S: Likewise.
* cipher/sha256-avx2-bmi2-amd64.S: Likewise.
* cipher/sha256-ssse3-amd64.S: Likewise.
* cipher/sha512-avx-amd64.S: Likewise.
* cipher/sha512-avx2-bmi2-amd64.S: Likewise.
* cipher/sha512-avx512-amd64.S: Likewise.
* cipher/sha512-ssse3-amd64.S: Likewise.
* cipher/sha3-avx-bmi2-amd64.S: Likewise.
* cipher/whirlpool-sse2-amd64.S: Likewise.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/asm-common-amd64.h       |  6 ++++++
 cipher/blake2b-amd64-avx2.S     |  7 ++++---
 cipher/blake2b-amd64-avx512.S   | 10 ++++++----
 cipher/blake2s-amd64-avx.S      |  9 ++++++---
 cipher/blake2s-amd64-avx512.S   | 10 ++++++----
 cipher/poly1305-amd64-avx512.S  |  7 +++++--
 cipher/sha1-avx-amd64.S         |  8 ++++++--
 cipher/sha1-avx-bmi2-amd64.S    |  9 +++++++--
 cipher/sha1-avx2-bmi2-amd64.S   |  9 +++++++--
 cipher/sha1-ssse3-amd64.S       |  9 +++++++--
 cipher/sha256-avx-amd64.S       |  7 ++++++-
 cipher/sha256-avx2-bmi2-amd64.S |  8 +++++++-
 cipher/sha256-ssse3-amd64.S     |  7 ++++++-
 cipher/sha512-avx-amd64.S       |  7 ++++++-
 cipher/sha512-avx2-bmi2-amd64.S |  7 ++++++-
 cipher/sha512-avx512-amd64.S    |  4 +++-
 cipher/sha512-ssse3-amd64.S     |  7 ++++++-
 cipher/sm3-avx-bmi2-amd64.S     |  6 ++++--
 cipher/whirlpool-sse2-amd64.S   |  2 +-
 19 files changed, 105 insertions(+), 34 deletions(-)

diff --git a/cipher/asm-common-amd64.h b/cipher/asm-common-amd64.h
index d9bbc01b..870fef9a 100644
--- a/cipher/asm-common-amd64.h
+++ b/cipher/asm-common-amd64.h
@@ -29,6 +29,12 @@
 # define ELF(...) /*_*/
 #endif
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define SECTION_RODATA .section .rdata
+#else
+# define SECTION_RODATA .section .rodata
+#endif
+
 #ifdef __PIC__
 #  define rRIP (%rip)
 #else
diff --git a/cipher/blake2b-amd64-avx2.S b/cipher/blake2b-amd64-avx2.S
index 3601b65f..43c2cce1 100644
--- a/cipher/blake2b-amd64-avx2.S
+++ b/cipher/blake2b-amd64-avx2.S
@@ -31,8 +31,6 @@
 
 #include "asm-common-amd64.h"
 
-.text
-
 /* register macros */
 #define RSTATE  %rdi
 #define RINBLKS %rsi
@@ -185,8 +183,10 @@
         G2(ROW1, ROW2, ROW3, ROW4, m4); \
         UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
 
-blake2b_data:
+SECTION_RODATA
 .align 32
+ELF(.type _blake2b_avx2_data, at object;)
+_blake2b_avx2_data:
 .Liv:
         .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b
         .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1
@@ -197,6 +197,7 @@ blake2b_data:
 .Lshuf_ror24:
         .byte 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10
 
+.text
 .align 64
 .globl _gcry_blake2b_transform_amd64_avx2
 ELF(.type _gcry_blake2b_transform_amd64_avx2, at function;)
diff --git a/cipher/blake2b-amd64-avx512.S b/cipher/blake2b-amd64-avx512.S
index 18b0c3ad..fe938730 100644
--- a/cipher/blake2b-amd64-avx512.S
+++ b/cipher/blake2b-amd64-avx512.S
@@ -31,8 +31,6 @@
 
 #include "asm-common-amd64.h"
 
-.text
-
 /* register macros */
 #define RSTATE  %rdi
 #define RINBLKS %rsi
@@ -180,9 +178,11 @@
         G2(ROW1, ROW2, ROW3, ROW4, m4); \
         UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4)
 
-ELF(.type blake2b_data, at object;)
-blake2b_data:
+SECTION_RODATA
+
 .align 32
+ELF(.type _blake2b_avx512_data, at object;)
+_blake2b_avx512_data:
 .Liv:
         .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b
         .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1
@@ -209,6 +209,8 @@ blake2b_data:
 .Lgmask9:
         GEN_GMASK(10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0)
 
+.text
+
 .align 64
 .globl _gcry_blake2b_transform_amd64_avx512
 ELF(.type _gcry_blake2b_transform_amd64_avx512, at function;)
diff --git a/cipher/blake2s-amd64-avx.S b/cipher/blake2s-amd64-avx.S
index 5094b4c1..44b82ab2 100644
--- a/cipher/blake2s-amd64-avx.S
+++ b/cipher/blake2s-amd64-avx.S
@@ -31,8 +31,6 @@
 
 #include "asm-common-amd64.h"
 
-.text
-
 /* register macros */
 #define RSTATE  %rdi
 #define RINBLKS %rsi
@@ -171,8 +169,11 @@
         G2(ROW1, ROW2, ROW3, ROW4, m4); \
         UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
 
-blake2s_data:
+SECTION_RODATA
+
 .align 16
+ELF(.type _blake2s_avx_data, at object;)
+_blake2s_avx_data:
 .Liv:
         .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
         .long 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
@@ -181,6 +182,8 @@ blake2s_data:
 .Lshuf_ror8:
         .byte 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12
 
+.text
+
 .align 64
 .globl _gcry_blake2s_transform_amd64_avx
 ELF(.type _gcry_blake2s_transform_amd64_avx, at function;)
diff --git a/cipher/blake2s-amd64-avx512.S b/cipher/blake2s-amd64-avx512.S
index ddcdfd67..e2da2a18 100644
--- a/cipher/blake2s-amd64-avx512.S
+++ b/cipher/blake2s-amd64-avx512.S
@@ -31,8 +31,6 @@
 
 #include "asm-common-amd64.h"
 
-.text
-
 /* register macros */
 #define RSTATE  %rdi
 #define RINBLKS %rsi
@@ -164,13 +162,17 @@
         G2(ROW1, ROW2, ROW3, ROW4, m4); \
         UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
 
-ELF(.type blake2s_data, at object;)
-blake2s_data:
+SECTION_RODATA
+
+ELF(.type _blake2s_avx512_data, at object;)
 .align 16
+_blake2s_avx512_data:
 .Liv:
         .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
         .long 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
 
+.text
+
 .align 64
 .globl _gcry_blake2s_transform_amd64_avx512
 ELF(.type _gcry_blake2s_transform_amd64_avx512, at function;)
diff --git a/cipher/poly1305-amd64-avx512.S b/cipher/poly1305-amd64-avx512.S
index 9beed8ad..cf176129 100644
--- a/cipher/poly1305-amd64-avx512.S
+++ b/cipher/poly1305-amd64-avx512.S
@@ -44,7 +44,7 @@
 
 .intel_syntax noprefix
 
-.text
+SECTION_RODATA
 
 ELF(.type _gcry_poly1305_avx512_consts, at object)
 _gcry_poly1305_avx512_consts:
@@ -1575,7 +1575,10 @@ ELF(.size _gcry_poly1305_avx512_consts,.-_gcry_poly1305_avx512_consts)
 ;; arg3 - Input/output hash
 ;; arg4 - Poly1305 key
 */
-.align 32
+
+.text
+
+.align 64
 .globl _gcry_poly1305_amd64_avx512_blocks
 ELF(.type _gcry_poly1305_amd64_avx512_blocks, at function;)
 _gcry_poly1305_amd64_avx512_blocks:
diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S
index acada960..5b9e0500 100644
--- a/cipher/sha1-avx-amd64.S
+++ b/cipher/sha1-avx-amd64.S
@@ -47,7 +47,10 @@
 
 /* Constants */
 
-.text
+SECTION_RODATA
+
+ELF(.type _sha1_avx_consts, at object)
+_sha1_avx_consts:
 #define K1  0x5A827999
 #define K2  0x6ED9EBA1
 #define K3  0x8F1BBCDC
@@ -195,6 +198,7 @@
 	vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
 	vmovdqa tmp0, WK((i)&~3);
 
+.text
 
 /*
  * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
@@ -205,7 +209,7 @@
  */
 .globl _gcry_sha1_transform_amd64_avx
 ELF(.type _gcry_sha1_transform_amd64_avx, at function)
-.align 16
+.align 64
 _gcry_sha1_transform_amd64_avx:
   /* input:
    *	%rdi: ctx, CTX
diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S
index 5f4b9e69..9df147c2 100644
--- a/cipher/sha1-avx-bmi2-amd64.S
+++ b/cipher/sha1-avx-bmi2-amd64.S
@@ -48,7 +48,11 @@
 
 /* Constants */
 
-.text
+SECTION_RODATA
+
+ELF(.type _sha1_avx_bmi2_consts, at object)
+_sha1_avx_bmi2_consts:
+
 .align 16
 .Lbswap_shufb_ctl:
 	.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
@@ -194,6 +198,7 @@
 	vpaddd K, W, tmp0; \
 	vmovdqa tmp0, WK((i)&~3);
 
+.text
 
 /*
  * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
@@ -204,7 +209,7 @@
  */
 .globl _gcry_sha1_transform_amd64_avx_bmi2
 ELF(.type _gcry_sha1_transform_amd64_avx_bmi2, at function)
-.align 16
+.align 64
 _gcry_sha1_transform_amd64_avx_bmi2:
   /* input:
    *	%rdi: ctx, CTX
diff --git a/cipher/sha1-avx2-bmi2-amd64.S b/cipher/sha1-avx2-bmi2-amd64.S
index ed52761b..0db1d9b9 100644
--- a/cipher/sha1-avx2-bmi2-amd64.S
+++ b/cipher/sha1-avx2-bmi2-amd64.S
@@ -48,9 +48,13 @@
 
 /* Constants */
 
+SECTION_RODATA
+
 #define WK_STACK_WORDS (80 * 2)
 
-.text
+ELF(.type _sha1_avx2_bmi2_consts, at object)
+_sha1_avx2_bmi2_consts:
+
 .align 16
 .Lbswap_shufb_ctl:
 	.long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
@@ -200,6 +204,7 @@
 	vpaddd K, W, tmp0; \
 	vmovdqa tmp0, PRE_WK((i)&~3);
 
+.text
 
 /*
  * Transform 2*nblks*64 bytes (2*nblks*16 32-bit words) at DATA.
@@ -210,7 +215,7 @@
  */
 .globl _gcry_sha1_transform_amd64_avx2_bmi2
 ELF(.type _gcry_sha1_transform_amd64_avx2_bmi2, at function)
-.align 16
+.align 64
 _gcry_sha1_transform_amd64_avx2_bmi2:
   /* input:
    *	%rdi: ctx, CTX
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
index f09b1de1..afea6501 100644
--- a/cipher/sha1-ssse3-amd64.S
+++ b/cipher/sha1-ssse3-amd64.S
@@ -47,7 +47,11 @@
 
 /* Constants */
 
-.text
+SECTION_RODATA
+
+ELF(.type _sha1_ssse3_consts, at object)
+_sha1_ssse3_consts:
+
 #define K1  0x5A827999
 #define K2  0x6ED9EBA1
 #define K3  0x8F1BBCDC
@@ -207,6 +211,7 @@
 
 #define CLEAR_REG(reg) pxor reg, reg;
 
+.text
 
 /*
  * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
@@ -217,7 +222,7 @@
  */
 .globl _gcry_sha1_transform_amd64_ssse3
 ELF(.type _gcry_sha1_transform_amd64_ssse3, at function)
-.align 16
+.align 64
 _gcry_sha1_transform_amd64_ssse3:
   /* input:
    *	%rdi: ctx, CTX
diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index be8a799d..8b2cbfe8 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -342,7 +342,7 @@
 .text
 .globl _gcry_sha256_transform_amd64_avx
 ELF(.type  _gcry_sha256_transform_amd64_avx, at function;)
-.align 16
+.align 64
 _gcry_sha256_transform_amd64_avx:
 	CFI_STARTPROC()
 	vzeroupper
@@ -475,6 +475,11 @@ _gcry_sha256_transform_amd64_avx:
 	CFI_ENDPROC()
 
 
+SECTION_RODATA
+
+ELF(.type _sha256_avx_consts, at object)
+_sha256_avx_consts:
+
 .align 16
 .LK256:
 	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index 60ad442c..93919ead 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -247,7 +247,7 @@
 .text
 .globl _gcry_sha256_transform_amd64_avx2
 ELF(.type _gcry_sha256_transform_amd64_avx2, at function)
-.align 32
+.align 64
 _gcry_sha256_transform_amd64_avx2:
 	CFI_STARTPROC()
 	xor eax, eax
@@ -477,6 +477,12 @@ _gcry_sha256_transform_amd64_avx2:
 	ret_spec_stop
 	CFI_ENDPROC()
 
+
+SECTION_RODATA
+
+ELF(.type _sha256_avx2_consts, at object)
+_sha256_avx2_consts:
+
 .align 64
 .LK256:
 	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index 401ff6f4..41c15420 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -349,7 +349,7 @@
 .text
 .globl _gcry_sha256_transform_amd64_ssse3
 ELF(.type  _gcry_sha256_transform_amd64_ssse3, at function;)
-.align 16
+.align 64
 _gcry_sha256_transform_amd64_ssse3:
 	CFI_STARTPROC()
 	push	rbx
@@ -497,6 +497,11 @@ _gcry_sha256_transform_amd64_ssse3:
 	CFI_ENDPROC()
 
 
+SECTION_RODATA
+
+ELF(.type _sha256_ssse3_consts, at object)
+_sha256_ssse3_consts:
+
 .align 16
 .LK256:
 	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index bfc4435d..e8663756 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -246,7 +246,7 @@
 */
 .globl _gcry_sha512_transform_amd64_avx
 ELF(.type _gcry_sha512_transform_amd64_avx, at function;)
-.align 16
+.align 64
 _gcry_sha512_transform_amd64_avx:
 	CFI_STARTPROC()
 	xor eax, eax
@@ -408,6 +408,11 @@ _gcry_sha512_transform_amd64_avx:
 ;;; Binary Data
 */
 
+SECTION_RODATA
+
+ELF(.type _sha512_avx_consts, at object)
+_sha512_avx_consts:
+
 .align 16
 
 /* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index a431e196..6e6e1e43 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -274,7 +274,7 @@
 */
 .globl _gcry_sha512_transform_amd64_avx2
 ELF(.type _gcry_sha512_transform_amd64_avx2, at function;)
-.align 16
+.align 64
 _gcry_sha512_transform_amd64_avx2:
 	CFI_STARTPROC()
 	xor eax, eax
@@ -445,6 +445,11 @@ _gcry_sha512_transform_amd64_avx2:
 /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
 /*;; Binary Data */
 
+SECTION_RODATA
+
+ELF(.type _sha512_avx2_consts, at object)
+_sha512_avx2_consts:
+
 .align 64
 /* K[t] used in SHA512 hashing */
 .LK512:
diff --git a/cipher/sha512-avx512-amd64.S b/cipher/sha512-avx512-amd64.S
index 431fb3e9..f113824c 100644
--- a/cipher/sha512-avx512-amd64.S
+++ b/cipher/sha512-avx512-amd64.S
@@ -256,7 +256,7 @@
 */
 .globl _gcry_sha512_transform_amd64_avx512
 ELF(.type _gcry_sha512_transform_amd64_avx512, at function;)
-.align 16
+.align 64
 _gcry_sha512_transform_amd64_avx512:
 	CFI_STARTPROC()
 	xor	eax, eax
@@ -404,6 +404,8 @@ ELF(.size _gcry_sha512_transform_amd64_avx512,.-_gcry_sha512_transform_amd64_avx
 /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
 /*;; Binary Data */
 
+SECTION_RODATA
+
 ELF(.type _gcry_sha512_avx512_consts, at object)
 _gcry_sha512_avx512_consts:
 .align 64
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index 9cc30892..0a26f215 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -249,7 +249,7 @@
 */
 .globl _gcry_sha512_transform_amd64_ssse3
 ELF(.type _gcry_sha512_transform_amd64_ssse3, at function;)
-.align 16
+.align 64
 _gcry_sha512_transform_amd64_ssse3:
 	CFI_STARTPROC()
 	xor eax, eax
@@ -414,6 +414,11 @@ _gcry_sha512_transform_amd64_ssse3:
 ;;; Binary Data
 */
 
+SECTION_RODATA
+
+ELF(.type _sha512_ssse3_consts, at object)
+_sha512_ssse3_consts:
+
 .align 16
 
 /* Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. */
diff --git a/cipher/sm3-avx-bmi2-amd64.S b/cipher/sm3-avx-bmi2-amd64.S
index d9b6206a..9066be33 100644
--- a/cipher/sm3-avx-bmi2-amd64.S
+++ b/cipher/sm3-avx-bmi2-amd64.S
@@ -41,7 +41,7 @@
 
 /* Constants */
 
-.text
+SECTION_RODATA
 .align 16
 ELF(.type _gcry_sm3_avx2_consts, at object)
 _gcry_sm3_avx2_consts:
@@ -334,6 +334,8 @@ ELF(.size _gcry_sm3_avx2_consts,.-_gcry_sm3_avx2_consts)
         vpxor w0, XTMP4, XTMP1; \
         vmovdqa XTMP1, XW_W1W2_ADDR((round), 0);
 
+.text
+
 /*
  * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
  *
@@ -343,7 +345,7 @@ ELF(.size _gcry_sm3_avx2_consts,.-_gcry_sm3_avx2_consts)
  */
 .globl _gcry_sm3_transform_amd64_avx_bmi2
 ELF(.type _gcry_sm3_transform_amd64_avx_bmi2, at function)
-.align 16
+.align 64
 _gcry_sm3_transform_amd64_avx_bmi2:
   /* input:
    *	%rdi: ctx, CTX
diff --git a/cipher/whirlpool-sse2-amd64.S b/cipher/whirlpool-sse2-amd64.S
index 37648faa..b26dfed2 100644
--- a/cipher/whirlpool-sse2-amd64.S
+++ b/cipher/whirlpool-sse2-amd64.S
@@ -152,7 +152,7 @@
 #define RB_ADD6 RB6, RB7, RB0, RB1, RB2, RB3, RB4, RB5
 #define RB_ADD7 RB7, RB0, RB1, RB2, RB3, RB4, RB5, RB6
 
-.align 8
+.align 64
 .globl _gcry_whirlpool_transform_amd64
 ELF(.type  _gcry_whirlpool_transform_amd64, at function;)
 
-- 
2.37.2




More information about the Gcrypt-devel mailing list