[PATCH 7/7] aarch64-asm: move constant data to read-only section
Jussi Kivilinna
jussi.kivilinna at iki.fi
Tue Jan 17 20:17:41 CET 2023
* cipher/asm-common-aarch64.h (SECTION_RODATA)
(GET_DATA_POINTER): New.
(GET_LOCAL_POINTER): Remove.
* cipher/camellia-aarch64.S: Likewise.
* cipher/chacha20-aarch64.S: Likewise.
* cipher/cipher-gcm-armv8-aarch64-ce.S: Likewise.
* cipher/crc-armv8-aarch64-ce.S: Likewise.
* cipher/rijndael-aarch64.S: Likewise.
* cipher/sha1-armv8-aarch64-ce.S: Likewise.
* cipher/sha256-armv8-aarch64-ce.S: Likewise.
* cipher/sm3-aarch64.S: Likewise.
* cipher/sm3-armv8-aarch64-ce.S: Likewise.
* cipher/sm4-aarch64.S: Likewise.
* cipher/sm4-armv9-aarch64-sve-ce.S: Likewise.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/asm-common-aarch64.h | 13 +++++++++++--
cipher/camellia-aarch64.S | 11 ++++-------
cipher/chacha20-aarch64.S | 16 ++++++++++------
cipher/cipher-gcm-armv8-aarch64-ce.S | 13 ++++++++-----
cipher/crc-armv8-aarch64-ce.S | 11 +++++++----
cipher/rijndael-aarch64.S | 4 ----
cipher/sha1-armv8-aarch64-ce.S | 9 ++++++---
cipher/sha256-armv8-aarch64-ce.S | 9 ++++++---
cipher/sm3-aarch64.S | 7 +++++--
cipher/sm3-armv8-aarch64-ce.S | 7 +++++--
cipher/sm4-aarch64.S | 6 ++++--
cipher/sm4-armv9-aarch64-sve-ce.S | 6 +++---
12 files changed, 69 insertions(+), 43 deletions(-)
diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
index b38b17a6..8e8bf8e7 100644
--- a/cipher/asm-common-aarch64.h
+++ b/cipher/asm-common-aarch64.h
@@ -29,8 +29,17 @@
# define ELF(...) /*_*/
#endif
-#define GET_LOCAL_POINTER(reg, label) \
- adr reg, label;
+#define SECTION_RODATA .section .rodata
+
+#ifdef __APPLE__
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, name at GOTPAGE ; \
+ add reg, reg, name at GOTPAGEOFF ;
+#else
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, name ; \
+ add reg, reg, #:lo12:name ;
+#endif
#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
/* CFI directives to emit DWARF stack unwinding information. */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index c019c168..d53c595c 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -214,7 +214,7 @@ _gcry_camellia_arm_encrypt_block:
* w3: keybitlen
*/
- GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
+ GET_DATA_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
@@ -240,7 +240,6 @@ _gcry_camellia_arm_encrypt_block:
CFI_RESTORE(x30)
ret_spec_stop;
CFI_RESTORE_STATE()
-.ltorg
.Lenc_256:
enc_fls(24);
@@ -254,7 +253,6 @@ _gcry_camellia_arm_encrypt_block:
CFI_RESTORE(x30)
ret_spec_stop;
CFI_ENDPROC()
-.ltorg
ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;)
.globl _gcry_camellia_arm_decrypt_block
@@ -274,7 +272,7 @@ _gcry_camellia_arm_decrypt_block:
* w3: keybitlen
*/
- GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
+ GET_DATA_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
@@ -301,7 +299,6 @@ _gcry_camellia_arm_decrypt_block:
CFI_RESTORE(x30)
ret_spec_stop;
CFI_RESTORE_STATE()
-.ltorg
.Ldec_256:
inpack(32);
@@ -310,11 +307,11 @@ _gcry_camellia_arm_decrypt_block:
b .Ldec_128;
CFI_ENDPROC()
-.ltorg
ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;)
/* Encryption/Decryption tables */
-ELF(.type _gcry_camellia_arm_tables, at object;)
+SECTION_RODATA
+ELF(.type _gcry_camellia_arm_tables,%object;)
.balign 32
_gcry_camellia_arm_tables:
.Lcamellia_sp1110:
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 540f892b..c07fbede 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -36,8 +36,6 @@
.cpu generic+simd
-.text
-
#include "asm-poly1305-aarch64.h"
/* register macros */
@@ -180,12 +178,16 @@
ROTATE4(b1, b2, b3, b4, 7, tmp1, tmp2, tmp3, tmp4, \
_(iop27), _(iop28), _(iop29));
+SECTION_RODATA
+
.align 4
+ELF(.type _gcry_chacha20_aarch64_blocks4_data_inc_counter,%object;)
.globl _gcry_chacha20_aarch64_blocks4_data_inc_counter
_gcry_chacha20_aarch64_blocks4_data_inc_counter:
.long 0,1,2,3
.align 4
+ELF(.type _gcry_chacha20_aarch64_blocks4_data_rot8,%object;)
.globl _gcry_chacha20_aarch64_blocks4_data_rot8
_gcry_chacha20_aarch64_blocks4_data_rot8:
.byte 3,0,1,2
@@ -193,6 +195,8 @@ _gcry_chacha20_aarch64_blocks4_data_rot8:
.byte 11,8,9,10
.byte 15,12,13,14
+.text
+
.align 3
.globl _gcry_chacha20_aarch64_blocks4
ELF(.type _gcry_chacha20_aarch64_blocks4,%function;)
@@ -206,10 +210,10 @@ _gcry_chacha20_aarch64_blocks4:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];
@@ -383,10 +387,10 @@ _gcry_chacha20_poly1305_aarch64_blocks4:
mov POLY_RSTATE, x4;
mov POLY_RSRC, x5;
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];
diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 78f3ad2d..8fd5d24a 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -25,12 +25,13 @@
.cpu generic+simd+crypto
-.text
-
/* Constants */
+SECTION_RODATA
+
.align 4
+ELF(.type gcry_gcm_reduction_constant,%object;)
gcry_gcm_reduction_constant:
.Lrconst:
.quad 0x87
@@ -149,6 +150,8 @@ gcry_gcm_reduction_constant:
#define _(...) __VA_ARGS__
#define __ _()
+.text
+
/*
* unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
* const byte *buf, size_t nblocks,
@@ -169,7 +172,7 @@ _gcry_ghash_armv8_ce_pmull:
cbz x3, .Ldo_nothing;
- GET_LOCAL_POINTER(x5, .Lrconst)
+ GET_DATA_POINTER(x5, .Lrconst)
eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -368,7 +371,7 @@ _gcry_polyval_armv8_ce_pmull:
cbz x3, .Lpolyval_do_nothing;
- GET_LOCAL_POINTER(x5, .Lrconst)
+ GET_DATA_POINTER(x5, .Lrconst)
eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -589,7 +592,7 @@ _gcry_ghash_setup_armv8_ce_pmull:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(x2, .Lrconst)
+ GET_DATA_POINTER(x2, .Lrconst)
eor vZZ.16b, vZZ.16b, vZZ.16b
diff --git a/cipher/crc-armv8-aarch64-ce.S b/cipher/crc-armv8-aarch64-ce.S
index b6cdbb3d..5609e368 100644
--- a/cipher/crc-armv8-aarch64-ce.S
+++ b/cipher/crc-armv8-aarch64-ce.S
@@ -25,8 +25,6 @@
.cpu generic+simd+crypto
-.text
-
/* Structure of crc32_consts_s */
@@ -35,7 +33,11 @@
/* Constants */
+SECTION_RODATA
+
.align 6
+ELF(.type _crc32_aarch64_ce_constants,%object;)
+_crc32_aarch64_ce_constants:
.Lcrc32_constants:
.Lcrc32_partial_fold_input_mask:
.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
@@ -54,6 +56,7 @@
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+.text
/*
* void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
@@ -71,7 +74,7 @@ _gcry_crc32r_armv8_ce_bulk:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(x7, .Lcrc32_constants)
+ GET_DATA_POINTER(x7, .Lcrc32_constants)
add x9, x3, #consts_k(5 - 1)
cmp x2, #128
@@ -280,7 +283,7 @@ _gcry_crc32_armv8_ce_bulk:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(x7, .Lcrc32_constants)
+ GET_DATA_POINTER(x7, .Lcrc32_constants)
add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
cmp x2, #128
ld1 {v7.16b}, [x4]
diff --git a/cipher/rijndael-aarch64.S b/cipher/rijndael-aarch64.S
index 184fcd20..dcb82382 100644
--- a/cipher/rijndael-aarch64.S
+++ b/cipher/rijndael-aarch64.S
@@ -265,7 +265,6 @@ _gcry_aes_arm_encrypt_block:
mov x0, #(0);
ret_spec_stop;
-.ltorg
.Lenc_not_128:
beq .Lenc_192
@@ -278,7 +277,6 @@ _gcry_aes_arm_encrypt_block:
b .Lenc_done;
-.ltorg
.Lenc_192:
encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
@@ -488,7 +486,6 @@ _gcry_aes_arm_decrypt_block:
mov x0, #(0);
ret_spec_stop;
-.ltorg
.Ldec_256:
beq .Ldec_192;
@@ -500,7 +497,6 @@ _gcry_aes_arm_decrypt_block:
b .Ldec_tail;
-.ltorg
.Ldec_192:
firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND);
decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index f95717ee..28fb1c14 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -25,16 +25,17 @@
.cpu generic+simd+crypto
-.text
-
/* Constants */
+SECTION_RODATA
+
#define K1 0x5A827999
#define K2 0x6ED9EBA1
#define K3 0x8F1BBCDC
#define K4 0xCA62C1D6
.align 4
+ELF(.type gcry_sha1_aarch64_ce_K_VEC,%object;)
gcry_sha1_aarch64_ce_K_VEC:
.LK_VEC:
.LK1: .long K1, K1, K1, K1
@@ -91,6 +92,8 @@ gcry_sha1_aarch64_ce_K_VEC:
#define CLEAR_REG(reg) movi reg.16b, #0;
+.text
+
/*
* unsigned int
* _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
@@ -109,7 +112,7 @@ _gcry_sha1_transform_armv8_ce:
cbz x2, .Ldo_nothing;
- GET_LOCAL_POINTER(x4, .LK_VEC);
+ GET_DATA_POINTER(x4, .LK_VEC);
ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */
ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index 5616eada..43b941b6 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -25,12 +25,13 @@
.cpu generic+simd+crypto
-.text
-
/* Constants */
+SECTION_RODATA
+
.align 4
+ELF(.type gcry_sha256_aarch64_ce_K,%object;)
gcry_sha256_aarch64_ce_K:
.LK:
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
@@ -101,6 +102,8 @@ gcry_sha256_aarch64_ce_K:
#define CLEAR_REG(reg) movi reg.16b, #0;
+.text
+
/*
* unsigned int
* _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data,
@@ -119,7 +122,7 @@ _gcry_sha256_transform_armv8_ce:
cbz x2, .Ldo_nothing;
- GET_LOCAL_POINTER(x3, .LK);
+ GET_DATA_POINTER(x3, .LK);
mov x4, x3
ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */
diff --git a/cipher/sm3-aarch64.S b/cipher/sm3-aarch64.S
index 0e58254b..a4c132d3 100644
--- a/cipher/sm3-aarch64.S
+++ b/cipher/sm3-aarch64.S
@@ -29,7 +29,7 @@
/* Constants */
-.text
+SECTION_RODATA
.align 4
ELF(.type _gcry_sm3_aarch64_consts, at object)
_gcry_sm3_aarch64_consts:
@@ -383,6 +383,9 @@ ELF(.size _gcry_sm3_aarch64_consts,.-_gcry_sm3_aarch64_consts)
#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \
SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4)
+
+.text
+
/*
* Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
*
@@ -425,7 +428,7 @@ _gcry_sm3_transform_aarch64:
CFI_DEF_CFA_REGISTER(RFRAME);
sub addr0, sp, #STACK_SIZE;
- GET_LOCAL_POINTER(RKPTR, .LKtable);
+ GET_DATA_POINTER(RKPTR, .LKtable);
and sp, addr0, #(~63);
/* Preload first block. */
diff --git a/cipher/sm3-armv8-aarch64-ce.S b/cipher/sm3-armv8-aarch64-ce.S
index d592d08a..fdee3ccb 100644
--- a/cipher/sm3-armv8-aarch64-ce.S
+++ b/cipher/sm3-armv8-aarch64-ce.S
@@ -73,7 +73,7 @@
/* Constants */
-.text
+SECTION_RODATA
.align 4
ELF(.type _gcry_sm3_armv8_ce_consts, at object)
_gcry_sm3_armv8_ce_consts:
@@ -152,6 +152,9 @@ ELF(.size _gcry_sm3_armv8_ce_consts,.-_gcry_sm3_armv8_ce_consts)
#define R1(s0, s1, s2, s3, s4, IOP) R(a, s0, s1, s2, s3, s4, IOP)
#define R2(s0, s1, s2, s3, s4, IOP) R(b, s0, s1, s2, s3, s4, IOP)
+
+.text
+
.align 3
.global _gcry_sm3_transform_armv8_ce
ELF(.type _gcry_sm3_transform_armv8_ce,%function;)
@@ -170,7 +173,7 @@ _gcry_sm3_transform_armv8_ce:
ext CTX2.16b, CTX2.16b, CTX2.16b, #8;
.Lloop:
- GET_LOCAL_POINTER(x3, .Lsm3_Ktable);
+ GET_DATA_POINTER(x3, .Lsm3_Ktable);
ld1 {v0.16b-v3.16b}, [x1], #64;
sub x2, x2, #1;
diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
index 8d06991b..30a19358 100644
--- a/cipher/sm4-aarch64.S
+++ b/cipher/sm4-aarch64.S
@@ -30,7 +30,7 @@
/* Constants */
-.text
+SECTION_RODATA
.align 4
ELF(.type _gcry_sm4_aarch64_consts, at object)
_gcry_sm4_aarch64_consts:
@@ -84,7 +84,7 @@ ELF(.size _gcry_sm4_aarch64_consts,.-_gcry_sm4_aarch64_consts)
/* Helper macros. */
#define preload_sbox(ptr) \
- GET_LOCAL_POINTER(ptr, .Lsm4_sbox); \
+ GET_DATA_POINTER(ptr, .Lsm4_sbox); \
ld1 {v16.16b-v19.16b}, [ptr], #64; \
ld1 {v20.16b-v23.16b}, [ptr], #64; \
ld1 {v24.16b-v27.16b}, [ptr], #64; \
@@ -111,6 +111,8 @@ ELF(.size _gcry_sm4_aarch64_consts,.-_gcry_sm4_aarch64_consts)
zip2 s3.2d, RTMP3.2d, RTMP1.2d;
+.text
+
.align 3
ELF(.type sm4_aarch64_crypt_blk1_4,%function;)
sm4_aarch64_crypt_blk1_4:
diff --git a/cipher/sm4-armv9-aarch64-sve-ce.S b/cipher/sm4-armv9-aarch64-sve-ce.S
index 21e34e6f..f180cfdb 100644
--- a/cipher/sm4-armv9-aarch64-sve-ce.S
+++ b/cipher/sm4-armv9-aarch64-sve-ce.S
@@ -32,7 +32,7 @@
/* Constants */
-.text
+SECTION_RODATA
.align 4
ELF(.type _gcry_sm4_armv9_svesm4_consts, at object)
_gcry_sm4_armv9_svesm4_consts:
@@ -167,7 +167,7 @@ ELF(.size _gcry_sm4_armv9_svesm4_consts,.-_gcry_sm4_armv9_svesm4_consts)
/* Helper macros. */
#define PREPARE() \
- GET_LOCAL_POINTER(x7, .Lbswap128_mask); \
+ GET_DATA_POINTER(x7, .Lbswap128_mask); \
ptrue p0.b, ALL; \
rdvl x5, #1; \
ld1b {RSWAP128.b}, p0/z, [x7]; \
@@ -811,7 +811,7 @@ _gcry_sm4_armv9_sve_ce_ctr_enc:
PREPARE();
dup RZERO.d, #0;
- GET_LOCAL_POINTER(x6, .Lle128_inc);
+ GET_DATA_POINTER(x6, .Lle128_inc);
ld1b {RLE128_INC.b}, p0/z, [x6];
ldp x7, x8, [x3];
--
2.37.2
More information about the Gcrypt-devel
mailing list