[PATCH] Revert "aarch64-asm: use ADR for getting pointers for local labels"
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sun Jan 15 19:57:57 CET 2023
* cipher/asm-common-aarch64.h (GET_LOCAL_POINTER): Remove.
(GET_DATA_POINTER): New.
* cipher/camellia-aarch64.S: Use GET_DATA_POINTER instead of
GET_LOCAL_POINTER.
* cipher/chacha20-aarch64.S: Likewise.
* cipher/cipher-gcm-armv8-aarch64-ce.S: Likewise.
* cipher/crc-armv8-aarch64-ce.S: Likewise.
* cipher/sha1-armv8-aarch64-ce.S: Likewise.
* cipher/sha256-armv8-aarch64-ce.S: Likewise.
* cipher/sm3-aarch64.S: Likewise.
* cipher/sm3-armv8-aarch64-ce.S: Likewise.
* cipher/sm4-aarch64.S: Likewise.
* cipher/sm4-armv9-aarch64-sve-ce.S: Likewise.
--
This reverts commit fd02e8e78470deb661269c429f3348f811c054c6 with
following modifications:
- Only use adrp/add type address generation for GET_DATA_POINTER as
adrp/ldr can cause problems with only locally visible data labels.
- Change 'sm4-armv9-aarch64-sve-ce.S' to use GET_DATA_POINTER also.
- Don't revert 'camellia-aarch64.S' to use ADR instruction directly
but instead use GET_DATA_POINTER.
Apparently taking local addresses with single instruction will not work
when OS targets start to move to execute-only memory mappings. Therefore
revert "aarch64-asm: use ADR for getting pointers for local labels" to
switch back to using GET_DATA_POINTER.
Reported-by: Theo de Raadt <deraadt at openbsd.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/asm-common-aarch64.h | 11 +++++++++--
cipher/camellia-aarch64.S | 4 ++--
cipher/chacha20-aarch64.S | 8 ++++----
cipher/cipher-gcm-armv8-aarch64-ce.S | 6 +++---
cipher/crc-armv8-aarch64-ce.S | 4 ++--
cipher/sha1-armv8-aarch64-ce.S | 2 +-
cipher/sha256-armv8-aarch64-ce.S | 2 +-
cipher/sm3-aarch64.S | 2 +-
cipher/sm3-armv8-aarch64-ce.S | 2 +-
cipher/sm4-aarch64.S | 2 +-
cipher/sm4-armv9-aarch64-sve-ce.S | 4 ++--
11 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
index b38b17a6..8f7951a3 100644
--- a/cipher/asm-common-aarch64.h
+++ b/cipher/asm-common-aarch64.h
@@ -29,8 +29,15 @@
# define ELF(...) /*_*/
#endif
-#define GET_LOCAL_POINTER(reg, label) \
- adr reg, label;
+#ifdef __APPLE__
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, name at GOTPAGE ; \
+ add reg, reg, name at GOTPAGEOFF ;
+#else
+#define GET_DATA_POINTER(reg, name) \
+ adrp reg, name ; \
+ add reg, reg, #:lo12:name ;
+#endif
#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
/* CFI directives to emit DWARF stack unwinding information. */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index c019c168..d7c0cf31 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -214,7 +214,7 @@ _gcry_camellia_arm_encrypt_block:
* w3: keybitlen
*/
- GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
+ GET_DATA_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
@@ -274,7 +274,7 @@ _gcry_camellia_arm_decrypt_block:
* w3: keybitlen
*/
- GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
+ adr RTAB1, _gcry_camellia_arm_tables;
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 540f892b..2a980b95 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -206,10 +206,10 @@ _gcry_chacha20_aarch64_blocks4:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];
@@ -383,10 +383,10 @@ _gcry_chacha20_poly1305_aarch64_blocks4:
mov POLY_RSTATE, x4;
mov POLY_RSRC, x5;
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];
diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 78f3ad2d..687fabe3 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -169,7 +169,7 @@ _gcry_ghash_armv8_ce_pmull:
cbz x3, .Ldo_nothing;
- GET_LOCAL_POINTER(x5, .Lrconst)
+ GET_DATA_POINTER(x5, .Lrconst)
eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -368,7 +368,7 @@ _gcry_polyval_armv8_ce_pmull:
cbz x3, .Lpolyval_do_nothing;
- GET_LOCAL_POINTER(x5, .Lrconst)
+ GET_DATA_POINTER(x5, .Lrconst)
eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -589,7 +589,7 @@ _gcry_ghash_setup_armv8_ce_pmull:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(x2, .Lrconst)
+ GET_DATA_POINTER(x2, .Lrconst)
eor vZZ.16b, vZZ.16b, vZZ.16b
diff --git a/cipher/crc-armv8-aarch64-ce.S b/cipher/crc-armv8-aarch64-ce.S
index b6cdbb3d..7ac884af 100644
--- a/cipher/crc-armv8-aarch64-ce.S
+++ b/cipher/crc-armv8-aarch64-ce.S
@@ -71,7 +71,7 @@ _gcry_crc32r_armv8_ce_bulk:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(x7, .Lcrc32_constants)
+ GET_DATA_POINTER(x7, .Lcrc32_constants)
add x9, x3, #consts_k(5 - 1)
cmp x2, #128
@@ -280,7 +280,7 @@ _gcry_crc32_armv8_ce_bulk:
*/
CFI_STARTPROC()
- GET_LOCAL_POINTER(x7, .Lcrc32_constants)
+ GET_DATA_POINTER(x7, .Lcrc32_constants)
add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
cmp x2, #128
ld1 {v7.16b}, [x4]
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index f95717ee..ea26564b 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -109,7 +109,7 @@ _gcry_sha1_transform_armv8_ce:
cbz x2, .Ldo_nothing;
- GET_LOCAL_POINTER(x4, .LK_VEC);
+ GET_DATA_POINTER(x4, .LK_VEC);
ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */
ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index 5616eada..d0fa6285 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -119,7 +119,7 @@ _gcry_sha256_transform_armv8_ce:
cbz x2, .Ldo_nothing;
- GET_LOCAL_POINTER(x3, .LK);
+ GET_DATA_POINTER(x3, .LK);
mov x4, x3
ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */
diff --git a/cipher/sm3-aarch64.S b/cipher/sm3-aarch64.S
index 0e58254b..3fb89006 100644
--- a/cipher/sm3-aarch64.S
+++ b/cipher/sm3-aarch64.S
@@ -425,7 +425,7 @@ _gcry_sm3_transform_aarch64:
CFI_DEF_CFA_REGISTER(RFRAME);
sub addr0, sp, #STACK_SIZE;
- GET_LOCAL_POINTER(RKPTR, .LKtable);
+ GET_DATA_POINTER(RKPTR, .LKtable);
and sp, addr0, #(~63);
/* Preload first block. */
diff --git a/cipher/sm3-armv8-aarch64-ce.S b/cipher/sm3-armv8-aarch64-ce.S
index d592d08a..0900b84f 100644
--- a/cipher/sm3-armv8-aarch64-ce.S
+++ b/cipher/sm3-armv8-aarch64-ce.S
@@ -170,7 +170,7 @@ _gcry_sm3_transform_armv8_ce:
ext CTX2.16b, CTX2.16b, CTX2.16b, #8;
.Lloop:
- GET_LOCAL_POINTER(x3, .Lsm3_Ktable);
+ GET_DATA_POINTER(x3, .Lsm3_Ktable);
ld1 {v0.16b-v3.16b}, [x1], #64;
sub x2, x2, #1;
diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
index 8d06991b..306b425e 100644
--- a/cipher/sm4-aarch64.S
+++ b/cipher/sm4-aarch64.S
@@ -84,7 +84,7 @@ ELF(.size _gcry_sm4_aarch64_consts,.-_gcry_sm4_aarch64_consts)
/* Helper macros. */
#define preload_sbox(ptr) \
- GET_LOCAL_POINTER(ptr, .Lsm4_sbox); \
+ GET_DATA_POINTER(ptr, .Lsm4_sbox); \
ld1 {v16.16b-v19.16b}, [ptr], #64; \
ld1 {v20.16b-v23.16b}, [ptr], #64; \
ld1 {v24.16b-v27.16b}, [ptr], #64; \
diff --git a/cipher/sm4-armv9-aarch64-sve-ce.S b/cipher/sm4-armv9-aarch64-sve-ce.S
index 21e34e6f..a1b61416 100644
--- a/cipher/sm4-armv9-aarch64-sve-ce.S
+++ b/cipher/sm4-armv9-aarch64-sve-ce.S
@@ -167,7 +167,7 @@ ELF(.size _gcry_sm4_armv9_svesm4_consts,.-_gcry_sm4_armv9_svesm4_consts)
/* Helper macros. */
#define PREPARE() \
- GET_LOCAL_POINTER(x7, .Lbswap128_mask); \
+ GET_DATA_POINTER(x7, .Lbswap128_mask); \
ptrue p0.b, ALL; \
rdvl x5, #1; \
ld1b {RSWAP128.b}, p0/z, [x7]; \
@@ -811,7 +811,7 @@ _gcry_sm4_armv9_sve_ce_ctr_enc:
PREPARE();
dup RZERO.d, #0;
- GET_LOCAL_POINTER(x6, .Lle128_inc);
+ GET_DATA_POINTER(x6, .Lle128_inc);
ld1b {RLE128_INC.b}, p0/z, [x6];
ldp x7, x8, [x3];
--
2.37.2
More information about the Gcrypt-devel
mailing list