[PATCH 2/2] aarch64/assembly: only use the lower 32 bit of an int parameters
Jussi Kivilinna
jussi.kivilinna at iki.fi
Wed Mar 28 19:53:22 CEST 2018
* cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block)
(__gcry_camellia_arm_decrypt_block): Make comment section about input
registers match usage.
* cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use
'w12' and 'w7' instead of 'x12' and 'x7'.
(_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function
prototype in comments.
* mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t
parameters.
* mpi/aarch64/mpih-mul1.S: Ditto.
* mpi/aarch64/mpih-mul2.S: Ditto.
* mpi/aarch64/mpih-mul3.S: Ditto.
* mpi/aarch64/mpih-sub1.S: Ditto.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
0 files changed
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index c3cc463d5..b0e9a0335 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -206,7 +206,7 @@ _gcry_camellia_arm_encrypt_block:
* x0: keytable
* x1: dst
* x2: src
- * x3: keybitlen
+ * w3: keybitlen
*/
adr RTAB1, _gcry_camellia_arm_tables;
@@ -252,7 +252,7 @@ _gcry_camellia_arm_decrypt_block:
* x0: keytable
* x1: dst
* x2: src
- * x3: keybitlen
+ * w3: keybitlen
*/
adr RTAB1, _gcry_camellia_arm_tables;
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index 5859557ab..f0012c20a 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce:
* w6: nrounds => w7
* w7: blkn => w12
*/
- mov x12, x7
- mov x7, x6
+ mov w12, w7
+ mov w7, w6
mov x6, x5
mov x5, x4
mov x4, x3
@@ -1280,7 +1280,9 @@ ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
* void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
* unsigned char *outbuf,
* const unsigned char *inbuf,
- * unsigned char *tweak, unsigned int nrounds);
+ * unsigned char *tweak,
+ * size_t nblocks,
+ * unsigned int nrounds);
*/
.align 3
@@ -1417,7 +1419,9 @@ ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
* void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
* unsigned char *outbuf,
* const unsigned char *inbuf,
- * unsigned char *tweak, unsigned int nrounds);
+ * unsigned char *tweak,
+ * size_t nblocks,
+ * unsigned int nrounds);
*/
.align 3
diff --git a/mpi/aarch64/mpih-add1.S b/mpi/aarch64/mpih-add1.S
index 4ead1c23b..3370320e0 100644
--- a/mpi/aarch64/mpih-add1.S
+++ b/mpi/aarch64/mpih-add1.S
@@ -29,7 +29,7 @@
* _gcry_mpih_add_n( mpi_ptr_t res_ptr, x0
* mpi_ptr_t s1_ptr, x1
* mpi_ptr_t s2_ptr, x2
- * mpi_size_t size) x3
+ * mpi_size_t size) w3
*/
.text
@@ -37,34 +37,34 @@
.globl _gcry_mpih_add_n
ELF(.type _gcry_mpih_add_n,%function)
_gcry_mpih_add_n:
- and x5, x3, #3;
+ and w5, w3, #3;
adds xzr, xzr, xzr; /* clear carry flag */
- cbz x5, .Large_loop;
+ cbz w5, .Large_loop;
.Loop:
ldr x4, [x1], #8;
- sub x3, x3, #1;
+ sub w3, w3, #1;
ldr x11, [x2], #8;
- and x5, x3, #3;
+ and w5, w3, #3;
adcs x4, x4, x11;
str x4, [x0], #8;
- cbz x3, .Lend;
- cbnz x5, .Loop;
+ cbz w3, .Lend;
+ cbnz w5, .Loop;
.Large_loop:
ldp x4, x6, [x1], #16;
ldp x5, x7, [x2], #16;
ldp x8, x10, [x1], #16;
ldp x9, x11, [x2], #16;
- sub x3, x3, #4;
+ sub w3, w3, #4;
adcs x4, x4, x5;
adcs x6, x6, x7;
adcs x8, x8, x9;
adcs x10, x10, x11;
stp x4, x6, [x0], #16;
stp x8, x10, [x0], #16;
- cbnz x3, .Large_loop;
+ cbnz w3, .Large_loop;
.Lend:
adc x0, xzr, xzr;
diff --git a/mpi/aarch64/mpih-mul1.S b/mpi/aarch64/mpih-mul1.S
index 8a8626936..8830845a7 100644
--- a/mpi/aarch64/mpih-mul1.S
+++ b/mpi/aarch64/mpih-mul1.S
@@ -28,7 +28,7 @@
* mpi_limb_t
* _gcry_mpih_mul_1( mpi_ptr_t res_ptr, x0
* mpi_ptr_t s1_ptr, x1
- * mpi_size_t s1_size, x2
+ * mpi_size_t s1_size, w2
* mpi_limb_t s2_limb) x3
*/
@@ -37,27 +37,27 @@
.globl _gcry_mpih_mul_1
ELF(.type _gcry_mpih_mul_1,%function)
_gcry_mpih_mul_1:
- and x5, x2, #3;
+ and w5, w2, #3;
mov x4, xzr;
- cbz x5, .Large_loop;
+ cbz w5, .Large_loop;
.Loop:
ldr x5, [x1], #8;
- sub x2, x2, #1;
+ sub w2, w2, #1;
mul x9, x5, x3;
umulh x10, x5, x3;
- and x5, x2, #3;
+ and w5, w2, #3;
adds x4, x4, x9;
str x4, [x0], #8;
adc x4, x10, xzr;
- cbz x2, .Lend;
- cbnz x5, .Loop;
+ cbz w2, .Lend;
+ cbnz w5, .Loop;
.Large_loop:
ldp x5, x6, [x1];
- sub x2, x2, #4;
+ sub w2, w2, #4;
mul x9, x5, x3;
ldp x7, x8, [x1, #16];
@@ -89,7 +89,7 @@ _gcry_mpih_mul_1:
str x4, [x0], #8;
adc x4, x16, xzr;
- cbnz x2, .Large_loop;
+ cbnz w2, .Large_loop;
.Lend:
mov x0, x4;
diff --git a/mpi/aarch64/mpih-mul2.S b/mpi/aarch64/mpih-mul2.S
index c7c08e5ab..5d736990e 100644
--- a/mpi/aarch64/mpih-mul2.S
+++ b/mpi/aarch64/mpih-mul2.S
@@ -28,7 +28,7 @@
* mpi_limb_t
* _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, x0
* mpi_ptr_t s1_ptr, x1
- * mpi_size_t s1_size, x2
+ * mpi_size_t s1_size, w2
* mpi_limb_t s2_limb) x3
*/
@@ -37,11 +37,11 @@
.globl _gcry_mpih_addmul_1
ELF(.type _gcry_mpih_addmul_1,%function)
_gcry_mpih_addmul_1:
- and x5, x2, #3;
+ and w5, w2, #3;
mov x6, xzr;
mov x7, xzr;
- cbz x5, .Large_loop;
+ cbz w5, .Large_loop;
.Loop:
ldr x5, [x1], #8;
@@ -49,21 +49,21 @@ _gcry_mpih_addmul_1:
mul x12, x5, x3;
ldr x4, [x0];
umulh x13, x5, x3;
- sub x2, x2, #1;
+ sub w2, w2, #1;
adds x12, x12, x4;
- and x5, x2, #3;
+ and w5, w2, #3;
adc x13, x13, x7;
adds x12, x12, x6;
str x12, [x0], #8;
adc x6, x7, x13;
- cbz x2, .Lend;
- cbnz x5, .Loop;
+ cbz w2, .Lend;
+ cbnz w5, .Loop;
.Large_loop:
ldp x5, x9, [x1], #16;
- sub x2, x2, #4;
+ sub w2, w2, #4;
ldp x4, x8, [x0];
mul x12, x5, x3;
@@ -101,7 +101,7 @@ _gcry_mpih_addmul_1:
str x14, [x0], #8;
adc x6, x7, x15;
- cbnz x2, .Large_loop;
+ cbnz w2, .Large_loop;
.Lend:
mov x0, x6;
diff --git a/mpi/aarch64/mpih-mul3.S b/mpi/aarch64/mpih-mul3.S
index ccc961e62..f785e5e42 100644
--- a/mpi/aarch64/mpih-mul3.S
+++ b/mpi/aarch64/mpih-mul3.S
@@ -28,7 +28,7 @@
* mpi_limb_t
* _gcry_mpih_submul_1( mpi_ptr_t res_ptr, x0
* mpi_ptr_t s1_ptr, x1
- * mpi_size_t s1_size, x2
+ * mpi_size_t s1_size, w2
* mpi_limb_t s2_limb) x3
*/
@@ -37,9 +37,9 @@
.globl _gcry_mpih_submul_1
ELF(.type _gcry_mpih_submul_1,%function)
_gcry_mpih_submul_1:
- and x5, x2, #3;
+ and w5, w2, #3;
mov x7, xzr;
- cbz x5, .Large_loop;
+ cbz w5, .Large_loop;
subs xzr, xzr, xzr;
@@ -47,26 +47,26 @@ _gcry_mpih_submul_1:
ldr x4, [x1], #8;
cinc x7, x7, cc;
ldr x5, [x0];
- sub x2, x2, #1;
+ sub w2, w2, #1;
mul x6, x4, x3;
subs x5, x5, x7;
umulh x4, x4, x3;
- and x10, x2, #3;
+ and w10, w2, #3;
cset x7, cc;
subs x5, x5, x6;
add x7, x7, x4;
str x5, [x0], #8;
- cbz x2, .Loop_end;
- cbnz x10, .Loop;
+ cbz w2, .Loop_end;
+ cbnz w10, .Loop;
cinc x7, x7, cc;
.Large_loop:
ldp x4, x8, [x1], #16;
- sub x2, x2, #4;
+ sub w2, w2, #4;
ldp x5, x9, [x0];
mul x6, x4, x3;
@@ -111,7 +111,7 @@ _gcry_mpih_submul_1:
str x9, [x0], #8;
cinc x7, x7, cc;
- cbnz x2, .Large_loop;
+ cbnz w2, .Large_loop;
mov x0, x7;
ret;
diff --git a/mpi/aarch64/mpih-sub1.S b/mpi/aarch64/mpih-sub1.S
index 4a663732d..45a7b0417 100644
--- a/mpi/aarch64/mpih-sub1.S
+++ b/mpi/aarch64/mpih-sub1.S
@@ -29,7 +29,7 @@
* _gcry_mpih_sub_n( mpi_ptr_t res_ptr, x0
* mpi_ptr_t s1_ptr, x1
* mpi_ptr_t s2_ptr, x2
- * mpi_size_t size) x3
+ * mpi_size_t size) w3
*/
.text
@@ -37,34 +37,34 @@
.globl _gcry_mpih_sub_n
ELF(.type _gcry_mpih_sub_n,%function)
_gcry_mpih_sub_n:
- and x5, x3, #3;
+ and w5, w3, #3;
subs xzr, xzr, xzr; /* prepare carry flag for sub */
- cbz x5, .Large_loop;
+ cbz w5, .Large_loop;
.Loop:
ldr x4, [x1], #8;
- sub x3, x3, #1;
+ sub w3, w3, #1;
ldr x11, [x2], #8;
- and x5, x3, #3;
+ and w5, w3, #3;
sbcs x4, x4, x11;
str x4, [x0], #8;
- cbz x3, .Lend;
- cbnz x5, .Loop;
+ cbz w3, .Lend;
+ cbnz w5, .Loop;
.Large_loop:
ldp x4, x6, [x1], #16;
ldp x5, x7, [x2], #16;
ldp x8, x10, [x1], #16;
ldp x9, x11, [x2], #16;
- sub x3, x3, #4;
+ sub w3, w3, #4;
sbcs x4, x4, x5;
sbcs x6, x6, x7;
sbcs x8, x8, x9;
sbcs x10, x10, x11;
stp x4, x6, [x0], #16;
stp x8, x10, [x0], #16;
- cbnz x3, .Large_loop;
+ cbnz w3, .Large_loop;
.Lend:
cset x0, cc;
More information about the Gcrypt-devel
mailing list