[PATCH] AES/ARMv8-CE: Use inline assembly for key setup
Jussi Kivilinna
jussi.kivilinna at iki.fi
Thu Mar 28 23:04:33 CET 2019
* cipher/rijndael-armv8-aarch32-ce.S (_gcry_aes_sbox4_armv8_ce)
(_gcry_aes_invmixcol_armv8_ce): Remove.
* cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_sbox4_armv8_ce)
(_gcry_aes_invmixcol_armv8_ce): Remove.
* cipher/rijndael-armv8-ce.c (_gcry_aes_sbox4_armv8_ce)
(_gcry_aes_invmixcol_armv8_ce): Replace prototypes with ...
(_gcry_aes_sbox4_armv8_ce, _gcry_aes_invmixcol_armv8_ce): ... these
inline functions.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/rijndael-armv8-aarch32-ce.S | 36 -------------------
cipher/rijndael-armv8-aarch64-ce.S | 35 ------------------
cipher/rijndael-armv8-ce.c | 70 +++++++++++++++++++++++++++++++++++-
3 files changed, 68 insertions(+), 73 deletions(-)
diff --git a/cipher/rijndael-armv8-aarch32-ce.S b/cipher/rijndael-armv8-aarch32-ce.S
index 66440bd4e..bbd33d353 100644
--- a/cipher/rijndael-armv8-aarch32-ce.S
+++ b/cipher/rijndael-armv8-aarch32-ce.S
@@ -1828,40 +1828,4 @@ _gcry_aes_xts_dec_armv8_ce:
.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
-/*
- * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
- */
-.align 3
-.globl _gcry_aes_sbox4_armv8_ce
-.type _gcry_aes_sbox4_armv8_ce,%function;
-_gcry_aes_sbox4_armv8_ce:
- /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
- * Cryptology — CT-RSA 2015" for details.
- */
- vmov.i8 q0, #0x52
- vmov.i8 q1, #0
- vmov s0, r0
- aese.8 q0, q1
- veor d0, d1
- vpadd.i32 d0, d0, d1
- vmov r0, s0
- CLEAR_REG(q0)
- bx lr
-.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;
-
-
-/*
- * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src);
- */
-.align 3
-.globl _gcry_aes_invmixcol_armv8_ce
-.type _gcry_aes_invmixcol_armv8_ce,%function;
-_gcry_aes_invmixcol_armv8_ce:
- vld1.8 {q0}, [r1]
- aesimc.8 q0, q0
- vst1.8 {q0}, [r0]
- CLEAR_REG(q0)
- bx lr
-.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;
-
#endif
diff --git a/cipher/rijndael-armv8-aarch64-ce.S b/cipher/rijndael-armv8-aarch64-ce.S
index f0012c20a..f3ec97f82 100644
--- a/cipher/rijndael-armv8-aarch64-ce.S
+++ b/cipher/rijndael-armv8-aarch64-ce.S
@@ -1554,39 +1554,4 @@ _gcry_aes_xts_dec_armv8_ce:
ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;)
-/*
- * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
- */
-.align 3
-.globl _gcry_aes_sbox4_armv8_ce
-ELF(.type _gcry_aes_sbox4_armv8_ce,%function;)
-_gcry_aes_sbox4_armv8_ce:
- /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
- * Cryptology — CT-RSA 2015" for details.
- */
- movi v0.16b, #0x52
- movi v1.16b, #0
- mov v0.S[0], w0
- aese v0.16b, v1.16b
- addv s0, v0.4s
- mov w0, v0.S[0]
- CLEAR_REG(v0)
- ret
-ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;)
-
-
-/*
- * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src);
- */
-.align 3
-.globl _gcry_aes_invmixcol_armv8_ce
-ELF(.type _gcry_aes_invmixcol_armv8_ce,%function;)
-_gcry_aes_invmixcol_armv8_ce:
- ld1 {v0.16b}, [x1]
- aesimc v0.16b, v0.16b
- st1 {v0.16b}, [x0]
- CLEAR_REG(v0)
- ret
-ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;)
-
#endif
diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c
index 6e46830ee..1d27157be 100644
--- a/cipher/rijndael-armv8-ce.c
+++ b/cipher/rijndael-armv8-ce.c
@@ -37,8 +37,6 @@
typedef struct u128_s { u32 a, b, c, d; } u128_t;
-extern u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
-extern void _gcry_aes_invmixcol_armv8_ce(u128_t *dst, const u128_t *src);
extern unsigned int _gcry_aes_enc_armv8_ce(const void *keysched, byte *dst,
const byte *src,
@@ -123,6 +121,74 @@ typedef void (*xts_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
unsigned char *tweak, size_t nblocks,
unsigned int nrounds);
+
+static inline u32
+_gcry_aes_sbox4_armv8_ce(u32 val)
+{
+#if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+ asm (".syntax unified\n"
+ ".arch armv8-a\n"
+ ".fpu crypto-neon-fp-armv8\n"
+
+ "vmov.i8 q0, #0x52\n"
+ "vmov.i8 q1, #0\n"
+ "vmov s0, %[in]\n"
+ "aese.8 q0, q1\n"
+ "veor d0, d1\n"
+ "vpadd.i32 d0, d0, d1\n"
+ "vmov %[out], s0\n"
+ : [out] "=r" (val)
+ : [in] "r" (val)
+ : "q0", "q1", "d0", "s0");
+#elif defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+ asm (".cpu generic+simd+crypto\n"
+
+ "movi v0.16b, #0x52\n"
+ "movi v1.16b, #0\n"
+ "mov v0.S[0], %w[in]\n"
+ "aese v0.16b, v1.16b\n"
+ "addv s0, v0.4s\n"
+ "mov %w[out], v0.S[0]\n"
+ : [out] "=r" (val)
+ : [in] "r" (val)
+ : "v0", "v1", "s0");
+#endif
+
+ return val;
+}
+
+
+static inline void
+_gcry_aes_invmixcol_armv8_ce(u128_t *dst, const u128_t *src)
+{
+#if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+ asm (".syntax unified\n"
+ ".arch armv8-a\n"
+ ".fpu crypto-neon-fp-armv8\n"
+
+ "vld1.8 {q0}, [%[src]]\n"
+ "aesimc.8 q0, q0\n"
+ "vst1.8 {q0}, [%[dst]]\n"
+ :
+ : [dst] "r" (dst), [src] "r" (src)
+ : "q0", "memory");
+#elif defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+ asm (".cpu generic+simd+crypto\n"
+
+ "ld1 {v0.16b}, [%[src]]\n"
+ "aesimc v0.16b, v0.16b\n"
+ "st1 {v0.16b}, [%[dst]]\n"
+ :
+ : [dst] "r" (dst), [src] "r" (src)
+ : "v0", "memory");
+#endif
+}
+
+
void
_gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key)
{
More information about the Gcrypt-devel
mailing list