[PATCH] rijndael-ppc: use vector registers for key schedule calculations
Jussi Kivilinna
jussi.kivilinna at iki.fi
Mon Mar 6 20:36:29 CET 2023
* cipher/rijndael-ppc.c (_gcry_aes_sbox4_ppc8): Remove.
(bcast_u32_to_vec, u32_from_vec): New.
(_gcry_aes_ppc8_setkey): Use vectors for round key calculation
variables.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/rijndael-ppc.c | 68 +++++++++++++++++++++++++------------------
1 file changed, 39 insertions(+), 29 deletions(-)
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index 7530209d..055b00c0 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -116,25 +116,32 @@ asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
}
-static ASM_FUNC_ATTR_INLINE u32
-_gcry_aes_sbox4_ppc8(u32 fourbytes)
+static ASM_FUNC_ATTR_INLINE unsigned int
+keysched_idx(unsigned int in)
{
- vec_u32 vec_fourbyte = { fourbytes, fourbytes, fourbytes, fourbytes };
#ifdef WORDS_BIGENDIAN
- return ((vec_u32)asm_sbox_be((block)vec_fourbyte))[1];
+ return in;
#else
- return ((vec_u32)asm_sbox_be((block)vec_fourbyte))[2];
+ return (in & ~3U) | (3U - (in & 3U));
#endif
}
-static ASM_FUNC_ATTR_INLINE unsigned int
-keysched_idx(unsigned int in)
+static ASM_FUNC_ATTR_INLINE vec_u32
+bcast_u32_to_vec(u32 x)
+{
+ vec_u32 v = { x, x, x, x };
+ return v;
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+u32_from_vec(vec_u32 x)
{
#ifdef WORDS_BIGENDIAN
- return in;
+ return x[1];
#else
- return (in & ~3U) | (3U - (in & 3U));
+ return x[2];
#endif
}
@@ -142,55 +149,58 @@ keysched_idx(unsigned int in)
void PPC_OPT_ATTR
_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
{
- u32 tk_u32[MAXKC];
+ static const vec_u32 rotate24 = { 24, 24, 24, 24 };
+ static const vec_u32 rcon_const = { 0x1b, 0x1b, 0x1b, 0x1b };
+ vec_u32 tk_vu32[MAXKC];
unsigned int rounds = ctx->rounds;
unsigned int KC = rounds - 6;
u32 *W_u32 = ctx->keyschenc32b;
unsigned int i, j;
- u32 tk_prev;
- byte rcon = 1;
+ vec_u32 tk_prev;
+ vec_u32 rcon = { 1, 1, 1, 1 };
for (i = 0; i < KC; i += 2)
{
unsigned int idx0 = keysched_idx(i + 0);
unsigned int idx1 = keysched_idx(i + 1);
- tk_u32[i + 0] = buf_get_le32(key + i * 4 + 0);
- tk_u32[i + 1] = buf_get_le32(key + i * 4 + 4);
- W_u32[idx0] = _gcry_bswap32(tk_u32[i + 0]);
- W_u32[idx1] = _gcry_bswap32(tk_u32[i + 1]);
+ tk_vu32[i + 0] = bcast_u32_to_vec(buf_get_le32(key + i * 4 + 0));
+ tk_vu32[i + 1] = bcast_u32_to_vec(buf_get_le32(key + i * 4 + 4));
+ W_u32[idx0] = u32_from_vec(vec_revb(tk_vu32[i + 0]));
+ W_u32[idx1] = u32_from_vec(vec_revb(tk_vu32[i + 1]));
}
- for (i = KC, j = KC, tk_prev = tk_u32[KC - 1];
+ for (i = KC, j = KC, tk_prev = tk_vu32[KC - 1];
i < 4 * (rounds + 1);
i += 2, j += 2)
{
unsigned int idx0 = keysched_idx(i + 0);
unsigned int idx1 = keysched_idx(i + 1);
- u32 temp0 = tk_prev;
- u32 temp1;
+ vec_u32 temp0 = tk_prev;
+ vec_u32 temp1;
if (j == KC)
{
j = 0;
- temp0 = _gcry_aes_sbox4_ppc8(rol(temp0, 24)) ^ rcon;
- rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff;
+ temp0 = (vec_u32)(asm_sbox_be((block)vec_rl(temp0, rotate24))) ^ rcon;
+ rcon = (vec_u32)(((block)rcon << 1)
+ ^ (-((block)rcon >> 7) & (block)rcon_const));
}
else if (KC == 8 && j == 4)
{
- temp0 = _gcry_aes_sbox4_ppc8(temp0);
+ temp0 = (vec_u32)asm_sbox_be((block)temp0);
}
- temp1 = tk_u32[j + 0];
+ temp1 = tk_vu32[j + 0];
- tk_u32[j + 0] = temp0 ^ temp1;
- tk_u32[j + 1] ^= temp0 ^ temp1;
- tk_prev = tk_u32[j + 1];
+ tk_vu32[j + 0] = temp0 ^ temp1;
+ tk_vu32[j + 1] ^= temp0 ^ temp1;
+ tk_prev = tk_vu32[j + 1];
- W_u32[idx0] = _gcry_bswap32(tk_u32[j + 0]);
- W_u32[idx1] = _gcry_bswap32(tk_u32[j + 1]);
+ W_u32[idx0] = u32_from_vec(vec_revb(tk_vu32[j + 0]));
+ W_u32[idx1] = u32_from_vec(vec_revb(tk_vu32[j + 1]));
}
- wipememory(tk_u32, sizeof(tk_u32));
+ wipememory(tk_vu32, sizeof(tk_vu32));
}
--
2.37.2
More information about the Gcrypt-devel
mailing list