[PATCH 6/6] rijndael-ppc: add block modes for CBC, CFB, CTR and XTS
Jussi Kivilinna
jussi.kivilinna at iki.fi
Fri Aug 23 18:52:25 CEST 2019
* cipher/rijndael-ppc.c (vec_add_uint128, _gcry_aes_ppc8_cfb_enc)
(_gcry_aes_ppc8_cfb_dec, _gcry_aes_ppc8_cbc_enc)
(_gcry_aes_ppc8_cbc_dec, _gcry_aes_ppc8_ctr_enc)
(_gcry_aes_ppc8_xts_crypt): New.
* cipher/rijndael.c [USE_PPC_CRYPTO] (_gcry_aes_ppc8_cfb_enc)
(_gcry_aes_ppc8_cfb_dec, _gcry_aes_ppc8_cbc_enc)
(_gcry_aes_ppc8_cbc_dec, _gcry_aes_ppc8_ctr_enc)
(_gcry_aes_ppc8_xts_crypt): New.
(do_setkey, _gcry_aes_cfb_enc, _gcry_aes_cfb_dec, _gcry_aes_cbc_enc)
(_gcry_aes_cbc_dec, _gcry_aes_ctr_enc)
(_gcry_aes_xts_crypto) [USE_PPC_CRYPTO]: Enable PowerPC AES
CFB/CBC/CTR/XTS bulk implementations.
* configure.ac (gcry_cv_gcc_inline_asm_ppc_altivec): Add 'vadduwm'
instruction.
--
Benchmark on POWER8 ~3.8Ghz:
Before:
AES | nanosecs/byte mebibytes/sec cycles/byte
CBC enc | 2.13 ns/B 447.2 MiB/s 8.10 c/B
CBC dec | 1.13 ns/B 843.4 MiB/s 4.30 c/B
CFB enc | 2.20 ns/B 433.9 MiB/s 8.35 c/B
CFB dec | 2.22 ns/B 429.7 MiB/s 8.43 c/B
CTR enc | 2.18 ns/B 438.2 MiB/s 8.27 c/B
CTR dec | 2.18 ns/B 437.4 MiB/s 8.28 c/B
XTS enc | 2.31 ns/B 412.8 MiB/s 8.78 c/B
XTS dec | 2.30 ns/B 414.3 MiB/s 8.75 c/B
CCM enc | 4.33 ns/B 220.1 MiB/s 16.47 c/B
CCM dec | 4.34 ns/B 219.9 MiB/s 16.48 c/B
CCM auth | 2.16 ns/B 440.6 MiB/s 8.22 c/B
EAX enc | 4.34 ns/B 219.8 MiB/s 16.49 c/B
EAX dec | 4.34 ns/B 219.8 MiB/s 16.49 c/B
EAX auth | 2.16 ns/B 440.5 MiB/s 8.23 c/B
After:
AES | nanosecs/byte mebibytes/sec cycles/byte
CBC enc | 1.06 ns/B 903.1 MiB/s 4.01 c/B
CBC dec | 0.211 ns/B 4511 MiB/s 0.803 c/B
CFB enc | 1.06 ns/B 896.7 MiB/s 4.04 c/B
CFB dec | 0.209 ns/B 4563 MiB/s 0.794 c/B
CTR enc | 0.237 ns/B 4026 MiB/s 0.900 c/B
CTR dec | 0.237 ns/B 4029 MiB/s 0.900 c/B
XTS enc | 0.496 ns/B 1922 MiB/s 1.89 c/B
XTS dec | 0.496 ns/B 1924 MiB/s 1.88 c/B
CCM enc | 1.29 ns/B 737.7 MiB/s 4.91 c/B
CCM dec | 1.29 ns/B 737.8 MiB/s 4.91 c/B
CCM auth | 1.06 ns/B 903.3 MiB/s 4.01 c/B
EAX enc | 1.29 ns/B 737.7 MiB/s 4.91 c/B
EAX dec | 1.29 ns/B 737.2 MiB/s 4.92 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
0 files changed
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index 018527321..5f3c7ee30 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -230,6 +230,22 @@ vec_store_be(block vec, unsigned long offset, unsigned char *ptr,
}
+static ASM_FUNC_ATTR_INLINE block
+vec_add_uint128(block a, block b)
+{
+#if 1
+ block res;
+ /* Use assembly as GCC (v8.3) generates slow code for vec_vadduqm. */
+ __asm__ ("vadduqm %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+#else
+ return (block)vec_vadduqm((vector __uint128_t)a, (vector __uint128_t)b);
+#endif
+}
+
+
static ASM_FUNC_ATTR_INLINE u32
_gcry_aes_sbox4_ppc8(u32 fourbytes)
{
@@ -419,14 +435,612 @@ unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,
ROUND_KEY_VARIABLES;
block b;
- b = VEC_LOAD_BE (in, bige_const);
+ b = VEC_LOAD_BE (in, bige_const);
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+
+ AES_DECRYPT (b, rounds);
+ VEC_STORE_BE (out, b, bige_const);
+
+ return 0; /* does not use stack */
+}
+
+
+void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = vec_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block iv;
+
+ iv = VEC_LOAD_BE (iv_arg, bige_const);
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks; nblocks--)
+ {
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const);
+
+ AES_ENCRYPT (iv, rounds);
+
+ VEC_STORE_BE (out, iv, bige_const);
+
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (iv_arg, iv, bige_const);
+}
+
+void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = vec_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block iv, b, bin;
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block rkey;
+
+ iv = VEC_LOAD_BE (iv_arg, bige_const);
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ in0 = iv;
+ in1 = VEC_LOAD_BE (in + 0, bige_const);
+ in2 = VEC_LOAD_BE (in + 1, bige_const);
+ in3 = VEC_LOAD_BE (in + 2, bige_const);
+ in4 = VEC_LOAD_BE (in + 3, bige_const);
+ in5 = VEC_LOAD_BE (in + 4, bige_const);
+ in6 = VEC_LOAD_BE (in + 5, bige_const);
+ in7 = VEC_LOAD_BE (in + 6, bige_const);
+ iv = VEC_LOAD_BE (in + 7, bige_const);
+
+ b0 = rkey0 ^ in0;
+ b1 = rkey0 ^ in1;
+ b2 = rkey0 ^ in2;
+ b3 = rkey0 ^ in3;
+ b4 = rkey0 ^ in4;
+ b5 = rkey0 ^ in5;
+ b6 = rkey0 ^ in6;
+ b7 = rkey0 ^ in7;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD(&rk[r]); \
+ b0 = vec_cipher_be (b0, rkey); \
+ b1 = vec_cipher_be (b1, rkey); \
+ b2 = vec_cipher_be (b2, rkey); \
+ b3 = vec_cipher_be (b3, rkey); \
+ b4 = vec_cipher_be (b4, rkey); \
+ b5 = vec_cipher_be (b5, rkey); \
+ b6 = vec_cipher_be (b6, rkey); \
+ b7 = vec_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_cipherlast_be (b0, rkey ^ in1);
+ b1 = vec_cipherlast_be (b1, rkey ^ in2);
+ b2 = vec_cipherlast_be (b2, rkey ^ in3);
+ b3 = vec_cipherlast_be (b3, rkey ^ in4);
+ b4 = vec_cipherlast_be (b4, rkey ^ in5);
+ b5 = vec_cipherlast_be (b5, rkey ^ in6);
+ b6 = vec_cipherlast_be (b6, rkey ^ in7);
+ b7 = vec_cipherlast_be (b7, rkey ^ iv);
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+ VEC_STORE_BE (out + 4, b4, bige_const);
+ VEC_STORE_BE (out + 5, b5, bige_const);
+ VEC_STORE_BE (out + 6, b6, bige_const);
+ VEC_STORE_BE (out + 7, b7, bige_const);
+
+ in += 8;
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ in0 = iv;
+ in1 = VEC_LOAD_BE (in + 0, bige_const);
+ in2 = VEC_LOAD_BE (in + 1, bige_const);
+ in3 = VEC_LOAD_BE (in + 2, bige_const);
+ iv = VEC_LOAD_BE (in + 3, bige_const);
+
+ b0 = rkey0 ^ in0;
+ b1 = rkey0 ^ in1;
+ b2 = rkey0 ^ in2;
+ b3 = rkey0 ^ in3;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD(&rk[r]); \
+ b0 = vec_cipher_be (b0, rkey); \
+ b1 = vec_cipher_be (b1, rkey); \
+ b2 = vec_cipher_be (b2, rkey); \
+ b3 = vec_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_cipherlast_be (b0, rkey ^ in1);
+ b1 = vec_cipherlast_be (b1, rkey ^ in2);
+ b2 = vec_cipherlast_be (b2, rkey ^ in3);
+ b3 = vec_cipherlast_be (b3, rkey ^ iv);
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ bin = VEC_LOAD_BE (in, bige_const);
+ rkeylast = rkeylast_orig ^ bin;
+ b = iv;
+ iv = bin;
+
+ AES_ENCRYPT (b, rounds);
+
+ VEC_STORE_BE (out, b, bige_const);
+
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (iv_arg, iv, bige_const);
+}
+
+
+void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac)
+{
+ const block bige_const = vec_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block lastiv, b;
+
+ lastiv = VEC_LOAD_BE (iv_arg, bige_const);
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+
+ for (; nblocks; nblocks--)
+ {
+ b = lastiv ^ VEC_LOAD_BE (in, bige_const);
+
+ AES_ENCRYPT (b, rounds);
+
+ lastiv = b;
+ VEC_STORE_BE (out, b, bige_const);
+
+ in++;
+ if (!cbc_mac)
+ out++;
+ }
+
+ VEC_STORE_BE (iv_arg, lastiv, bige_const);
+}
+
+void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = vec_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block rkey;
+ block iv, b;
+
+ if (!ctx->decryption_prepared)
+ {
+ aes_ppc8_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ iv = VEC_LOAD_BE (iv_arg, bige_const);
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ in0 = VEC_LOAD_BE (in + 0, bige_const);
+ in1 = VEC_LOAD_BE (in + 1, bige_const);
+ in2 = VEC_LOAD_BE (in + 2, bige_const);
+ in3 = VEC_LOAD_BE (in + 3, bige_const);
+ in4 = VEC_LOAD_BE (in + 4, bige_const);
+ in5 = VEC_LOAD_BE (in + 5, bige_const);
+ in6 = VEC_LOAD_BE (in + 6, bige_const);
+ in7 = VEC_LOAD_BE (in + 7, bige_const);
+
+ b0 = rkey0 ^ in0;
+ b1 = rkey0 ^ in1;
+ b2 = rkey0 ^ in2;
+ b3 = rkey0 ^ in3;
+ b4 = rkey0 ^ in4;
+ b5 = rkey0 ^ in5;
+ b6 = rkey0 ^ in6;
+ b7 = rkey0 ^ in7;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD(&rk[r]); \
+ b0 = vec_ncipher_be (b0, rkey); \
+ b1 = vec_ncipher_be (b1, rkey); \
+ b2 = vec_ncipher_be (b2, rkey); \
+ b3 = vec_ncipher_be (b3, rkey); \
+ b4 = vec_ncipher_be (b4, rkey); \
+ b5 = vec_ncipher_be (b5, rkey); \
+ b6 = vec_ncipher_be (b6, rkey); \
+ b7 = vec_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_ncipherlast_be (b0, rkey ^ iv);
+ b1 = vec_ncipherlast_be (b1, rkey ^ in0);
+ b2 = vec_ncipherlast_be (b2, rkey ^ in1);
+ b3 = vec_ncipherlast_be (b3, rkey ^ in2);
+ b4 = vec_ncipherlast_be (b4, rkey ^ in3);
+ b5 = vec_ncipherlast_be (b5, rkey ^ in4);
+ b6 = vec_ncipherlast_be (b6, rkey ^ in5);
+ b7 = vec_ncipherlast_be (b7, rkey ^ in6);
+ iv = in7;
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+ VEC_STORE_BE (out + 4, b4, bige_const);
+ VEC_STORE_BE (out + 5, b5, bige_const);
+ VEC_STORE_BE (out + 6, b6, bige_const);
+ VEC_STORE_BE (out + 7, b7, bige_const);
+
+ in += 8;
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ in0 = VEC_LOAD_BE (in + 0, bige_const);
+ in1 = VEC_LOAD_BE (in + 1, bige_const);
+ in2 = VEC_LOAD_BE (in + 2, bige_const);
+ in3 = VEC_LOAD_BE (in + 3, bige_const);
+
+ b0 = rkey0 ^ in0;
+ b1 = rkey0 ^ in1;
+ b2 = rkey0 ^ in2;
+ b3 = rkey0 ^ in3;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD(&rk[r]); \
+ b0 = vec_ncipher_be (b0, rkey); \
+ b1 = vec_ncipher_be (b1, rkey); \
+ b2 = vec_ncipher_be (b2, rkey); \
+ b3 = vec_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_ncipherlast_be (b0, rkey ^ iv);
+ b1 = vec_ncipherlast_be (b1, rkey ^ in0);
+ b2 = vec_ncipherlast_be (b2, rkey ^ in1);
+ b3 = vec_ncipherlast_be (b3, rkey ^ in2);
+ iv = in3;
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ rkeylast = rkeylast_orig ^ iv;
+
+ iv = VEC_LOAD_BE (in, bige_const);
+ b = iv;
+ AES_DECRYPT (b, rounds);
+
+ VEC_STORE_BE (out, b, bige_const);
+
+ in++;
+ out++;
+ }
+
+ VEC_STORE_BE (iv_arg, iv, bige_const);
+}
+
+
+void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ static const unsigned char vec_one_const[16] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+ const block bige_const = vec_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block ctr, b, one;
+
+ ctr = VEC_LOAD_BE (ctr_arg, bige_const);
+ one = VEC_LOAD_BE (&vec_one_const, bige_const);
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+ rkeylast_orig = rkeylast;
+
+ if (nblocks >= 4)
+ {
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block two, three, four;
+ block ctr4;
+ block rkey;
+
+ two = vec_add_uint128 (one, one);
+ three = vec_add_uint128 (two, one);
+ four = vec_add_uint128 (two, two);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ ctr4 = vec_add_uint128 (ctr, four);
+ b0 = rkey0 ^ ctr;
+ b1 = rkey0 ^ vec_add_uint128 (ctr, one);
+ b2 = rkey0 ^ vec_add_uint128 (ctr, two);
+ b3 = rkey0 ^ vec_add_uint128 (ctr, three);
+ b4 = rkey0 ^ ctr4;
+ b5 = rkey0 ^ vec_add_uint128 (ctr4, one);
+ b6 = rkey0 ^ vec_add_uint128 (ctr4, two);
+ b7 = rkey0 ^ vec_add_uint128 (ctr4, three);
+ ctr = vec_add_uint128 (ctr4, four);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD(&rk[r]); \
+ b0 = vec_cipher_be (b0, rkey); \
+ b1 = vec_cipher_be (b1, rkey); \
+ b2 = vec_cipher_be (b2, rkey); \
+ b3 = vec_cipher_be (b3, rkey); \
+ b4 = vec_cipher_be (b4, rkey); \
+ b5 = vec_cipher_be (b5, rkey); \
+ b6 = vec_cipher_be (b6, rkey); \
+ b7 = vec_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const));
+ b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const));
+ b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const));
+ b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const));
+ b4 = vec_cipherlast_be (b4, rkey ^ VEC_LOAD_BE (in + 4, bige_const));
+ b5 = vec_cipherlast_be (b5, rkey ^ VEC_LOAD_BE (in + 5, bige_const));
+ b6 = vec_cipherlast_be (b6, rkey ^ VEC_LOAD_BE (in + 6, bige_const));
+ b7 = vec_cipherlast_be (b7, rkey ^ VEC_LOAD_BE (in + 7, bige_const));
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+ VEC_STORE_BE (out + 4, b4, bige_const);
+ VEC_STORE_BE (out + 5, b5, bige_const);
+ VEC_STORE_BE (out + 6, b6, bige_const);
+ VEC_STORE_BE (out + 7, b7, bige_const);
+
+ in += 8;
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ b0 = rkey0 ^ ctr;
+ b1 = rkey0 ^ vec_add_uint128 (ctr, one);
+ b2 = rkey0 ^ vec_add_uint128 (ctr, two);
+ b3 = rkey0 ^ vec_add_uint128 (ctr, three);
+ ctr = vec_add_uint128 (ctr, four);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD(&rk[r]); \
+ b0 = vec_cipher_be (b0, rkey); \
+ b1 = vec_cipher_be (b1, rkey); \
+ b2 = vec_cipher_be (b2, rkey); \
+ b3 = vec_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const));
+ b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const));
+ b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const));
+ b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const));
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ b = ctr;
+ ctr = vec_add_uint128 (ctr, one);
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const);
- PRELOAD_ROUND_KEYS (rk, rounds);
+ AES_ENCRYPT (b, rounds);
- AES_DECRYPT (b, rounds);
- VEC_STORE_BE (out, b, bige_const);
+ VEC_STORE_BE (out, b, bige_const);
- return 0; /* does not use stack */
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (ctr_arg, ctr, bige_const);
}
@@ -1091,4 +1705,400 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
return 0;
}
+
+void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+ static const block vec_bswap64_const =
+ { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 };
+ static const block vec_bswap128_const =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ static const unsigned char vec_tweak_const[16] =
+ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
+ static const vector unsigned long long vec_shift63_const =
+ { 63, 63 };
+ static const vector unsigned long long vec_shift1_const =
+ { 1, 1 };
+ const block bige_const = vec_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ block tweak_tmp, tweak_next, tweak;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey;
+ block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
+ block tweak_const, bswap64_const, bswap128_const;
+ vector unsigned long long shift63_const, shift1_const;
+ ROUND_KEY_VARIABLES;
+
+ tweak_const = VEC_LOAD_BE (&vec_tweak_const, bige_const);
+ bswap64_const = ALIGNED_LOAD (&vec_bswap64_const);
+ bswap128_const = ALIGNED_LOAD (&vec_bswap128_const);
+ shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const);
+ shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const);
+
+ tweak_next = VEC_LOAD_BE (tweak_arg, bige_const);
+
+#define GEN_TWEAK(tweak, tmp) /* Generate next tweak. */ \
+ tmp = vec_vperm(tweak, tweak, bswap64_const); \
+ tweak = vec_vperm(tweak, tweak, bswap128_const); \
+ tmp = (block)(vec_sra((vector unsigned long long)tmp, shift63_const)) & \
+ tweak_const; \
+ tweak = (block)vec_sl((vector unsigned long long)tweak, shift1_const); \
+ tweak = tweak ^ tmp; \
+ tweak = vec_vperm(tweak, tweak, bswap128_const);
+
+ if (encrypt)
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ tweak0 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak1 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak2 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak3 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak4 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak5 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak6 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak7 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+
+ b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+ b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+ b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+ b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+ b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0;
+ b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0;
+ b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0;
+ b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (&rk[r]); \
+ b0 = vec_cipher_be (b0, rkey); \
+ b1 = vec_cipher_be (b1, rkey); \
+ b2 = vec_cipher_be (b2, rkey); \
+ b3 = vec_cipher_be (b3, rkey); \
+ b4 = vec_cipher_be (b4, rkey); \
+ b5 = vec_cipher_be (b5, rkey); \
+ b6 = vec_cipher_be (b6, rkey); \
+ b7 = vec_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_cipherlast_be (b0, rkey ^ tweak0);
+ b1 = vec_cipherlast_be (b1, rkey ^ tweak1);
+ b2 = vec_cipherlast_be (b2, rkey ^ tweak2);
+ b3 = vec_cipherlast_be (b3, rkey ^ tweak3);
+ b4 = vec_cipherlast_be (b4, rkey ^ tweak4);
+ b5 = vec_cipherlast_be (b5, rkey ^ tweak5);
+ b6 = vec_cipherlast_be (b6, rkey ^ tweak6);
+ b7 = vec_cipherlast_be (b7, rkey ^ tweak7);
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+ VEC_STORE_BE (out + 4, b4, bige_const);
+ VEC_STORE_BE (out + 5, b5, bige_const);
+ VEC_STORE_BE (out + 6, b6, bige_const);
+ VEC_STORE_BE (out + 7, b7, bige_const);
+
+ in += 8;
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ tweak0 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak1 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak2 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak3 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+
+ b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+ b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+ b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+ b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (&rk[r]); \
+ b0 = vec_cipher_be (b0, rkey); \
+ b1 = vec_cipher_be (b1, rkey); \
+ b2 = vec_cipher_be (b2, rkey); \
+ b3 = vec_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_cipherlast_be (b0, rkey ^ tweak0);
+ b1 = vec_cipherlast_be (b1, rkey ^ tweak1);
+ b2 = vec_cipherlast_be (b2, rkey ^ tweak2);
+ b3 = vec_cipherlast_be (b3, rkey ^ tweak3);
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ tweak = tweak_next;
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ b = VEC_LOAD_BE (in, bige_const) ^ tweak;
+
+ /* Generate next tweak. */
+ GEN_TWEAK (tweak_next, tweak_tmp);
+
+ AES_ENCRYPT (b, rounds);
+
+ b ^= tweak;
+ VEC_STORE_BE (out, b, bige_const);
+
+ in++;
+ out++;
+ }
+ }
+ else
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+ if (!ctx->decryption_prepared)
+ {
+ aes_ppc8_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ PRELOAD_ROUND_KEYS (rk, rounds);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ tweak0 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak1 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak2 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak3 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak4 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak5 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak6 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak7 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+
+ b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+ b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+ b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+ b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+ b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0;
+ b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0;
+ b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0;
+ b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (&rk[r]); \
+ b0 = vec_ncipher_be (b0, rkey); \
+ b1 = vec_ncipher_be (b1, rkey); \
+ b2 = vec_ncipher_be (b2, rkey); \
+ b3 = vec_ncipher_be (b3, rkey); \
+ b4 = vec_ncipher_be (b4, rkey); \
+ b5 = vec_ncipher_be (b5, rkey); \
+ b6 = vec_ncipher_be (b6, rkey); \
+ b7 = vec_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_ncipherlast_be (b0, rkey ^ tweak0);
+ b1 = vec_ncipherlast_be (b1, rkey ^ tweak1);
+ b2 = vec_ncipherlast_be (b2, rkey ^ tweak2);
+ b3 = vec_ncipherlast_be (b3, rkey ^ tweak3);
+ b4 = vec_ncipherlast_be (b4, rkey ^ tweak4);
+ b5 = vec_ncipherlast_be (b5, rkey ^ tweak5);
+ b6 = vec_ncipherlast_be (b6, rkey ^ tweak6);
+ b7 = vec_ncipherlast_be (b7, rkey ^ tweak7);
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+ VEC_STORE_BE (out + 4, b4, bige_const);
+ VEC_STORE_BE (out + 5, b5, bige_const);
+ VEC_STORE_BE (out + 6, b6, bige_const);
+ VEC_STORE_BE (out + 7, b7, bige_const);
+
+ in += 8;
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ tweak0 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak1 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak2 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+ tweak3 = tweak_next;
+ GEN_TWEAK (tweak_next, tweak_tmp);
+
+ b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+ b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+ b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+ b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (&rk[r]); \
+ b0 = vec_ncipher_be (b0, rkey); \
+ b1 = vec_ncipher_be (b1, rkey); \
+ b2 = vec_ncipher_be (b2, rkey); \
+ b3 = vec_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = vec_ncipherlast_be (b0, rkey ^ tweak0);
+ b1 = vec_ncipherlast_be (b1, rkey ^ tweak1);
+ b2 = vec_ncipherlast_be (b2, rkey ^ tweak2);
+ b3 = vec_ncipherlast_be (b3, rkey ^ tweak3);
+
+ VEC_STORE_BE (out + 0, b0, bige_const);
+ VEC_STORE_BE (out + 1, b1, bige_const);
+ VEC_STORE_BE (out + 2, b2, bige_const);
+ VEC_STORE_BE (out + 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ tweak = tweak_next;
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ b = VEC_LOAD_BE (in, bige_const) ^ tweak;
+
+ /* Generate next tweak. */
+ GEN_TWEAK (tweak_next, tweak_tmp);
+
+ AES_DECRYPT (b, rounds);
+
+ b ^= tweak;
+ VEC_STORE_BE (out, b, bige_const);
+
+ in++;
+ out++;
+ }
+ }
+
+ VEC_STORE_BE (tweak_arg, tweak_next, bige_const);
+
+#undef GEN_TWEAK
+}
+
#endif /* USE_PPC_CRYPTO */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index f15ac18b1..ebd1a11a5 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -210,11 +210,33 @@ extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx,
extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx,
unsigned char *dst,
const unsigned char *src);
+
+extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks,
int encrypt);
extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c,
const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks, int encrypt);
#endif /*USE_PPC_CRYPTO*/
static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
@@ -452,8 +474,14 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
ctx->use_ppc_crypto = 1;
if (hd)
{
+ hd->bulk.cfb_enc = _gcry_aes_ppc8_cfb_enc;
+ hd->bulk.cfb_dec = _gcry_aes_ppc8_cfb_dec;
+ hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc;
+ hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec;
+ hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc;
hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt;
hd->bulk.ocb_auth = _gcry_aes_ppc8_ocb_auth;
+ hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt;
}
}
#endif
@@ -896,6 +924,13 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO*/
else
{
rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
@@ -957,6 +992,13 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO*/
else
{
rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
@@ -1025,6 +1067,13 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO*/
else
{
union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp;
@@ -1268,6 +1317,13 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO*/
else
{
rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
@@ -1326,6 +1382,13 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO*/
else
{
unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16;
@@ -1556,6 +1619,13 @@ _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO*/
else
{
if (encrypt)
diff --git a/configure.ac b/configure.ac
index 586145aa4..d7725b553 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1905,6 +1905,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto
"lvx %v20,%r12,%r0;\n"
"vcipher %v0, %v1, %v22;\n"
"lxvw4x %vs32, %r0, %r1;\n"
+ "vadduwm %v0, %v1, %v22;\n"
);
]])],
[gcry_cv_gcc_inline_asm_ppc_altivec=yes])
More information about the Gcrypt-devel
mailing list