[PATCH 6/6] rijndael-ppc: add block modes for CBC, CFB, CTR and XTS

Jussi Kivilinna jussi.kivilinna at iki.fi
Fri Aug 23 18:52:25 CEST 2019


* cipher/rijndael-ppc.c (vec_add_uint128, _gcry_aes_ppc8_cfb_enc)
(_gcry_aes_ppc8_cfb_dec, _gcry_aes_ppc8_cbc_enc)
(_gcry_aes_ppc8_cbc_dec, _gcry_aes_ppc8_ctr_enc)
(_gcry_aes_ppc8_xts_crypt): New.
* cipher/rijndael.c [USE_PPC_CRYPTO] (_gcry_aes_ppc8_cfb_enc)
(_gcry_aes_ppc8_cfb_dec, _gcry_aes_ppc8_cbc_enc)
(_gcry_aes_ppc8_cbc_dec, _gcry_aes_ppc8_ctr_enc)
(_gcry_aes_ppc8_xts_crypt): New.
(do_setkey, _gcry_aes_cfb_enc, _gcry_aes_cfb_dec, _gcry_aes_cbc_enc)
(_gcry_aes_cbc_dec, _gcry_aes_ctr_enc)
(_gcry_aes_xts_crypto) [USE_PPC_CRYPTO]: Enable PowerPC AES
CFB/CBC/CTR/XTS bulk implementations.
* configure.ac (gcry_cv_gcc_inline_asm_ppc_altivec): Add 'vadduwm'
instruction.
--

Benchmark on POWER8 ~3.8Ghz:

Before:
 AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
        CBC enc |      2.13 ns/B     447.2 MiB/s      8.10 c/B
        CBC dec |      1.13 ns/B     843.4 MiB/s      4.30 c/B
        CFB enc |      2.20 ns/B     433.9 MiB/s      8.35 c/B
        CFB dec |      2.22 ns/B     429.7 MiB/s      8.43 c/B
        CTR enc |      2.18 ns/B     438.2 MiB/s      8.27 c/B
        CTR dec |      2.18 ns/B     437.4 MiB/s      8.28 c/B
        XTS enc |      2.31 ns/B     412.8 MiB/s      8.78 c/B
        XTS dec |      2.30 ns/B     414.3 MiB/s      8.75 c/B
        CCM enc |      4.33 ns/B     220.1 MiB/s     16.47 c/B
        CCM dec |      4.34 ns/B     219.9 MiB/s     16.48 c/B
       CCM auth |      2.16 ns/B     440.6 MiB/s      8.22 c/B
        EAX enc |      4.34 ns/B     219.8 MiB/s     16.49 c/B
        EAX dec |      4.34 ns/B     219.8 MiB/s     16.49 c/B
       EAX auth |      2.16 ns/B     440.5 MiB/s      8.23 c/B

After:
 AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
        CBC enc |      1.06 ns/B     903.1 MiB/s      4.01 c/B
        CBC dec |     0.211 ns/B      4511 MiB/s     0.803 c/B
        CFB enc |      1.06 ns/B     896.7 MiB/s      4.04 c/B
        CFB dec |     0.209 ns/B      4563 MiB/s     0.794 c/B
        CTR enc |     0.237 ns/B      4026 MiB/s     0.900 c/B
        CTR dec |     0.237 ns/B      4029 MiB/s     0.900 c/B
        XTS enc |     0.496 ns/B      1922 MiB/s      1.89 c/B
        XTS dec |     0.496 ns/B      1924 MiB/s      1.88 c/B
        CCM enc |      1.29 ns/B     737.7 MiB/s      4.91 c/B
        CCM dec |      1.29 ns/B     737.8 MiB/s      4.91 c/B
       CCM auth |      1.06 ns/B     903.3 MiB/s      4.01 c/B
        EAX enc |      1.29 ns/B     737.7 MiB/s      4.91 c/B
        EAX dec |      1.29 ns/B     737.2 MiB/s      4.92 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index 018527321..5f3c7ee30 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -230,6 +230,22 @@ vec_store_be(block vec, unsigned long offset, unsigned char *ptr,
 }
 
 
+static ASM_FUNC_ATTR_INLINE block
+vec_add_uint128(block a, block b)
+{
+#if 1
+  block res;
+  /* Use assembly as GCC (v8.3) generates slow code for vec_vadduqm. */
+  __asm__ ("vadduqm %0,%1,%2\n\t"
+	   : "=v" (res)
+	   : "v" (a), "v" (b));
+  return res;
+#else
+  return (block)vec_vadduqm((vector __uint128_t)a, (vector __uint128_t)b);
+#endif
+}
+
+
 static ASM_FUNC_ATTR_INLINE u32
 _gcry_aes_sbox4_ppc8(u32 fourbytes)
 {
@@ -419,14 +435,612 @@ unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,
   ROUND_KEY_VARIABLES;
   block b;
 
-  b = VEC_LOAD_BE (in, bige_const);
+  b = VEC_LOAD_BE (in, bige_const);
+
+  PRELOAD_ROUND_KEYS (rk, rounds);
+
+  AES_DECRYPT (b, rounds);
+  VEC_STORE_BE (out, b, bige_const);
+
+  return 0; /* does not use stack */
+}
+
+
+void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg,
+			     void *outbuf_arg, const void *inbuf_arg,
+			     size_t nblocks)
+{
+  const block bige_const = vec_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block iv;
+
+  iv = VEC_LOAD_BE (iv_arg, bige_const);
+
+  PRELOAD_ROUND_KEYS (rk, rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks; nblocks--)
+    {
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const);
+
+      AES_ENCRYPT (iv, rounds);
+
+      VEC_STORE_BE (out, iv, bige_const);
+
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (iv_arg, iv, bige_const);
+}
+
+void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg,
+			     void *outbuf_arg, const void *inbuf_arg,
+			     size_t nblocks)
+{
+  const block bige_const = vec_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block iv, b, bin;
+  block in0, in1, in2, in3, in4, in5, in6, in7;
+  block b0, b1, b2, b3, b4, b5, b6, b7;
+  block rkey;
+
+  iv = VEC_LOAD_BE (iv_arg, bige_const);
+
+  PRELOAD_ROUND_KEYS (rk, rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      in0 = iv;
+      in1 = VEC_LOAD_BE (in + 0, bige_const);
+      in2 = VEC_LOAD_BE (in + 1, bige_const);
+      in3 = VEC_LOAD_BE (in + 2, bige_const);
+      in4 = VEC_LOAD_BE (in + 3, bige_const);
+      in5 = VEC_LOAD_BE (in + 4, bige_const);
+      in6 = VEC_LOAD_BE (in + 5, bige_const);
+      in7 = VEC_LOAD_BE (in + 6, bige_const);
+      iv = VEC_LOAD_BE (in + 7, bige_const);
+
+      b0 = rkey0 ^ in0;
+      b1 = rkey0 ^ in1;
+      b2 = rkey0 ^ in2;
+      b3 = rkey0 ^ in3;
+      b4 = rkey0 ^ in4;
+      b5 = rkey0 ^ in5;
+      b6 = rkey0 ^ in6;
+      b7 = rkey0 ^ in7;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD(&rk[r]); \
+	      b0 = vec_cipher_be (b0, rkey); \
+	      b1 = vec_cipher_be (b1, rkey); \
+	      b2 = vec_cipher_be (b2, rkey); \
+	      b3 = vec_cipher_be (b3, rkey); \
+	      b4 = vec_cipher_be (b4, rkey); \
+	      b5 = vec_cipher_be (b5, rkey); \
+	      b6 = vec_cipher_be (b6, rkey); \
+	      b7 = vec_cipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = vec_cipherlast_be (b0, rkey ^ in1);
+      b1 = vec_cipherlast_be (b1, rkey ^ in2);
+      b2 = vec_cipherlast_be (b2, rkey ^ in3);
+      b3 = vec_cipherlast_be (b3, rkey ^ in4);
+      b4 = vec_cipherlast_be (b4, rkey ^ in5);
+      b5 = vec_cipherlast_be (b5, rkey ^ in6);
+      b6 = vec_cipherlast_be (b6, rkey ^ in7);
+      b7 = vec_cipherlast_be (b7, rkey ^ iv);
+
+      VEC_STORE_BE (out + 0, b0, bige_const);
+      VEC_STORE_BE (out + 1, b1, bige_const);
+      VEC_STORE_BE (out + 2, b2, bige_const);
+      VEC_STORE_BE (out + 3, b3, bige_const);
+      VEC_STORE_BE (out + 4, b4, bige_const);
+      VEC_STORE_BE (out + 5, b5, bige_const);
+      VEC_STORE_BE (out + 6, b6, bige_const);
+      VEC_STORE_BE (out + 7, b7, bige_const);
+
+      in += 8;
+      out += 8;
+    }
+
+  if (nblocks >= 4)
+    {
+      in0 = iv;
+      in1 = VEC_LOAD_BE (in + 0, bige_const);
+      in2 = VEC_LOAD_BE (in + 1, bige_const);
+      in3 = VEC_LOAD_BE (in + 2, bige_const);
+      iv = VEC_LOAD_BE (in + 3, bige_const);
+
+      b0 = rkey0 ^ in0;
+      b1 = rkey0 ^ in1;
+      b2 = rkey0 ^ in2;
+      b3 = rkey0 ^ in3;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD(&rk[r]); \
+	      b0 = vec_cipher_be (b0, rkey); \
+	      b1 = vec_cipher_be (b1, rkey); \
+	      b2 = vec_cipher_be (b2, rkey); \
+	      b3 = vec_cipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = vec_cipherlast_be (b0, rkey ^ in1);
+      b1 = vec_cipherlast_be (b1, rkey ^ in2);
+      b2 = vec_cipherlast_be (b2, rkey ^ in3);
+      b3 = vec_cipherlast_be (b3, rkey ^ iv);
+
+      VEC_STORE_BE (out + 0, b0, bige_const);
+      VEC_STORE_BE (out + 1, b1, bige_const);
+      VEC_STORE_BE (out + 2, b2, bige_const);
+      VEC_STORE_BE (out + 3, b3, bige_const);
+
+      in += 4;
+      out += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      bin = VEC_LOAD_BE (in, bige_const);
+      rkeylast = rkeylast_orig ^ bin;
+      b = iv;
+      iv = bin;
+
+      AES_ENCRYPT (b, rounds);
+
+      VEC_STORE_BE (out, b, bige_const);
+
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (iv_arg, iv, bige_const);
+}
+
+
+void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg,
+			     void *outbuf_arg, const void *inbuf_arg,
+			     size_t nblocks, int cbc_mac)
+{
+  const block bige_const = vec_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block lastiv, b;
+
+  lastiv = VEC_LOAD_BE (iv_arg, bige_const);
+
+  PRELOAD_ROUND_KEYS (rk, rounds);
+
+  for (; nblocks; nblocks--)
+    {
+      b = lastiv ^ VEC_LOAD_BE (in, bige_const);
+
+      AES_ENCRYPT (b, rounds);
+
+      lastiv = b;
+      VEC_STORE_BE (out, b, bige_const);
+
+      in++;
+      if (!cbc_mac)
+	out++;
+    }
+
+  VEC_STORE_BE (iv_arg, lastiv, bige_const);
+}
+
+void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg,
+			     void *outbuf_arg, const void *inbuf_arg,
+			     size_t nblocks)
+{
+  const block bige_const = vec_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschdec;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block in0, in1, in2, in3, in4, in5, in6, in7;
+  block b0, b1, b2, b3, b4, b5, b6, b7;
+  block rkey;
+  block iv, b;
+
+  if (!ctx->decryption_prepared)
+    {
+      aes_ppc8_prepare_decryption (ctx);
+      ctx->decryption_prepared = 1;
+    }
+
+  iv = VEC_LOAD_BE (iv_arg, bige_const);
+
+  PRELOAD_ROUND_KEYS (rk, rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      in0 = VEC_LOAD_BE (in + 0, bige_const);
+      in1 = VEC_LOAD_BE (in + 1, bige_const);
+      in2 = VEC_LOAD_BE (in + 2, bige_const);
+      in3 = VEC_LOAD_BE (in + 3, bige_const);
+      in4 = VEC_LOAD_BE (in + 4, bige_const);
+      in5 = VEC_LOAD_BE (in + 5, bige_const);
+      in6 = VEC_LOAD_BE (in + 6, bige_const);
+      in7 = VEC_LOAD_BE (in + 7, bige_const);
+
+      b0 = rkey0 ^ in0;
+      b1 = rkey0 ^ in1;
+      b2 = rkey0 ^ in2;
+      b3 = rkey0 ^ in3;
+      b4 = rkey0 ^ in4;
+      b5 = rkey0 ^ in5;
+      b6 = rkey0 ^ in6;
+      b7 = rkey0 ^ in7;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD(&rk[r]); \
+	      b0 = vec_ncipher_be (b0, rkey); \
+	      b1 = vec_ncipher_be (b1, rkey); \
+	      b2 = vec_ncipher_be (b2, rkey); \
+	      b3 = vec_ncipher_be (b3, rkey); \
+	      b4 = vec_ncipher_be (b4, rkey); \
+	      b5 = vec_ncipher_be (b5, rkey); \
+	      b6 = vec_ncipher_be (b6, rkey); \
+	      b7 = vec_ncipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = vec_ncipherlast_be (b0, rkey ^ iv);
+      b1 = vec_ncipherlast_be (b1, rkey ^ in0);
+      b2 = vec_ncipherlast_be (b2, rkey ^ in1);
+      b3 = vec_ncipherlast_be (b3, rkey ^ in2);
+      b4 = vec_ncipherlast_be (b4, rkey ^ in3);
+      b5 = vec_ncipherlast_be (b5, rkey ^ in4);
+      b6 = vec_ncipherlast_be (b6, rkey ^ in5);
+      b7 = vec_ncipherlast_be (b7, rkey ^ in6);
+      iv = in7;
+
+      VEC_STORE_BE (out + 0, b0, bige_const);
+      VEC_STORE_BE (out + 1, b1, bige_const);
+      VEC_STORE_BE (out + 2, b2, bige_const);
+      VEC_STORE_BE (out + 3, b3, bige_const);
+      VEC_STORE_BE (out + 4, b4, bige_const);
+      VEC_STORE_BE (out + 5, b5, bige_const);
+      VEC_STORE_BE (out + 6, b6, bige_const);
+      VEC_STORE_BE (out + 7, b7, bige_const);
+
+      in += 8;
+      out += 8;
+    }
+
+  if (nblocks >= 4)
+    {
+      in0 = VEC_LOAD_BE (in + 0, bige_const);
+      in1 = VEC_LOAD_BE (in + 1, bige_const);
+      in2 = VEC_LOAD_BE (in + 2, bige_const);
+      in3 = VEC_LOAD_BE (in + 3, bige_const);
+
+      b0 = rkey0 ^ in0;
+      b1 = rkey0 ^ in1;
+      b2 = rkey0 ^ in2;
+      b3 = rkey0 ^ in3;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD(&rk[r]); \
+	      b0 = vec_ncipher_be (b0, rkey); \
+	      b1 = vec_ncipher_be (b1, rkey); \
+	      b2 = vec_ncipher_be (b2, rkey); \
+	      b3 = vec_ncipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+	{
+	  DO_ROUND(10);
+	  DO_ROUND(11);
+	  if (rounds > 12)
+	    {
+	      DO_ROUND(12);
+	      DO_ROUND(13);
+	    }
+	}
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = vec_ncipherlast_be (b0, rkey ^ iv);
+      b1 = vec_ncipherlast_be (b1, rkey ^ in0);
+      b2 = vec_ncipherlast_be (b2, rkey ^ in1);
+      b3 = vec_ncipherlast_be (b3, rkey ^ in2);
+      iv = in3;
+
+      VEC_STORE_BE (out + 0, b0, bige_const);
+      VEC_STORE_BE (out + 1, b1, bige_const);
+      VEC_STORE_BE (out + 2, b2, bige_const);
+      VEC_STORE_BE (out + 3, b3, bige_const);
+
+      in += 4;
+      out += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      rkeylast = rkeylast_orig ^ iv;
+
+      iv = VEC_LOAD_BE (in, bige_const);
+      b = iv;
+      AES_DECRYPT (b, rounds);
+
+      VEC_STORE_BE (out, b, bige_const);
+
+      in++;
+      out++;
+    }
+
+  VEC_STORE_BE (iv_arg, iv, bige_const);
+}
+
+
+void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg,
+			     void *outbuf_arg, const void *inbuf_arg,
+			     size_t nblocks)
+{
+  static const unsigned char vec_one_const[16] =
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  const block bige_const = vec_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block ctr, b, one;
+
+  ctr = VEC_LOAD_BE (ctr_arg, bige_const);
+  one = VEC_LOAD_BE (&vec_one_const, bige_const);
+
+  PRELOAD_ROUND_KEYS (rk, rounds);
+  rkeylast_orig = rkeylast;
+
+  if (nblocks >= 4)
+    {
+      block b0, b1, b2, b3, b4, b5, b6, b7;
+      block two, three, four;
+      block ctr4;
+      block rkey;
+
+      two   = vec_add_uint128 (one, one);
+      three = vec_add_uint128 (two, one);
+      four  = vec_add_uint128 (two, two);
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  ctr4 = vec_add_uint128 (ctr, four);
+	  b0 = rkey0 ^ ctr;
+	  b1 = rkey0 ^ vec_add_uint128 (ctr, one);
+	  b2 = rkey0 ^ vec_add_uint128 (ctr, two);
+	  b3 = rkey0 ^ vec_add_uint128 (ctr, three);
+	  b4 = rkey0 ^ ctr4;
+	  b5 = rkey0 ^ vec_add_uint128 (ctr4, one);
+	  b6 = rkey0 ^ vec_add_uint128 (ctr4, two);
+	  b7 = rkey0 ^ vec_add_uint128 (ctr4, three);
+	  ctr = vec_add_uint128 (ctr4, four);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD(&rk[r]); \
+	      b0 = vec_cipher_be (b0, rkey); \
+	      b1 = vec_cipher_be (b1, rkey); \
+	      b2 = vec_cipher_be (b2, rkey); \
+	      b3 = vec_cipher_be (b3, rkey); \
+	      b4 = vec_cipher_be (b4, rkey); \
+	      b5 = vec_cipher_be (b5, rkey); \
+	      b6 = vec_cipher_be (b6, rkey); \
+	      b7 = vec_cipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const));
+	  b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const));
+	  b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const));
+	  b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const));
+	  b4 = vec_cipherlast_be (b4, rkey ^ VEC_LOAD_BE (in + 4, bige_const));
+	  b5 = vec_cipherlast_be (b5, rkey ^ VEC_LOAD_BE (in + 5, bige_const));
+	  b6 = vec_cipherlast_be (b6, rkey ^ VEC_LOAD_BE (in + 6, bige_const));
+	  b7 = vec_cipherlast_be (b7, rkey ^ VEC_LOAD_BE (in + 7, bige_const));
+
+	  VEC_STORE_BE (out + 0, b0, bige_const);
+	  VEC_STORE_BE (out + 1, b1, bige_const);
+	  VEC_STORE_BE (out + 2, b2, bige_const);
+	  VEC_STORE_BE (out + 3, b3, bige_const);
+	  VEC_STORE_BE (out + 4, b4, bige_const);
+	  VEC_STORE_BE (out + 5, b5, bige_const);
+	  VEC_STORE_BE (out + 6, b6, bige_const);
+	  VEC_STORE_BE (out + 7, b7, bige_const);
+
+	  in += 8;
+	  out += 8;
+	}
+
+      if (nblocks >= 4)
+	{
+	  b0 = rkey0 ^ ctr;
+	  b1 = rkey0 ^ vec_add_uint128 (ctr, one);
+	  b2 = rkey0 ^ vec_add_uint128 (ctr, two);
+	  b3 = rkey0 ^ vec_add_uint128 (ctr, three);
+	  ctr = vec_add_uint128 (ctr, four);
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD(&rk[r]); \
+	      b0 = vec_cipher_be (b0, rkey); \
+	      b1 = vec_cipher_be (b1, rkey); \
+	      b2 = vec_cipher_be (b2, rkey); \
+	      b3 = vec_cipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = vec_cipherlast_be (b0, rkey ^ VEC_LOAD_BE (in + 0, bige_const));
+	  b1 = vec_cipherlast_be (b1, rkey ^ VEC_LOAD_BE (in + 1, bige_const));
+	  b2 = vec_cipherlast_be (b2, rkey ^ VEC_LOAD_BE (in + 2, bige_const));
+	  b3 = vec_cipherlast_be (b3, rkey ^ VEC_LOAD_BE (in + 3, bige_const));
+
+	  VEC_STORE_BE (out + 0, b0, bige_const);
+	  VEC_STORE_BE (out + 1, b1, bige_const);
+	  VEC_STORE_BE (out + 2, b2, bige_const);
+	  VEC_STORE_BE (out + 3, b3, bige_const);
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      b = ctr;
+      ctr = vec_add_uint128 (ctr, one);
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, bige_const);
 
-  PRELOAD_ROUND_KEYS (rk, rounds);
+      AES_ENCRYPT (b, rounds);
 
-  AES_DECRYPT (b, rounds);
-  VEC_STORE_BE (out, b, bige_const);
+      VEC_STORE_BE (out, b, bige_const);
 
-  return 0; /* does not use stack */
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (ctr_arg, ctr, bige_const);
 }
 
 
@@ -1091,4 +1705,400 @@ size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
   return 0;
 }
 
+
+void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg,
+			       void *outbuf_arg, const void *inbuf_arg,
+			       size_t nblocks, int encrypt)
+{
+  static const block vec_bswap64_const =
+    { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 };
+  static const block vec_bswap128_const =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  static const unsigned char vec_tweak_const[16] =
+    { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
+  static const vector unsigned long long vec_shift63_const =
+    { 63, 63 };
+  static const vector unsigned long long vec_shift1_const =
+    { 1, 1 };
+  const block bige_const = vec_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  block tweak_tmp, tweak_next, tweak;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey;
+  block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
+  block tweak_const, bswap64_const, bswap128_const;
+  vector unsigned long long shift63_const, shift1_const;
+  ROUND_KEY_VARIABLES;
+
+  tweak_const = VEC_LOAD_BE (&vec_tweak_const, bige_const);
+  bswap64_const = ALIGNED_LOAD (&vec_bswap64_const);
+  bswap128_const = ALIGNED_LOAD (&vec_bswap128_const);
+  shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const);
+  shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const);
+
+  tweak_next = VEC_LOAD_BE (tweak_arg, bige_const);
+
+#define GEN_TWEAK(tweak, tmp) /* Generate next tweak. */ \
+    tmp = vec_vperm(tweak, tweak, bswap64_const); \
+    tweak = vec_vperm(tweak, tweak, bswap128_const); \
+    tmp = (block)(vec_sra((vector unsigned long long)tmp, shift63_const)) & \
+	  tweak_const; \
+    tweak = (block)vec_sl((vector unsigned long long)tweak, shift1_const); \
+    tweak = tweak ^ tmp; \
+    tweak = vec_vperm(tweak, tweak, bswap128_const);
+
+  if (encrypt)
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+      PRELOAD_ROUND_KEYS (rk, rounds);
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  tweak0 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak1 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak2 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak3 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak4 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak5 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak6 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak7 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+
+	  b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+	  b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+	  b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+	  b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+	  b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0;
+	  b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0;
+	  b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0;
+	  b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (&rk[r]); \
+	      b0 = vec_cipher_be (b0, rkey); \
+	      b1 = vec_cipher_be (b1, rkey); \
+	      b2 = vec_cipher_be (b2, rkey); \
+	      b3 = vec_cipher_be (b3, rkey); \
+	      b4 = vec_cipher_be (b4, rkey); \
+	      b5 = vec_cipher_be (b5, rkey); \
+	      b6 = vec_cipher_be (b6, rkey); \
+	      b7 = vec_cipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = vec_cipherlast_be (b0, rkey ^ tweak0);
+	  b1 = vec_cipherlast_be (b1, rkey ^ tweak1);
+	  b2 = vec_cipherlast_be (b2, rkey ^ tweak2);
+	  b3 = vec_cipherlast_be (b3, rkey ^ tweak3);
+	  b4 = vec_cipherlast_be (b4, rkey ^ tweak4);
+	  b5 = vec_cipherlast_be (b5, rkey ^ tweak5);
+	  b6 = vec_cipherlast_be (b6, rkey ^ tweak6);
+	  b7 = vec_cipherlast_be (b7, rkey ^ tweak7);
+
+	  VEC_STORE_BE (out + 0, b0, bige_const);
+	  VEC_STORE_BE (out + 1, b1, bige_const);
+	  VEC_STORE_BE (out + 2, b2, bige_const);
+	  VEC_STORE_BE (out + 3, b3, bige_const);
+	  VEC_STORE_BE (out + 4, b4, bige_const);
+	  VEC_STORE_BE (out + 5, b5, bige_const);
+	  VEC_STORE_BE (out + 6, b6, bige_const);
+	  VEC_STORE_BE (out + 7, b7, bige_const);
+
+	  in += 8;
+	  out += 8;
+	}
+
+      if (nblocks >= 4)
+	{
+	  tweak0 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak1 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak2 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak3 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+
+	  b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+	  b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+	  b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+	  b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (&rk[r]); \
+	      b0 = vec_cipher_be (b0, rkey); \
+	      b1 = vec_cipher_be (b1, rkey); \
+	      b2 = vec_cipher_be (b2, rkey); \
+	      b3 = vec_cipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = vec_cipherlast_be (b0, rkey ^ tweak0);
+	  b1 = vec_cipherlast_be (b1, rkey ^ tweak1);
+	  b2 = vec_cipherlast_be (b2, rkey ^ tweak2);
+	  b3 = vec_cipherlast_be (b3, rkey ^ tweak3);
+
+	  VEC_STORE_BE (out + 0, b0, bige_const);
+	  VEC_STORE_BE (out + 1, b1, bige_const);
+	  VEC_STORE_BE (out + 2, b2, bige_const);
+	  VEC_STORE_BE (out + 3, b3, bige_const);
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+
+      for (; nblocks; nblocks--)
+	{
+	  tweak = tweak_next;
+
+	  /* Xor-Encrypt/Decrypt-Xor block. */
+	  b = VEC_LOAD_BE (in, bige_const) ^ tweak;
+
+	  /* Generate next tweak. */
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+
+	  AES_ENCRYPT (b, rounds);
+
+	  b ^= tweak;
+	  VEC_STORE_BE (out, b, bige_const);
+
+	  in++;
+	  out++;
+	}
+    }
+  else
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+      if (!ctx->decryption_prepared)
+	{
+	  aes_ppc8_prepare_decryption (ctx);
+	  ctx->decryption_prepared = 1;
+	}
+
+      PRELOAD_ROUND_KEYS (rk, rounds);
+
+      for (; nblocks >= 8; nblocks -= 8)
+	{
+	  tweak0 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak1 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak2 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak3 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak4 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak5 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak6 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak7 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+
+	  b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+	  b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+	  b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+	  b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+	  b4 = VEC_LOAD_BE (in + 4, bige_const) ^ tweak4 ^ rkey0;
+	  b5 = VEC_LOAD_BE (in + 5, bige_const) ^ tweak5 ^ rkey0;
+	  b6 = VEC_LOAD_BE (in + 6, bige_const) ^ tweak6 ^ rkey0;
+	  b7 = VEC_LOAD_BE (in + 7, bige_const) ^ tweak7 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (&rk[r]); \
+	      b0 = vec_ncipher_be (b0, rkey); \
+	      b1 = vec_ncipher_be (b1, rkey); \
+	      b2 = vec_ncipher_be (b2, rkey); \
+	      b3 = vec_ncipher_be (b3, rkey); \
+	      b4 = vec_ncipher_be (b4, rkey); \
+	      b5 = vec_ncipher_be (b5, rkey); \
+	      b6 = vec_ncipher_be (b6, rkey); \
+	      b7 = vec_ncipher_be (b7, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = vec_ncipherlast_be (b0, rkey ^ tweak0);
+	  b1 = vec_ncipherlast_be (b1, rkey ^ tweak1);
+	  b2 = vec_ncipherlast_be (b2, rkey ^ tweak2);
+	  b3 = vec_ncipherlast_be (b3, rkey ^ tweak3);
+	  b4 = vec_ncipherlast_be (b4, rkey ^ tweak4);
+	  b5 = vec_ncipherlast_be (b5, rkey ^ tweak5);
+	  b6 = vec_ncipherlast_be (b6, rkey ^ tweak6);
+	  b7 = vec_ncipherlast_be (b7, rkey ^ tweak7);
+
+	  VEC_STORE_BE (out + 0, b0, bige_const);
+	  VEC_STORE_BE (out + 1, b1, bige_const);
+	  VEC_STORE_BE (out + 2, b2, bige_const);
+	  VEC_STORE_BE (out + 3, b3, bige_const);
+	  VEC_STORE_BE (out + 4, b4, bige_const);
+	  VEC_STORE_BE (out + 5, b5, bige_const);
+	  VEC_STORE_BE (out + 6, b6, bige_const);
+	  VEC_STORE_BE (out + 7, b7, bige_const);
+
+	  in += 8;
+	  out += 8;
+	}
+
+      if (nblocks >= 4)
+	{
+	  tweak0 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak1 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak2 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+	  tweak3 = tweak_next;
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+
+	  b0 = VEC_LOAD_BE (in + 0, bige_const) ^ tweak0 ^ rkey0;
+	  b1 = VEC_LOAD_BE (in + 1, bige_const) ^ tweak1 ^ rkey0;
+	  b2 = VEC_LOAD_BE (in + 2, bige_const) ^ tweak2 ^ rkey0;
+	  b3 = VEC_LOAD_BE (in + 3, bige_const) ^ tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+	      rkey = ALIGNED_LOAD (&rk[r]); \
+	      b0 = vec_ncipher_be (b0, rkey); \
+	      b1 = vec_ncipher_be (b1, rkey); \
+	      b2 = vec_ncipher_be (b2, rkey); \
+	      b3 = vec_ncipher_be (b3, rkey);
+
+	  DO_ROUND(1);
+	  DO_ROUND(2);
+	  DO_ROUND(3);
+	  DO_ROUND(4);
+	  DO_ROUND(5);
+	  DO_ROUND(6);
+	  DO_ROUND(7);
+	  DO_ROUND(8);
+	  DO_ROUND(9);
+	  if (rounds >= 12)
+	    {
+	      DO_ROUND(10);
+	      DO_ROUND(11);
+	      if (rounds > 12)
+		{
+		  DO_ROUND(12);
+		  DO_ROUND(13);
+		}
+	    }
+
+#undef DO_ROUND
+
+	  rkey = rkeylast;
+	  b0 = vec_ncipherlast_be (b0, rkey ^ tweak0);
+	  b1 = vec_ncipherlast_be (b1, rkey ^ tweak1);
+	  b2 = vec_ncipherlast_be (b2, rkey ^ tweak2);
+	  b3 = vec_ncipherlast_be (b3, rkey ^ tweak3);
+
+	  VEC_STORE_BE (out + 0, b0, bige_const);
+	  VEC_STORE_BE (out + 1, b1, bige_const);
+	  VEC_STORE_BE (out + 2, b2, bige_const);
+	  VEC_STORE_BE (out + 3, b3, bige_const);
+
+	  in += 4;
+	  out += 4;
+	  nblocks -= 4;
+	}
+
+      for (; nblocks; nblocks--)
+	{
+	  tweak = tweak_next;
+
+	  /* Xor-Encrypt/Decrypt-Xor block. */
+	  b = VEC_LOAD_BE (in, bige_const) ^ tweak;
+
+	  /* Generate next tweak. */
+	  GEN_TWEAK (tweak_next, tweak_tmp);
+
+	  AES_DECRYPT (b, rounds);
+
+	  b ^= tweak;
+	  VEC_STORE_BE (out, b, bige_const);
+
+	  in++;
+	  out++;
+	}
+    }
+
+  VEC_STORE_BE (tweak_arg, tweak_next, bige_const);
+
+#undef GEN_TWEAK
+}
+
 #endif /* USE_PPC_CRYPTO */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index f15ac18b1..ebd1a11a5 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -210,11 +210,33 @@ extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx,
 extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx,
 					   unsigned char *dst,
 					   const unsigned char *src);
+
+extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv,
+				    void *outbuf_arg, const void *inbuf_arg,
+				    size_t nblocks);
+
 extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
 					const void *inbuf_arg, size_t nblocks,
 					int encrypt);
 extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c,
 				       const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak,
+				      void *outbuf_arg,
+				      const void *inbuf_arg,
+				      size_t nblocks, int encrypt);
 #endif /*USE_PPC_CRYPTO*/
 
 static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
@@ -452,8 +474,14 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
       ctx->use_ppc_crypto = 1;
       if (hd)
         {
+          hd->bulk.cfb_enc = _gcry_aes_ppc8_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_ppc8_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_ppc8_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_ppc8_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_ppc8_ctr_enc;
           hd->bulk.ocb_crypt = _gcry_aes_ppc8_ocb_crypt;
           hd->bulk.ocb_auth = _gcry_aes_ppc8_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_ppc8_xts_crypt;
         }
     }
 #endif
@@ -896,6 +924,13 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
       return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
@@ -957,6 +992,13 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
       return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
@@ -1025,6 +1067,13 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
       return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp;
@@ -1268,6 +1317,13 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv,
       return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
@@ -1326,6 +1382,13 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
       return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16;
@@ -1556,6 +1619,13 @@ _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
       return;
     }
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+  else if (ctx->use_ppc_crypto)
+    {
+      _gcry_aes_ppc8_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+      return;
+    }
+#endif /*USE_PPC_CRYPTO*/
   else
     {
       if (encrypt)
diff --git a/configure.ac b/configure.ac
index 586145aa4..d7725b553 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1905,6 +1905,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto
 		    "lvx  %v20,%r12,%r0;\n"
 		    "vcipher %v0, %v1, %v22;\n"
 		    "lxvw4x %vs32, %r0, %r1;\n"
+		    "vadduwm %v0, %v1, %v22;\n"
 		  );
             ]])],
           [gcry_cv_gcc_inline_asm_ppc_altivec=yes])




More information about the Gcrypt-devel mailing list