[PATCH 2/5] Support for PowerPC's AES acceleration.

Shawn Landden shawn at git.icu
Tue Jul 9 14:28:59 CEST 2019


The CPU does not require the 4KB table to be provided to it,
and thus is more resistant to side-channel attacks.

I get an approximentally 2-3X speed-up with vcrypto support.

This numbers are incorrect because of turbo mode:

Before:
Cipher:
AES | nanosecs/byte mebibytes/sec cycles/byte auto Mhz

 ECB enc |      6.27 ns/B     152.2 MiB/s      7.92 c/B      1263
 ECB dec |      7.10 ns/B     134.4 MiB/s      8.97 c/B      1264
 CBC enc |      4.52 ns/B     211.2 MiB/s      5.71 c/B      1264
 CBC dec |      4.59 ns/B     207.9 MiB/s      8.69 c/B      1895
 CFB enc |      4.34 ns/B     219.6 MiB/s      8.23 c/B      1895
 CFB dec |      4.14 ns/B     230.6 MiB/s      7.84 c/B      1895
 OFB enc |      6.09 ns/B     156.7 MiB/s     11.54 c/B      1895
 OFB dec |      6.03 ns/B     158.2 MiB/s     11.43 c/B      1895
 CTR enc |      4.17 ns/B     228.9 MiB/s      7.90 c/B      1895
 CTR dec |      4.17 ns/B     228.6 MiB/s      7.91 c/B      1895
 XTS enc |      4.53 ns/B     210.4 MiB/s      8.59 c/B      1895
 XTS dec |      5.00 ns/B     190.8 MiB/s      9.47 c/B      1895
 CCM enc |      8.51 ns/B     112.0 MiB/s     16.13 c/B      1895
 CCM dec |      8.51 ns/B     112.0 MiB/s     16.13 c/B      1895
CCM auth |      4.35 ns/B     219.1 MiB/s      8.25 c/B      1895
 EAX enc |      8.51 ns/B     112.1 MiB/s     16.13 c/B      1895
 EAX dec |      8.55 ns/B     111.5 MiB/s     16.21 c/B      1895
EAX auth |      4.34 ns/B     219.5 MiB/s      8.23 c/B      1895
 GCM enc |      7.49 ns/B     127.3 MiB/s     14.20 c/B      1895
 GCM dec |      7.49 ns/B     127.3 MiB/s     14.20 c/B      1895
GCM auth |      3.33 ns/B     286.2 MiB/s      6.31 c/B      1895
 OCB enc |      4.33 ns/B     220.1 MiB/s      8.21 c/B      1895
 OCB dec |      5.69 ns/B     167.5 MiB/s      9.32 c/B      1638
OCB auth |      5.05 ns/B     189.0 MiB/s      8.26 c/B      1638

After:
Cipher:
AES | nanosecs/byte mebibytes/sec cycles/byte auto Mhz

 ECB enc |      2.14 ns/B     445.7 MiB/s      4.06 c/B      1895
 ECB dec |      2.41 ns/B     396.0 MiB/s      4.54 c/B      1887
 CBC enc |      2.11 ns/B     451.9 MiB/s      4.00 c/B      1895
 CBC dec |      2.06 ns/B     462.7 MiB/s      3.91 c/B      1895
 CFB enc |      2.09 ns/B     455.9 MiB/s      3.96 c/B      1895
 CFB dec |      2.09 ns/B     456.2 MiB/s      3.96 c/B      1895
 OFB enc |      2.17 ns/B     439.9 MiB/s      4.11 c/B      1895
 OFB dec |      2.12 ns/B     449.6 MiB/s      4.02 c/B      1895
 CTR enc |      2.10 ns/B     454.6 MiB/s      3.98 c/B      1895
 CTR dec |      2.09 ns/B     456.7 MiB/s      3.96 c/B      1895
 XTS enc |      2.30 ns/B     415.3 MiB/s      4.35 c/B      1895
 XTS dec |      2.29 ns/B     415.8 MiB/s      4.35 c/B      1895
 CCM enc |      4.67 ns/B     204.2 MiB/s      7.65 c/B      1638
 CCM dec |      4.83 ns/B     197.3 MiB/s      7.92 c/B      1638
CCM auth |      2.43 ns/B     391.9 MiB/s      3.99 c/B      1638
 EAX enc |      4.84 ns/B     197.2 MiB/s      7.92 c/B      1638
 EAX dec |      4.83 ns/B     197.3 MiB/s      7.92 c/B      1638
EAX auth |      2.42 ns/B     394.2 MiB/s      3.96 c/B      1638
 GCM enc |      5.42 ns/B     176.0 MiB/s     10.27 c/B      1895
 GCM dec |      5.42 ns/B     176.1 MiB/s     10.27 c/B      1895
GCM auth |      3.33 ns/B     286.2 MiB/s      6.32 c/B      1895
 OCB enc |      2.10 ns/B     454.7 MiB/s      3.98 c/B      1895
 OCB dec |      2.11 ns/B     452.8 MiB/s      3.99 c/B      1895
OCB auth |      2.10 ns/B     453.6 MiB/s      3.98 c/B      1895

Fixes T4529

GCM is slow because of lack of CPU support.

v2: changelog, GNU coding standards

2019-07-09  Shawn Landden  <shawn.git.icu>

	* cipher/Makefile.am: Build cryptogams AES code.
	* cipher/ppc-xlate.pl: Renamed to...
	* asm-common-ppc.pl
	* cipher/aesp8-ppc.pl: Renamed to...
	* cipher/rijndael-ppc8.pl: ...and modified to libgcrypt's ctx structure.
	* cipher/rijndael-ppc8.S: Regenerated.
	* cipher/rijndael-ppc832.S: Regenerated.
	* cipher/rijndael-ppc8be.S: Regenerated.
	* cipher/rijndael.c: Wrapped new functionality.
	* cipher/rijndael-internal.h: Hardware detection glue.
	* configure.ac: Link cryptogams code based on target.
	* src/Makefile.am: Hardware detection.
	* src/g10lib.h: Likewise.
	* src/hwf-common.h: Likewise.
	* src/hwf-ppc.c: Likewise.
	* src/hwfeatures.c: Likewise.
---
 cipher/Makefile.am                         |   4 +
 cipher/{ppc-xlate.pl => asm-common-ppc.pl} |  29 +--
 cipher/rijndael-internal.h                 |  10 +
 cipher/rijndael-ppc8.S                     |  11 +-
 cipher/{aesp8-ppc.pl => rijndael-ppc8.pl}  |  16 +-
 cipher/rijndael-ppc832.S                   |  11 +-
 cipher/rijndael-ppc8be.S                   |  11 +-
 cipher/rijndael.c                          |  80 ++++++-
 configure.ac                               |  40 +++-
 src/Makefile.am                            |   2 +-
 src/g10lib.h                               |   3 +-
 src/hwf-common.h                           |   2 +-
 src/hwf-ppc.c                              | 231 +++++++++++++++++++++
 src/hwfeatures.c                           |  17 +-
 14 files changed, 410 insertions(+), 57 deletions(-)
 rename cipher/{ppc-xlate.pl => asm-common-ppc.pl} (93%)
 mode change 100755 => 100644
 rename cipher/{aesp8-ppc.pl => rijndael-ppc8.pl} (99%)
 create mode 100644 src/hwf-ppc.c

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 0a031f4b..495b2f6d 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -96,6 +96,10 @@ EXTRA_libcipher_la_SOURCES = \
 	rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S  \
 	rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S    \
 	rijndael-armv8-aarch64-ce.S rijndael-aarch64.S     \
+	rijndael-ppc8.pl \
+	rijndael-ppc8.S	\
+	rijndael-ppc8be.S \
+	rijndael-ppc832.S \
 	rmd160.c \
 	rsa.c \
 	salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
diff --git a/cipher/ppc-xlate.pl b/cipher/asm-common-ppc.pl
old mode 100755
new mode 100644
similarity index 93%
rename from cipher/ppc-xlate.pl
rename to cipher/asm-common-ppc.pl
index 4c6e90c2..00b955cc
--- a/cipher/ppc-xlate.pl
+++ b/cipher/asm-common-ppc.pl
@@ -1,12 +1,16 @@
-#! /usr/bin/env perl
-# SPDX-License-Identifier: BSD-3-Clause
+#!/usr/bin/env perl
 
-# ====================================================================
-# Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
+# PowerPC assembler distiller by \@dot-asm.
+
+################################################################
+# Recognized "flavour"-s are:
+#
+# linux{32|64}[le]  GNU assembler and ELF symbol decorations,
+#                   with little-endian option
+# linux64v2         GNU asssembler and big-endian instantiation
+#                   of latest ELF specification
+# aix{32|64}        AIX assembler and symbol decorations
+# osx{32|64}        Mac OS X assembler and symbol decoratons
 
 my $flavour = shift;
 my $output = shift;
@@ -51,7 +55,7 @@ my $globl = sub {
 	/osx/		&& do { $name = "_$name";
 				last;
 			      };
-	/linux.*(32|64le)/
+	/linux.*(32|64(le|v2))/
 			&& do {	$ret .= ".globl	$name";
 				if (!$$type) {
 				    $ret .= "\n.type	$name,\@function";
@@ -82,7 +86,7 @@ my $globl = sub {
 };
 my $text = sub {
     my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
-    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
+    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64(le|v2)/);
     $ret;
 };
 my $machine = sub {
@@ -188,7 +192,7 @@ my $vmr = sub {
 
 # Some ABIs specify vrsave, special-purpose register #256, as reserved
 # for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $no_vrsave = ($flavour =~ /aix|linux64(le|v2)/);
 my $mtspr = sub {
     my ($f,$idx,$ra) = @_;
     if ($idx == 256 && $no_vrsave) {
@@ -322,7 +326,7 @@ while($line=<>) {
 	if ($label) {
 	    my $xlated = ($GLOBALS{$label} or $label);
 	    print "$xlated:";
-	    if ($flavour =~ /linux.*64le/) {
+	    if ($flavour =~ /linux.*64(le|v2)/) {
 		if ($TYPES{$label} =~ /function/) {
 		    printf "\n.localentry	%s,0\n",$xlated;
 		}
@@ -346,3 +350,4 @@ while($line=<>) {
 }
 
 close STDOUT;
+
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 78b08e8f..c573ed36 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -104,6 +104,13 @@
 # endif
 #endif /* ENABLE_ARM_CRYPTO_SUPPORT */
 
+#undef USE_PPC_ASM
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(__powerpc64__) || defined(__powerpc__)
+#   define USE_PPC_ASM 1
+# endif
+#endif
+
 struct RIJNDAEL_context_s;
 
 typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx,
@@ -154,6 +161,9 @@ typedef struct RIJNDAEL_context_s
 #ifdef USE_ARM_CE
   unsigned int use_arm_ce:1;          /* ARMv8 CE shall be used.  */
 #endif /*USE_ARM_CE*/
+#ifdef USE_PPC_ASM
+  unsigned int use_ppc_asm:1;          /* PowerISA 2.07 crypto shall be used.  */
+#endif
   rijndael_cryptfn_t encrypt_fn;
   rijndael_cryptfn_t decrypt_fn;
   rijndael_prefetchfn_t prefetch_enc_fn;
diff --git a/cipher/rijndael-ppc8.S b/cipher/rijndael-ppc8.S
index f03f7360..49db6f95 100644
--- a/cipher/rijndael-ppc8.S
+++ b/cipher/rijndael-ppc8.S
@@ -278,7 +278,6 @@ aes_p8_set_encrypt_key:
 	stvx	2,0,3
 	li	6,0
 	or	12,12,12
-	stw	8,0(5)
 
 .Lenc_key_abort:
 	mr	3,6
@@ -344,7 +343,7 @@ aes_p8_set_decrypt_key:
 aes_p8_encrypt:
 .localentry	aes_p8_encrypt,0
 
-	lwz	6,240(5)
+	lwz	6,480(5)
 	lis	0,0xfc00
 	li	12,-1
 	li	7,15
@@ -504,7 +503,7 @@ aes_p8_cbc_encrypt:
 
 	neg	11,3
 	lvsr	10,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 
 	lvsr	6,0,11
 	lvx	5,0,3
@@ -1243,7 +1242,7 @@ aes_p8_ctr32_encrypt_blocks:
 
 	neg	11,3
 	lvsr	10,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 
 	lvsr	6,0,11
 	lvx	5,0,3
@@ -1884,7 +1883,7 @@ aes_p8_xts_encrypt:
 	beq	.Lxts_enc_no_key2
 
 	lvsr	7,0,7
-	lwz	9,240(7)
+	lwz	9,480(7)
 	srwi	9,9,1
 	subi	9,9,1
 	li	3,16
@@ -1928,7 +1927,7 @@ aes_p8_xts_encrypt:
 	addi	10,10,16
 
 	lvsr	7,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 	srwi	9,9,1
 	subi	9,9,1
 	li	3,16
diff --git a/cipher/aesp8-ppc.pl b/cipher/rijndael-ppc8.pl
similarity index 99%
rename from cipher/aesp8-ppc.pl
rename to cipher/rijndael-ppc8.pl
index 8397a8c7..00bf30d9 100755
--- a/cipher/aesp8-ppc.pl
+++ b/cipher/rijndael-ppc8.pl
@@ -1,6 +1,9 @@
 #! /usr/bin/env perl
 # SPDX-License-Identifier: BSD-3-Clause
 #
+# Changes: adjust struct offsets to work with libgcrypt ctx
+#          rename ppc-xlate.pl
+#
 # ====================================================================
 # Written by Andy Polyakov <appro at openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
@@ -60,6 +63,7 @@ if ($flavour =~ /64/) {
 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}asm-common-ppc.pl" and -f $xlate ) or
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 die "can't locate ppc-xlate.pl";
@@ -353,7 +357,6 @@ Ldone:
 	stvx		$in1,0,$inp
 	li		$ptr,0
 	mtspr		256,$vrsave
-	stw		$rounds,0($out)
 
 Lenc_key_abort:
 	mr		r3,$ptr
@@ -417,13 +420,14 @@ ___
 sub gen_block () {
 my $dir = shift;
 my $n   = $dir eq "de" ? "n" : "";
+my $rounds_off = $dir eq "de" ? "240" : "480";
 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
 
 $code.=<<___;
 .globl	.${prefix}_${dir}crypt
 .align	5
 .${prefix}_${dir}crypt:
-	lwz		$rounds,240($key)
+	lwz		$rounds,$rounds_off($key)
 	lis		r0,0xfc00
 	mfspr		$vrsave,256
 	li		$idx,15			# 15 is not typo
@@ -522,7 +526,7 @@ $code.=<<___;
 
 	neg		r11,$inp
 	?lvsl		$keyperm,0,$key		# prepare for unaligned key
-	lwz		$rounds,240($key)
+	lwz		$rounds,480($key)
 
 	lvsr		$inpperm,0,r11		# prepare for unaligned load
 	lvx		$inptail,0,$inp
@@ -1283,7 +1287,7 @@ $code.=<<___;
 
 	neg		r11,$inp
 	?lvsl		$keyperm,0,$key		# prepare for unaligned key
-	lwz		$rounds,240($key)
+	lwz		$rounds,480($key)
 
 	lvsr		$inpperm,0,r11		# prepare for unaligned load
 	lvx		$inptail,0,$inp
@@ -1958,7 +1962,7 @@ $code.=<<___;
 	beq		Lxts_enc_no_key2
 
 	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
-	lwz		$rounds,240($key2)
+	lwz		$rounds,480($key2)
 	srwi		$rounds,$rounds,1
 	subi		$rounds,$rounds,1
 	li		$idx,16
@@ -2002,7 +2006,7 @@ Lxts_enc:
 	addi		$inp,$inp,16
 
 	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
-	lwz		$rounds,240($key1)
+	lwz		$rounds,480($key1)
 	srwi		$rounds,$rounds,1
 	subi		$rounds,$rounds,1
 	li		$idx,16
diff --git a/cipher/rijndael-ppc832.S b/cipher/rijndael-ppc832.S
index c297e398..838083e7 100644
--- a/cipher/rijndael-ppc832.S
+++ b/cipher/rijndael-ppc832.S
@@ -275,7 +275,6 @@ aes_p8_set_encrypt_key:
 	stvx	2,0,3
 	li	6,0
 	mtspr	256,12
-	stw	8,0(5)
 
 .Lenc_key_abort:
 	mr	3,6
@@ -337,7 +336,7 @@ aes_p8_set_decrypt_key:
 .type	aes_p8_encrypt, at function
 .align	5
 aes_p8_encrypt:
-	lwz	6,240(5)
+	lwz	6,480(5)
 	lis	0,0xfc00
 	mfspr	12,256
 	li	7,15
@@ -493,7 +492,7 @@ aes_p8_cbc_encrypt:
 
 	neg	11,3
 	lvsl	10,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 
 	lvsr	6,0,11
 	lvx	5,0,3
@@ -1230,7 +1229,7 @@ aes_p8_ctr32_encrypt_blocks:
 
 	neg	11,3
 	lvsl	10,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 
 	lvsr	6,0,11
 	lvx	5,0,3
@@ -1869,7 +1868,7 @@ aes_p8_xts_encrypt:
 	beq	.Lxts_enc_no_key2
 
 	lvsl	7,0,7
-	lwz	9,240(7)
+	lwz	9,480(7)
 	srwi	9,9,1
 	subi	9,9,1
 	li	3,16
@@ -1913,7 +1912,7 @@ aes_p8_xts_encrypt:
 	addi	10,10,16
 
 	lvsl	7,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 	srwi	9,9,1
 	subi	9,9,1
 	li	3,16
diff --git a/cipher/rijndael-ppc8be.S b/cipher/rijndael-ppc8be.S
index 11001a46..4fd21131 100644
--- a/cipher/rijndael-ppc8be.S
+++ b/cipher/rijndael-ppc8be.S
@@ -280,7 +280,6 @@ aes_p8_set_encrypt_key:
 	stvx	2,0,3
 	li	6,0
 	mtspr	256,12
-	stw	8,0(5)
 
 .Lenc_key_abort:
 	mr	3,6
@@ -354,7 +353,7 @@ aes_p8_encrypt:
 .previous
 .align	5
 .aes_p8_encrypt:
-	lwz	6,240(5)
+	lwz	6,480(5)
 	lis	0,0xfc00
 	mfspr	12,256
 	li	7,15
@@ -522,7 +521,7 @@ aes_p8_cbc_encrypt:
 
 	neg	11,3
 	lvsl	10,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 
 	lvsr	6,0,11
 	lvx	5,0,3
@@ -1265,7 +1264,7 @@ aes_p8_ctr32_encrypt_blocks:
 
 	neg	11,3
 	lvsl	10,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 
 	lvsr	6,0,11
 	lvx	5,0,3
@@ -1910,7 +1909,7 @@ aes_p8_xts_encrypt:
 	beq	.Lxts_enc_no_key2
 
 	lvsl	7,0,7
-	lwz	9,240(7)
+	lwz	9,480(7)
 	srwi	9,9,1
 	subi	9,9,1
 	li	3,16
@@ -1954,7 +1953,7 @@ aes_p8_xts_encrypt:
 	addi	10,10,16
 
 	lvsl	7,0,6
-	lwz	9,240(6)
+	lwz	9,480(6)
 	srwi	9,9,1
 	subi	9,9,1
 	li	3,16
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2c9aa673..e6e53b4f 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -199,6 +199,37 @@ extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak,
                                           size_t nblocks, int encrypt);
 #endif /*USE_ARM_ASM*/
 
+#ifdef USE_PPC_ASM
+/* POWER 8 AES extensions */
+extern void aes_p8_encrypt (const unsigned char *in,
+                            unsigned char *out,
+                            const RIJNDAEL_context *ctx);
+static unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in)
+{
+  /* When I tried to switch these registers in the assembly it broke. */
+  aes_p8_encrypt (in, out, ctx);
+  return 0; /* does not use stack */
+}
+                                  /* this is the decryption key part of context */
+extern void aes_p8_decrypt (const unsigned char *in,
+                            unsigned char *out,
+                            const void *sboxes);
+static unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in)
+{
+  aes_p8_decrypt (in, out, &ctx->u2);
+  return 0; /* does not use stack */
+}
+extern int aes_p8_set_encrypt_key (const unsigned char *userKey, const int bits,
+                                   RIJNDAEL_context *key);
+extern int aes_p8_set_decrypt_key (const unsigned char *userKey, const int bits,
+                                   /* this is the decryption key part of context */
+                                   const unsigned (*)[15][4]);
+#endif /*USE_PPC_ASM*/
+
 static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
                                 const unsigned char *ax);
 static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
@@ -280,7 +311,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
   int i,j, r, t, rconpointer = 0;
   int KC;
 #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \
-    || defined(USE_ARM_CE)
+    || defined(USE_ARM_CE) || defined(USE_PPC_ASM)
   unsigned int hwfeatures;
 #endif
 
@@ -324,7 +355,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
   ctx->rounds = rounds;
 
 #if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \
-    || defined(USE_ARM_CE)
+    || defined(USE_ARM_CE) || defined(USE_PPC_ASM)
   hwfeatures = _gcry_get_hw_features ();
 #endif
 
@@ -341,6 +372,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
 #ifdef USE_ARM_CE
   ctx->use_arm_ce = 0;
 #endif
+#ifdef USE_PPC_ASM
+  ctx->use_ppc_asm = 0;
+#endif
 
   if (0)
     {
@@ -420,6 +454,16 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
           hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt;
         }
     }
+#endif
+#ifdef USE_PPC_ASM
+  else if (hwfeatures & HWF_PPC_VCRYPTO)
+    {
+      ctx->encrypt_fn = _gcry_aes_ppc8_encrypt;
+      ctx->decrypt_fn = _gcry_aes_ppc8_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->use_ppc_asm = 1;
+    }
 #endif
   else
     {
@@ -446,6 +490,15 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     _gcry_aes_armv8_ce_setkey (ctx, key);
+#endif
+#ifdef USE_PPC_ASM
+  else if (ctx->use_ppc_asm)
+    {
+      /* These are both done here to avoid having to store the key.
+       * Does not require in-memory S-boxes. */
+      aes_p8_set_encrypt_key (key, keylen * 8, ctx);
+      aes_p8_set_decrypt_key (key, keylen * 8, &ctx->keyschdec32);
+    }
 #endif
   else
     {
@@ -591,6 +644,13 @@ prepare_decryption( RIJNDAEL_context *ctx )
       /* Padlock does not need decryption subkeys. */
     }
 #endif /*USE_PADLOCK*/
+#ifdef USE_PPC_ASM
+  else if (ctx->use_ppc_asm)
+    {
+      /* done during encryption key setup, as then we have the actual
+       * key available */
+    }
+#endif /*USE_PPC_ASM*/
   else
     {
       const byte *sbox = ((const byte *)encT) + 1;
@@ -866,7 +926,6 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
     _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
 }
 
-
 /* Bulk encryption of complete blocks in CBC mode.  Caller needs to
    make sure that IV is aligned on an unsigned long boundary.  This
    function is only intended for the bulk encryption feature of
@@ -1150,7 +1209,7 @@ do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
 				     dec_tables.T);
 #else
   return do_decrypt_fn (ctx, bx, ax);
-#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
+#endif
 }
 
 
@@ -1588,14 +1647,21 @@ selftest_basic_128 (void)
     {
       0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
       0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
-      /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */
-      /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */
     };
   static const unsigned char ciphertext_128[16] =
     {
       0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30,
       0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a
     };
+
+  static const unsigned char key_test_expansion_128[16] =
+    {
+      0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+      0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c
+    };
+
+  RIJNDAEL_context exp_ctx;
+  rijndael_setkey (&exp_ctx, key_test_expansion_128, sizeof (key_128), NULL);
 #endif
 
   /* Because gcc/ld can only align the CTX struct on 8 bytes on the
@@ -1611,7 +1677,7 @@ selftest_basic_128 (void)
       xfree (ctxmem);
       return "AES-128 test encryption failed.";
     }
-  rijndael_decrypt (ctx, scratch, scratch);
+  rijndael_decrypt (ctx, scratch, ciphertext_128);
   xfree (ctxmem);
   if (memcmp (scratch, plaintext_128, sizeof (plaintext_128)))
     return "AES-128 test decryption failed.";
diff --git a/configure.ac b/configure.ac
index 3ac5897b..e65ce280 100644
--- a/configure.ac
+++ b/configure.ac
@@ -681,6 +681,14 @@ AC_ARG_ENABLE(arm-crypto-support,
 	      armcryptosupport=$enableval,armcryptosupport=yes)
 AC_MSG_RESULT($armcryptosupport)
 
+# Implementation of the --disable-ppc-crypto-support switch.
+AC_MSG_CHECKING([whether PPC crypto support is requested])
+AC_ARG_ENABLE(ppc-crypto-support,
+              AC_HELP_STRING([--disable-ppc-crypto-support],
+                 [Disable support for the PPC crypto instructions introduced in POWER 8 (PowerISA 2.07)]),
+              ppccryptosupport=$enableval,ppccryptosupport=yes)
+AC_MSG_RESULT($ppccryptosupport)
+
 # Implementation of the --disable-O-flag-munging switch.
 AC_MSG_CHECKING([whether a -O flag munging is requested])
 AC_ARG_ENABLE([O-flag-munging],
@@ -1272,14 +1280,14 @@ if test "$mpi_cpu_arch" != "x86" ; then
    drngsupport="n/a"
 fi
 
-if test "$mpi_cpu_arch" != "arm" ; then
-   if test "$mpi_cpu_arch" != "aarch64" ; then
-     neonsupport="n/a"
-     armcryptosupport="n/a"
-   fi
+if test "$mpi_cpu_arch" != "arm" && test "$mpi_cpu_arch" != "aarch64"; then
+   neonsupport="n/a"
+   armcryptosupport="n/a"
 fi
 
-
+if test "$mpi_cpu_arch" != "ppc"; then
+   ppccryptosupport="n/a"
+fi
 #############################################
 ####                                     ####
 #### Platform specific compiler checks.  ####
@@ -2119,6 +2127,10 @@ if test x"$armcryptosupport" = xyes ; then
   AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1,
             [Enable support for ARMv8 Crypto Extension instructions.])
 fi
+if test x"$ppccryptosupport" = xyes ; then
+  AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1,
+            [Enable support for POWER 8 (PowerISA 2.07) crypto extension.])
+fi
 if test x"$jentsupport" = xyes ; then
   AC_DEFINE(ENABLE_JENT_SUPPORT, 1,
             [Enable support for the jitter entropy collector.])
@@ -2226,6 +2238,20 @@ if test "$found" = "1" ; then
          GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo"
          GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo"
       ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8.lo"
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc8be.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc832.lo"
+      ;;
    esac
 
    case "$mpi_cpu_arch" in
@@ -2699,6 +2725,7 @@ case "$mpi_cpu_arch" in
         ;;
      ppc)
         AC_DEFINE(HAVE_CPU_ARCH_PPC, 1,   [Defined for PPC platforms])
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo"
         ;;
      arm)
         AC_DEFINE(HAVE_CPU_ARCH_ARM, 1,   [Defined for ARM platforms])
@@ -2800,6 +2827,7 @@ GCRY_MSG_SHOW([Try using Intel AVX:      ],[$avxsupport])
 GCRY_MSG_SHOW([Try using Intel AVX2:     ],[$avx2support])
 GCRY_MSG_SHOW([Try using ARM NEON:       ],[$neonsupport])
 GCRY_MSG_SHOW([Try using ARMv8 crypto:   ],[$armcryptosupport])
+GCRY_MSG_SHOW([Try using PPC crypto:     ],[$ppccryptosupport])
 GCRY_MSG_SHOW([],[])
 
 if test "x${gpg_config_script_warn}" != x; then
diff --git a/src/Makefile.am b/src/Makefile.am
index 82d6e8a0..5d347a2a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -66,7 +66,7 @@ libgcrypt_la_SOURCES = \
 	hmac256.c hmac256.h context.c context.h \
 	ec-context.h
 
-EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c
+EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c
 gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@
 
 
diff --git a/src/g10lib.h b/src/g10lib.h
index 694c2d83..03b3967d 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -236,7 +236,8 @@ char **_gcry_strtokenize (const char *string, const char *delim);
 #define HWF_ARM_SHA2            (1 << 20)
 #define HWF_ARM_PMULL           (1 << 21)
 
-
+/* No problem re-using a slot from a differn't architecture */
+#define HWF_PPC_VCRYPTO       (1 << 0)
 
 gpg_err_code_t _gcry_disable_hw_feature (const char *name);
 void _gcry_detect_hw_features (void);
diff --git a/src/hwf-common.h b/src/hwf-common.h
index 8f156b56..76f346e9 100644
--- a/src/hwf-common.h
+++ b/src/hwf-common.h
@@ -22,6 +22,6 @@
 
 unsigned int _gcry_hwf_detect_x86 (void);
 unsigned int _gcry_hwf_detect_arm (void);
-
+unsigned int _gcry_hwf_detect_ppc (void);
 
 #endif /*HWF_COMMON_H*/
diff --git a/src/hwf-ppc.c b/src/hwf-ppc.c
new file mode 100644
index 00000000..0f754e15
--- /dev/null
+++ b/src/hwf-ppc.c
@@ -0,0 +1,231 @@
+/* hwf-ppc.c - Detect hardware features - PPC part
+ * Copyright (C) 2013,2019 Jussi Kivilinna <jussi.kivilinna at iki.fi>
+ * Copyright (C) 2019 Shawn Landden <shawn at git.icu>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \
+    defined(HAVE_ELF_AUX_INFO))
+#include <sys/auxv.h>
+#endif
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__powerpc__) && !defined (__powerpc64__)
+# error Module build for wrong CPU.
+#endif
+
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \
+    !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP)
+#define HAVE_GETAUXVAL
+static unsigned long getauxval(unsigned long type)
+{
+  unsigned long auxval = 0;
+  int err;
+
+  /* FreeBSD provides 'elf_aux_info' function that does the same as
+   * 'getauxval' on Linux. */
+
+  err = elf_aux_info (type, &auxval, sizeof(auxval));
+  if (err)
+    {
+      errno = err;
+      auxval = 0;
+    }
+
+  return auxval;
+}
+#endif
+
+
+#undef HAS_SYS_AT_HWCAP
+#if defined(__linux__) || \
+    (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL))
+#define HAS_SYS_AT_HWCAP 1
+
+struct feature_map_s
+  {
+    unsigned int hwcap_flag;
+    unsigned int hwcap2_flag;
+    const char *feature_match;
+    unsigned int hwf_flag;
+  };
+
+#if defined(__powerpc__) || defined(__powerpc64__)
+
+/* Note: These macros have same values on Linux and FreeBSD. */
+#ifndef AT_HWCAP
+# define AT_HWCAP      16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2     26
+#endif
+
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+# define PPC_FEATURE2_VEC_CRYPTO    0x02000000
+#endif
+
+static const struct feature_map_s ppc_features[] =
+  {
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+    { 0, PPC_FEATURE2_VEC_CRYPTO, " crypto", HWF_PPC_VCRYPTO },
+#endif
+  };
+#endif
+
+static int
+get_hwcap(unsigned int *hwcap, unsigned int *hwcap2)
+{
+  struct { unsigned long a_type; unsigned long a_val; } auxv;
+  FILE *f;
+  int err = -1;
+  static int hwcap_initialized = 0;
+  static unsigned int stored_hwcap = 0;
+  static unsigned int stored_hwcap2 = 0;
+
+  if (hwcap_initialized)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+
+#if defined(__GLIBC__) && defined(__GNUC__)
+#if __GNUC__ >= 6
+  /* Returns 0 if glibc support doesn't exist, so we can
+   * only trust positive results. This function will need updating
+   * if we ever need more than one cpu feature.
+   */
+  if (sizeof(ppc_features)/sizeof(ppc_features[0]) == 0) {
+    if (__builtin_cpu_supports("vcrypto")) {
+      stored_hwcap = 0;
+      stored_hwcap2 = PPC_FEATURE2_VEC_CRYPTO;
+	    hwcap_initialized = 1;
+      return 0;
+    }
+  }
+#endif
+#endif
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)
+  errno = 0;
+  auxv.a_val = getauxval (AT_HWCAP);
+  if (errno == 0)
+    {
+      stored_hwcap |= auxv.a_val;
+      hwcap_initialized = 1;
+    }
+
+  if (AT_HWCAP2 >= 0)
+    {
+      errno = 0;
+      auxv.a_val = getauxval (AT_HWCAP2);
+      if (errno == 0)
+	{
+	  stored_hwcap2 |= auxv.a_val;
+	  hwcap_initialized = 1;
+	}
+    }
+
+  if (hwcap_initialized && (stored_hwcap || stored_hwcap2))
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+#endif
+
+  f = fopen("/proc/self/auxv", "r");
+  if (!f)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return -1;
+    }
+
+  while (fread(&auxv, sizeof(auxv), 1, f) > 0)
+    {
+      if (auxv.a_type == AT_HWCAP)
+        {
+          stored_hwcap |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+
+      if (auxv.a_type == AT_HWCAP2)
+        {
+          stored_hwcap2 |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+    }
+
+  if (hwcap_initialized)
+      err = 0;
+
+  fclose(f);
+  *hwcap = stored_hwcap;
+  *hwcap2 = stored_hwcap2;
+  return err;
+}
+
+static unsigned int
+detect_ppc_at_hwcap(void)
+{
+  unsigned int hwcap;
+  unsigned int hwcap2;
+  unsigned int features = 0;
+  unsigned int i;
+
+  if (get_hwcap(&hwcap, &hwcap2) < 0)
+      return features;
+
+  for (i = 0; i < DIM(ppc_features); i++)
+    {
+      if (hwcap & ppc_features[i].hwcap_flag)
+        features |= ppc_features[i].hwf_flag;
+
+      if (hwcap2 & ppc_features[i].hwcap2_flag)
+        features |= ppc_features[i].hwf_flag;
+    }
+
+  return features;
+}
+
+#endif
+
+unsigned int
+_gcry_hwf_detect_ppc (void)
+{
+  unsigned int ret = 0;
+  unsigned int broken_hwfs = 0;
+
+#if defined (HAS_SYS_AT_HWCAP)
+  ret |= detect_ppc_at_hwcap ();
+#endif
+
+  ret &= ~broken_hwfs;
+
+  return ret;
+}
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index e0816694..179521ab 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -42,6 +42,7 @@ static struct
   const char *desc;
 } hwflist[] =
   {
+#if defined(HAVE_CPU_ARCH_X86)
     { HWF_PADLOCK_RNG,         "padlock-rng" },
     { HWF_PADLOCK_AES,         "padlock-aes" },
     { HWF_PADLOCK_SHA,         "padlock-sha" },
@@ -59,11 +60,15 @@ static struct
     { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
     { HWF_INTEL_RDTSC,         "intel-rdtsc" },
     { HWF_INTEL_SHAEXT,        "intel-shaext" },
+#elif defined(HAVE_CPU_ARCH_ARM)
     { HWF_ARM_NEON,            "arm-neon" },
     { HWF_ARM_AES,             "arm-aes" },
     { HWF_ARM_SHA1,            "arm-sha1" },
     { HWF_ARM_SHA2,            "arm-sha2" },
-    { HWF_ARM_PMULL,           "arm-pmull" }
+    { HWF_ARM_PMULL,           "arm-pmull" },
+#elif defined(HAVE_CPU_ARCH_PPC)
+    { HWF_PPC_VCRYPTO,       "ppc-crypto" },
+#endif
   };
 
 /* A bit vector with the hardware features which shall not be used.
@@ -208,12 +213,14 @@ _gcry_detect_hw_features (void)
   {
     hw_features = _gcry_hwf_detect_x86 ();
   }
-#endif /* HAVE_CPU_ARCH_X86 */
-#if defined (HAVE_CPU_ARCH_ARM)
+#elif defined (HAVE_CPU_ARCH_ARM)
   {
     hw_features = _gcry_hwf_detect_arm ();
   }
-#endif /* HAVE_CPU_ARCH_ARM */
-
+#elif defined (HAVE_CPU_ARCH_PPC)
+  {
+    hw_features = _gcry_hwf_detect_ppc ();
+  }
+#endif
   hw_features &= ~disabled_hw_features;
 }
-- 
2.20.1




More information about the Gcrypt-devel mailing list