From jussi.kivilinna at iki.fi Sat Feb 1 23:58:11 2020 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 2 Feb 2020 00:58:11 +0200 Subject: [PATCH 3/3] crc-ppc: fix bad register used for vector load/store assembly In-Reply-To: <158059788087.29219.3671906635900050322.stgit@localhost6.localdomain6> References: <158059788087.29219.3671906635900050322.stgit@localhost6.localdomain6> Message-ID: <158059789122.29219.10926381793952186890.stgit@localhost6.localdomain6> * cipher/crc-ppc.c (CRC_VEC_U64_LOAD_BE): Move implementation to... (asm_vec_u64_load_be): ...here; Add "r0" to clobber list for load instruction when offset is not zero; Add zero offset path. -- Register r0 must not be used for RA input for vector load/store instructions as r0 is not read as register but as value '0'. Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c index 7dda90c5f..4d7f0add3 100644 --- a/cipher/crc-ppc.c +++ b/cipher/crc-ppc.c @@ -1,5 +1,5 @@ /* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation - * Copyright (C) 2019 Jussi Kivilinna + * Copyright (C) 2019-2020 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -168,22 +168,37 @@ static const vector16x_u8 bswap_const ALIGNED_64 = # define CRC_VEC_U64_LOAD(offs, ptr) \ vec_vsx_ld((offs), (const unsigned long long *)(ptr)) # define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr)) -# define CRC_VEC_U64_LOAD_BE(offs, ptr) \ - ({ \ - vector2x_u64 __vecu64; \ - __asm__ ("lxvd2x %%vs32,%1,%2\n\t" \ - "vperm %0,%%v0,%%v0,%3\n\t" \ - : "=v" (__vecu64) \ - : "r" (offs), "r" ((uintptr_t)(ptr)), \ - "v" (vec_load_le_const) \ - : "memory", "v0"); \ - __vecu64; }) +# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr) # define CRC_VEC_SWAP_TO_LE(v) (v) # define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v) # define VEC_U64_LO 0 # define VEC_U64_HI 1 -static const vector16x_u8 vec_load_le_const = - { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 }; + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load_be(unsigned int offset, const void *ptr) +{ + static const vector16x_u8 vec_load_le_const = + { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 }; + vector2x_u64 vecu64; + +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("lxvd2x %%vs32,0,%1\n\t" + "vperm %0,%%v0,%%v0,%2\n\t" + : "=v" (vecu64) + : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const) + : "memory", "v0"); +#endif + else + __asm__ ("lxvd2x %%vs32,%1,%2\n\t" + "vperm %0,%%v0,%%v0,%3\n\t" + : "=v" (vecu64) + : "r" (offset), "r" ((uintptr_t)(ptr)), + "v" (vec_load_le_const) + : "memory", "r0", "v0"); + + return vecu64; +} #endif From jussi.kivilinna at iki.fi Sat Feb 1 23:58:06 2020 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 2 Feb 2020 00:58:06 +0200 Subject: [PATCH 2/3] rinjdael-aes: use zero offset vector load/store when possible In-Reply-To: <158059788087.29219.3671906635900050322.stgit@localhost6.localdomain6> References: <158059788087.29219.3671906635900050322.stgit@localhost6.localdomain6> Message-ID: <158059788606.29219.16909445919426688169.stgit@localhost6.localdomain6> * cipher/rijndael-ppc-common.h (asm_aligned_ld, asm_aligned_st): Use zero offset instruction variant when input offset is constant zero. * cipher/rijndael-ppc.c (asm_load_be_noswap) (asm_store_be_noswap): Likewise. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h index 165dd9f71..bbbeaac03 100644 --- a/cipher/rijndael-ppc-common.h +++ b/cipher/rijndael-ppc-common.h @@ -188,20 +188,36 @@ static ASM_FUNC_ATTR_INLINE block asm_aligned_ld(unsigned long offset, const void *ptr) { block vec; - __asm__ volatile ("lvx %0,%1,%2\n\t" - : "=v" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lvx %0,0,%1\n\t" + : "=v" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); return vec; } static ASM_FUNC_ATTR_INLINE void asm_aligned_st(block vec, unsigned long offset, void *ptr) { - __asm__ volatile ("stvx %0,%1,%2\n\t" - : - : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stvx %0,0,%1\n\t" + : + : "v" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } static ASM_FUNC_ATTR_INLINE block diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 3e727628b..f5c323611 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -69,10 +69,18 @@ static ASM_FUNC_ATTR_INLINE block asm_load_be_noswap(unsigned long offset, const void *ptr) { block vec; - __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" - : "=wa" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvw4x %x0,0,%1\n\t" + : "=wa" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvw4x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */ return vec; } @@ -81,10 +89,18 @@ static ASM_FUNC_ATTR_INLINE void asm_store_be_noswap(block vec, unsigned long offset, void *ptr) { /* NOTE: vec be-swapped using 'asm_be_swap' by caller */ - __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" - : - : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stxvw4x %x0,0,%1\n\t" + : + : "wa" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stxvw4x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); } From jussi.kivilinna at iki.fi Sat Feb 1 23:58:01 2020 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 2 Feb 2020 00:58:01 +0200 Subject: [PATCH 1/3] Add POWER9 little-endian variant of PPC AES implementation Message-ID: <158059788087.29219.3671906635900050322.stgit@localhost6.localdomain6> * configure.ac: Add 'rijndael-ppc9le.lo'. * cipher/Makefile.am: Add 'rijndael-ppc9le.c', 'rijndael-ppc-common.h' and 'rijndael-ppc-functions.h'. * cipher/rijndael-internal.h (USE_PPC_CRYPTO_WITH_PPC9LE): New. (RIJNDAEL_context_s): Add 'use_ppc9le_crypto'. * cipher/rijndael.c (_gcry_aes_ppc9le_encrypt) (_gcry_aes_ppc9le_decrypt, _gcry_aes_ppc9le_cfb_enc) (_gcry_aes_ppc9le_cfb_dec, _gcry_aes_ppc9le_ctr_enc) (_gcry_aes_ppc9le_cbc_enc, _gcry_aes_ppc9le_cbc_dec) (_gcry_aes_ppc9le_ocb_crypt, _gcry_aes_ppc9le_ocb_auth) (_gcry_aes_ppc9le_xts_crypt): New. (do_setkey, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc) (_gcry_aes_ctr_enc, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec) (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth, _gcry_aes_xts_crypt) [USE_PPC_CRYPTO_WITH_PPC9LE]: New. * cipher/rijndael-ppc.c: Split common code to headers 'rijndael-ppc-common.h' and 'rijndael-ppc-functions.h'. * cipher/rijndael-ppc-common.h: Split from 'rijndael-ppc.c'. (asm_add_uint64, asm_sra_int64, asm_swap_uint64_halfs): New. * cipher/rijndael-ppc-functions.h: Split from 'rijndael-ppc.c'. (CFB_ENC_FUNC, CBC_ENC_FUNC): Unroll loop by 2. (XTS_CRYPT_FUNC, GEN_TWEAK): Tweak generation without vperm instruction. * cipher/rijndael-ppc9le.c: New. -- Provide POWER9 little-endian optimized variant of PPC vcrypto AES implementation. This implementation uses 'lxvb16x' and 'stxvb16x' instructions to load/store vectors directly in big-endian order. Benchmark on POWER9 (~3.8Ghz): Before: AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.04 ns/B 918.7 MiB/s 3.94 c/B CBC dec | 0.222 ns/B 4292 MiB/s 0.844 c/B CFB enc | 1.04 ns/B 916.9 MiB/s 3.95 c/B CFB dec | 0.224 ns/B 4252 MiB/s 0.852 c/B CTR enc | 0.226 ns/B 4218 MiB/s 0.859 c/B CTR dec | 0.225 ns/B 4233 MiB/s 0.856 c/B XTS enc | 0.500 ns/B 1907 MiB/s 1.90 c/B XTS dec | 0.494 ns/B 1932 MiB/s 1.88 c/B OCB enc | 0.288 ns/B 3312 MiB/s 1.09 c/B OCB dec | 0.292 ns/B 3266 MiB/s 1.11 c/B OCB auth | 0.267 ns/B 3567 MiB/s 1.02 c/B After (ctr & ocb & cbc-dec & cfb-dec ~15% and xts ~8% faster): AES | nanosecs/byte mebibytes/sec cycles/byte CBC enc | 1.04 ns/B 914.2 MiB/s 3.96 c/B CBC dec | 0.191 ns/B 4984 MiB/s 0.727 c/B CFB enc | 1.03 ns/B 930.0 MiB/s 3.90 c/B CFB dec | 0.194 ns/B 4906 MiB/s 0.739 c/B CTR enc | 0.196 ns/B 4868 MiB/s 0.744 c/B CTR dec | 0.197 ns/B 4834 MiB/s 0.750 c/B XTS enc | 0.460 ns/B 2075 MiB/s 1.75 c/B XTS dec | 0.455 ns/B 2097 MiB/s 1.73 c/B OCB enc | 0.250 ns/B 3812 MiB/s 0.951 c/B OCB dec | 0.253 ns/B 3764 MiB/s 0.963 c/B OCB auth | 0.232 ns/B 4106 MiB/s 0.883 c/B Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 10a5ab62f..ef83cc741 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -99,7 +99,8 @@ EXTRA_libcipher_la_SOURCES = \ rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ - rijndael-ppc.c \ + rijndael-ppc.c rijndael-ppc9le.c \ + rijndael-ppc-common.h rijndael-ppc-functions.h \ rmd160.c \ rsa.c \ salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ @@ -221,6 +222,12 @@ rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` +rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index 5150a69d7..bdd3bee14 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -105,13 +105,18 @@ #endif /* ENABLE_ARM_CRYPTO_SUPPORT */ /* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto - * accelerated code. */ + * accelerated code. USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to + * enable POWER9 optimized variant. */ #undef USE_PPC_CRYPTO +#undef USE_PPC_CRYPTO_WITH_PPC9LE #ifdef ENABLE_PPC_CRYPTO_SUPPORT # if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) # if __GNUC__ >= 4 # define USE_PPC_CRYPTO 1 +# if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00) +# define USE_PPC_CRYPTO_WITH_PPC9LE 1 +# endif # endif # endif #endif /* ENABLE_PPC_CRYPTO_SUPPORT */ @@ -169,6 +174,9 @@ typedef struct RIJNDAEL_context_s #ifdef USE_PPC_CRYPTO unsigned int use_ppc_crypto:1; /* PowerPC crypto shall be used. */ #endif /*USE_PPC_CRYPTO*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + unsigned int use_ppc9le_crypto:1; /* POWER9 LE crypto shall be used. */ +#endif rijndael_cryptfn_t encrypt_fn; rijndael_cryptfn_t decrypt_fn; rijndael_prefetchfn_t prefetch_enc_fn; diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h new file mode 100644 index 000000000..165dd9f71 --- /dev/null +++ b/cipher/rijndael-ppc-common.h @@ -0,0 +1,326 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +#ifndef G10_RIJNDAEL_PPC_COMMON_H +#define G10_RIJNDAEL_PPC_COMMON_H + +#include + + +typedef vector unsigned char block; + +typedef union +{ + u32 data32[4]; +} __attribute__((packed, aligned(1), may_alias)) u128_t; + + +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE +#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE + + +#define ALIGNED_LOAD(in_ptr, offs) \ + (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) + +#define ALIGNED_STORE(out_ptr, offs, vec) \ + (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) + +#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) + +#define VEC_LOAD_BE(in_ptr, offs, bige_const) \ + (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ + bige_const)) + +#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ + (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) + +#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ + (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ + (void *)(out_ptr))) + +#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ + (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) + + +#define ROUND_KEY_VARIABLES \ + block rkey0, rkeylast + +#define PRELOAD_ROUND_KEYS(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + +#define AES_DECRYPT(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ + if (nrounds >= 12) \ + { \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ + if (rounds > 12) \ + { \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ + blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ + } \ + } \ + blk = asm_ncipherlast_be (blk, rkeylast); \ + } while (0) + + +#define ROUND_KEY_VARIABLES_ALL \ + block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ + rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast + +#define PRELOAD_ROUND_KEYS_ALL(nrounds) \ + do { \ + rkey0 = ALIGNED_LOAD (rk, 0); \ + rkey1 = ALIGNED_LOAD (rk, 1); \ + rkey2 = ALIGNED_LOAD (rk, 2); \ + rkey3 = ALIGNED_LOAD (rk, 3); \ + rkey4 = ALIGNED_LOAD (rk, 4); \ + rkey5 = ALIGNED_LOAD (rk, 5); \ + rkey6 = ALIGNED_LOAD (rk, 6); \ + rkey7 = ALIGNED_LOAD (rk, 7); \ + rkey8 = ALIGNED_LOAD (rk, 8); \ + rkey9 = ALIGNED_LOAD (rk, 9); \ + if (nrounds >= 12) \ + { \ + rkey10 = ALIGNED_LOAD (rk, 10); \ + rkey11 = ALIGNED_LOAD (rk, 11); \ + if (rounds > 12) \ + { \ + rkey12 = ALIGNED_LOAD (rk, 12); \ + rkey13 = ALIGNED_LOAD (rk, 13); \ + } \ + } \ + rkeylast = ALIGNED_LOAD (rk, nrounds); \ + } while (0) + +#define AES_ENCRYPT_ALL(blk, nrounds) \ + do { \ + blk ^= rkey0; \ + blk = asm_cipher_be (blk, rkey1); \ + blk = asm_cipher_be (blk, rkey2); \ + blk = asm_cipher_be (blk, rkey3); \ + blk = asm_cipher_be (blk, rkey4); \ + blk = asm_cipher_be (blk, rkey5); \ + blk = asm_cipher_be (blk, rkey6); \ + blk = asm_cipher_be (blk, rkey7); \ + blk = asm_cipher_be (blk, rkey8); \ + blk = asm_cipher_be (blk, rkey9); \ + if (nrounds >= 12) \ + { \ + blk = asm_cipher_be (blk, rkey10); \ + blk = asm_cipher_be (blk, rkey11); \ + if (rounds > 12) \ + { \ + blk = asm_cipher_be (blk, rkey12); \ + blk = asm_cipher_be (blk, rkey13); \ + } \ + } \ + blk = asm_cipherlast_be (blk, rkeylast); \ + } while (0) + + +static ASM_FUNC_ATTR_INLINE block +asm_aligned_ld(unsigned long offset, const void *ptr) +{ + block vec; + __asm__ volatile ("lvx %0,%1,%2\n\t" + : "=v" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return vec; +} + +static ASM_FUNC_ATTR_INLINE void +asm_aligned_st(block vec, unsigned long offset, void *ptr) +{ + __asm__ volatile ("stvx %0,%1,%2\n\t" + : + : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + +static ASM_FUNC_ATTR_INLINE block +asm_vperm1(block vec, block mask) +{ + block o; + __asm__ volatile ("vperm %0,%1,%1,%2\n\t" + : "=v" (o) + : "v" (vec), "v" (mask)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_add_uint128(block a, block b) +{ + block res; + __asm__ volatile ("vadduqm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_add_uint64(block a, block b) +{ + block res; + __asm__ volatile ("vaddudm %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_sra_int64(block a, block b) +{ + block res; + __asm__ volatile ("vsrad %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static block +asm_swap_uint64_halfs(block a) +{ + block res; + __asm__ volatile ("xxswapd %x0, %x1" + : "=wa" (res) + : "wa" (a)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_xor(block a, block b) +{ + block res; + __asm__ volatile ("vxor %0,%1,%2\n\t" + : "=v" (res) + : "v" (a), "v" (b)); + return res; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_cipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vcipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipher_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipher %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + +static ASM_FUNC_ATTR_INLINE block +asm_ncipherlast_be(block b, block rk) +{ + block o; + __asm__ volatile ("vncipherlast %0, %1, %2\n\t" + : "=v" (o) + : "v" (b), "v" (rk)); + return o; +} + + +/* Make a decryption key from an encryption key. */ +static ASM_FUNC_ATTR_INLINE void +internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx) +{ + u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; + u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; + int rounds = ctx->rounds; + int rr; + int r; + + r = 0; + rr = rounds; + for (r = 0, rr = rounds; r <= rounds; r++, rr--) + { + ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); + } +} + +#endif /* G10_RIJNDAEL_PPC_COMMON_H */ diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h new file mode 100644 index 000000000..72f31852b --- /dev/null +++ b/cipher/rijndael-ppc-functions.h @@ -0,0 +1,2020 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + const block bige_const = asm_load_be_const(); + const u128_t *rk = (u128_t *)&ctx->keyschenc; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b; + + b = VEC_LOAD_BE (in, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + + AES_ENCRYPT (b, rounds); + VEC_STORE_BE (out, 0, b, bige_const); + + return 0; /* does not use stack */ +} + + +unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, + unsigned char *out, + const unsigned char *in) +{ + const block bige_const = asm_load_be_const(); + const u128_t *rk = (u128_t *)&ctx->keyschdec; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b; + + b = VEC_LOAD_BE (in, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + + AES_DECRYPT (b, rounds); + VEC_STORE_BE (out, 0, b, bige_const); + + return 0; /* does not use stack */ +} + + +void CFB_ENC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES_ALL; + block rkeylast_orig; + block iv; + + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS_ALL (rounds); + rkeylast_orig = rkeylast; + + for (; nblocks >= 2; nblocks -= 2) + { + block in2, iv1; + + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in + 1, 0, bige_const); + in += 2; + + AES_ENCRYPT_ALL (iv, rounds); + + iv1 = iv; + rkeylast = rkeylast_orig ^ in2; + + AES_ENCRYPT_ALL (iv, rounds); + + VEC_STORE_BE (out++, 0, iv1, bige_const); + VEC_STORE_BE (out++, 0, iv, bige_const); + } + + for (; nblocks; nblocks--) + { + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const); + + AES_ENCRYPT_ALL (iv, rounds); + + VEC_STORE_BE (out++, 0, iv, bige_const); + } + + VEC_STORE_BE (iv_arg, 0, iv, bige_const); +} + +void CFB_DEC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block rkeylast_orig; + block iv, b, bin; + block in0, in1, in2, in3, in4, in5, in6, in7; + block b0, b1, b2, b3, b4, b5, b6, b7; + block rkey; + + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + rkeylast_orig = rkeylast; + + for (; nblocks >= 8; nblocks -= 8) + { + in0 = iv; + in1 = VEC_LOAD_BE_NOSWAP (in, 0); + in2 = VEC_LOAD_BE_NOSWAP (in, 1); + in3 = VEC_LOAD_BE_NOSWAP (in, 2); + in4 = VEC_LOAD_BE_NOSWAP (in, 3); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in5 = VEC_LOAD_BE_NOSWAP (in, 4); + in6 = VEC_LOAD_BE_NOSWAP (in, 5); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in7 = VEC_LOAD_BE_NOSWAP (in, 6); + iv = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in7 = VEC_BE_SWAP (in7, bige_const); + iv = VEC_BE_SWAP (iv, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in4); + in7 = asm_xor (rkeylast, in7); + in0 = asm_xor (rkeylast, iv); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, in5); + b5 = asm_cipherlast_be (b5, in6); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, in7); + b7 = asm_cipherlast_be (b7, in0); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + in0 = iv; + in1 = VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in, 1, bige_const); + in3 = VEC_LOAD_BE (in, 2, bige_const); + iv = VEC_LOAD_BE (in, 3, bige_const); + + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + in0 = asm_xor (rkeylast, iv); + b0 = asm_cipherlast_be (b0, in1); + b1 = asm_cipherlast_be (b1, in2); + b2 = asm_cipherlast_be (b2, in3); + b3 = asm_cipherlast_be (b3, in0); + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + bin = VEC_LOAD_BE (in, 0, bige_const); + rkeylast = rkeylast_orig ^ bin; + b = iv; + iv = bin; + + AES_ENCRYPT (b, rounds); + + VEC_STORE_BE (out, 0, b, bige_const); + + out++; + in++; + } + + VEC_STORE_BE (iv_arg, 0, iv, bige_const); +} + + +void CBC_ENC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + byte *out = (byte *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES_ALL; + block lastiv, b; + unsigned int outadd = -(!cbc_mac) & 16; + + lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS_ALL (rounds); + + for (; nblocks >= 2; nblocks -= 2) + { + block in2, lastiv1; + + b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); + in2 = VEC_LOAD_BE (in + 1, 0, bige_const); + in += 2; + + AES_ENCRYPT_ALL (b, rounds); + + lastiv1 = b; + b = lastiv1 ^ in2; + + AES_ENCRYPT_ALL (b, rounds); + + lastiv = b; + VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const); + out += outadd; + VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const); + out += outadd; + } + + for (; nblocks; nblocks--) + { + b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const); + + AES_ENCRYPT_ALL (b, rounds); + + lastiv = b; + VEC_STORE_BE ((u128_t *)out, 0, b, bige_const); + out += outadd; + } + + VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); +} + +void CBC_DEC_FUNC (void *context, unsigned char *iv_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschdec; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block rkeylast_orig; + block in0, in1, in2, in3, in4, in5, in6, in7; + block b0, b1, b2, b3, b4, b5, b6, b7; + block rkey; + block iv, b; + + if (!ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + rkeylast_orig = rkeylast; + + for (; nblocks >= 8; nblocks -= 8) + { + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + b4 = asm_xor (rkey0, in4); + b5 = asm_xor (rkey0, in5); + b6 = asm_xor (rkey0, in6); + b7 = asm_xor (rkey0, in7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + b0 = asm_ncipherlast_be (b0, iv); + iv = in7; + b1 = asm_ncipherlast_be (b1, in0); + in3 = asm_xor (rkeylast, in3); + in4 = asm_xor (rkeylast, in4); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + in5 = asm_xor (rkeylast, in5); + in6 = asm_xor (rkeylast, in6); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, in3); + b5 = asm_ncipherlast_be (b5, in4); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, in5); + b7 = asm_ncipherlast_be (b7, in6); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + + b0 = asm_xor (rkey0, in0); + b1 = asm_xor (rkey0, in1); + b2 = asm_xor (rkey0, in2); + b3 = asm_xor (rkey0, in3); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + iv = asm_xor (rkeylast, iv); + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + + b0 = asm_ncipherlast_be (b0, iv); + iv = in3; + b1 = asm_ncipherlast_be (b1, in0); + b2 = asm_ncipherlast_be (b2, in1); + b3 = asm_ncipherlast_be (b3, in2); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + rkeylast = rkeylast_orig ^ iv; + + iv = VEC_LOAD_BE (in, 0, bige_const); + b = iv; + AES_DECRYPT (b, rounds); + + VEC_STORE_BE (out, 0, b, bige_const); + + in++; + out++; + } + + VEC_STORE_BE (iv_arg, 0, iv, bige_const); +} + + +void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + static const unsigned char vec_one_const[16] = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block rkeylast_orig; + block ctr, b, one; + + ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); + one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + rkeylast_orig = rkeylast; + + if (nblocks >= 4) + { + block in0, in1, in2, in3, in4, in5, in6, in7; + block b0, b1, b2, b3, b4, b5, b6, b7; + block two, three, four; + block rkey; + + two = asm_add_uint128 (one, one); + three = asm_add_uint128 (two, one); + four = asm_add_uint128 (two, two); + + for (; nblocks >= 8; nblocks -= 8) + { + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b4 = asm_add_uint128 (ctr, four); + b5 = asm_add_uint128 (b1, four); + b6 = asm_add_uint128 (b2, four); + b7 = asm_add_uint128 (b3, four); + b0 = asm_xor (rkey0, ctr); + rkey = ALIGNED_LOAD (rk, 1); + ctr = asm_add_uint128 (b4, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b0 = asm_cipher_be (b0, rkey); + b1 = asm_cipher_be (b1, rkey); + b2 = asm_cipher_be (b2, rkey); + b3 = asm_cipher_be (b3, rkey); + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + b4 = asm_cipher_be (b4, rkey); + b5 = asm_cipher_be (b5, rkey); + b6 = asm_cipher_be (b6, rkey); + b7 = asm_cipher_be (b7, rkey); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + in0 = VEC_LOAD_BE_NOSWAP (in, 0); + DO_ROUND(2); + in1 = VEC_LOAD_BE_NOSWAP (in, 1); + DO_ROUND(3); + in2 = VEC_LOAD_BE_NOSWAP (in, 2); + DO_ROUND(4); + in3 = VEC_LOAD_BE_NOSWAP (in, 3); + DO_ROUND(5); + in4 = VEC_LOAD_BE_NOSWAP (in, 4); + DO_ROUND(6); + in5 = VEC_LOAD_BE_NOSWAP (in, 5); + DO_ROUND(7); + in6 = VEC_LOAD_BE_NOSWAP (in, 6); + DO_ROUND(8); + in7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + DO_ROUND(9); + + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in0 = VEC_BE_SWAP (in0, bige_const); + in1 = VEC_BE_SWAP (in1, bige_const); + in2 = VEC_BE_SWAP (in2, bige_const); + in3 = VEC_BE_SWAP (in3, bige_const); + in4 = VEC_BE_SWAP (in4, bige_const); + in5 = VEC_BE_SWAP (in5, bige_const); + in6 = VEC_BE_SWAP (in6, bige_const); + in7 = VEC_BE_SWAP (in7, bige_const); + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + in4 = asm_xor (rkeylast, in4); + in5 = asm_xor (rkeylast, in5); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + in6 = asm_xor (rkeylast, in6); + in7 = asm_xor (rkeylast, in7); + b4 = asm_cipherlast_be (b4, in4); + b5 = asm_cipherlast_be (b5, in5); + b6 = asm_cipherlast_be (b6, in6); + b7 = asm_cipherlast_be (b7, in7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + b1 = asm_add_uint128 (ctr, one); + b2 = asm_add_uint128 (ctr, two); + b3 = asm_add_uint128 (ctr, three); + b0 = asm_xor (rkey0, ctr); + ctr = asm_add_uint128 (ctr, four); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + + in0 = VEC_LOAD_BE (in, 0, bige_const); + in1 = VEC_LOAD_BE (in, 1, bige_const); + in2 = VEC_LOAD_BE (in, 2, bige_const); + in3 = VEC_LOAD_BE (in, 3, bige_const); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + in0 = asm_xor (rkeylast, in0); + in1 = asm_xor (rkeylast, in1); + in2 = asm_xor (rkeylast, in2); + in3 = asm_xor (rkeylast, in3); + + b0 = asm_cipherlast_be (b0, in0); + b1 = asm_cipherlast_be (b1, in1); + b2 = asm_cipherlast_be (b2, in2); + b3 = asm_cipherlast_be (b3, in3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + } + + for (; nblocks; nblocks--) + { + b = ctr; + ctr = asm_add_uint128 (ctr, one); + rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); + + AES_ENCRYPT (b, rounds); + + VEC_STORE_BE (out, 0, b, bige_const); + + out++; + in++; + } + + VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); +} + + +size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = (void *)&c->context.c; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + u64 data_nblocks = c->u_mode.ocb.data_nblocks; + block l0, l1, l2, l; + block b0, b1, b2, b3, b4, b5, b6, b7, b; + block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; + block rkey, rkeylf; + block ctr, iv; + ROUND_KEY_VARIABLES; + + iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); + + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); + + if (encrypt) + { + const u128_t *rk = (u128_t *)&ctx->keyschenc; + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + b ^= iv; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + b4 ^= iv4; + b5 ^= iv5; + b6 ^= iv6; + b7 ^= iv7; + iv = iv7 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, iv0); + b1 = asm_cipherlast_be (b1, iv1); + b2 = asm_cipherlast_be (b2, iv2); + b3 = asm_cipherlast_be (b3, iv3); + b4 = asm_cipherlast_be (b4, iv4); + b5 = asm_cipherlast_be (b5, iv5); + b6 = asm_cipherlast_be (b6, iv6); + b7 = asm_cipherlast_be (b7, iv7); + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = asm_cipherlast_be (b0, rkey ^ iv0); + b1 = asm_cipherlast_be (b1, rkey ^ iv1); + b2 = asm_cipherlast_be (b2, rkey ^ iv2); + b3 = asm_cipherlast_be (b3, rkey ^ iv3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + b ^= iv; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + } + else + { + const u128_t *rk = (u128_t *)&ctx->keyschdec; + + if (!ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + b ^= iv; + AES_DECRYPT (b, rounds); + b ^= iv; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + l = VEC_BE_SWAP(l, bige_const); + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + b4 ^= iv4; + b5 ^= iv5; + b6 ^= iv6; + b7 ^= iv7; + iv = iv7 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + iv0 = asm_xor (rkeylf, iv0); + iv1 = asm_xor (rkeylf, iv1); + iv2 = asm_xor (rkeylf, iv2); + iv3 = asm_xor (rkeylf, iv3); + iv4 = asm_xor (rkeylf, iv4); + iv5 = asm_xor (rkeylf, iv5); + iv6 = asm_xor (rkeylf, iv6); + iv7 = asm_xor (rkeylf, iv7); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, iv0); + b1 = asm_ncipherlast_be (b1, iv1); + b2 = asm_ncipherlast_be (b2, iv2); + b3 = asm_ncipherlast_be (b3, iv3); + b4 = asm_ncipherlast_be (b4, iv4); + b5 = asm_ncipherlast_be (b5, iv5); + b6 = asm_ncipherlast_be (b6, iv6); + b7 = asm_ncipherlast_be (b7, iv7); + + ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; + + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); + + iv ^= rkey0; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast ^ rkey0; + b0 = asm_ncipherlast_be (b0, rkey ^ iv0); + b1 = asm_ncipherlast_be (b1, rkey ^ iv1); + b2 = asm_ncipherlast_be (b2, rkey ^ iv2); + b3 = asm_ncipherlast_be (b3, rkey ^ iv3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (in, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + b ^= iv; + AES_DECRYPT (b, rounds); + b ^= iv; + /* Checksum_i = Checksum_{i-1} xor P_i */ + ctr ^= b; + + VEC_STORE_BE (out, 0, b, bige_const); + + in += 1; + out += 1; + } + } + + VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); + VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); + c->u_mode.ocb.data_nblocks = data_nblocks; + + return 0; +} + +size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = (void *)&c->context.c; + const u128_t *rk = (u128_t *)&ctx->keyschenc; + const u128_t *abuf = (const u128_t *)abuf_arg; + int rounds = ctx->rounds; + u64 data_nblocks = c->u_mode.ocb.aad_nblocks; + block l0, l1, l2, l; + block b0, b1, b2, b3, b4, b5, b6, b7, b; + block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; + block rkey, frkey; + block ctr, iv; + ROUND_KEY_VARIABLES; + + iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); + ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); + + l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); + l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); + l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8 && data_nblocks % 8; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + ctr ^= b; + + abuf += 1; + } + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + b4 = VEC_LOAD_BE (abuf, 4, bige_const); + b5 = VEC_LOAD_BE (abuf, 5, bige_const); + b6 = VEC_LOAD_BE (abuf, 6, bige_const); + b7 = VEC_LOAD_BE (abuf, 7, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); + + frkey = rkey0; + iv ^= frkey; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l2; + iv4 = iv ^ l1 ^ l2 ^ l0; + iv5 = iv ^ l2 ^ l0; + iv6 = iv ^ l2; + iv7 = iv ^ l2 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + b4 ^= iv4; + b5 ^= iv5; + b6 ^= iv6; + b7 ^= iv7; + iv = iv7 ^ frkey; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + b4 = asm_cipherlast_be (b4, rkey); + b5 = asm_cipherlast_be (b5, rkey); + b6 = asm_cipherlast_be (b6, rkey); + b7 = asm_cipherlast_be (b7, rkey); + + ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; + + abuf += 8; + } + + if (nblocks >= 4 && (data_nblocks % 4) == 0) + { + b0 = VEC_LOAD_BE (abuf, 0, bige_const); + b1 = VEC_LOAD_BE (abuf, 1, bige_const); + b2 = VEC_LOAD_BE (abuf, 2, bige_const); + b3 = VEC_LOAD_BE (abuf, 3, bige_const); + + l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); + + frkey = rkey0; + iv ^= frkey; + + iv0 = iv ^ l0; + iv1 = iv ^ l0 ^ l1; + iv2 = iv ^ l1; + iv3 = iv ^ l1 ^ l; + + b0 ^= iv0; + b1 ^= iv1; + b2 ^= iv2; + b3 ^= iv3; + iv = iv3 ^ frkey; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_cipherlast_be (b0, rkey); + b1 = asm_cipherlast_be (b1, rkey); + b2 = asm_cipherlast_be (b2, rkey); + b3 = asm_cipherlast_be (b3, rkey); + + ctr ^= b0 ^ b1 ^ b2 ^ b3; + + abuf += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); + b = VEC_LOAD_BE (abuf, 0, bige_const); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + iv ^= l; + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + b ^= iv; + AES_ENCRYPT (b, rounds); + ctr ^= b; + + abuf += 1; + } + + VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); + VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); + c->u_mode.ocb.aad_nblocks = data_nblocks; + + return 0; +} + + +void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ +#ifdef WORDS_BIGENDIAN + static const block vec_bswap128_const = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; +#else + static const block vec_bswap128_const = + { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; +#endif + static const unsigned char vec_tweak_const[16] = + { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; + static const vector unsigned long long vec_shift63_const = + { 63, 63 }; + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + block tweak; + block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; + block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; + block tweak_const, bswap128_const, shift63_const; + ROUND_KEY_VARIABLES; + + tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); + bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); + shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0); + + tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); + tweak = asm_vperm1 (tweak, bswap128_const); + +#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ + do { \ + block tmp1, tmp2; \ + tmp1 = asm_swap_uint64_halfs(tin); \ + tmp2 = asm_add_uint64(tin, tin); \ + tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \ + tout = asm_xor(tmp1, tmp2); \ + } while (0) + + if (encrypt) + { + const u128_t *rk = (u128_t *)&ctx->keyschenc; + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, tweak0); + b1 = asm_cipherlast_be (b1, tweak1); + b2 = asm_cipherlast_be (b2, tweak2); + b3 = asm_cipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_cipherlast_be (b4, tweak4); + b5 = asm_cipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_cipherlast_be (b6, tweak6); + b7 = asm_cipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_cipherlast_be (b0, rkey ^ tweak0); + b1 = asm_cipherlast_be (b1, rkey ^ tweak1); + b2 = asm_cipherlast_be (b2, rkey ^ tweak2); + b3 = asm_cipherlast_be (b3, rkey ^ tweak3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + tweak0 = asm_vperm1 (tweak, bswap128_const); + + /* Xor-Encrypt/Decrypt-Xor block. */ + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; + + /* Generate next tweak. */ + GEN_TWEAK (tweak, tweak); + + AES_ENCRYPT (b, rounds); + + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); + + in++; + out++; + } + } + else + { + const u128_t *rk = (u128_t *)&ctx->keyschdec; + + if (!ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE_NOSWAP (in, 0); + b1 = VEC_LOAD_BE_NOSWAP (in, 1); + b2 = VEC_LOAD_BE_NOSWAP (in, 2); + b3 = VEC_LOAD_BE_NOSWAP (in, 3); + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + tweak0 = asm_vperm1 (tweak0, bswap128_const); + b4 = VEC_LOAD_BE_NOSWAP (in, 4); + b5 = VEC_LOAD_BE_NOSWAP (in, 5); + GEN_TWEAK (tweak2, tweak1); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + b6 = VEC_LOAD_BE_NOSWAP (in, 6); + b7 = VEC_LOAD_BE_NOSWAP (in, 7); + in += 8; + + b0 = VEC_BE_SWAP(b0, bige_const); + b1 = VEC_BE_SWAP(b1, bige_const); + GEN_TWEAK (tweak3, tweak2); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + GEN_TWEAK (tweak4, tweak3); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + b2 = VEC_BE_SWAP(b2, bige_const); + b3 = VEC_BE_SWAP(b3, bige_const); + GEN_TWEAK (tweak5, tweak4); + tweak4 = asm_vperm1 (tweak4, bswap128_const); + GEN_TWEAK (tweak6, tweak5); + tweak5 = asm_vperm1 (tweak5, bswap128_const); + b4 = VEC_BE_SWAP(b4, bige_const); + b5 = VEC_BE_SWAP(b5, bige_const); + GEN_TWEAK (tweak7, tweak6); + tweak6 = asm_vperm1 (tweak6, bswap128_const); + GEN_TWEAK (tweak, tweak7); + tweak7 = asm_vperm1 (tweak7, bswap128_const); + b6 = VEC_BE_SWAP(b6, bige_const); + b7 = VEC_BE_SWAP(b7, bige_const); + + tweak0 = asm_xor (tweak0, rkey0); + tweak1 = asm_xor (tweak1, rkey0); + tweak2 = asm_xor (tweak2, rkey0); + tweak3 = asm_xor (tweak3, rkey0); + tweak4 = asm_xor (tweak4, rkey0); + tweak5 = asm_xor (tweak5, rkey0); + tweak6 = asm_xor (tweak6, rkey0); + tweak7 = asm_xor (tweak7, rkey0); + + b0 = asm_xor (b0, tweak0); + b1 = asm_xor (b1, tweak1); + b2 = asm_xor (b2, tweak2); + b3 = asm_xor (b3, tweak3); + b4 = asm_xor (b4, tweak4); + b5 = asm_xor (b5, tweak5); + b6 = asm_xor (b6, tweak6); + b7 = asm_xor (b7, tweak7); + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + + rkeylf = asm_xor (rkeylast, rkey0); + + DO_ROUND(8); + + tweak0 = asm_xor (tweak0, rkeylf); + tweak1 = asm_xor (tweak1, rkeylf); + tweak2 = asm_xor (tweak2, rkeylf); + tweak3 = asm_xor (tweak3, rkeylf); + tweak4 = asm_xor (tweak4, rkeylf); + tweak5 = asm_xor (tweak5, rkeylf); + tweak6 = asm_xor (tweak6, rkeylf); + tweak7 = asm_xor (tweak7, rkeylf); + + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, tweak0); + b1 = asm_ncipherlast_be (b1, tweak1); + b2 = asm_ncipherlast_be (b2, tweak2); + b3 = asm_ncipherlast_be (b3, tweak3); + b0 = VEC_BE_SWAP (b0, bige_const); + b1 = VEC_BE_SWAP (b1, bige_const); + b4 = asm_ncipherlast_be (b4, tweak4); + b5 = asm_ncipherlast_be (b5, tweak5); + b2 = VEC_BE_SWAP (b2, bige_const); + b3 = VEC_BE_SWAP (b3, bige_const); + b6 = asm_ncipherlast_be (b6, tweak6); + b7 = asm_ncipherlast_be (b7, tweak7); + VEC_STORE_BE_NOSWAP (out, 0, b0); + VEC_STORE_BE_NOSWAP (out, 1, b1); + b4 = VEC_BE_SWAP (b4, bige_const); + b5 = VEC_BE_SWAP (b5, bige_const); + VEC_STORE_BE_NOSWAP (out, 2, b2); + VEC_STORE_BE_NOSWAP (out, 3, b3); + b6 = VEC_BE_SWAP (b6, bige_const); + b7 = VEC_BE_SWAP (b7, bige_const); + VEC_STORE_BE_NOSWAP (out, 4, b4); + VEC_STORE_BE_NOSWAP (out, 5, b5); + VEC_STORE_BE_NOSWAP (out, 6, b6); + VEC_STORE_BE_NOSWAP (out, 7, b7); + out += 8; + } + + if (nblocks >= 4) + { + tweak0 = tweak; + GEN_TWEAK (tweak1, tweak0); + GEN_TWEAK (tweak2, tweak1); + GEN_TWEAK (tweak3, tweak2); + GEN_TWEAK (tweak, tweak3); + + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + tweak0 = asm_vperm1 (tweak0, bswap128_const); + tweak1 = asm_vperm1 (tweak1, bswap128_const); + tweak2 = asm_vperm1 (tweak2, bswap128_const); + tweak3 = asm_vperm1 (tweak3, bswap128_const); + + b0 ^= tweak0 ^ rkey0; + b1 ^= tweak1 ^ rkey0; + b2 ^= tweak2 ^ rkey0; + b3 ^= tweak3 ^ rkey0; + +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + rkey = rkeylast; + b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); + b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); + b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); + b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + tweak0 = asm_vperm1 (tweak, bswap128_const); + + /* Xor-Encrypt/Decrypt-Xor block. */ + b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; + + /* Generate next tweak. */ + GEN_TWEAK (tweak, tweak); + + AES_DECRYPT (b, rounds); + + b ^= tweak0; + VEC_STORE_BE (out, 0, b, bige_const); + + in++; + out++; + } + } + + tweak = asm_vperm1 (tweak, bswap128_const); + VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); + +#undef GEN_TWEAK +} diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index a8bcae468..3e727628b 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -1,6 +1,6 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden - * Copyright (C) 2019 Jussi Kivilinna + * Copyright (C) 2019-2020 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -31,162 +31,7 @@ #ifdef USE_PPC_CRYPTO -#include - - -typedef vector unsigned char block; - -typedef union -{ - u32 data32[4]; -} __attribute__((packed, aligned(1), may_alias)) u128_t; - - -#define ALWAYS_INLINE inline __attribute__((always_inline)) -#define NO_INLINE __attribute__((noinline)) -#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) - -#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION -#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE -#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE - - -#define ALIGNED_LOAD(in_ptr, offs) \ - (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr))) - -#define ALIGNED_STORE(out_ptr, offs, vec) \ - (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr))) - -#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const))) - -#define VEC_LOAD_BE(in_ptr, offs, bige_const) \ - (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \ - bige_const)) - -#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \ - (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr))) - -#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \ - (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \ - (void *)(out_ptr))) - -#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \ - (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr))) - - -#define ROUND_KEY_VARIABLES \ - block rkey0, rkeylast - -#define PRELOAD_ROUND_KEYS(nrounds) \ - do { \ - rkey0 = ALIGNED_LOAD (rk, 0); \ - rkeylast = ALIGNED_LOAD (rk, nrounds); \ - } while (0) - -#define AES_ENCRYPT(blk, nrounds) \ - do { \ - blk ^= rkey0; \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \ - if (nrounds >= 12) \ - { \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \ - if (rounds > 12) \ - { \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \ - blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \ - } \ - } \ - blk = asm_cipherlast_be (blk, rkeylast); \ - } while (0) - -#define AES_DECRYPT(blk, nrounds) \ - do { \ - blk ^= rkey0; \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \ - if (nrounds >= 12) \ - { \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \ - if (rounds > 12) \ - { \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \ - blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \ - } \ - } \ - blk = asm_ncipherlast_be (blk, rkeylast); \ - } while (0) - - -#define ROUND_KEY_VARIABLES_ALL \ - block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \ - rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast - -#define PRELOAD_ROUND_KEYS_ALL(nrounds) \ - do { \ - rkey0 = ALIGNED_LOAD (rk, 0); \ - rkey1 = ALIGNED_LOAD (rk, 1); \ - rkey2 = ALIGNED_LOAD (rk, 2); \ - rkey3 = ALIGNED_LOAD (rk, 3); \ - rkey4 = ALIGNED_LOAD (rk, 4); \ - rkey5 = ALIGNED_LOAD (rk, 5); \ - rkey6 = ALIGNED_LOAD (rk, 6); \ - rkey7 = ALIGNED_LOAD (rk, 7); \ - rkey8 = ALIGNED_LOAD (rk, 8); \ - rkey9 = ALIGNED_LOAD (rk, 9); \ - if (nrounds >= 12) \ - { \ - rkey10 = ALIGNED_LOAD (rk, 10); \ - rkey11 = ALIGNED_LOAD (rk, 11); \ - if (rounds > 12) \ - { \ - rkey12 = ALIGNED_LOAD (rk, 12); \ - rkey13 = ALIGNED_LOAD (rk, 13); \ - } \ - } \ - rkeylast = ALIGNED_LOAD (rk, nrounds); \ - } while (0) - -#define AES_ENCRYPT_ALL(blk, nrounds) \ - do { \ - blk ^= rkey0; \ - blk = asm_cipher_be (blk, rkey1); \ - blk = asm_cipher_be (blk, rkey2); \ - blk = asm_cipher_be (blk, rkey3); \ - blk = asm_cipher_be (blk, rkey4); \ - blk = asm_cipher_be (blk, rkey5); \ - blk = asm_cipher_be (blk, rkey6); \ - blk = asm_cipher_be (blk, rkey7); \ - blk = asm_cipher_be (blk, rkey8); \ - blk = asm_cipher_be (blk, rkey9); \ - if (nrounds >= 12) \ - { \ - blk = asm_cipher_be (blk, rkey10); \ - blk = asm_cipher_be (blk, rkey11); \ - if (rounds > 12) \ - { \ - blk = asm_cipher_be (blk, rkey12); \ - blk = asm_cipher_be (blk, rkey13); \ - } \ - } \ - blk = asm_cipherlast_be (blk, rkeylast); \ - } while (0) +#include "rijndael-ppc-common.h" #ifdef WORDS_BIGENDIAN @@ -198,26 +43,6 @@ static const block vec_bswap32_const_neg = #endif -static ASM_FUNC_ATTR_INLINE block -asm_aligned_ld(unsigned long offset, const void *ptr) -{ - block vec; - __asm__ volatile ("lvx %0,%1,%2\n\t" - : "=v" (vec) - : "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); - return vec; -} - -static ASM_FUNC_ATTR_INLINE void -asm_aligned_st(block vec, unsigned long offset, void *ptr) -{ - __asm__ volatile ("stvx %0,%1,%2\n\t" - : - : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr) - : "memory", "r0"); -} - static ASM_FUNC_ATTR_INLINE block asm_load_be_const(void) { @@ -229,16 +54,6 @@ asm_load_be_const(void) #endif } -static ASM_FUNC_ATTR_INLINE block -asm_vperm1(block vec, block mask) -{ - block o; - __asm__ volatile ("vperm %0,%1,%1,%2\n\t" - : "=v" (o) - : "v" (vec), "v" (mask)); - return o; -} - static ASM_FUNC_ATTR_INLINE block asm_be_swap(block vec, block be_bswap_const) { @@ -272,66 +87,6 @@ asm_store_be_noswap(block vec, unsigned long offset, void *ptr) : "memory", "r0"); } -static ASM_FUNC_ATTR_INLINE block -asm_add_uint128(block a, block b) -{ - block res; - __asm__ volatile ("vadduqm %0,%1,%2\n\t" - : "=v" (res) - : "v" (a), "v" (b)); - return res; -} - -static ASM_FUNC_ATTR_INLINE block -asm_xor(block a, block b) -{ - block res; - __asm__ volatile ("vxor %0,%1,%2\n\t" - : "=v" (res) - : "v" (a), "v" (b)); - return res; -} - -static ASM_FUNC_ATTR_INLINE block -asm_cipher_be(block b, block rk) -{ - block o; - __asm__ volatile ("vcipher %0, %1, %2\n\t" - : "=v" (o) - : "v" (b), "v" (rk)); - return o; -} - -static ASM_FUNC_ATTR_INLINE block -asm_cipherlast_be(block b, block rk) -{ - block o; - __asm__ volatile ("vcipherlast %0, %1, %2\n\t" - : "=v" (o) - : "v" (b), "v" (rk)); - return o; -} - -static ASM_FUNC_ATTR_INLINE block -asm_ncipher_be(block b, block rk) -{ - block o; - __asm__ volatile ("vncipher %0, %1, %2\n\t" - : "=v" (o) - : "v" (b), "v" (rk)); - return o; -} - -static ASM_FUNC_ATTR_INLINE block -asm_ncipherlast_be(block b, block rk) -{ - block o; - __asm__ volatile ("vncipherlast %0, %1, %2\n\t" - : "=v" (o) - : "v" (b), "v" (rk)); - return o; -} - static ASM_FUNC_ATTR_INLINE u32 _gcry_aes_sbox4_ppc8(u32 fourbytes) @@ -439,7 +194,7 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) } } - rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b); + rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b); } /* Store in big-endian order. */ @@ -450,7 +205,7 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) #else block rvec = ALIGNED_LOAD (ekey, r); ALIGNED_STORE (ekey, r, - vec_perm(rvec, rvec, vec_bswap32_const)); + vec_perm(rvec, rvec, vec_bswap32_const)); (void)bige_const; #endif } @@ -464,2012 +219,25 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) wipememory(&tkk, sizeof(tkk)); } - -/* Make a decryption key from an encryption key. */ -static ASM_FUNC_ATTR_INLINE void -aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) -{ - u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; - u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; - int rounds = ctx->rounds; - int rr; - int r; - - r = 0; - rr = rounds; - for (r = 0, rr = rounds; r <= rounds; r++, rr--) - { - ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr)); - } -} - - void _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { - aes_ppc8_prepare_decryption (ctx); -} - - -unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx, - unsigned char *out, - const unsigned char *in) -{ - const block bige_const = asm_load_be_const(); - const u128_t *rk = (u128_t *)&ctx->keyschenc; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; - block b; - - b = VEC_LOAD_BE (in, 0, bige_const); - - PRELOAD_ROUND_KEYS (rounds); - - AES_ENCRYPT (b, rounds); - VEC_STORE_BE (out, 0, b, bige_const); - - return 0; /* does not use stack */ -} - - -unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx, - unsigned char *out, - const unsigned char *in) -{ - const block bige_const = asm_load_be_const(); - const u128_t *rk = (u128_t *)&ctx->keyschdec; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; - block b; - - b = VEC_LOAD_BE (in, 0, bige_const); - - PRELOAD_ROUND_KEYS (rounds); - - AES_DECRYPT (b, rounds); - VEC_STORE_BE (out, 0, b, bige_const); - - return 0; /* does not use stack */ -} - - -void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg, - void *outbuf_arg, const void *inbuf_arg, - size_t nblocks) -{ - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = context; - const u128_t *rk = (u128_t *)&ctx->keyschenc; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES_ALL; - block rkeylast_orig; - block iv; - - iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - - PRELOAD_ROUND_KEYS_ALL (rounds); - rkeylast_orig = rkeylast; - - for (; nblocks; nblocks--) - { - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); - - AES_ENCRYPT_ALL (iv, rounds); - - VEC_STORE_BE (out, 0, iv, bige_const); - - out++; - in++; - } - - VEC_STORE_BE (iv_arg, 0, iv, bige_const); -} - -void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg, - void *outbuf_arg, const void *inbuf_arg, - size_t nblocks) -{ - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = context; - const u128_t *rk = (u128_t *)&ctx->keyschenc; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; - block rkeylast_orig; - block iv, b, bin; - block in0, in1, in2, in3, in4, in5, in6, in7; - block b0, b1, b2, b3, b4, b5, b6, b7; - block rkey; - - iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - - PRELOAD_ROUND_KEYS (rounds); - rkeylast_orig = rkeylast; - - for (; nblocks >= 8; nblocks -= 8) - { - in0 = iv; - in1 = VEC_LOAD_BE_NOSWAP (in, 0); - in2 = VEC_LOAD_BE_NOSWAP (in, 1); - in3 = VEC_LOAD_BE_NOSWAP (in, 2); - in4 = VEC_LOAD_BE_NOSWAP (in, 3); - in1 = VEC_BE_SWAP (in1, bige_const); - in2 = VEC_BE_SWAP (in2, bige_const); - in5 = VEC_LOAD_BE_NOSWAP (in, 4); - in6 = VEC_LOAD_BE_NOSWAP (in, 5); - in3 = VEC_BE_SWAP (in3, bige_const); - in4 = VEC_BE_SWAP (in4, bige_const); - in7 = VEC_LOAD_BE_NOSWAP (in, 6); - iv = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - in5 = VEC_BE_SWAP (in5, bige_const); - in6 = VEC_BE_SWAP (in6, bige_const); - b0 = asm_xor (rkey0, in0); - b1 = asm_xor (rkey0, in1); - in7 = VEC_BE_SWAP (in7, bige_const); - iv = VEC_BE_SWAP (iv, bige_const); - b2 = asm_xor (rkey0, in2); - b3 = asm_xor (rkey0, in3); - b4 = asm_xor (rkey0, in4); - b5 = asm_xor (rkey0, in5); - b6 = asm_xor (rkey0, in6); - b7 = asm_xor (rkey0, in7); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); \ - b4 = asm_cipher_be (b4, rkey); \ - b5 = asm_cipher_be (b5, rkey); \ - b6 = asm_cipher_be (b6, rkey); \ - b7 = asm_cipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - in1 = asm_xor (rkeylast, in1); - in2 = asm_xor (rkeylast, in2); - in3 = asm_xor (rkeylast, in3); - in4 = asm_xor (rkeylast, in4); - b0 = asm_cipherlast_be (b0, in1); - b1 = asm_cipherlast_be (b1, in2); - in5 = asm_xor (rkeylast, in5); - in6 = asm_xor (rkeylast, in6); - b2 = asm_cipherlast_be (b2, in3); - b3 = asm_cipherlast_be (b3, in4); - in7 = asm_xor (rkeylast, in7); - in0 = asm_xor (rkeylast, iv); - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b4 = asm_cipherlast_be (b4, in5); - b5 = asm_cipherlast_be (b5, in6); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b6 = asm_cipherlast_be (b6, in7); - b7 = asm_cipherlast_be (b7, in0); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } - - if (nblocks >= 4) - { - in0 = iv; - in1 = VEC_LOAD_BE (in, 0, bige_const); - in2 = VEC_LOAD_BE (in, 1, bige_const); - in3 = VEC_LOAD_BE (in, 2, bige_const); - iv = VEC_LOAD_BE (in, 3, bige_const); - - b0 = asm_xor (rkey0, in0); - b1 = asm_xor (rkey0, in1); - b2 = asm_xor (rkey0, in2); - b3 = asm_xor (rkey0, in3); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - in1 = asm_xor (rkeylast, in1); - in2 = asm_xor (rkeylast, in2); - in3 = asm_xor (rkeylast, in3); - in0 = asm_xor (rkeylast, iv); - b0 = asm_cipherlast_be (b0, in1); - b1 = asm_cipherlast_be (b1, in2); - b2 = asm_cipherlast_be (b2, in3); - b3 = asm_cipherlast_be (b3, in0); - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - in += 4; - out += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - bin = VEC_LOAD_BE (in, 0, bige_const); - rkeylast = rkeylast_orig ^ bin; - b = iv; - iv = bin; - - AES_ENCRYPT (b, rounds); - - VEC_STORE_BE (out, 0, b, bige_const); - - out++; - in++; - } - - VEC_STORE_BE (iv_arg, 0, iv, bige_const); -} - - -void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg, - void *outbuf_arg, const void *inbuf_arg, - size_t nblocks, int cbc_mac) -{ - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = context; - const u128_t *rk = (u128_t *)&ctx->keyschenc; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES_ALL; - block lastiv, b; - unsigned int outadd = !cbc_mac; - - lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); - - PRELOAD_ROUND_KEYS_ALL (rounds); - - for (; nblocks; nblocks--) - { - b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); - - AES_ENCRYPT_ALL (b, rounds); - - lastiv = b; - VEC_STORE_BE (out, 0, b, bige_const); - - in++; - out += outadd; - } - - VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); -} - -void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg, - void *outbuf_arg, const void *inbuf_arg, - size_t nblocks) -{ - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = context; - const u128_t *rk = (u128_t *)&ctx->keyschdec; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; - block rkeylast_orig; - block in0, in1, in2, in3, in4, in5, in6, in7; - block b0, b1, b2, b3, b4, b5, b6, b7; - block rkey; - block iv, b; - - if (!ctx->decryption_prepared) - { - aes_ppc8_prepare_decryption (ctx); - ctx->decryption_prepared = 1; - } - - iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - - PRELOAD_ROUND_KEYS (rounds); - rkeylast_orig = rkeylast; - - for (; nblocks >= 8; nblocks -= 8) - { - in0 = VEC_LOAD_BE_NOSWAP (in, 0); - in1 = VEC_LOAD_BE_NOSWAP (in, 1); - in2 = VEC_LOAD_BE_NOSWAP (in, 2); - in3 = VEC_LOAD_BE_NOSWAP (in, 3); - in0 = VEC_BE_SWAP (in0, bige_const); - in1 = VEC_BE_SWAP (in1, bige_const); - in4 = VEC_LOAD_BE_NOSWAP (in, 4); - in5 = VEC_LOAD_BE_NOSWAP (in, 5); - in2 = VEC_BE_SWAP (in2, bige_const); - in3 = VEC_BE_SWAP (in3, bige_const); - in6 = VEC_LOAD_BE_NOSWAP (in, 6); - in7 = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - b0 = asm_xor (rkey0, in0); - b1 = asm_xor (rkey0, in1); - in4 = VEC_BE_SWAP (in4, bige_const); - in5 = VEC_BE_SWAP (in5, bige_const); - b2 = asm_xor (rkey0, in2); - b3 = asm_xor (rkey0, in3); - in6 = VEC_BE_SWAP (in6, bige_const); - in7 = VEC_BE_SWAP (in7, bige_const); - b4 = asm_xor (rkey0, in4); - b5 = asm_xor (rkey0, in5); - b6 = asm_xor (rkey0, in6); - b7 = asm_xor (rkey0, in7); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_ncipher_be (b0, rkey); \ - b1 = asm_ncipher_be (b1, rkey); \ - b2 = asm_ncipher_be (b2, rkey); \ - b3 = asm_ncipher_be (b3, rkey); \ - b4 = asm_ncipher_be (b4, rkey); \ - b5 = asm_ncipher_be (b5, rkey); \ - b6 = asm_ncipher_be (b6, rkey); \ - b7 = asm_ncipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - iv = asm_xor (rkeylast, iv); - in0 = asm_xor (rkeylast, in0); - in1 = asm_xor (rkeylast, in1); - in2 = asm_xor (rkeylast, in2); - b0 = asm_ncipherlast_be (b0, iv); - iv = in7; - b1 = asm_ncipherlast_be (b1, in0); - in3 = asm_xor (rkeylast, in3); - in4 = asm_xor (rkeylast, in4); - b2 = asm_ncipherlast_be (b2, in1); - b3 = asm_ncipherlast_be (b3, in2); - in5 = asm_xor (rkeylast, in5); - in6 = asm_xor (rkeylast, in6); - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b4 = asm_ncipherlast_be (b4, in3); - b5 = asm_ncipherlast_be (b5, in4); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b6 = asm_ncipherlast_be (b6, in5); - b7 = asm_ncipherlast_be (b7, in6); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } - - if (nblocks >= 4) - { - in0 = VEC_LOAD_BE (in, 0, bige_const); - in1 = VEC_LOAD_BE (in, 1, bige_const); - in2 = VEC_LOAD_BE (in, 2, bige_const); - in3 = VEC_LOAD_BE (in, 3, bige_const); - - b0 = asm_xor (rkey0, in0); - b1 = asm_xor (rkey0, in1); - b2 = asm_xor (rkey0, in2); - b3 = asm_xor (rkey0, in3); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_ncipher_be (b0, rkey); \ - b1 = asm_ncipher_be (b1, rkey); \ - b2 = asm_ncipher_be (b2, rkey); \ - b3 = asm_ncipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - iv = asm_xor (rkeylast, iv); - in0 = asm_xor (rkeylast, in0); - in1 = asm_xor (rkeylast, in1); - in2 = asm_xor (rkeylast, in2); - - b0 = asm_ncipherlast_be (b0, iv); - iv = in3; - b1 = asm_ncipherlast_be (b1, in0); - b2 = asm_ncipherlast_be (b2, in1); - b3 = asm_ncipherlast_be (b3, in2); - - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - in += 4; - out += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - rkeylast = rkeylast_orig ^ iv; - - iv = VEC_LOAD_BE (in, 0, bige_const); - b = iv; - AES_DECRYPT (b, rounds); - - VEC_STORE_BE (out, 0, b, bige_const); - - in++; - out++; - } - - VEC_STORE_BE (iv_arg, 0, iv, bige_const); -} - - -void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg, - void *outbuf_arg, const void *inbuf_arg, - size_t nblocks) -{ - static const unsigned char vec_one_const[16] = - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = context; - const u128_t *rk = (u128_t *)&ctx->keyschenc; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - ROUND_KEY_VARIABLES; - block rkeylast_orig; - block ctr, b, one; - - ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const); - one = VEC_LOAD_BE (&vec_one_const, 0, bige_const); - - PRELOAD_ROUND_KEYS (rounds); - rkeylast_orig = rkeylast; - - if (nblocks >= 4) - { - block in0, in1, in2, in3, in4, in5, in6, in7; - block b0, b1, b2, b3, b4, b5, b6, b7; - block two, three, four; - block rkey; - - two = asm_add_uint128 (one, one); - three = asm_add_uint128 (two, one); - four = asm_add_uint128 (two, two); - - for (; nblocks >= 8; nblocks -= 8) - { - b1 = asm_add_uint128 (ctr, one); - b2 = asm_add_uint128 (ctr, two); - b3 = asm_add_uint128 (ctr, three); - b4 = asm_add_uint128 (ctr, four); - b5 = asm_add_uint128 (b1, four); - b6 = asm_add_uint128 (b2, four); - b7 = asm_add_uint128 (b3, four); - b0 = asm_xor (rkey0, ctr); - rkey = ALIGNED_LOAD (rk, 1); - ctr = asm_add_uint128 (b4, four); - b1 = asm_xor (rkey0, b1); - b2 = asm_xor (rkey0, b2); - b3 = asm_xor (rkey0, b3); - b0 = asm_cipher_be (b0, rkey); - b1 = asm_cipher_be (b1, rkey); - b2 = asm_cipher_be (b2, rkey); - b3 = asm_cipher_be (b3, rkey); - b4 = asm_xor (rkey0, b4); - b5 = asm_xor (rkey0, b5); - b6 = asm_xor (rkey0, b6); - b7 = asm_xor (rkey0, b7); - b4 = asm_cipher_be (b4, rkey); - b5 = asm_cipher_be (b5, rkey); - b6 = asm_cipher_be (b6, rkey); - b7 = asm_cipher_be (b7, rkey); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); \ - b4 = asm_cipher_be (b4, rkey); \ - b5 = asm_cipher_be (b5, rkey); \ - b6 = asm_cipher_be (b6, rkey); \ - b7 = asm_cipher_be (b7, rkey); - - in0 = VEC_LOAD_BE_NOSWAP (in, 0); - DO_ROUND(2); - in1 = VEC_LOAD_BE_NOSWAP (in, 1); - DO_ROUND(3); - in2 = VEC_LOAD_BE_NOSWAP (in, 2); - DO_ROUND(4); - in3 = VEC_LOAD_BE_NOSWAP (in, 3); - DO_ROUND(5); - in4 = VEC_LOAD_BE_NOSWAP (in, 4); - DO_ROUND(6); - in5 = VEC_LOAD_BE_NOSWAP (in, 5); - DO_ROUND(7); - in6 = VEC_LOAD_BE_NOSWAP (in, 6); - DO_ROUND(8); - in7 = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - DO_ROUND(9); - - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - in0 = VEC_BE_SWAP (in0, bige_const); - in1 = VEC_BE_SWAP (in1, bige_const); - in2 = VEC_BE_SWAP (in2, bige_const); - in3 = VEC_BE_SWAP (in3, bige_const); - in4 = VEC_BE_SWAP (in4, bige_const); - in5 = VEC_BE_SWAP (in5, bige_const); - in6 = VEC_BE_SWAP (in6, bige_const); - in7 = VEC_BE_SWAP (in7, bige_const); - - in0 = asm_xor (rkeylast, in0); - in1 = asm_xor (rkeylast, in1); - in2 = asm_xor (rkeylast, in2); - in3 = asm_xor (rkeylast, in3); - b0 = asm_cipherlast_be (b0, in0); - b1 = asm_cipherlast_be (b1, in1); - in4 = asm_xor (rkeylast, in4); - in5 = asm_xor (rkeylast, in5); - b2 = asm_cipherlast_be (b2, in2); - b3 = asm_cipherlast_be (b3, in3); - in6 = asm_xor (rkeylast, in6); - in7 = asm_xor (rkeylast, in7); - b4 = asm_cipherlast_be (b4, in4); - b5 = asm_cipherlast_be (b5, in5); - b6 = asm_cipherlast_be (b6, in6); - b7 = asm_cipherlast_be (b7, in7); - - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } - - if (nblocks >= 4) - { - b1 = asm_add_uint128 (ctr, one); - b2 = asm_add_uint128 (ctr, two); - b3 = asm_add_uint128 (ctr, three); - b0 = asm_xor (rkey0, ctr); - ctr = asm_add_uint128 (ctr, four); - b1 = asm_xor (rkey0, b1); - b2 = asm_xor (rkey0, b2); - b3 = asm_xor (rkey0, b3); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - - in0 = VEC_LOAD_BE (in, 0, bige_const); - in1 = VEC_LOAD_BE (in, 1, bige_const); - in2 = VEC_LOAD_BE (in, 2, bige_const); - in3 = VEC_LOAD_BE (in, 3, bige_const); - - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - in0 = asm_xor (rkeylast, in0); - in1 = asm_xor (rkeylast, in1); - in2 = asm_xor (rkeylast, in2); - in3 = asm_xor (rkeylast, in3); - - b0 = asm_cipherlast_be (b0, in0); - b1 = asm_cipherlast_be (b1, in1); - b2 = asm_cipherlast_be (b2, in2); - b3 = asm_cipherlast_be (b3, in3); - - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - in += 4; - out += 4; - nblocks -= 4; - } - } - - for (; nblocks; nblocks--) - { - b = ctr; - ctr = asm_add_uint128 (ctr, one); - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); - - AES_ENCRYPT (b, rounds); - - VEC_STORE_BE (out, 0, b, bige_const); - - out++; - in++; - } - - VEC_STORE_BE (ctr_arg, 0, ctr, bige_const); -} - - -size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt) -{ - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = (void *)&c->context.c; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - u64 data_nblocks = c->u_mode.ocb.data_nblocks; - block l0, l1, l2, l; - block b0, b1, b2, b3, b4, b5, b6, b7, b; - block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - block rkey, rkeylf; - block ctr, iv; - ROUND_KEY_VARIABLES; - - iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const); - ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const); - - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); - - if (encrypt) - { - const u128_t *rk = (u128_t *)&ctx->keyschenc; - - PRELOAD_ROUND_KEYS (rounds); - - for (; nblocks >= 8 && data_nblocks % 8; nblocks--) - { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); - b = VEC_LOAD_BE (in, 0, bige_const); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - iv ^= l; - /* Checksum_i = Checksum_{i-1} xor P_i */ - ctr ^= b; - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - b ^= iv; - AES_ENCRYPT (b, rounds); - b ^= iv; - - VEC_STORE_BE (out, 0, b, bige_const); - - in += 1; - out += 1; - } - - for (; nblocks >= 8; nblocks -= 8) - { - b0 = VEC_LOAD_BE_NOSWAP (in, 0); - b1 = VEC_LOAD_BE_NOSWAP (in, 1); - b2 = VEC_LOAD_BE_NOSWAP (in, 2); - b3 = VEC_LOAD_BE_NOSWAP (in, 3); - b4 = VEC_LOAD_BE_NOSWAP (in, 4); - b5 = VEC_LOAD_BE_NOSWAP (in, 5); - b6 = VEC_LOAD_BE_NOSWAP (in, 6); - b7 = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); - b0 = VEC_BE_SWAP(b0, bige_const); - b1 = VEC_BE_SWAP(b1, bige_const); - b2 = VEC_BE_SWAP(b2, bige_const); - b3 = VEC_BE_SWAP(b3, bige_const); - b4 = VEC_BE_SWAP(b4, bige_const); - b5 = VEC_BE_SWAP(b5, bige_const); - b6 = VEC_BE_SWAP(b6, bige_const); - b7 = VEC_BE_SWAP(b7, bige_const); - l = VEC_BE_SWAP(l, bige_const); - - ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - - iv ^= rkey0; - - iv0 = iv ^ l0; - iv1 = iv ^ l0 ^ l1; - iv2 = iv ^ l1; - iv3 = iv ^ l1 ^ l2; - iv4 = iv ^ l1 ^ l2 ^ l0; - iv5 = iv ^ l2 ^ l0; - iv6 = iv ^ l2; - iv7 = iv ^ l2 ^ l; - - b0 ^= iv0; - b1 ^= iv1; - b2 ^= iv2; - b3 ^= iv3; - b4 ^= iv4; - b5 ^= iv5; - b6 ^= iv6; - b7 ^= iv7; - iv = iv7 ^ rkey0; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); \ - b4 = asm_cipher_be (b4, rkey); \ - b5 = asm_cipher_be (b5, rkey); \ - b6 = asm_cipher_be (b6, rkey); \ - b7 = asm_cipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - - rkeylf = asm_xor (rkeylast, rkey0); - - DO_ROUND(8); - - iv0 = asm_xor (rkeylf, iv0); - iv1 = asm_xor (rkeylf, iv1); - iv2 = asm_xor (rkeylf, iv2); - iv3 = asm_xor (rkeylf, iv3); - iv4 = asm_xor (rkeylf, iv4); - iv5 = asm_xor (rkeylf, iv5); - iv6 = asm_xor (rkeylf, iv6); - iv7 = asm_xor (rkeylf, iv7); - - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - b0 = asm_cipherlast_be (b0, iv0); - b1 = asm_cipherlast_be (b1, iv1); - b2 = asm_cipherlast_be (b2, iv2); - b3 = asm_cipherlast_be (b3, iv3); - b4 = asm_cipherlast_be (b4, iv4); - b5 = asm_cipherlast_be (b5, iv5); - b6 = asm_cipherlast_be (b6, iv6); - b7 = asm_cipherlast_be (b7, iv7); - - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } - - if (nblocks >= 4 && (data_nblocks % 4) == 0) - { - b0 = VEC_LOAD_BE (in, 0, bige_const); - b1 = VEC_LOAD_BE (in, 1, bige_const); - b2 = VEC_LOAD_BE (in, 2, bige_const); - b3 = VEC_LOAD_BE (in, 3, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); - - ctr ^= b0 ^ b1 ^ b2 ^ b3; - - iv ^= rkey0; - - iv0 = iv ^ l0; - iv1 = iv ^ l0 ^ l1; - iv2 = iv ^ l1; - iv3 = iv ^ l1 ^ l; - - b0 ^= iv0; - b1 ^= iv1; - b2 ^= iv2; - b3 ^= iv3; - iv = iv3 ^ rkey0; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - rkey = rkeylast ^ rkey0; - b0 = asm_cipherlast_be (b0, rkey ^ iv0); - b1 = asm_cipherlast_be (b1, rkey ^ iv1); - b2 = asm_cipherlast_be (b2, rkey ^ iv2); - b3 = asm_cipherlast_be (b3, rkey ^ iv3); - - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - in += 4; - out += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); - b = VEC_LOAD_BE (in, 0, bige_const); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - iv ^= l; - /* Checksum_i = Checksum_{i-1} xor P_i */ - ctr ^= b; - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - b ^= iv; - AES_ENCRYPT (b, rounds); - b ^= iv; - - VEC_STORE_BE (out, 0, b, bige_const); - - in += 1; - out += 1; - } - } - else - { - const u128_t *rk = (u128_t *)&ctx->keyschdec; - - if (!ctx->decryption_prepared) - { - aes_ppc8_prepare_decryption (ctx); - ctx->decryption_prepared = 1; - } - - PRELOAD_ROUND_KEYS (rounds); - - for (; nblocks >= 8 && data_nblocks % 8; nblocks--) - { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); - b = VEC_LOAD_BE (in, 0, bige_const); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - iv ^= l; - /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - b ^= iv; - AES_DECRYPT (b, rounds); - b ^= iv; - /* Checksum_i = Checksum_{i-1} xor P_i */ - ctr ^= b; - - VEC_STORE_BE (out, 0, b, bige_const); - - in += 1; - out += 1; - } - - for (; nblocks >= 8; nblocks -= 8) - { - b0 = VEC_LOAD_BE_NOSWAP (in, 0); - b1 = VEC_LOAD_BE_NOSWAP (in, 1); - b2 = VEC_LOAD_BE_NOSWAP (in, 2); - b3 = VEC_LOAD_BE_NOSWAP (in, 3); - b4 = VEC_LOAD_BE_NOSWAP (in, 4); - b5 = VEC_LOAD_BE_NOSWAP (in, 5); - b6 = VEC_LOAD_BE_NOSWAP (in, 6); - b7 = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0); - b0 = VEC_BE_SWAP(b0, bige_const); - b1 = VEC_BE_SWAP(b1, bige_const); - b2 = VEC_BE_SWAP(b2, bige_const); - b3 = VEC_BE_SWAP(b3, bige_const); - b4 = VEC_BE_SWAP(b4, bige_const); - b5 = VEC_BE_SWAP(b5, bige_const); - b6 = VEC_BE_SWAP(b6, bige_const); - b7 = VEC_BE_SWAP(b7, bige_const); - l = VEC_BE_SWAP(l, bige_const); - - iv ^= rkey0; - - iv0 = iv ^ l0; - iv1 = iv ^ l0 ^ l1; - iv2 = iv ^ l1; - iv3 = iv ^ l1 ^ l2; - iv4 = iv ^ l1 ^ l2 ^ l0; - iv5 = iv ^ l2 ^ l0; - iv6 = iv ^ l2; - iv7 = iv ^ l2 ^ l; - - b0 ^= iv0; - b1 ^= iv1; - b2 ^= iv2; - b3 ^= iv3; - b4 ^= iv4; - b5 ^= iv5; - b6 ^= iv6; - b7 ^= iv7; - iv = iv7 ^ rkey0; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_ncipher_be (b0, rkey); \ - b1 = asm_ncipher_be (b1, rkey); \ - b2 = asm_ncipher_be (b2, rkey); \ - b3 = asm_ncipher_be (b3, rkey); \ - b4 = asm_ncipher_be (b4, rkey); \ - b5 = asm_ncipher_be (b5, rkey); \ - b6 = asm_ncipher_be (b6, rkey); \ - b7 = asm_ncipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - - rkeylf = asm_xor (rkeylast, rkey0); - - DO_ROUND(8); - - iv0 = asm_xor (rkeylf, iv0); - iv1 = asm_xor (rkeylf, iv1); - iv2 = asm_xor (rkeylf, iv2); - iv3 = asm_xor (rkeylf, iv3); - iv4 = asm_xor (rkeylf, iv4); - iv5 = asm_xor (rkeylf, iv5); - iv6 = asm_xor (rkeylf, iv6); - iv7 = asm_xor (rkeylf, iv7); - - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - b0 = asm_ncipherlast_be (b0, iv0); - b1 = asm_ncipherlast_be (b1, iv1); - b2 = asm_ncipherlast_be (b2, iv2); - b3 = asm_ncipherlast_be (b3, iv3); - b4 = asm_ncipherlast_be (b4, iv4); - b5 = asm_ncipherlast_be (b5, iv5); - b6 = asm_ncipherlast_be (b6, iv6); - b7 = asm_ncipherlast_be (b7, iv7); - - ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } - - if (nblocks >= 4 && (data_nblocks % 4) == 0) - { - b0 = VEC_LOAD_BE (in, 0, bige_const); - b1 = VEC_LOAD_BE (in, 1, bige_const); - b2 = VEC_LOAD_BE (in, 2, bige_const); - b3 = VEC_LOAD_BE (in, 3, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); - - iv ^= rkey0; - - iv0 = iv ^ l0; - iv1 = iv ^ l0 ^ l1; - iv2 = iv ^ l1; - iv3 = iv ^ l1 ^ l; - - b0 ^= iv0; - b1 ^= iv1; - b2 ^= iv2; - b3 ^= iv3; - iv = iv3 ^ rkey0; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_ncipher_be (b0, rkey); \ - b1 = asm_ncipher_be (b1, rkey); \ - b2 = asm_ncipher_be (b2, rkey); \ - b3 = asm_ncipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - rkey = rkeylast ^ rkey0; - b0 = asm_ncipherlast_be (b0, rkey ^ iv0); - b1 = asm_ncipherlast_be (b1, rkey ^ iv1); - b2 = asm_ncipherlast_be (b2, rkey ^ iv2); - b3 = asm_ncipherlast_be (b3, rkey ^ iv3); - - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - ctr ^= b0 ^ b1 ^ b2 ^ b3; - - in += 4; - out += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); - b = VEC_LOAD_BE (in, 0, bige_const); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - iv ^= l; - /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ - b ^= iv; - AES_DECRYPT (b, rounds); - b ^= iv; - /* Checksum_i = Checksum_{i-1} xor P_i */ - ctr ^= b; - - VEC_STORE_BE (out, 0, b, bige_const); - - in += 1; - out += 1; - } - } - - VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const); - VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const); - c->u_mode.ocb.data_nblocks = data_nblocks; - - return 0; -} - -size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, - size_t nblocks) -{ - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = (void *)&c->context.c; - const u128_t *rk = (u128_t *)&ctx->keyschenc; - const u128_t *abuf = (const u128_t *)abuf_arg; - int rounds = ctx->rounds; - u64 data_nblocks = c->u_mode.ocb.aad_nblocks; - block l0, l1, l2, l; - block b0, b1, b2, b3, b4, b5, b6, b7, b; - block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7; - block rkey, frkey; - block ctr, iv; - ROUND_KEY_VARIABLES; - - iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const); - ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const); - - l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const); - l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const); - l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const); - - PRELOAD_ROUND_KEYS (rounds); - - for (; nblocks >= 8 && data_nblocks % 8; nblocks--) - { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); - b = VEC_LOAD_BE (abuf, 0, bige_const); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - iv ^= l; - /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - b ^= iv; - AES_ENCRYPT (b, rounds); - ctr ^= b; - - abuf += 1; - } - - for (; nblocks >= 8; nblocks -= 8) - { - b0 = VEC_LOAD_BE (abuf, 0, bige_const); - b1 = VEC_LOAD_BE (abuf, 1, bige_const); - b2 = VEC_LOAD_BE (abuf, 2, bige_const); - b3 = VEC_LOAD_BE (abuf, 3, bige_const); - b4 = VEC_LOAD_BE (abuf, 4, bige_const); - b5 = VEC_LOAD_BE (abuf, 5, bige_const); - b6 = VEC_LOAD_BE (abuf, 6, bige_const); - b7 = VEC_LOAD_BE (abuf, 7, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const); - - frkey = rkey0; - iv ^= frkey; - - iv0 = iv ^ l0; - iv1 = iv ^ l0 ^ l1; - iv2 = iv ^ l1; - iv3 = iv ^ l1 ^ l2; - iv4 = iv ^ l1 ^ l2 ^ l0; - iv5 = iv ^ l2 ^ l0; - iv6 = iv ^ l2; - iv7 = iv ^ l2 ^ l; - - b0 ^= iv0; - b1 ^= iv1; - b2 ^= iv2; - b3 ^= iv3; - b4 ^= iv4; - b5 ^= iv5; - b6 ^= iv6; - b7 ^= iv7; - iv = iv7 ^ frkey; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); \ - b4 = asm_cipher_be (b4, rkey); \ - b5 = asm_cipher_be (b5, rkey); \ - b6 = asm_cipher_be (b6, rkey); \ - b7 = asm_cipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - rkey = rkeylast; - b0 = asm_cipherlast_be (b0, rkey); - b1 = asm_cipherlast_be (b1, rkey); - b2 = asm_cipherlast_be (b2, rkey); - b3 = asm_cipherlast_be (b3, rkey); - b4 = asm_cipherlast_be (b4, rkey); - b5 = asm_cipherlast_be (b5, rkey); - b6 = asm_cipherlast_be (b6, rkey); - b7 = asm_cipherlast_be (b7, rkey); - - ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7; - - abuf += 8; - } - - if (nblocks >= 4 && (data_nblocks % 4) == 0) - { - b0 = VEC_LOAD_BE (abuf, 0, bige_const); - b1 = VEC_LOAD_BE (abuf, 1, bige_const); - b2 = VEC_LOAD_BE (abuf, 2, bige_const); - b3 = VEC_LOAD_BE (abuf, 3, bige_const); - - l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const); - - frkey = rkey0; - iv ^= frkey; - - iv0 = iv ^ l0; - iv1 = iv ^ l0 ^ l1; - iv2 = iv ^ l1; - iv3 = iv ^ l1 ^ l; - - b0 ^= iv0; - b1 ^= iv1; - b2 ^= iv2; - b3 ^= iv3; - iv = iv3 ^ frkey; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - rkey = rkeylast; - b0 = asm_cipherlast_be (b0, rkey); - b1 = asm_cipherlast_be (b1, rkey); - b2 = asm_cipherlast_be (b2, rkey); - b3 = asm_cipherlast_be (b3, rkey); - - ctr ^= b0 ^ b1 ^ b2 ^ b3; - - abuf += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const); - b = VEC_LOAD_BE (abuf, 0, bige_const); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - iv ^= l; - /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - b ^= iv; - AES_ENCRYPT (b, rounds); - ctr ^= b; - - abuf += 1; - } - - VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const); - VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const); - c->u_mode.ocb.aad_nblocks = data_nblocks; - - return 0; + internal_aes_ppc_prepare_decryption (ctx); } -void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg, - void *outbuf_arg, const void *inbuf_arg, - size_t nblocks, int encrypt) -{ -#ifdef WORDS_BIGENDIAN - static const block vec_bswap64_const = - { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 }; - static const block vec_bswap128_const = - { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; -#else - static const block vec_bswap64_const = - { ~8, ~9, ~10, ~11, ~12, ~13, ~14, ~15, ~0, ~1, ~2, ~3, ~4, ~5, ~6, ~7 }; - static const block vec_bswap128_const = - { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 }; - static const block vec_tweakin_swap_const = - { ~12, ~13, ~14, ~15, ~8, ~9, ~10, ~11, ~4, ~5, ~6, ~7, ~0, ~1, ~2, ~3 }; -#endif - static const unsigned char vec_tweak_const[16] = - { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 }; - static const vector unsigned long long vec_shift63_const = - { 63, 63 }; - static const vector unsigned long long vec_shift1_const = - { 1, 1 }; - const block bige_const = asm_load_be_const(); - RIJNDAEL_context *ctx = context; - const u128_t *in = (const u128_t *)inbuf_arg; - u128_t *out = (u128_t *)outbuf_arg; - int rounds = ctx->rounds; - block tweak; - block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf; - block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7; - block tweak_const, bswap64_const, bswap128_const; - vector unsigned long long shift63_const, shift1_const; - ROUND_KEY_VARIABLES; - - tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const); - bswap64_const = ALIGNED_LOAD (&vec_bswap64_const, 0); - bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0); - shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const, 0); - shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const, 0); - -#ifdef WORDS_BIGENDIAN - tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const); - tweak = asm_vperm1 (tweak, bswap128_const); -#else - tweak = VEC_LOAD_BE (tweak_arg, 0, vec_tweakin_swap_const); -#endif - -#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \ - do { \ - block tmp1, tmp2; \ - tmp1 = asm_vperm1((tin), bswap64_const); \ - tmp2 = (block)vec_sl((vector unsigned long long)(tin), shift1_const); \ - tmp1 = (block)(vec_sra((vector unsigned long long)tmp1, shift63_const)) & \ - tweak_const; \ - tout = asm_xor(tmp1, tmp2); \ - } while (0) - - if (encrypt) - { - const u128_t *rk = (u128_t *)&ctx->keyschenc; - - PRELOAD_ROUND_KEYS (rounds); - - for (; nblocks >= 8; nblocks -= 8) - { - b0 = VEC_LOAD_BE_NOSWAP (in, 0); - b1 = VEC_LOAD_BE_NOSWAP (in, 1); - b2 = VEC_LOAD_BE_NOSWAP (in, 2); - b3 = VEC_LOAD_BE_NOSWAP (in, 3); - tweak0 = tweak; - GEN_TWEAK (tweak1, tweak0); - tweak0 = asm_vperm1 (tweak0, bswap128_const); - b4 = VEC_LOAD_BE_NOSWAP (in, 4); - b5 = VEC_LOAD_BE_NOSWAP (in, 5); - GEN_TWEAK (tweak2, tweak1); - tweak1 = asm_vperm1 (tweak1, bswap128_const); - b6 = VEC_LOAD_BE_NOSWAP (in, 6); - b7 = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - - b0 = VEC_BE_SWAP(b0, bige_const); - b1 = VEC_BE_SWAP(b1, bige_const); - GEN_TWEAK (tweak3, tweak2); - tweak2 = asm_vperm1 (tweak2, bswap128_const); - GEN_TWEAK (tweak4, tweak3); - tweak3 = asm_vperm1 (tweak3, bswap128_const); - b2 = VEC_BE_SWAP(b2, bige_const); - b3 = VEC_BE_SWAP(b3, bige_const); - GEN_TWEAK (tweak5, tweak4); - tweak4 = asm_vperm1 (tweak4, bswap128_const); - GEN_TWEAK (tweak6, tweak5); - tweak5 = asm_vperm1 (tweak5, bswap128_const); - b4 = VEC_BE_SWAP(b4, bige_const); - b5 = VEC_BE_SWAP(b5, bige_const); - GEN_TWEAK (tweak7, tweak6); - tweak6 = asm_vperm1 (tweak6, bswap128_const); - GEN_TWEAK (tweak, tweak7); - tweak7 = asm_vperm1 (tweak7, bswap128_const); - b6 = VEC_BE_SWAP(b6, bige_const); - b7 = VEC_BE_SWAP(b7, bige_const); - - tweak0 = asm_xor (tweak0, rkey0); - tweak1 = asm_xor (tweak1, rkey0); - tweak2 = asm_xor (tweak2, rkey0); - tweak3 = asm_xor (tweak3, rkey0); - tweak4 = asm_xor (tweak4, rkey0); - tweak5 = asm_xor (tweak5, rkey0); - tweak6 = asm_xor (tweak6, rkey0); - tweak7 = asm_xor (tweak7, rkey0); - - b0 = asm_xor (b0, tweak0); - b1 = asm_xor (b1, tweak1); - b2 = asm_xor (b2, tweak2); - b3 = asm_xor (b3, tweak3); - b4 = asm_xor (b4, tweak4); - b5 = asm_xor (b5, tweak5); - b6 = asm_xor (b6, tweak6); - b7 = asm_xor (b7, tweak7); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); \ - b4 = asm_cipher_be (b4, rkey); \ - b5 = asm_cipher_be (b5, rkey); \ - b6 = asm_cipher_be (b6, rkey); \ - b7 = asm_cipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - - rkeylf = asm_xor (rkeylast, rkey0); - - DO_ROUND(8); - - tweak0 = asm_xor (tweak0, rkeylf); - tweak1 = asm_xor (tweak1, rkeylf); - tweak2 = asm_xor (tweak2, rkeylf); - tweak3 = asm_xor (tweak3, rkeylf); - tweak4 = asm_xor (tweak4, rkeylf); - tweak5 = asm_xor (tweak5, rkeylf); - tweak6 = asm_xor (tweak6, rkeylf); - tweak7 = asm_xor (tweak7, rkeylf); - - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - b0 = asm_cipherlast_be (b0, tweak0); - b1 = asm_cipherlast_be (b1, tweak1); - b2 = asm_cipherlast_be (b2, tweak2); - b3 = asm_cipherlast_be (b3, tweak3); - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b4 = asm_cipherlast_be (b4, tweak4); - b5 = asm_cipherlast_be (b5, tweak5); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b6 = asm_cipherlast_be (b6, tweak6); - b7 = asm_cipherlast_be (b7, tweak7); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } +#define GCRY_AES_PPC8 1 +#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt +#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt +#define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc +#define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec +#define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc +#define CBC_DEC_FUNC _gcry_aes_ppc8_cbc_dec +#define CTR_ENC_FUNC _gcry_aes_ppc8_ctr_enc +#define OCB_CRYPT_FUNC _gcry_aes_ppc8_ocb_crypt +#define OCB_AUTH_FUNC _gcry_aes_ppc8_ocb_auth +#define XTS_CRYPT_FUNC _gcry_aes_ppc8_xts_crypt - if (nblocks >= 4) - { - tweak0 = tweak; - GEN_TWEAK (tweak1, tweak0); - GEN_TWEAK (tweak2, tweak1); - GEN_TWEAK (tweak3, tweak2); - GEN_TWEAK (tweak, tweak3); - - b0 = VEC_LOAD_BE (in, 0, bige_const); - b1 = VEC_LOAD_BE (in, 1, bige_const); - b2 = VEC_LOAD_BE (in, 2, bige_const); - b3 = VEC_LOAD_BE (in, 3, bige_const); - - tweak0 = asm_vperm1 (tweak0, bswap128_const); - tweak1 = asm_vperm1 (tweak1, bswap128_const); - tweak2 = asm_vperm1 (tweak2, bswap128_const); - tweak3 = asm_vperm1 (tweak3, bswap128_const); - - b0 ^= tweak0 ^ rkey0; - b1 ^= tweak1 ^ rkey0; - b2 ^= tweak2 ^ rkey0; - b3 ^= tweak3 ^ rkey0; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_cipher_be (b0, rkey); \ - b1 = asm_cipher_be (b1, rkey); \ - b2 = asm_cipher_be (b2, rkey); \ - b3 = asm_cipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - rkey = rkeylast; - b0 = asm_cipherlast_be (b0, rkey ^ tweak0); - b1 = asm_cipherlast_be (b1, rkey ^ tweak1); - b2 = asm_cipherlast_be (b2, rkey ^ tweak2); - b3 = asm_cipherlast_be (b3, rkey ^ tweak3); - - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - in += 4; - out += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - tweak0 = asm_vperm1 (tweak, bswap128_const); - - /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; - - /* Generate next tweak. */ - GEN_TWEAK (tweak, tweak); - - AES_ENCRYPT (b, rounds); - - b ^= tweak0; - VEC_STORE_BE (out, 0, b, bige_const); - - in++; - out++; - } - } - else - { - const u128_t *rk = (u128_t *)&ctx->keyschdec; - - if (!ctx->decryption_prepared) - { - aes_ppc8_prepare_decryption (ctx); - ctx->decryption_prepared = 1; - } - - PRELOAD_ROUND_KEYS (rounds); - - for (; nblocks >= 8; nblocks -= 8) - { - b0 = VEC_LOAD_BE_NOSWAP (in, 0); - b1 = VEC_LOAD_BE_NOSWAP (in, 1); - b2 = VEC_LOAD_BE_NOSWAP (in, 2); - b3 = VEC_LOAD_BE_NOSWAP (in, 3); - tweak0 = tweak; - GEN_TWEAK (tweak1, tweak0); - tweak0 = asm_vperm1 (tweak0, bswap128_const); - b4 = VEC_LOAD_BE_NOSWAP (in, 4); - b5 = VEC_LOAD_BE_NOSWAP (in, 5); - GEN_TWEAK (tweak2, tweak1); - tweak1 = asm_vperm1 (tweak1, bswap128_const); - b6 = VEC_LOAD_BE_NOSWAP (in, 6); - b7 = VEC_LOAD_BE_NOSWAP (in, 7); - in += 8; - - b0 = VEC_BE_SWAP(b0, bige_const); - b1 = VEC_BE_SWAP(b1, bige_const); - GEN_TWEAK (tweak3, tweak2); - tweak2 = asm_vperm1 (tweak2, bswap128_const); - GEN_TWEAK (tweak4, tweak3); - tweak3 = asm_vperm1 (tweak3, bswap128_const); - b2 = VEC_BE_SWAP(b2, bige_const); - b3 = VEC_BE_SWAP(b3, bige_const); - GEN_TWEAK (tweak5, tweak4); - tweak4 = asm_vperm1 (tweak4, bswap128_const); - GEN_TWEAK (tweak6, tweak5); - tweak5 = asm_vperm1 (tweak5, bswap128_const); - b4 = VEC_BE_SWAP(b4, bige_const); - b5 = VEC_BE_SWAP(b5, bige_const); - GEN_TWEAK (tweak7, tweak6); - tweak6 = asm_vperm1 (tweak6, bswap128_const); - GEN_TWEAK (tweak, tweak7); - tweak7 = asm_vperm1 (tweak7, bswap128_const); - b6 = VEC_BE_SWAP(b6, bige_const); - b7 = VEC_BE_SWAP(b7, bige_const); - - tweak0 = asm_xor (tweak0, rkey0); - tweak1 = asm_xor (tweak1, rkey0); - tweak2 = asm_xor (tweak2, rkey0); - tweak3 = asm_xor (tweak3, rkey0); - tweak4 = asm_xor (tweak4, rkey0); - tweak5 = asm_xor (tweak5, rkey0); - tweak6 = asm_xor (tweak6, rkey0); - tweak7 = asm_xor (tweak7, rkey0); - - b0 = asm_xor (b0, tweak0); - b1 = asm_xor (b1, tweak1); - b2 = asm_xor (b2, tweak2); - b3 = asm_xor (b3, tweak3); - b4 = asm_xor (b4, tweak4); - b5 = asm_xor (b5, tweak5); - b6 = asm_xor (b6, tweak6); - b7 = asm_xor (b7, tweak7); - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_ncipher_be (b0, rkey); \ - b1 = asm_ncipher_be (b1, rkey); \ - b2 = asm_ncipher_be (b2, rkey); \ - b3 = asm_ncipher_be (b3, rkey); \ - b4 = asm_ncipher_be (b4, rkey); \ - b5 = asm_ncipher_be (b5, rkey); \ - b6 = asm_ncipher_be (b6, rkey); \ - b7 = asm_ncipher_be (b7, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - - rkeylf = asm_xor (rkeylast, rkey0); - - DO_ROUND(8); - - tweak0 = asm_xor (tweak0, rkeylf); - tweak1 = asm_xor (tweak1, rkeylf); - tweak2 = asm_xor (tweak2, rkeylf); - tweak3 = asm_xor (tweak3, rkeylf); - tweak4 = asm_xor (tweak4, rkeylf); - tweak5 = asm_xor (tweak5, rkeylf); - tweak6 = asm_xor (tweak6, rkeylf); - tweak7 = asm_xor (tweak7, rkeylf); - - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - b0 = asm_ncipherlast_be (b0, tweak0); - b1 = asm_ncipherlast_be (b1, tweak1); - b2 = asm_ncipherlast_be (b2, tweak2); - b3 = asm_ncipherlast_be (b3, tweak3); - b0 = VEC_BE_SWAP (b0, bige_const); - b1 = VEC_BE_SWAP (b1, bige_const); - b4 = asm_ncipherlast_be (b4, tweak4); - b5 = asm_ncipherlast_be (b5, tweak5); - b2 = VEC_BE_SWAP (b2, bige_const); - b3 = VEC_BE_SWAP (b3, bige_const); - b6 = asm_ncipherlast_be (b6, tweak6); - b7 = asm_ncipherlast_be (b7, tweak7); - VEC_STORE_BE_NOSWAP (out, 0, b0); - VEC_STORE_BE_NOSWAP (out, 1, b1); - b4 = VEC_BE_SWAP (b4, bige_const); - b5 = VEC_BE_SWAP (b5, bige_const); - VEC_STORE_BE_NOSWAP (out, 2, b2); - VEC_STORE_BE_NOSWAP (out, 3, b3); - b6 = VEC_BE_SWAP (b6, bige_const); - b7 = VEC_BE_SWAP (b7, bige_const); - VEC_STORE_BE_NOSWAP (out, 4, b4); - VEC_STORE_BE_NOSWAP (out, 5, b5); - VEC_STORE_BE_NOSWAP (out, 6, b6); - VEC_STORE_BE_NOSWAP (out, 7, b7); - out += 8; - } - - if (nblocks >= 4) - { - tweak0 = tweak; - GEN_TWEAK (tweak1, tweak0); - GEN_TWEAK (tweak2, tweak1); - GEN_TWEAK (tweak3, tweak2); - GEN_TWEAK (tweak, tweak3); - - b0 = VEC_LOAD_BE (in, 0, bige_const); - b1 = VEC_LOAD_BE (in, 1, bige_const); - b2 = VEC_LOAD_BE (in, 2, bige_const); - b3 = VEC_LOAD_BE (in, 3, bige_const); - - tweak0 = asm_vperm1 (tweak0, bswap128_const); - tweak1 = asm_vperm1 (tweak1, bswap128_const); - tweak2 = asm_vperm1 (tweak2, bswap128_const); - tweak3 = asm_vperm1 (tweak3, bswap128_const); - - b0 ^= tweak0 ^ rkey0; - b1 ^= tweak1 ^ rkey0; - b2 ^= tweak2 ^ rkey0; - b3 ^= tweak3 ^ rkey0; - -#define DO_ROUND(r) \ - rkey = ALIGNED_LOAD (rk, r); \ - b0 = asm_ncipher_be (b0, rkey); \ - b1 = asm_ncipher_be (b1, rkey); \ - b2 = asm_ncipher_be (b2, rkey); \ - b3 = asm_ncipher_be (b3, rkey); - - DO_ROUND(1); - DO_ROUND(2); - DO_ROUND(3); - DO_ROUND(4); - DO_ROUND(5); - DO_ROUND(6); - DO_ROUND(7); - DO_ROUND(8); - DO_ROUND(9); - if (rounds >= 12) - { - DO_ROUND(10); - DO_ROUND(11); - if (rounds > 12) - { - DO_ROUND(12); - DO_ROUND(13); - } - } - -#undef DO_ROUND - - rkey = rkeylast; - b0 = asm_ncipherlast_be (b0, rkey ^ tweak0); - b1 = asm_ncipherlast_be (b1, rkey ^ tweak1); - b2 = asm_ncipherlast_be (b2, rkey ^ tweak2); - b3 = asm_ncipherlast_be (b3, rkey ^ tweak3); - - VEC_STORE_BE (out, 0, b0, bige_const); - VEC_STORE_BE (out, 1, b1, bige_const); - VEC_STORE_BE (out, 2, b2, bige_const); - VEC_STORE_BE (out, 3, b3, bige_const); - - in += 4; - out += 4; - nblocks -= 4; - } - - for (; nblocks; nblocks--) - { - tweak0 = asm_vperm1 (tweak, bswap128_const); - - /* Xor-Encrypt/Decrypt-Xor block. */ - b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0; - - /* Generate next tweak. */ - GEN_TWEAK (tweak, tweak); - - AES_DECRYPT (b, rounds); - - b ^= tweak0; - VEC_STORE_BE (out, 0, b, bige_const); - - in++; - out++; - } - } - -#ifdef WORDS_BIGENDIAN - tweak = asm_vperm1 (tweak, bswap128_const); - VEC_STORE_BE (tweak_arg, 0, tweak, bige_const); -#else - VEC_STORE_BE (tweak_arg, 0, tweak, vec_tweakin_swap_const); -#endif - -#undef GEN_TWEAK -} +#include #endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c new file mode 100644 index 000000000..facdedd4f --- /dev/null +++ b/cipher/rijndael-ppc9le.c @@ -0,0 +1,102 @@ +/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation + * Copyright (C) 2019 Shawn Landden + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * Alternatively, this code may be used in OpenSSL from The OpenSSL Project, + * and Cryptogams by Andy Polyakov, and if made part of a release of either + * or both projects, is thereafter dual-licensed under the license said project + * is released under. + */ + +#include + +#include "rijndael-internal.h" +#include "cipher-internal.h" +#include "bufhelp.h" + +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + +#include "rijndael-ppc-common.h" + + +static ASM_FUNC_ATTR_INLINE block +asm_load_be_const(void) +{ + static const block vec_dummy = { 0 }; + return vec_dummy; +} + +static ASM_FUNC_ATTR_INLINE block +asm_be_swap(block vec, block be_bswap_const) +{ + (void)be_bswap_const; + return vec; +} + +static ASM_FUNC_ATTR_INLINE block +asm_load_be_noswap(unsigned long offset, const void *ptr) +{ + block vec; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvb16x %x0,0,%1\n\t" + : "=wa" (vec) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvb16x %x0,%1,%2\n\t" + : "=wa" (vec) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return vec; +} + +static ASM_FUNC_ATTR_INLINE void +asm_store_be_noswap(block vec, unsigned long offset, void *ptr) +{ +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("stxvb16x %x0,0,%1\n\t" + : + : "wa" (vec), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("stxvb16x %x0,%1,%2\n\t" + : + : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + + +#define GCRY_AES_PPC9LE 1 +#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc9le_encrypt +#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc9le_decrypt +#define CFB_ENC_FUNC _gcry_aes_ppc9le_cfb_enc +#define CFB_DEC_FUNC _gcry_aes_ppc9le_cfb_dec +#define CBC_ENC_FUNC _gcry_aes_ppc9le_cbc_enc +#define CBC_DEC_FUNC _gcry_aes_ppc9le_cbc_dec +#define CTR_ENC_FUNC _gcry_aes_ppc9le_ctr_enc +#define OCB_CRYPT_FUNC _gcry_aes_ppc9le_ocb_crypt +#define OCB_AUTH_FUNC _gcry_aes_ppc9le_ocb_auth +#define XTS_CRYPT_FUNC _gcry_aes_ppc9le_xts_crypt + +#include + +#endif /* USE_PPC_CRYPTO */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index ebd1a11a5..a1c4cfc1a 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -239,6 +239,43 @@ extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak, size_t nblocks, int encrypt); #endif /*USE_PPC_CRYPTO*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE +/* Power9 little-endian crypto implementations of AES */ +extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); + +extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int cbc_mac); +extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); + +extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c, + const void *abuf_arg, size_t nblocks); + +extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks, int encrypt); +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ + static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax); static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx, @@ -384,6 +421,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, #ifdef USE_PPC_CRYPTO ctx->use_ppc_crypto = 0; #endif +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + ctx->use_ppc9le_crypto = 0; +#endif if (0) { @@ -464,6 +504,28 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, } } #endif +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00)) + { + ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt; + ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ppc_crypto = 1; /* same key-setup as USE_PPC_CRYPTO */ + ctx->use_ppc9le_crypto = 1; + if (hd) + { + hd->bulk.cfb_enc = _gcry_aes_ppc9le_cfb_enc; + hd->bulk.cfb_dec = _gcry_aes_ppc9le_cfb_dec; + hd->bulk.cbc_enc = _gcry_aes_ppc9le_cbc_enc; + hd->bulk.cbc_dec = _gcry_aes_ppc9le_cbc_dec; + hd->bulk.ctr_enc = _gcry_aes_ppc9le_ctr_enc; + hd->bulk.ocb_crypt = _gcry_aes_ppc9le_ocb_crypt; + hd->bulk.ocb_auth = _gcry_aes_ppc9le_ocb_auth; + hd->bulk.xts_crypt = _gcry_aes_ppc9le_xts_crypt; + } + } +#endif #ifdef USE_PPC_CRYPTO else if (hwfeatures & HWF_PPC_VCRYPTO) { @@ -924,6 +986,13 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv, return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cfb_enc (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -992,6 +1061,13 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv, return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -1067,6 +1143,13 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr, return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -1317,6 +1400,13 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv, return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cfb_dec (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -1382,6 +1472,13 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv, return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_cbc_dec (ctx, iv, outbuf, inbuf, nblocks); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -1450,6 +1547,12 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + return _gcry_aes_ppc9le_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -1550,6 +1653,12 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) return _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks); } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + return _gcry_aes_ppc9le_ocb_auth (c, abuf, nblocks); + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { @@ -1619,6 +1728,13 @@ _gcry_aes_xts_crypt (void *context, unsigned char *tweak, return; } #endif /*USE_ARM_CE*/ +#ifdef USE_PPC_CRYPTO_WITH_PPC9LE + else if (ctx->use_ppc9le_crypto) + { + _gcry_aes_ppc9le_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt); + return; + } +#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/ #ifdef USE_PPC_CRYPTO else if (ctx->use_ppc_crypto) { diff --git a/configure.ac b/configure.ac index f31b75586..f9d3dd718 100644 --- a/configure.ac +++ b/configure.ac @@ -2348,6 +2348,7 @@ if test "$found" = "1" ; then powerpc64le-*-*) # Build with the crypto extension implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc9le.lo" ;; powerpc64-*-*) # Big-Endian. From jussi.kivilinna at iki.fi Sun Feb 2 18:50:27 2020 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 2 Feb 2020 19:50:27 +0200 Subject: [PATCH] Define HW-feature flags per architecture Message-ID: <158066582722.21628.4368399279241188156.stgit@localhost6.localdomain6> * random/rand-internal.h (_gcry_rndhw_poll_slow): Add requested length parameter. * random/rndhw.c (_gcry_rndhw_poll_slow): Limit accounted bytes to 50% (or 25% for RDRAND) - this code is moved from caller side. * random/rndlinux.c (_gcry_rndlinux_gather_random): Move HWF_INTEL_RDRAND check to _gcry_rndhw_poll_slow. * src/g10lib.h (HWF_PADLOCK_*, HWF_INTEL_*): Define only if HAVE_CPU_ARCH_X86. (HWF_ARM_*): Define only if HAVE_CPU_ARCH_ARM. (HWF_PPC_*): Define only if HAVE_CPU_ARCH_PPC. -- Signed-off-by: Jussi Kivilinna --- 0 files changed diff --git a/random/rand-internal.h b/random/rand-internal.h index d99c66714..342215695 100644 --- a/random/rand-internal.h +++ b/random/rand-internal.h @@ -141,7 +141,7 @@ void _gcry_rndhw_poll_fast (void (*add)(const void*, size_t, enum random_origins origin); size_t _gcry_rndhw_poll_slow (void (*add)(const void*, size_t, enum random_origins), - enum random_origins origin); + enum random_origins origin, size_t req_length); diff --git a/random/rndhw.c b/random/rndhw.c index 2829382c6..3cf9acc3a 100644 --- a/random/rndhw.c +++ b/random/rndhw.c @@ -198,24 +198,33 @@ _gcry_rndhw_poll_fast (void (*add)(const void*, size_t, enum random_origins), /* Read 64 bytes from a hardware RNG and return the number of bytes - actually read. */ + actually read. However hardware source is let account only + for up to 50% (or 25% for RDRAND) of the requested bytes. */ size_t _gcry_rndhw_poll_slow (void (*add)(const void*, size_t, enum random_origins), - enum random_origins origin) + enum random_origins origin, size_t req_length) { size_t nbytes = 0; (void)add; (void)origin; + req_length /= 2; /* Up to 50%. */ + #ifdef USE_DRNG if ((_gcry_get_hw_features () & HWF_INTEL_RDRAND)) - nbytes += poll_drng (add, origin, 0); + { + req_length /= 2; /* Up to 25%. */ + nbytes += poll_drng (add, origin, 0); + } #endif #ifdef USE_PADLOCK if ((_gcry_get_hw_features () & HWF_PADLOCK_RNG)) nbytes += poll_padlock (add, origin, 0); #endif + if (nbytes > req_length) + nbytes = req_length; + return nbytes; } diff --git a/random/rndlinux.c b/random/rndlinux.c index 04e2a464c..7cbf6ac21 100644 --- a/random/rndlinux.c +++ b/random/rndlinux.c @@ -186,19 +186,10 @@ _gcry_rndlinux_gather_random (void (*add)(const void*, size_t, } - /* First read from a hardware source. However let it account only - for up to 50% (or 25% for RDRAND) of the requested bytes. */ - n_hw = _gcry_rndhw_poll_slow (add, origin); - if ((_gcry_get_hw_features () & HWF_INTEL_RDRAND)) - { - if (n_hw > length/4) - n_hw = length/4; - } - else - { - if (n_hw > length/2) - n_hw = length/2; - } + /* First read from a hardware source. Note that _gcry_rndhw_poll_slow lets + it account only for up to 50% (or 25% for RDRAND) of the requested + bytes. */ + n_hw = _gcry_rndhw_poll_slow (add, origin, length); if (length > 1) length -= n_hw; diff --git a/src/g10lib.h b/src/g10lib.h index c85e66492..36cf85c13 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -211,6 +211,8 @@ char **_gcry_strtokenize (const char *string, const char *delim); /*-- src/hwfeatures.c --*/ +#if defined(HAVE_CPU_ARCH_X86) + #define HWF_PADLOCK_RNG (1 << 0) #define HWF_PADLOCK_AES (1 << 1) #define HWF_PADLOCK_SHA (1 << 2) @@ -230,15 +232,21 @@ char **_gcry_strtokenize (const char *string, const char *delim); #define HWF_INTEL_RDTSC (1 << 15) #define HWF_INTEL_SHAEXT (1 << 16) -#define HWF_ARM_NEON (1 << 17) -#define HWF_ARM_AES (1 << 18) -#define HWF_ARM_SHA1 (1 << 19) -#define HWF_ARM_SHA2 (1 << 20) -#define HWF_ARM_PMULL (1 << 21) +#elif defined(HAVE_CPU_ARCH_ARM) + +#define HWF_ARM_NEON (1 << 0) +#define HWF_ARM_AES (1 << 1) +#define HWF_ARM_SHA1 (1 << 2) +#define HWF_ARM_SHA2 (1 << 3) +#define HWF_ARM_PMULL (1 << 4) + +#elif defined(HAVE_CPU_ARCH_PPC) -#define HWF_PPC_VCRYPTO (1 << 22) -#define HWF_PPC_ARCH_3_00 (1 << 23) -#define HWF_PPC_ARCH_2_07 (1 << 24) +#define HWF_PPC_VCRYPTO (1 << 0) +#define HWF_PPC_ARCH_3_00 (1 << 1) +#define HWF_PPC_ARCH_2_07 (1 << 2) + +#endif gpg_err_code_t _gcry_disable_hw_feature (const char *name); void _gcry_detect_hw_features (void); From jussi.kivilinna at iki.fi Sun Feb 2 23:29:27 2020 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 3 Feb 2020 00:29:27 +0200 Subject: [PATCH 1/2] rndw32: use rndhw for extra random input Message-ID: <158068256735.30852.6073975692038598826.stgit@localhost6.localdomain6> * random/rndw32.c (_gcry_rndw32_gather_random): Use rndhw. -- Signed-off-by: Jussi Kivilinna --- random/rndw32.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/random/rndw32.c b/random/rndw32.c index b3f63d207..4a18305aa 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -816,6 +816,13 @@ _gcry_rndw32_gather_random (void (*add)(const void*, size_t, log_debug ("rndw32#gather_random: jent contributed extra %u bytes\n", (unsigned int)n); + /* Read from a hardware source. */ + n = _gcry_rndhw_poll_slow (add, origin, -1); + + if (debug_me) + log_debug ("rndw32#gather_random: rndhw contributed extra %u bytes\n", + (unsigned int)n); + return 0; } From jussi.kivilinna at iki.fi Sun Feb 2 23:29:32 2020 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 3 Feb 2020 00:29:32 +0200 Subject: [PATCH 2/2] rndhw: wipe temporary buffer after use In-Reply-To: <158068256735.30852.6073975692038598826.stgit@localhost6.localdomain6> References: <158068256735.30852.6073975692038598826.stgit@localhost6.localdomain6> Message-ID: <158068257251.30852.17934075591379141517.stgit@localhost6.localdomain6> * random/rndhw.c (poll_drng): Wipe buffer on exit. -- Signed-off-by: Jussi Kivilinna --- random/rndhw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/random/rndhw.c b/random/rndhw.c index 3cf9acc3a..3c0abe7c7 100644 --- a/random/rndhw.c +++ b/random/rndhw.c @@ -165,6 +165,7 @@ poll_drng (add_fn_t add, enum random_origins origin, int fast) if (!rdrand_nlong (buffer, DIM(buffer))) return 0; (*add)((void *)buffer, nbytes, origin); + wipememory (buffer, nbytes); return nbytes; } #endif /*USE_DRNG*/ From wk at gnupg.org Tue Feb 4 08:42:40 2020 From: wk at gnupg.org (Werner Koch) Date: Tue, 04 Feb 2020 08:42:40 +0100 Subject: [PATCH 1/2] rndw32: use rndhw for extra random input In-Reply-To: <158068256735.30852.6073975692038598826.stgit@localhost6.localdomain6> (Jussi Kivilinna's message of "Mon, 3 Feb 2020 00:29:27 +0200") References: <158068256735.30852.6073975692038598826.stgit@localhost6.localdomain6> Message-ID: <87blqeol1r.fsf@wheatstone.g10code.de> Hi! On Mon, 3 Feb 2020 00:29, Jussi Kivilinna said: > * random/rndw32.c (_gcry_rndw32_gather_random): Use rndhw. Can you please explain why you think this makes sense. The random number generator has been evaluated and approved for official secret communication and thus such changes are highly critical should only done with a very good reason. Shalom-Salam, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 227 bytes Desc: not available URL: From dbaryshkov at gmail.com Sun Feb 9 19:29:44 2020 From: dbaryshkov at gmail.com (dbaryshkov at gmail.com) Date: Sun, 9 Feb 2020 21:29:44 +0300 Subject: [PATCH 3/3] tests/basic: add GOST 28147 keymeshing testcase from LibreSSL testsuite In-Reply-To: <20200209182944.491835-1-dbaryshkov@gmail.com> References: <20200209182944.491835-1-dbaryshkov@gmail.com> Message-ID: <20200209182944.491835-4-dbaryshkov@gmail.com> From: Dmitry Baryshkov * tests/basic.c (check_cfb_cipher): add check for GOST 28147 CFB with KeyMeshing enabled. Signed-off-by: Dmitry Baryshkov --- tests/basic.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/tests/basic.c b/tests/basic.c index 6dc2fe40a6a4..3a7d667602df 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -59,7 +59,7 @@ test_spec_pubkey_t; static int in_fips_mode; -#define MAX_DATA_LEN 128 +#define MAX_DATA_LEN 1040 static void @@ -2264,6 +2264,7 @@ check_cfb_cipher (void) char out[MAX_DATA_LEN]; } data[MAX_DATA_LEN]; + const char *oid; /* For gost 28147 param sets */ } tv[] = { /* http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf */ @@ -2417,6 +2418,147 @@ check_cfb_cipher (void) "\xf2\x0e\x53\x66\x74\xa6\x6f\xa7\x38\x05"}, } }, +#ifdef USE_GOST28147 + { GCRY_CIPHER_GOST28147_MESH, 0, + "\x48\x0c\x74\x1b\x02\x6b\x55\xd5\xb6\x6d\xd7\x1d\x40\x48\x05\x6b" + "\x6d\xeb\x3c\x29\x0f\x84\x80\x23\xee\x0d\x47\x77\xe3\xfe\x61\xc9", + "\x1f\x3f\x82\x1e\x0d\xd8\x1e\x22", + { { "\x8c\x9c\x44\x35\xfb\xe9\xa5\xa3\xa0\xae\x28\x56\x91\x10\x8e\x1e" + "\xd2\xbb\x18\x53\x81\x27\x0d\xa6\x68\x59\x36\xc5\x81\x62\x9a\x8e" + "\x7d\x50\xf1\x6f\x97\x62\x29\xec\x80\x51\xe3\x7d\x6c\xc4\x07\x95" + "\x28\x63\xdc\xb4\xb9\x2d\xb8\x13\xb1\x05\xb5\xf9\xeb\x75\x37\x4e" + "\xf7\xbf\x51\xf1\x98\x86\x43\xc4\xe4\x3d\x3e\xa7\x62\xec\x41\x59" + "\xe0\xbd\xfb\xb6\xfd\xec\xe0\x77\x13\xd2\x59\x90\xa1\xb8\x97\x6b" + "\x3d\x8b\x7d\xfc\x9d\xca\x82\x73\x32\x70\x0a\x74\x03\xc6\x0c\x26" + "\x7f\x56\xf0\x9d\xb2\xeb\x71\x40\xd7\xc3\xb1\xa7\xc5\x1e\x20\x17" + "\xb3\x50\x1d\x8a\x6e\x19\xcb\xbe\x20\x86\x2b\xd6\x1c\xfd\xb4\xb7" + "\x5d\x9a\xb3\xe3\x7d\x15\x7a\x35\x01\x9f\x5d\x65\x89\x4b\x34\xc6" + "\xf4\x81\x3f\x78\x30\xcf\xe9\x15\x90\x9a\xf9\xde\xba\x63\xd0\x19" + "\x14\x66\x3c\xb9\xa4\xb2\x84\x94\x02\xcf\xce\x20\xcf\x76\xe7\xc5" + "\x48\xf7\x69\x3a\x5d\xec\xaf\x41\xa7\x12\x64\x83\xf5\x99\x1e\x9e" + "\xb2\xab\x86\x16\x00\x23\x8e\xe6\xd9\x80\x0b\x6d\xc5\x93\xe2\x5c" + "\x8c\xd8\x5e\x5a\xae\x4a\x85\xfd\x76\x01\xea\x30\xf3\x78\x34\x10" + "\x72\x51\xbc\x9f\x76\xce\x1f\xd4\x8f\x33\x50\x34\xc7\x4d\x7b\xcf" + "\x91\x63\x7d\x82\x9e\xa1\x23\x45\xf5\x45\xac\x98\x7a\x48\xff\x64" + "\xd5\x59\x47\xde\x2b\x3f\xfa\xec\x50\xe0\x81\x60\x8b\xc3\xfc\x80" + "\x98\x17\xc7\xa3\xc2\x57\x3d\xab\x91\x67\xf5\xc4\xab\x92\xc8\xd6" + "\x3b\x6b\x3f\xff\x15\x6b\xcf\x53\x65\x02\xf1\x74\xca\xa9\xbe\x24" + "\xd2\xf0\xb7\x26\xa8\xd7\x6d\xed\x90\x36\x7b\x3e\x41\xa9\x7f\xa3" + "\x1b\xf4\x43\xc5\x51\xbe\x28\x59\xe9\x45\x26\x49\x38\x32\xf8\xf3" + "\x92\x6e\x30\xcc\xb0\xa0\xf9\x01\x14\xc8\xba\xd9\xf0\x2a\x29\xe2" + "\x52\x9a\x76\x95\x3a\x16\x32\xec\xf4\x10\xec\xee\x47\x00\x70\x19" + "\xe4\x72\x35\x66\x44\x53\x2d\xa2\xf3\xaa\x7e\x8a\x33\x13\xcd\xc8" + "\xbf\x0e\x40\x90\x00\xe4\x42\xc3\x09\x84\xe1\x66\x17\xa2\xaf\x03" + "\xab\x6b\xa1\xec\xfb\x17\x72\x81\xfe\x9a\x9f\xf4\xb2\x33\x1f\xae" + "\x0c\xd1\x6a\xae\x19\xb8\xaf\xec\xe3\xea\x00\xf8\xac\x87\x07\x5f" + "\x6d\xb0\xac\x6b\x22\x48\x36\xbf\x22\x18\xb0\x03\x9f\x6c\x70\x45" + "\x36\xf0\x6b\xc6\xc2\xa5\x72\x2c\xd8\xe0\x27\x3d\xec\x56\x07\x05" + "\x7d\x83\xa1\x65\x7d\x41\x5b\xcd\x77\x24\xe5\xaa\x76\x47\xd0\x50" + "\xf6\xe7\xb5\x59\x75\x31\x27\xef\xd8\xa6\x4e\x7f\xb8\x40\xb1\xdf" + "\x53\x14\xed\xf1\x68\x5f\xfc\x3f\x02\xdb\x05\xeb\x31\xe4\x2c\x7f" + "\x32\xb5\x70\x8e\x75\x85\xa4\x5c\x16\x23\x37\xf2\x10\x79\xcb\xdc" + "\xf8\x1c\x25\xc2\xa1\x3d\x9c\x33\x6c\xed\xc3\xe7\xf3\x02\x87\x82" + "\x4e\xfb\xac\xb3\x2d\xfc\xf8\x0d\x1d\x4a\x39\xd4\xb3\x09\xbb\xe9" + "\x25\xc7\xec\x6a\x87\x72\x84\xed\x12\x60\x19\x64\xeb\x16\x2a\x5b" + "\x10\x76\x27\xff\x7b\xe4\xae\xe5\xa4\x04\x02\x7f\xbb\x0a\xb5\xf4" + "\x05\xa5\x56\x1c\x53\x31\x7a\x93\xba\x16\x15\xab\x62\x60\xfc\xde" + "\x72\x36\x6e\x28\xaf\x98\x0d\xe6\xf4\xde\x60\xa7\x7e\x06\x07\x86" + "\xf3\x94\xb6\x6d\x0d\x93\xa6\xbc\x60\x70\x33\xac\x3f\xa1\xa8\x4a" + "\x20\x61\xb6\xb5\x43\xa3\x15\x5a\x00\xbe\x76\x98\x57\x72\xab\x7a" + "\x0e\x18\x93\x82\x3a\x18\x78\x6e\x71\x7b\x78\x4f\x7e\x8c\xde\x7a" + "\x62\xb5\x0a\x7c\x45\x1d\x16\xd5\xc3\x8c\x9b\x25\xb4\x50\x90\xcd" + "\x96\x93\xad\x0f\xd4\x43\xcb\x49\x0f\xfc\x5a\x31\xf4\x19\xb7\xd4" + "\xeb\x4d\x40\x58\xd0\x3b\xc8\xe0\x4a\x54\x2f\xdb\x22\xc3\x29\x7b" + "\x40\x90\x61\x43\xd3\x7e\xe2\x30\x2b\x48\x3c\xce\x90\x93\xb1\x8b" + "\x31\x96\x65\x6d\x57\x8b\x9d\x4d\x53\xf0\x83\x1c\xe5\xa1\x9d\x55" + "\xe3\xbf\x7e\xca\x1a\x74\x66\x14\xcc\x47\x43\xd9\xbb\xef\x97\x7d" + "\xb7\x6e\xff\xf1\x22\xf8\x10\x2d\x3f\xcd\x49\x96\xd9\x09\x11\xb8" + "\x33\xd0\x23\x9a\xfa\x16\xcb\x50\x26\x57\x24\x5c\x0e\xba\xf0\x3f" + "\x37\x2f\xa3\xf7\x18\x57\x48\x48\x95\xcf\xef\x87\x67\x2a\xe9\xb6" + "\x8a\x21\x36\x7f\xff\x48\x6c\x46\x35\x57\xf2\xbc\x48\x67\x8f\x63" + "\x23\x78\x11\x2b\xc2\x08\xde\x51\xe8\x8b\x92\x29\xf9\x9a\x9e\xad" + "\xed\x0f\xeb\xa2\xd2\x40\x92\xd4\xde\x62\x95\x76\xfd\x6e\x3c\xbf" + "\xc0\xd7\x0d\xe5\x1b\xa4\xc7\x18\xe1\x58\xa4\x56\xef\x2e\x17\x1b" + "\x75\xcb\xbc\xf9\x2a\x95\x71\xa7\x1d\x7f\xe7\x73\x63\x05\x6b\x19" + "\x4c\xf4\x22\x14\xc4\x59\x88\x66\x92\x86\x61\x5c\x6a\xae\xec\x58" + "\xff\xc9\xf2\x44\xd4\xa2\xf5\x98\xeb\x5f\x09\xbc\x8a\xbf\x3c\xb4" + "\x3e\xb1\x20\x05\x44\x96\x79\x0a\x40\x92\x7f\x9d\xd1\xaf\xbc\x90" + "\x95\x0a\x81\xd4\xa7\xc6\xb8\xe0\xe4\x39\x30\x1d\x79\xc0\xe5\xfa" + "\xb4\xe9\x63\xb4\x09\x72\x3b\x3e\xd9\xf6\xd9\x10\x21\x18\x7e\xe5" + "\xad\x81\xd7\xd5\x82\xd0\x8c\x3b\x38\x95\xf8\x92\x01\xa9\x92\x00" + "\x70\xd1\xa7\x88\x77\x1f\x3a\xeb\xb5\xe4\xf5\x9d\xc7\x37\x86\xb2" + "\x12\x46\x34\x19\x72\x8c\xf5\x8c\xf6\x78\x98\xe0\x7c\xd3\xf4", + 1039, + "\x23\xc6\x7f\x20\xa1\x23\x58\xbc\x7b\x05\xdb\x21\x15\xcf\x96\x41" + "\xc7\x88\xef\x76\x5c\x49\xdb\x42\xbf\xf3\xc0\xf5\xbd\x5d\xd9\x8e" + "\xaf\x3d\xf4\xe4\xda\x88\xbd\xbc\x47\x5d\x76\x07\xc9\x5f\x54\x1d" + "\x1d\x6a\xa1\x2e\x18\xd6\x60\x84\x02\x18\x37\x92\x92\x15\xab\x21" + "\xee\x21\xcc\x71\x6e\x51\xd9\x2b\xcc\x81\x97\x3f\xeb\x45\x99\xb8" + "\x1b\xda\xff\x90\xd3\x41\x06\x9c\x3f\xfb\xe4\xb2\xdc\xc9\x03\x0d" + "\xa7\xae\xd7\x7d\x02\xb8\x32\xab\xf3\x65\xa3\x65\x6c\x4e\xe4\xa2" + "\x5e\x9e\xee\xcd\xde\x79\x36\x6b\x1b\xe1\x3c\xdf\x10\xad\x4f\x02" + "\xe1\x14\xaa\x09\xb4\x0b\x76\xeb\x69\x38\x20\x02\xcb\x8e\xc0\xdf" + "\xca\x48\x74\xc3\x31\xad\x42\x2c\x51\x9b\xd0\x6a\xc1\x36\xd7\x21" + "\xdf\xb0\x45\xba\xca\x7f\x35\x20\x28\xbb\xc1\x76\xfd\x43\x5d\x23" + "\x7d\x31\x84\x1a\x97\x4d\x83\xaa\x7e\xf1\xc4\xe6\x83\xac\x0d\xef" + "\xef\x3c\xa4\x7c\x48\xe4\xc8\xca\x0d\x7d\xea\x7c\x45\xd7\x73\x50" + "\x25\x1d\x01\xc4\x02\x1a\xcd\xe0\x38\x5b\xa8\x5a\x16\x9a\x10\x59" + "\x74\xd7\x19\xc6\xf3\xb5\x17\xf6\x59\x8d\x62\xaf\x44\xe8\xdc\xe9" + "\xc1\x76\xf1\xd0\xbd\x29\xd7\xec\x1d\xac\x57\xdb\x1a\x3f\xd8\xf6" + "\x6e\xb6\xe6\xdf\x36\xe7\x89\xce\x56\x35\x43\x1c\x7d\x57\x79\x0e" + "\xd8\xf4\xd7\xa7\x0d\xc6\x8f\x91\x66\x67\x82\x0f\x49\xc9\xc5\x65" + "\x81\xa1\x39\x5a\x53\x9f\x02\xa5\xd5\x36\x22\xa8\xa8\x1c\x37\x0e" + "\x76\x46\xdf\xbd\x6a\xdb\xfc\x1b\xbd\x10\xb8\xb1\xbc\x72\x4c\x58" + "\x4a\xda\x6d\x66\x00\xda\x7a\x66\xa0\xe7\x3b\x39\xa3\xf7\x05\x07" + "\xfa\x21\x4b\xc7\x94\xc0\xd3\x7b\x19\x02\x5d\x4a\x10\xf1\xc2\x0f" + "\x19\x68\x27\xc7\x7d\xbf\x55\x03\x57\x7d\xaf\x77\xae\x80\x2f\x7a" + "\xe6\x1f\x4b\xdc\x15\x18\xc0\x62\xa1\xe8\xd9\x1c\x9e\x8c\x96\x39" + "\xc1\xc4\x88\xf7\x0c\xe1\x04\x84\x68\x51\xce\xf1\x90\xda\x7f\x76" + "\xc8\xc0\x88\xef\x8e\x15\x25\x3e\x7b\xe4\x79\xb5\x66\x2d\x9c\xd1" + "\x13\xda\xd0\xd5\x46\xd5\x8d\x46\x18\x07\xee\xd8\xc9\x64\xe3\xbe" + "\x0e\x68\x27\x09\x96\x26\xf6\xe2\x19\x61\x3f\xf4\x58\x27\x0a\xeb" + "\xce\x7c\xb6\x68\x92\xe7\x12\x3b\x31\xd4\x48\xdf\x35\x8d\xf4\x86" + "\x42\x2a\x15\x4b\xe8\x19\x1f\x26\x65\x9b\xa8\xda\x4b\x79\x1f\x8e" + "\xe6\x13\x7e\x49\x8f\xc1\xce\xdc\x5e\x64\x74\xce\x02\x78\xe0\xcf" + "\xa0\xed\x5e\x31\x74\xd1\xd0\xb4\xee\x70\x19\x14\x3c\x8f\x16\xa6" + "\xcf\x12\x93\x15\x88\xeb\x91\x65\x76\x98\xfd\xa1\x94\x30\xba\x43" + "\x62\x65\x40\x04\x77\x9e\xd6\xab\x8b\x0d\x93\x80\x50\x5f\xa2\x76" + "\x20\xa7\xd6\x9c\x27\x15\x27\xbc\xa5\x5a\xbf\xe9\x92\x82\x05\xa8" + "\x41\xe9\xb5\x60\xd5\xc0\xd7\x4b\xad\x38\xb2\xe9\xd1\xe5\x51\x5f" + "\x24\x78\x24\x9a\x23\xd2\xc2\x48\xbd\x0e\xf1\x37\x72\x91\x87\xb0" + "\x4e\xbd\x99\x6b\x2c\x01\xb6\x79\x69\xec\x0c\xed\xe5\x3f\x50\x64" + "\x7c\xb9\xdd\xe1\x92\x81\xb5\xd0\xcb\x17\x83\x86\x8b\xea\x4f\x93" + "\x08\xbc\x22\x0c\xef\xe8\x0d\xf5\x9e\x23\xe1\xf9\xb7\x6b\x45\x0b" + "\xcb\xa9\xb6\x4d\x28\x25\xba\x3e\x86\xf2\x75\x47\x5d\x9d\x6b\xf6" + "\x8a\x05\x58\x73\x3d\x00\xde\xfd\x69\xb1\x61\x16\xf5\x2e\xb0\x9f" + "\x31\x6a\x00\xb9\xef\x71\x63\x47\xa3\xca\xe0\x40\xa8\x7e\x02\x04" + "\xfe\xe5\xce\x48\x73\xe3\x94\xcf\xe2\xff\x29\x7e\xf6\x32\xbb\xb7" + "\x55\x12\x21\x7a\x9c\x75\x04\x0c\xb4\x7c\xb0\x3d\x40\xb3\x11\x9a" + "\x7a\x9a\x13\xfb\x77\xa7\x51\x68\xf7\x05\x47\x3b\x0f\x52\x5c\xe6" + "\xc2\x99\x3a\x37\x54\x5c\x4f\x2b\xa7\x01\x08\x74\xbc\x91\xe3\xe2" + "\xfe\x65\x94\xfd\x3d\x18\xe0\xf0\x62\xed\xc2\x10\x82\x9c\x58\x7f" + "\xb2\xa3\x87\x8a\x74\xd9\xc1\xfb\x84\x28\x17\xc7\x2b\xcb\x53\x1f" + "\x4e\x8a\x82\xfc\xb4\x3f\xc1\x47\x25\xf3\x21\xdc\x4c\x2d\x08\xfa" + "\xe7\x0f\x03\xa9\x68\xde\x6b\x41\xa0\xf9\x41\x6c\x57\x4d\x3a\x0e" + "\xea\x51\xca\x9f\x97\x11\x7d\xf6\x8e\x88\x63\x67\xc9\x65\x13\xca" + "\x38\xed\x35\xbe\xf4\x27\xa9\xfc\xa9\xe6\xc3\x40\x86\x08\x39\x72" + "\x37\xee\xb2\x87\x09\x96\xb7\x40\x87\x36\x92\xc1\x5d\x6a\x2c\x43" + "\xca\x25\xc8\x35\x37\x2d\xb5\xa9\x27\x44\x50\xf2\x6d\x22\x75\x41" + "\x77\x2a\xdb\xb1\x8c\x6d\x05\xe8\xc9\x99\xc7\x08\xf9\x14\x8f\x78" + "\xa9\x8f\xc2\x5a\x7a\x65\xc5\xd8\x86\xbb\x72\x69\x6b\x6b\x45\x83" + "\x5b\xb1\xf7\xcd\x16\x73\xee\xe9\x80\x85\xfe\x8e\xe1\xae\x53\x8f" + "\xde\xbe\x48\x8b\x59\xef\xf6\x7e\xd8\xb5\xa8\x47\xc0\x4e\x15\x58" + "\xca\xd3\x2f\xf8\x6c\xa6\x3d\x78\x4d\x7a\x54\xd6\x10\xe5\xcc\x05" + "\xe2\x29\xb5\x86\x07\x39\x7d\x78\x8e\x5a\x8f\x83\x4c\xe7\x3d\x68" + "\x3e\xe5\x02\xe6\x64\x4f\x5e\xb4\x49\x77\xf0\xc0\xfa\x6f\xc8\xfb" + "\x9f\x84\x6f\x55\xfb\x30\x5e\x89\x93\xa9\xf3\xa6\xa3\xd7\x26\xbb" + "\xd8\xa8\xd9\x95\x1d\xfe\xfc\xd7\xa8\x93\x66\x2f\x04\x53\x06\x64" + "\x7f\x31\x29\xae\xb7\x9f\xba\xc4\x6d\x68\xd1\x24\x32\xf4\x11", + }, + }, + "1.2.643.2.2.31.2" + }, +#endif }; gcry_cipher_hd_t hde, hdd; unsigned char out[MAX_DATA_LEN]; @@ -2451,6 +2593,20 @@ check_cfb_cipher (void) return; } + if (tv[i].oid) + { + err = gcry_cipher_set_sbox (hde, tv[i].oid); + if (!err) + err = gcry_cipher_set_sbox (hdd, tv[i].oid); + if (err) + { + fail ("cfb, gcry_cipher_set_sbox failed: %s\n", + gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + return; + } + } keylen = gcry_cipher_get_algo_keylen(tv[i].algo); if (!keylen) { -- 2.24.1 From dbaryshkov at gmail.com Sun Feb 9 19:29:42 2020 From: dbaryshkov at gmail.com (dbaryshkov at gmail.com) Date: Sun, 9 Feb 2020 21:29:42 +0300 Subject: [PATCH 1/3] gost: add keymeshing support per RFC 4357 In-Reply-To: <20200209182944.491835-1-dbaryshkov@gmail.com> References: <20200209182944.491835-1-dbaryshkov@gmail.com> Message-ID: <20200209182944.491835-2-dbaryshkov@gmail.com> From: Dmitry Baryshkov * cipher/gost-s-box.c (gost_sbox): define if keymeshing should be enabled or not. (main): output whether we should enable or disable keymeshing for a particular parameters set. -- RFC 4357 defines CryptoPro keymeshing that should be applied depending on selected S-BOX (param set). Declare if particular S-BOX should enable or disable CryptoPro KeyMeshing. Signed-off-by: Dmitry Baryshkov --- cipher/gost-s-box.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/cipher/gost-s-box.c b/cipher/gost-s-box.c index 7aa544476fb3..5d5ed7dc449a 100644 --- a/cipher/gost-s-box.c +++ b/cipher/gost-s-box.c @@ -26,9 +26,11 @@ struct gost_sbox { const char *name; const char *oid; + unsigned int keymeshing; unsigned char sbox[16*8]; } gost_sboxes[] = { - { "test_3411", "1.2.643.2.2.30.0", { + { "test_3411", "1.2.643.2.2.30.0", 0, + { 0x4, 0xE, 0x5, 0x7, 0x6, 0x4, 0xD, 0x1, 0xA, 0xB, 0x8, 0xD, 0xC, 0xB, 0xB, 0xF, 0x9, 0x4, 0x1, 0xA, 0x7, 0xA, 0x4, 0xD, @@ -50,7 +52,8 @@ struct gost_sbox 0x3, 0x9, 0xB, 0x3, 0x2, 0xE, 0xC, 0xC, } }, - { "CryptoPro_3411", "1.2.643.2.2.30.1", { + { "CryptoPro_3411", "1.2.643.2.2.30.1", 0, + { 0xA, 0x5, 0x7, 0x4, 0x7, 0x7, 0xD, 0x1, 0x4, 0xF, 0xF, 0xA, 0x6, 0x6, 0xE, 0x3, 0x5, 0x4, 0xC, 0x7, 0x4, 0x2, 0x4, 0xA, @@ -72,7 +75,8 @@ struct gost_sbox 0xF, 0x8, 0xD, 0x3, 0x5, 0x3, 0xB, 0xC, } }, - { "Test_89", "1.2.643.2.2.31.0", { + { "Test_89", "1.2.643.2.2.31.0", 0, + { 0x4, 0xC, 0xD, 0xE, 0x3, 0x8, 0x9, 0xC, 0x2, 0x9, 0x8, 0x9, 0xE, 0xF, 0xB, 0x6, 0xF, 0xF, 0xE, 0xB, 0x5, 0x6, 0xC, 0x5, @@ -94,7 +98,8 @@ struct gost_sbox 0x6, 0x5, 0xB, 0x8, 0x4, 0x4, 0xD, 0x8, } }, - { "CryptoPro_A", "1.2.643.2.2.31.1", { + { "CryptoPro_A", "1.2.643.2.2.31.1", 1, + { 0x9, 0x3, 0xE, 0xE, 0xB, 0x3, 0x1, 0xB, 0x6, 0x7, 0x4, 0x7, 0x5, 0xA, 0xD, 0xA, 0x3, 0xE, 0x6, 0xA, 0x1, 0xD, 0x2, 0xF, @@ -116,7 +121,8 @@ struct gost_sbox 0x5, 0x1, 0x9, 0x6, 0x6, 0x6, 0xE, 0x4, } }, - { "CryptoPro_B", "1.2.643.2.2.31.2", { + { "CryptoPro_B", "1.2.643.2.2.31.2", 1, + { 0x8, 0x0, 0xE, 0x7, 0x2, 0x8, 0x5, 0x0, 0x4, 0x1, 0xC, 0x5, 0x7, 0x3, 0x2, 0x4, 0xB, 0x2, 0x0, 0x0, 0xC, 0x2, 0xA, 0xB, @@ -138,7 +144,8 @@ struct gost_sbox 0xF, 0xE, 0x4, 0x8, 0x3, 0x5, 0xE, 0xC, } }, - { "CryptoPro_C", "1.2.643.2.2.31.3", { + { "CryptoPro_C", "1.2.643.2.2.31.3", 1, + { 0x1, 0x0, 0x8, 0x3, 0x8, 0xC, 0xA, 0x7, 0xB, 0x1, 0x2, 0x6, 0xD, 0x9, 0x9, 0x4, 0xC, 0x7, 0x5, 0x0, 0xB, 0xB, 0x6, 0x0, @@ -160,7 +167,8 @@ struct gost_sbox 0x3, 0x3, 0xB, 0x4, 0x7, 0xD, 0x7, 0x8, } }, - { "CryptoPro_D", "1.2.643.2.2.31.4", { + { "CryptoPro_D", "1.2.643.2.2.31.4", 1, + { 0xF, 0xB, 0x1, 0x1, 0x0, 0x8, 0x3, 0x1, 0xC, 0x6, 0xC, 0x5, 0xC, 0x0, 0x0, 0xA, 0x2, 0x3, 0xB, 0xE, 0x8, 0xF, 0x6, 0x6, @@ -182,7 +190,8 @@ struct gost_sbox 0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE, } }, - { "TC26_Z", "1.2.643.7.1.2.5.1.1", { + { "TC26_Z", "1.2.643.7.1.2.5.1.1", 1, + { 0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1, 0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7, 0x6, 0x2, 0x5, 0x2, 0x5, 0xf, 0x2, 0xe, @@ -242,14 +251,14 @@ int main(int argc, char **argv) fprintf (f, "\n };\n\n"); } - fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n} gost_oid_map[] = {\n"); + fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n const int keymeshing;\n} gost_oid_map[] = {\n"); for (s = 0; s < DIM(gost_sboxes); s++) { - fprintf (f, " { \"%s\", sbox_%s },\n", gost_sboxes[s].oid, gost_sboxes[s].name ); + fprintf (f, " { \"%s\", sbox_%s, %d },\n", gost_sboxes[s].oid, gost_sboxes[s].name, gost_sboxes[s].keymeshing ); } - fprintf(f, " { NULL, NULL }\n};\n"); + fprintf(f, " { NULL, NULL, 0 }\n};\n"); fclose (f); -- 2.24.1 From dbaryshkov at gmail.com Sun Feb 9 19:29:41 2020 From: dbaryshkov at gmail.com (dbaryshkov at gmail.com) Date: Sun, 9 Feb 2020 21:29:41 +0300 Subject: [PATCH 0/3] gost28147: implement KeyMeshing per RFC 4357 Message-ID: <20200209182944.491835-1-dbaryshkov@gmail.com> RFC 4357 defines a special procedure of changing key and IV each 1024 bytes. This procedure should be used for CFB-encrypted data (e.g. CMS files). Implement support for this key meshing. These changes were asked for by AltLinux team implementing support for RFC 4490 in gpgsm. -- With best wishes Dmitry From dbaryshkov at gmail.com Sun Feb 9 19:29:43 2020 From: dbaryshkov at gmail.com (dbaryshkov at gmail.com) Date: Sun, 9 Feb 2020 21:29:43 +0300 Subject: [PATCH 2/3] gost28147: add support for CryptoPro key meshing per RFC 4357 In-Reply-To: <20200209182944.491835-1-dbaryshkov@gmail.com> References: <20200209182944.491835-1-dbaryshkov@gmail.com> Message-ID: <20200209182944.491835-3-dbaryshkov@gmail.com> From: Dmitry Baryshkov * cipher/gost28147.c (gost_do_set_sbox, cryptopro_key_meshing, CryptoProMeshingKey, gost_encrypt_block_mesh): New. (_gcry_cipher_spec_gost28147_mesh): New cipher with keymeshing, (_gcry_cipher_spec_gost28147): Remove OIDs for this cipher should not be selected using these OIDs (they are for CFB with keymeshing). * cipher/cipher.c (cipher_list, cipher_list_algo301): add _gcry_cipher_spec_gost28147_mesh. * src/gcrypt.h.in (GCRY_CIPHER_GOST28147_MESH): New cipher with keymeshing. * doc/gcrypt.texi (GCRY_CIPHER_GOST28147_MESH): Add definition. * tests/basic.c (check_gost28147_cipher, check_gost28147_cipher_basic): Run basic tests on GCRY_CIPHER_GOST28147_MESH. -- Add actual cipher implementing CryptoPro KeyMeshing. This has been requested by AltLinux team to properly support CFB-encrypted CMS files. Signed-off-by: Dmitry Baryshkov --- cipher/cipher.c | 8 +++- cipher/gost.h | 2 + cipher/gost28147.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-- doc/gcrypt.texi | 7 ++++ src/cipher.h | 1 + src/gcrypt.h.in | 3 +- tests/basic.c | 15 ++++++-- 7 files changed, 118 insertions(+), 10 deletions(-) diff --git a/cipher/cipher.c b/cipher/cipher.c index 09b8d829932f..edcb421abe88 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -83,6 +83,7 @@ static gcry_cipher_spec_t * const cipher_list[] = #endif #if USE_GOST28147 &_gcry_cipher_spec_gost28147, + &_gcry_cipher_spec_gost28147_mesh, #endif #if USE_CHACHA20 &_gcry_cipher_spec_chacha20, @@ -193,7 +194,12 @@ static gcry_cipher_spec_t * const cipher_list_algo301[] = NULL, #endif #if USE_CHACHA20 - &_gcry_cipher_spec_chacha20 + &_gcry_cipher_spec_chacha20, +#else + NULL, +#endif +#if USE_GOST28147 + &_gcry_cipher_spec_gost28147_mesh, #else NULL, #endif diff --git a/cipher/gost.h b/cipher/gost.h index 04c2f85e57d2..53a40505038b 100644 --- a/cipher/gost.h +++ b/cipher/gost.h @@ -23,6 +23,8 @@ typedef struct { u32 key[8]; const u32 *sbox; + unsigned int mesh_counter; + unsigned int mesh_limit; } GOST28147_context; /* This is a simple interface that will be used by GOST R 34.11-94 */ diff --git a/cipher/gost28147.c b/cipher/gost28147.c index f30ca16a4d02..00d729020799 100644 --- a/cipher/gost28147.c +++ b/cipher/gost28147.c @@ -38,6 +38,13 @@ #include "gost.h" #include "gost-sb.h" +static void +gost_do_set_sbox (GOST28147_context *ctx, unsigned int index) +{ + ctx->sbox = gost_oid_map[index].sbox; + ctx->mesh_limit = gost_oid_map[index].keymeshing ? 1024 : 0; +} + static gcry_err_code_t gost_setkey (void *c, const byte *key, unsigned keylen, gcry_cipher_hd_t hd) @@ -51,12 +58,15 @@ gost_setkey (void *c, const byte *key, unsigned keylen, return GPG_ERR_INV_KEYLEN; if (!ctx->sbox) - ctx->sbox = sbox_test_3411; + gost_do_set_sbox (ctx, 0); for (i = 0; i < 8; i++) { ctx->key[i] = buf_get_le32(&key[4*i]); } + + ctx->mesh_counter = 0; + return GPG_ERR_NO_ERROR; } @@ -178,7 +188,7 @@ gost_set_sbox (GOST28147_context *ctx, const char *oid) { if (!strcmp(gost_oid_map[i].oid, oid)) { - ctx->sbox = gost_oid_map[i].sbox; + gost_do_set_sbox (ctx, i); return 0; } } @@ -207,8 +217,67 @@ gost_set_extra_info (void *c, int what, const void *buffer, size_t buflen) return ec; } -static gcry_cipher_oid_spec_t oids_gost28147[] = +static const byte CryptoProKeyMeshingKey[] = { + 0x69, 0x00, 0x72, 0x22, 0x64, 0xC9, 0x04, 0x23, + 0x8D, 0x3A, 0xDB, 0x96, 0x46, 0xE9, 0x2A, 0xC4, + 0x18, 0xFE, 0xAC, 0x94, 0x00, 0xED, 0x07, 0x12, + 0xC0, 0x86, 0xDC, 0xC2, 0xEF, 0x4C, 0xA9, 0x2B +}; + +/* Implements key meshing algorithm by modifing ctx and returning new IV. + Thanks to Dmitry Belyavskiy. */ +static void +cryptopro_key_meshing (GOST28147_context *ctx) +{ + unsigned char newkey[32]; + unsigned int i; + + /* "Decrypt" the static keymeshing key */ + for (i = 0; i < 4; i++) + { + gost_decrypt_block (ctx, newkey + i*8, CryptoProKeyMeshingKey + i*8); + } + + /* Set new key */ + for (i = 0; i < 8; i++) + { + ctx->key[i] = buf_get_le32(&newkey[4*i]); + } + + ctx->mesh_counter = 0; +} + +static unsigned int +gost_encrypt_block_mesh (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + unsigned int burn; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit)) + { + cryptopro_key_meshing (ctx); + /* Yes, encrypt twice: once for KeyMeshing procedure per RFC 4357, + * once for block encryption */ + _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + } + + burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + + ctx->mesh_counter += 8; + + buf_put_le32 (outbuf+0, n1); + buf_put_le32 (outbuf+4, n2); + + return /* burn_stack */ burn + 6*sizeof(void*) /* func call */; +} + +static gcry_cipher_oid_spec_t oids_gost28147_mesh[] = { + { "1.2.643.2.2.21", GCRY_CIPHER_MODE_CFB }, /* { "1.2.643.2.2.31.0", GCRY_CIPHER_MODE_CNTGOST }, */ { "1.2.643.2.2.31.1", GCRY_CIPHER_MODE_CFB }, { "1.2.643.2.2.31.2", GCRY_CIPHER_MODE_CFB }, @@ -220,10 +289,25 @@ static gcry_cipher_oid_spec_t oids_gost28147[] = gcry_cipher_spec_t _gcry_cipher_spec_gost28147 = { GCRY_CIPHER_GOST28147, {0, 0}, - "GOST28147", NULL, oids_gost28147, 8, 256, + "GOST28147", NULL, NULL, 8, 256, sizeof (GOST28147_context), gost_setkey, gost_encrypt_block, gost_decrypt_block, NULL, NULL, NULL, gost_set_extra_info, }; + +/* Meshing is used only for CFB, so no need to have separate + * gost_decrypt_block_mesh. + * Moreover key meshing is specified as encrypting the block (IV). Decrypting + * it afterwards would be meaningless. */ +gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh = + { + GCRY_CIPHER_GOST28147_MESH, {0, 0}, + "GOST28147_MESH", NULL, oids_gost28147_mesh, 8, 256, + sizeof (GOST28147_context), + gost_setkey, + gost_encrypt_block_mesh, + gost_decrypt_block, + NULL, NULL, NULL, gost_set_extra_info, + }; diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index 091704de16c6..8cd6e48013b3 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -1630,6 +1630,13 @@ This is the Salsa20/12 - reduced round version of Salsa20 stream cipher. The GOST 28147-89 cipher, defined in the respective GOST standard. Translation of this GOST into English is provided in the RFC-5830. + at item GCRY_CIPHER_GOST28147_MESH + at cindex GOST 28147-89 CryptoPro keymeshing +The GOST 28147-89 cipher, defined in the respective GOST standard. +Translation of this GOST into English is provided in the RFC-5830. +This cipher will use CryptoPro keymeshing as defined in RFC 4357 +if it has to be used for the selected parameter set. + @item GCRY_CIPHER_CHACHA20 @cindex ChaCha20 This is the ChaCha20 stream cipher. diff --git a/src/cipher.h b/src/cipher.h index 1fe50890f879..20ccb8c51846 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -300,6 +300,7 @@ extern gcry_cipher_spec_t _gcry_cipher_spec_idea; extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20; extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12; extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147; +extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh; extern gcry_cipher_spec_t _gcry_cipher_spec_chacha20; /* Declarations for the digest specifications. */ diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in index 46f92a91f6bb..375a40e2acaa 100644 --- a/src/gcrypt.h.in +++ b/src/gcrypt.h.in @@ -945,7 +945,8 @@ enum gcry_cipher_algos GCRY_CIPHER_SALSA20 = 313, GCRY_CIPHER_SALSA20R12 = 314, GCRY_CIPHER_GOST28147 = 315, - GCRY_CIPHER_CHACHA20 = 316 + GCRY_CIPHER_CHACHA20 = 316, + GCRY_CIPHER_GOST28147_MESH = 317 /* GOST 28247 with optional CryptoPro keymeshing */ }; /* The Rijndael algorithm is basically AES, so provide some macros. */ diff --git a/tests/basic.c b/tests/basic.c index 4e3589eea2f4..6dc2fe40a6a4 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -7126,7 +7126,7 @@ check_xts_cipher (void) static void -check_gost28147_cipher (void) +check_gost28147_cipher_basic (enum gcry_cipher_algos algo) { #if USE_GOST28147 static const struct { @@ -7203,7 +7203,7 @@ check_gost28147_cipher (void) if (verbose) fprintf (stderr, " Starting GOST28147 cipher checks.\n"); - keylen = gcry_cipher_get_algo_keylen(GCRY_CIPHER_GOST28147); + keylen = gcry_cipher_get_algo_keylen(algo); if (!keylen) { fail ("gost28147, gcry_cipher_get_algo_keylen failed\n"); @@ -7212,10 +7212,10 @@ check_gost28147_cipher (void) for (i = 0; i < sizeof (tv) / sizeof (tv[0]); i++) { - err = gcry_cipher_open (&hde, GCRY_CIPHER_GOST28147, + err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_ECB, 0); if (!err) - err = gcry_cipher_open (&hdd, GCRY_CIPHER_GOST28147, + err = gcry_cipher_open (&hdd, algo, GCRY_CIPHER_MODE_ECB, 0); if (err) { @@ -7292,6 +7292,12 @@ check_gost28147_cipher (void) #endif } +static void +check_gost28147_cipher (void) +{ + check_gost28147_cipher_basic (GCRY_CIPHER_GOST28147); + check_gost28147_cipher_basic (GCRY_CIPHER_GOST28147_MESH); +} static void check_stream_cipher (void) @@ -9281,6 +9287,7 @@ check_ciphers (void) #endif #if USE_GOST28147 GCRY_CIPHER_GOST28147, + GCRY_CIPHER_GOST28147_MESH, #endif 0 }; -- 2.24.1 From wangziheng at outlook.com Wed Feb 12 03:31:49 2020 From: wangziheng at outlook.com (microsoft gaofei) Date: Wed, 12 Feb 2020 02:31:49 +0000 Subject: RSA-4096 compared to AES-142. Message-ID: According to the research, https://crypto.stackexchange.com/q/8687 , 256-bit symmetric key is compared to RSA-15360. Will you add support of 15360-bit modulus for RSA? . Moreover, https://github.com/open-quantum-safe , Japan and USA are working on post-quantum algorithms. -------------- next part -------------- An HTML attachment was scrubbed... URL: From galex-713 at galex-713.eu Wed Feb 12 08:01:01 2020 From: galex-713 at galex-713.eu (Alexandre =?ISO-8859-1?Q?Fran=E7ois?= Garreau) Date: Wed, 12 Feb 2020 08:01:01 +0100 Subject: RSA-4096 compared to AES-142. In-Reply-To: References: Message-ID: <1602613.TnEz0FvcBl@galex-713.eu> Le mercredi 12 f?vrier 2020, 03:31:49 CET microsoft gaofei a ?crit : > According to the research, https://crypto.stackexchange.com/q/8687 , > 256-bit symmetric key is compared to RSA-15360. Will you add support of > 15360-bit modulus for RSA? Rather 16380 bits, which stays pretty close, and remains a power of two (round number!) > Moreover, https://github.com/open-quantum-safe , Japan and USA are > working on post-quantum algorithms. From wangziheng at outlook.com Wed Feb 12 12:15:13 2020 From: wangziheng at outlook.com (microsoft gaofei) Date: Wed, 12 Feb 2020 11:15:13 +0000 Subject: RSA-4096 compared to AES-142. Message-ID: https://github.com/open-quantum-safe . You should NOT approve Chinese algorithms. China blocked YouTube, Google, Twitter, Reddit, and more sites, I suspect that China may publish a bad cipher so that China can monitor us. AES, RSA, and ECC are trustable for Chinese users, but may contain USA backdoor. -------------- next part -------------- An HTML attachment was scrubbed... URL: From dbaryshkov at gmail.com Tue Feb 18 13:48:57 2020 From: dbaryshkov at gmail.com (Dmitry Baryshkov) Date: Tue, 18 Feb 2020 15:48:57 +0300 Subject: [PATCH 0/3] gost28147: implement KeyMeshing per RFC 4357 In-Reply-To: <20200209182944.491835-1-dbaryshkov@gmail.com> References: <20200209182944.491835-1-dbaryshkov@gmail.com> Message-ID: Any updates on these patches? -- With best wishes Dmitry ??, 9 ????. 2020 ?., 21:30 : > RFC 4357 defines a special procedure of changing key and IV each 1024 > bytes. This procedure should be used for CFB-encrypted data (e.g. CMS > files). Implement support for this key meshing. These changes were asked > for by AltLinux team implementing support for RFC 4490 in gpgsm. > > -- > With best wishes > Dmitry > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From stefbon at gmail.com Tue Feb 18 15:48:54 2020 From: stefbon at gmail.com (Stef Bon) Date: Tue, 18 Feb 2020 15:48:54 +0100 Subject: RSA-4096 compared to AES-142. In-Reply-To: References: Message-ID: You should not crosspost, and AES, RSA and ECC do not contain a backdoor. Op wo 12 feb. 2020 om 12:44 schreef microsoft gaofei via Gcrypt-devel : > > https://github.com/open-quantum-safe . You should NOT approve Chinese algorithms. China blocked YouTube, Google, Twitter, Reddit, and more sites, I suspect that China may publish a bad cipher so that China can monitor us. AES, RSA, and ECC are trustable for Chinese users, but may contain USA backdoor. > _______________________________________________ > Gcrypt-devel mailing list > Gcrypt-devel at gnupg.org > http://lists.gnupg.org/mailman/listinfo/gcrypt-devel From flashcode at flashtux.org Fri Feb 28 22:20:01 2020 From: flashcode at flashtux.org (=?utf-8?Q?S=C3=A9bastien?= Helleu) Date: Fri, 28 Feb 2020 22:20:01 +0100 Subject: [PATCH] gcrypt.texi: fix description of hash algorithms Message-ID: <20200228212000.GH11886@flashtux.org> Hi, You'll find attached a patch to fix two typos in doc (description of hash algorithms). -- S?bastien Helleu web: weechat.org / flashtux.org irc: FlashCode @ irc.freenode.net -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-gcrypt.texi-fix-description-of-hash-algorithms.patch Type: text/x-diff Size: 1159 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 195 bytes Desc: not available URL: