From jussi.kivilinna at iki.fi Sun Nov 1 20:06:09 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 01 Nov 2015 21:06:09 +0200 Subject: [PATCH 1/4] Enable CRC test vectors with zero bytes Message-ID: <20151101190609.1607.2281.stgit@localhost6.localdomain6> --- tests/basic.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/basic.c b/tests/basic.c index 0762a89..7d5de00 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -5851,16 +5851,12 @@ check_digests (void) { GCRY_MD_CRC32_RFC1510, "test0123456789", "\xb8\x3e\x88\xd6" }, { GCRY_MD_CRC32_RFC1510, "MASSACHVSETTS INSTITVTE OF TECHNOLOGY", "\xe3\x41\x80\xf7" }, -#if 0 - { GCRY_MD_CRC32_RFC1510, "\x80\x00", "\x3b\x83\x98\x4b" }, - { GCRY_MD_CRC32_RFC1510, "\x00\x08", "\x0e\xdb\x88\x32" }, - { GCRY_MD_CRC32_RFC1510, "\x00\x80", "\xed\xb8\x83\x20" }, -#endif + { GCRY_MD_CRC32_RFC1510, "\x80\x00", "\x3b\x83\x98\x4b", 2 }, + { GCRY_MD_CRC32_RFC1510, "\x00\x08", "\x0e\xdb\x88\x32", 2 }, + { GCRY_MD_CRC32_RFC1510, "\x00\x80", "\xed\xb8\x83\x20", 2 }, { GCRY_MD_CRC32_RFC1510, "\x80", "\xed\xb8\x83\x20" }, -#if 0 - { GCRY_MD_CRC32_RFC1510, "\x80\x00\x00\x00", "\xed\x59\xb6\x3b" }, - { GCRY_MD_CRC32_RFC1510, "\x00\x00\x00\x01", "\x77\x07\x30\x96" }, -#endif + { GCRY_MD_CRC32_RFC1510, "\x80\x00\x00\x00", "\xed\x59\xb6\x3b", 4 }, + { GCRY_MD_CRC32_RFC1510, "\x00\x00\x00\x01", "\x77\x07\x30\x96", 4 }, { GCRY_MD_CRC32_RFC1510, "123456789", "\x2d\xfd\x2d\x88" }, { GCRY_MD_CRC24_RFC2440, "", "\xb7\x04\xce" }, { GCRY_MD_CRC24_RFC2440, "foo", "\x4f\xc2\x55" }, From jussi.kivilinna at iki.fi Sun Nov 1 20:06:20 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 01 Nov 2015 21:06:20 +0200 Subject: [PATCH 3/4] Add ARMv7/NEON implementation of Keccak In-Reply-To: <20151101190609.1607.2281.stgit@localhost6.localdomain6> References: <20151101190609.1607.2281.stgit@localhost6.localdomain6> Message-ID: <20151101190619.1607.71346.stgit@localhost6.localdomain6> * cipher/Makefile.am: Add 'keccak-armv7-neon.S'. * cipher/keccak-armv7-neon.S: New. * cipher/keccak.c (USE_64BIT_ARM_NEON): New. (NEED_COMMON64): Select if USE_64BIT_ARM_NEON. [NEED_COMMON64] (round_consts_64bit): Rename to... [NEED_COMMON64] (_gcry_keccak_round_consts_64bit): ...this; Add terminator at end. [USE_64BIT_ARM_NEON] (_gcry_keccak_permute_armv7_neon) (_gcry_keccak_absorb_lanes64_armv7_neon, keccak_permute64_armv7_neon) (keccak_absorb_lanes64_armv7_neon, keccak_armv7_neon_64_ops): New. (keccak_init) [USE_64BIT_ARM_NEON]: Select ARM/NEON implementation if supported by HW. * cipher/keccak_permute_64.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Update to use new round constant table. * configure.ac: Add 'keccak-armv7-neon.lo'. -- Patch adds ARMv7/NEON implementation of Keccak (SHAKE/SHA3). Patch is based on public-domain implementation by Ronny Van Keer from SUPERCOP package: https://github.com/floodyberry/supercop/blob/master/crypto_hash/\ keccakc1024/inplace-armv7a-neon/keccak2.s Benchmark results on Cortex-A8 @ 1008 Mhz: Before (generic 32-bit bit-interleaved impl.): | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 83.00 ns/B 11.49 MiB/s 83.67 c/B SHAKE256 | 101.7 ns/B 9.38 MiB/s 102.5 c/B SHA3-224 | 96.13 ns/B 9.92 MiB/s 96.90 c/B SHA3-256 | 101.5 ns/B 9.40 MiB/s 102.3 c/B SHA3-384 | 131.4 ns/B 7.26 MiB/s 132.5 c/B SHA3-512 | 189.1 ns/B 5.04 MiB/s 190.6 c/B After (ARM/NEON, ~3.2x faster): | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 25.09 ns/B 38.01 MiB/s 25.29 c/B SHAKE256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B SHA3-224 | 29.24 ns/B 32.61 MiB/s 29.48 c/B SHA3-256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B SHA3-384 | 40.42 ns/B 23.59 MiB/s 40.74 c/B SHA3-512 | 58.37 ns/B 16.34 MiB/s 58.84 c/B Signed-off-by: Jussi Kivilinna --- cipher/Makefile.am | 2 cipher/keccak-armv7-neon.S | 945 ++++++++++++++++++++++++++++++++++++++++++++ cipher/keccak.c | 71 +++ cipher/keccak_permute_64.h | 2 configure.ac | 2 5 files changed, 1016 insertions(+), 6 deletions(-) create mode 100644 cipher/keccak-armv7-neon.S diff --git a/cipher/Makefile.am b/cipher/Makefile.am index be03d06..88c8fbf 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -90,7 +90,7 @@ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S \ -keccak.c keccak_permute_32.h keccak_permute_64.h \ +keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ diff --git a/cipher/keccak-armv7-neon.S b/cipher/keccak-armv7-neon.S new file mode 100644 index 0000000..6118ce4 --- /dev/null +++ b/cipher/keccak-armv7-neon.S @@ -0,0 +1,945 @@ +/* keccak-armv7-neon.S - ARMv7/NEON implementation of Keccak + * + * Copyright (C) 2015 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +/* Based on public-domain/CC0 implementation from SUPERCOP package + * (keccakc1024/inplace-armv7a-neon/keccak2.s) + * + * Original copyright header follows: + */ + +@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +@ Micha?l Peeters and Gilles Van Assche. For more information, feedback or +@ questions, please refer to our website: http://keccak.noekeon.org/ +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ + +.text + +.syntax unified +.fpu neon +.arm + + +.extern _gcry_keccak_round_consts_64bit; + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +@// --- offsets in state +.equ Aba, 0*8 +.equ Aga, 1*8 +.equ Aka, 2*8 +.equ Ama, 3*8 +.equ Asa, 4*8 + +@// --- macros + +.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5 + + @Prepare Theta + @Ca = Aba^Aga^Aka^Ama^Asa@ + @Ce = Abe^Age^Ake^Ame^Ase@ + @Ci = Abi^Agi^Aki^Ami^Asi@ + @Co = Abo^Ago^Ako^Amo^Aso@ + @Cu = Abu^Agu^Aku^Amu^Asu@ + @De = Ca^ROL64(Ci, 1)@ + @Di = Ce^ROL64(Co, 1)@ + @Do = Ci^ROL64(Cu, 1)@ + @Du = Co^ROL64(Ca, 1)@ + @Da = Cu^ROL64(Ce, 1)@ + + veor.64 q4, q6, q7 + veor.64 q5, q9, q10 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d1, d8, d16 + veor.64 d2, d10, d17 + + veor.64 q4, q11, q12 + veor.64 q5, q14, q15 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d3, d8, d26 + + vadd.u64 q4, q1, q1 + veor.64 d4, d10, d27 + vmov.64 d0, d5 + vsri.64 q4, q1, #63 + + vadd.u64 q5, q2, q2 + veor.64 q4, q4, q0 + vsri.64 q5, q2, #63 + vadd.u64 d7, d1, d1 + veor.64 \argA2, \argA2, d8 + veor.64 q5, q5, q1 + + vsri.64 d7, d1, #63 + vshl.u64 d1, \argA2, #44 + veor.64 \argA3, \argA3, d9 + veor.64 d7, d7, d4 + + @Ba = argA1^Da@ + @Be = ROL64((argA2^De), 44)@ + @Bi = ROL64((argA3^Di), 43)@ + @Bo = ROL64((argA4^Do), 21)@ + @Bu = ROL64((argA5^Du), 14)@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@ + vsri.64 d1, \argA2, #64-44 + vshl.u64 d2, \argA3, #43 + vldr.64 d0, [sp, #\argA1] + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA3, #64-43 + vshl.u64 d3, \argA4, #21 + veor.64 \argA5, \argA5, d11 + veor.64 d0, d0, d7 + vsri.64 d3, \argA4, #64-21 + vbic.64 d5, d2, d1 + vshl.u64 d4, \argA5, #14 + vbic.64 \argA2, d3, d2 + vld1.64 d6, [ip]! + veor.64 d5, d0 + vsri.64 d4, \argA5, #64-14 + veor.64 d5, d6 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA2, d1 + vstr.64 d5, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5 + + @d2 = ROL64((argA1^Da), 3)@ + @d3 = ROL64((argA2^De), 45)@ + @d4 = ROL64((argA3^Di), 61)@ + @d0 = ROL64((argA4^Do), 28)@ + @d1 = ROL64((argA5^Du), 20)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d3, \argA2, #45 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d4, \argA3, #61 + veor.64 \argA4, \argA4, d10 + vsri.64 d3, \argA2, #64-45 + veor.64 \argA5, \argA5, d11 + vsri.64 d4, \argA3, #64-61 + vshl.u64 d0, \argA4, #28 + veor.64 d6, d6, d7 + vshl.u64 d1, \argA5, #20 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA4, #64-28 + vbic.64 \argA4, d0, d4 + vshl.u64 d2, d6, #3 + vsri.64 d1, \argA5, #64-20 + veor.64 \argA4, d3 + vsri.64 d2, d6, #64-3 + vbic.64 \argA5, d1, d0 + vbic.64 d6, d2, d1 + vbic.64 \argA2, d3, d2 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5 + + @d4 = ROL64((argA1^Da), 18)@ + @d0 = ROL64((argA2^De), 1)@ + @d1 = ROL64((argA3^Di), 6)@ + @d2 = ROL64((argA4^Do), 25)@ + @d3 = ROL64((argA5^Du), 8)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA3, \argA3, d9 + veor.64 \argA4, \argA4, d10 + vshl.u64 d1, \argA3, #6 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d2, \argA4, #25 + veor.64 \argA5, \argA5, d11 + vsri.64 d1, \argA3, #64-6 + veor.64 \argA2, \argA2, d8 + vsri.64 d2, \argA4, #64-25 + vext.8 d3, \argA5, \argA5, #7 + veor.64 d6, d6, d7 + vbic.64 \argA3, d2, d1 + vadd.u64 d0, \argA2, \argA2 + vbic.64 \argA4, d3, d2 + vsri.64 d0, \argA2, #64-1 + vshl.u64 d4, d6, #18 + veor.64 \argA2, d1, \argA4 + veor.64 \argA3, d0 + vsri.64 d4, d6, #64-18 + vstr.64 \argA3, [sp, #\argA1] + veor.64 d5, \argA3 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5 + + @d1 = ROL64((argA1^Da), 36)@ + @d2 = ROL64((argA2^De), 10)@ + @d3 = ROL64((argA3^Di), 15)@ + @d4 = ROL64((argA4^Do), 56)@ + @d0 = ROL64((argA5^Du), 27)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d2, \argA2, #10 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d3, \argA3, #15 + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA2, #64-10 + vsri.64 d3, \argA3, #64-15 + veor.64 \argA5, \argA5, d11 + vext.8 d4, \argA4, \argA4, #1 + vbic.64 \argA2, d3, d2 + vshl.u64 d0, \argA5, #27 + veor.64 d6, d6, d7 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA5, #64-27 + vshl.u64 d1, d6, #36 + veor.64 \argA3, d2 + vbic.64 \argA4, d0, d4 + vsri.64 d1, d6, #64-36 + + veor.64 \argA4, d3 + vbic.64 d6, d2, d1 + vbic.64 \argA5, d1, d0 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5 + + @d3 = ROL64((argA1^Da), 41)@ + @d4 = ROL64((argA2^De), 2)@ + @d0 = ROL64((argA3^Di), 62)@ + @d1 = ROL64((argA4^Do), 55)@ + @d2 = ROL64((argA5^Du), 39)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d4, \argA2, #2 + veor.64 \argA5, \argA5, d11 + vshl.u64 d0, \argA3, #62 + vldr.64 d6, [sp, #\argA1] + vsri.64 d4, \argA2, #64-2 + veor.64 \argA4, \argA4, d10 + vsri.64 d0, \argA3, #64-62 + + vshl.u64 d1, \argA4, #55 + veor.64 d6, d6, d7 + vshl.u64 d2, \argA5, #39 + vsri.64 d1, \argA4, #64-55 + vbic.64 \argA4, d0, d4 + vsri.64 d2, \argA5, #64-39 + vbic.64 \argA2, d1, d0 + vshl.u64 d3, d6, #41 + veor.64 \argA5, d4, \argA2 + vbic.64 \argA2, d2, d1 + vsri.64 d3, d6, #64-41 + veor.64 d6, d0, \argA2 + + vbic.64 \argA2, d3, d2 + vbic.64 \argA3, d4, d3 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + + .endm + + +@// --- code + + at not callable from C! +.p2align 3 +.type KeccakF_armv7a_neon_asm,%function; +KeccakF_armv7a_neon_asm: @ + +.LroundLoop: + + KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31 + KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28 + KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30 + KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27 + KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29 + + KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29 + KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28 + KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27 + KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31 + KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30 + + KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30 + KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28 + KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31 + KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29 + KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27 + + KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27 + KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28 + KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29 + ldr r0, [ip] + KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30 + cmp r0, #0xFFFFFFFF + KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31 + + bne .LroundLoop + sub ip, #(8*24) + bx lr +.p2align 2 +.ltorg +.size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm; + + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state) callable from C +.p2align 3 +.global _gcry_keccak_permute_armv7_neon +.type _gcry_keccak_permute_armv7_neon,%function; +_gcry_keccak_permute_armv7_neon: + + push {ip, lr} + vpush {q4-q7} + sub sp,sp, #5*8 + + vldr.64 d0, [r0, #0*8] + vldr.64 d12, [r0, #1*8] + vldr.64 d17, [r0, #2*8] + vldr.64 d22, [r0, #3*8] + vldr.64 d27, [r0, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r0, #5*8] + vldr.64 d13, [r0, #6*8] + vldr.64 d18, [r0, #7*8] + vldr.64 d23, [r0, #8*8] + vldr.64 d28, [r0, #9*8] + + vldr.64 d2, [r0, #10*8] + vldr.64 d14, [r0, #11*8] + vldr.64 d19, [r0, #12*8] + vldr.64 d24, [r0, #13*8] + vldr.64 d29, [r0, #14*8] + + vldr.64 d3, [r0, #15*8] + vldr.64 d15, [r0, #16*8] + vldr.64 d20, [r0, #17*8] + vldr.64 d25, [r0, #18*8] + vldr.64 d30, [r0, #19*8] + + vldr.64 d4, [r0, #20*8] + vldr.64 d16, [r0, #21*8] + vldr.64 d21, [r0, #22*8] + vldr.64 d26, [r0, #23*8] + vldr.64 d31, [r0, #24*8] + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + mov r1, r0 + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + vpop.64 { d0- d4 } + + vstr.64 d0, [r1, #0*8] + vstr.64 d12, [r1, #1*8] + vstr.64 d17, [r1, #2*8] + vstr.64 d22, [r1, #3*8] + vstr.64 d27, [r1, #4*8] + + vstr.64 d1, [r1, #5*8] + vstr.64 d13, [r1, #6*8] + vstr.64 d18, [r1, #7*8] + vstr.64 d23, [r1, #8*8] + vstr.64 d28, [r1, #9*8] + + vstr.64 d2, [r1, #10*8] + vstr.64 d14, [r1, #11*8] + vstr.64 d19, [r1, #12*8] + vstr.64 d24, [r1, #13*8] + vstr.64 d29, [r1, #14*8] + + vstr.64 d3, [r1, #15*8] + vstr.64 d15, [r1, #16*8] + vstr.64 d20, [r1, #17*8] + vstr.64 d25, [r1, #18*8] + vstr.64 d30, [r1, #19*8] + + vstr.64 d4, [r1, #20*8] + vstr.64 d16, [r1, #21*8] + vstr.64 d21, [r1, #22*8] + vstr.64 d26, [r1, #23*8] + vstr.64 d31, [r1, #24*8] + + mov r0, #112 + vpop {q4-q7} + pop {ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon; + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state, @r4 +@ int pos, @r1 +@ const byte *lanes, @r2 +@ unsigned int nlanes, @r3 +@ int blocklanes) @ r5 callable from C +.p2align 3 +.global _gcry_keccak_absorb_lanes64_armv7_neon +.type _gcry_keccak_absorb_lanes64_armv7_neon,%function; +_gcry_keccak_absorb_lanes64_armv7_neon: + + cmp r3, #0 @ nlanes == 0 + itt eq + moveq r0, #0 + bxeq lr + + push {r4-r5, ip, lr} + beq .Lout + mov r4, r0 + ldr r5, [sp, #(4*4)] + vpush {q4-q7} + + @ load state + vldr.64 d0, [r4, #0*8] + vldr.64 d12, [r4, #1*8] + vldr.64 d17, [r4, #2*8] + vldr.64 d22, [r4, #3*8] + vldr.64 d27, [r4, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r4, #5*8] + vldr.64 d13, [r4, #6*8] + vldr.64 d18, [r4, #7*8] + vldr.64 d23, [r4, #8*8] + vldr.64 d28, [r4, #9*8] + + vldr.64 d2, [r4, #10*8] + vldr.64 d14, [r4, #11*8] + vldr.64 d19, [r4, #12*8] + vldr.64 d24, [r4, #13*8] + vldr.64 d29, [r4, #14*8] + + vldr.64 d3, [r4, #15*8] + vldr.64 d15, [r4, #16*8] + vldr.64 d20, [r4, #17*8] + vldr.64 d25, [r4, #18*8] + vldr.64 d30, [r4, #19*8] + + vldr.64 d4, [r4, #20*8] + vldr.64 d16, [r4, #21*8] + vldr.64 d21, [r4, #22*8] + vldr.64 d26, [r4, #23*8] + vldr.64 d31, [r4, #24*8] + +.Lmain_loop: + + @ detect absorb mode (full blocks vs lanes) + + cmp r1, #0 @ pos != 0 + bne .Llanes_loop + +.Lmain_loop_pos0: + + @ full blocks mode + + @ switch (blocksize) + cmp r5, #21 + beq .Lfull_block_21 + cmp r5, #18 + beq .Lfull_block_18 + cmp r5, #17 + beq .Lfull_block_17 + cmp r5, #13 + beq .Lfull_block_13 + cmp r5, #9 + beq .Lfull_block_9 + + @ unknown blocksize + b .Llanes_loop + +.Lfull_block_21: + + @ SHAKE128 + + cmp r3, #21 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + vld1.64 {d9-d11}, [r2]! + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + veor d25, d9 + veor d30, d10 + + veor d4, d11 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #21 @ nlanes -= 21 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_21 + +.Lfull_block_18: + + @ SHA3-224 + + cmp r3, #18 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #18 @ nlanes -= 18 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_18 + +.Lfull_block_17: + + @ SHA3-256 & SHAKE256 + + cmp r3, #17 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d7}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #17 @ nlanes -= 17 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_17 + +.Lfull_block_13: + + @ SHA3-384 + + cmp r3, #13 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d10}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + veor d14, d9 + veor d19, d10 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #13 @ nlanes -= 13 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_13 + +.Lfull_block_9: + + @ SHA3-512 + + cmp r3, #9 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d6}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + veor d18, d5 + veor d23, d6 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #9 @ nlanes -= 9 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_9 + +.Llanes_loop: + + @ per-lane mode + + @ switch (pos) + ldrb r0, [pc, r1] + add pc, pc, r0, lsl #2 +.Lswitch_table: + .byte (.Llane0-.Lswitch_table-4)/4 + .byte (.Llane1-.Lswitch_table-4)/4 + .byte (.Llane2-.Lswitch_table-4)/4 + .byte (.Llane3-.Lswitch_table-4)/4 + .byte (.Llane4-.Lswitch_table-4)/4 + .byte (.Llane5-.Lswitch_table-4)/4 + .byte (.Llane6-.Lswitch_table-4)/4 + .byte (.Llane7-.Lswitch_table-4)/4 + .byte (.Llane8-.Lswitch_table-4)/4 + .byte (.Llane9-.Lswitch_table-4)/4 + .byte (.Llane10-.Lswitch_table-4)/4 + .byte (.Llane11-.Lswitch_table-4)/4 + .byte (.Llane12-.Lswitch_table-4)/4 + .byte (.Llane13-.Lswitch_table-4)/4 + .byte (.Llane14-.Lswitch_table-4)/4 + .byte (.Llane15-.Lswitch_table-4)/4 + .byte (.Llane16-.Lswitch_table-4)/4 + .byte (.Llane17-.Lswitch_table-4)/4 + .byte (.Llane18-.Lswitch_table-4)/4 + .byte (.Llane19-.Lswitch_table-4)/4 + .byte (.Llane20-.Lswitch_table-4)/4 + .byte (.Llane21-.Lswitch_table-4)/4 + .byte (.Llane22-.Lswitch_table-4)/4 + .byte (.Llane23-.Lswitch_table-4)/4 + .byte (.Llane24-.Lswitch_table-4)/4 +.p2align 2 + +#define ABSORB_LANE(label, vreg) \ + label: \ + add r1, #1; \ + vld1.64 d5, [r2]!; \ + cmp r1, r5; /* pos == blocklanes */ \ + veor vreg, vreg, d5; \ + beq .Llanes_permute; \ + subs r3, #1; \ + beq .Ldone; + + ABSORB_LANE(.Llane0, d0) + ABSORB_LANE(.Llane1, d12) + ABSORB_LANE(.Llane2, d17) + ABSORB_LANE(.Llane3, d22) + ABSORB_LANE(.Llane4, d27) + + ABSORB_LANE(.Llane5, d1) + ABSORB_LANE(.Llane6, d13) + ABSORB_LANE(.Llane7, d18) + ABSORB_LANE(.Llane8, d23) + ABSORB_LANE(.Llane9, d28) + + ABSORB_LANE(.Llane10, d2) + ABSORB_LANE(.Llane11, d14) + ABSORB_LANE(.Llane12, d19) + ABSORB_LANE(.Llane13, d24) + ABSORB_LANE(.Llane14, d29) + + ABSORB_LANE(.Llane15, d3) + ABSORB_LANE(.Llane16, d15) + ABSORB_LANE(.Llane17, d20) + ABSORB_LANE(.Llane18, d25) + ABSORB_LANE(.Llane19, d30) + + ABSORB_LANE(.Llane20, d4) + ABSORB_LANE(.Llane21, d16) + ABSORB_LANE(.Llane22, d21) + ABSORB_LANE(.Llane23, d26) + ABSORB_LANE(.Llane24, d31) + + b .Llanes_loop + +.Llanes_permute: + + sub sp,sp, #5*8 + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + mov r1, #0 @ pos <= 0 + subs r3, #1 + + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lmain_loop_pos0 + +.Ldone: + + @ save state + vstr.64 d0, [r4, #0*8] + vstr.64 d12, [r4, #1*8] + vstr.64 d17, [r4, #2*8] + vstr.64 d22, [r4, #3*8] + vstr.64 d27, [r4, #4*8] + + vstr.64 d1, [r4, #5*8] + vstr.64 d13, [r4, #6*8] + vstr.64 d18, [r4, #7*8] + vstr.64 d23, [r4, #8*8] + vstr.64 d28, [r4, #9*8] + + vstr.64 d2, [r4, #10*8] + vstr.64 d14, [r4, #11*8] + vstr.64 d19, [r4, #12*8] + vstr.64 d24, [r4, #13*8] + vstr.64 d29, [r4, #14*8] + + vstr.64 d3, [r4, #15*8] + vstr.64 d15, [r4, #16*8] + vstr.64 d20, [r4, #17*8] + vstr.64 d25, [r4, #18*8] + vstr.64 d30, [r4, #19*8] + + vstr.64 d4, [r4, #20*8] + vstr.64 d16, [r4, #21*8] + vstr.64 d21, [r4, #22*8] + vstr.64 d26, [r4, #23*8] + vstr.64 d31, [r4, #24*8] + + mov r0, #120 + vpop {q4-q7} +.Lout: + pop {r4-r5, ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon; + +#endif diff --git a/cipher/keccak.c b/cipher/keccak.c index ce57860..0bb3155 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -59,7 +59,19 @@ #endif -#ifdef USE_64BIT +/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly + * code. */ +#undef USE_64BIT_ARM_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_64BIT_ARM_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) # define NEED_COMMON64 1 #endif @@ -109,7 +121,7 @@ typedef struct KECCAK_CONTEXT_S #ifdef NEED_COMMON64 -static const u64 round_consts_64bit[24] = +const u64 _gcry_keccak_round_consts_64bit[24 + 1] = { U64_C(0x0000000000000001), U64_C(0x0000000000008082), U64_C(0x800000000000808A), U64_C(0x8000000080008000), @@ -122,7 +134,8 @@ static const u64 round_consts_64bit[24] = U64_C(0x8000000000008002), U64_C(0x8000000000000080), U64_C(0x000000000000800A), U64_C(0x800000008000000A), U64_C(0x8000000080008081), U64_C(0x8000000000008080), - U64_C(0x0000000080000001), U64_C(0x8000000080008008) + U64_C(0x0000000080000001), U64_C(0x8000000080008008), + U64_C(0xFFFFFFFFFFFFFFFF) }; static unsigned int @@ -400,6 +413,54 @@ static const keccak_ops_t keccak_bmi2_64_ops = #endif /* USE_64BIT_BMI2 */ +/* 64-bit ARMv7/NEON implementation. */ +#ifdef USE_64BIT_ARM_NEON + +unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); +unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, + const byte *lanes, + unsigned int nlanes, + int blocklanes); + +static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) +{ + return _gcry_keccak_permute_armv7_neon(hd->u.state64); +} + +static unsigned int +keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + if (blocklanes < 0) + { + /* blocklanes == -1, permutationless absorb from keccak_final. */ + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + } + + return 0; + } + else + { + return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, + nlanes, blocklanes); + } +} + +static const keccak_ops_t keccak_armv7_neon_64_ops = +{ + .permute = keccak_permute64_armv7_neon, + .absorb = keccak_absorb_lanes64_armv7_neon, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_ARM_NEON */ + + /* Construct generic 32-bit implementation. */ #ifdef USE_32BIT @@ -662,6 +723,10 @@ keccak_init (int algo, void *context, unsigned int flags) /* Select optimized implementation based in hw features. */ if (0) {} +#ifdef USE_64BIT_ARM_NEON + else if (features & HWF_ARM_NEON) + ctx->ops = &keccak_armv7_neon_64_ops; +#endif #ifdef USE_64BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_64_ops; diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index 6f24217..1a80192 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -25,7 +25,7 @@ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { - const u64 *round_consts = round_consts_64bit; + const u64 *round_consts = _gcry_keccak_round_consts_64bit; u64 Aba, Abe, Abi, Abo, Abu; u64 Aga, Age, Agi, Ago, Agu; u64 Aka, Ake, Aki, Ako, Aku; diff --git a/configure.ac b/configure.ac index 2acfa36..ed37ab5 100644 --- a/configure.ac +++ b/configure.ac @@ -2108,7 +2108,7 @@ if test "$found" = "1" ; then if test x"$neonsupport" = xyes ; then # Build with the NEON implementation - : + GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak-armv7-neon.lo" fi fi From jussi.kivilinna at iki.fi Sun Nov 1 20:06:14 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 01 Nov 2015 21:06:14 +0200 Subject: [PATCH 2/4] Optimize Keccak 64-bit absorb functions In-Reply-To: <20151101190609.1607.2281.stgit@localhost6.localdomain6> References: <20151101190609.1607.2281.stgit@localhost6.localdomain6> Message-ID: <20151101190614.1607.39351.stgit@localhost6.localdomain6> * cipher/keccak.c [USE_64BIT] [__x86_64__] (absorb_lanes64_8) (absorb_lanes64_4, absorb_lanes64_2, absorb_lanes64_1): New. * cipher/keccak.c [USE_64BIT] [!__x86_64__] (absorb_lanes64_8) (absorb_lanes64_4, absorb_lanes64_2, absorb_lanes64_1): New. [USE_64BIT] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT] (keccak_absorb_lanes64): Remove. [USE_64BIT_SHLD] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT_SHLD] (keccak_absorb_lanes64_shld): Remove. [USE_64BIT_BMI2] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT_BMI2] (keccak_absorb_lanes64_bmi2): Remove. * cipher/keccak_permute_64.h (KECCAK_F1600_ABSORB_FUNC_NAME): New. -- Optimize 64-bit absorb functions for small speed-up. After this change, 64-bit BMI2 implementation matches speed of fastest results from SUPERCOP for Intel Haswell CPUs (long messages). Benchmark on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.32 ns/B 411.7 MiB/s 7.41 c/B SHAKE256 | 2.84 ns/B 336.2 MiB/s 9.08 c/B SHA3-224 | 2.69 ns/B 354.9 MiB/s 8.60 c/B SHA3-256 | 2.84 ns/B 336.0 MiB/s 9.08 c/B SHA3-384 | 3.69 ns/B 258.4 MiB/s 11.81 c/B SHA3-512 | 5.30 ns/B 179.9 MiB/s 16.97 c/B After: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.27 ns/B 420.6 MiB/s 7.26 c/B SHAKE256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-224 | 2.64 ns/B 361.7 MiB/s 8.44 c/B SHA3-256 | 2.79 ns/B 341.5 MiB/s 8.94 c/B SHA3-384 | 3.65 ns/B 261.4 MiB/s 11.68 c/B SHA3-512 | 5.27 ns/B 181.0 MiB/s 16.87 c/B Signed-off-by: Jussi Kivilinna --- cipher/keccak.c | 159 ++++++++++++++++++++++++++------------------ cipher/keccak_permute_64.h | 99 +++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 66 deletions(-) diff --git a/cipher/keccak.c b/cipher/keccak.c index f4f0ef3..ce57860 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -223,38 +223,105 @@ keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1) /* Construct generic 64-bit implementation. */ #ifdef USE_64BIT +#if __GNUC__ >= 4 && defined(__x86_64__) + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "movdqu 2*16(%[dst]), %%xmm2\n\t" + "movdqu 3*16(%[dst]), %%xmm3\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu 2*16(%[in]), %%xmm4\n\t" + "movdqu 3*16(%[in]), %%xmm5\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "pxor %%xmm4, %%xmm2\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm2, 2*16(%[dst])\n\t" + "movdqu %%xmm3, 3*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm4", "memory"); +} + +#else /* __x86_64__ */ + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); + dst[4] ^= buf_get_le64(in + 8 * 4); + dst[5] ^= buf_get_le64(in + 8 * 5); + dst[6] ^= buf_get_le64(in + 8 * 6); + dst[7] ^= buf_get_le64(in + 8 * 7); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); +} + +#endif /* !__x86_64__ */ + +static inline void absorb_lanes64_1(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); +} + + # define ANDN64(x, y) (~(x) & (y)) # define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \ ((x) >> ((64 - (unsigned int)(n)) & 63))) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME - -static unsigned int -keccak_absorb_lanes64(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) -{ - unsigned int burn = 0; - - while (nlanes) - { - hd->u.state64[pos] ^= buf_get_le64(lanes); - lanes += 8; - nlanes--; - - if (++pos == blocklanes) - { - burn = keccak_f1600_state_permute64(hd); - pos = 0; - } - } - - return burn; -} +# undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_generic64_ops = { @@ -279,33 +346,13 @@ static const keccak_ops_t keccak_generic64_ops = tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME - -static unsigned int -keccak_absorb_lanes64_shld(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) -{ - unsigned int burn = 0; - - while (nlanes) - { - hd->u.state64[pos] ^= buf_get_le64(lanes); - lanes += 8; - nlanes--; - - if (++pos == blocklanes) - { - burn = keccak_f1600_state_permute64_shld(hd); - pos = 0; - } - } - - return burn; -} +# undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_shld_64_ops = { @@ -335,33 +382,13 @@ static const keccak_ops_t keccak_shld_64_ops = tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME - -static unsigned int -keccak_absorb_lanes64_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) -{ - unsigned int burn = 0; - - while (nlanes) - { - hd->u.state64[pos] ^= buf_get_le64(lanes); - lanes += 8; - nlanes--; - - if (++pos == blocklanes) - { - burn = keccak_f1600_state_permute64_bmi2(hd); - pos = 0; - } - } - - return burn; -} +# undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_bmi2_64_ops = { diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index 1264f19..6f24217 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -288,3 +288,102 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) return sizeof(void *) * 4 + sizeof(u64) * 12 * 5; } + +static unsigned int +KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + switch (blocklanes) + { + case 21: + /* SHAKE128 */ + while (pos == 0 && nlanes >= 21) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_8(&hd->u.state64[12], lanes + 8 * 12); + absorb_lanes64_1(&hd->u.state64[20], lanes + 8 * 20); + lanes += 8 * 21; + nlanes -= 21; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 18: + /* SHA3-224 */ + while (pos == 0 && nlanes >= 18) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_2(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_8(&hd->u.state64[10], lanes + 8 * 10); + lanes += 8 * 18; + nlanes -= 18; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 17: + /* SHA3-256 & SHAKE256 */ + while (pos == 0 && nlanes >= 17) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_8(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_1(&hd->u.state64[16], lanes + 8 * 16); + lanes += 8 * 17; + nlanes -= 17; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 13: + /* SHA3-384 */ + while (pos == 0 && nlanes >= 13) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_1(&hd->u.state64[12], lanes + 8 * 12); + lanes += 8 * 13; + nlanes -= 13; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 9: + /* SHA3-512 */ + while (pos == 0 && nlanes >= 9) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_1(&hd->u.state64[8], lanes + 8 * 8); + lanes += 8 * 9; + nlanes -= 9; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + } + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + pos = 0; + break; + } + } + } + + return burn; +} From jussi.kivilinna at iki.fi Sun Nov 1 20:06:25 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 01 Nov 2015 21:06:25 +0200 Subject: [PATCH 4/4] Improve performance of Tiger hash algorithms In-Reply-To: <20151101190609.1607.2281.stgit@localhost6.localdomain6> References: <20151101190609.1607.2281.stgit@localhost6.localdomain6> Message-ID: <20151101190625.1607.56947.stgit@localhost6.localdomain6> * cipher/tiger.c (tiger_round, pass, key_schedule): Convert functions to macros. (transform_blk): Pass variable names instead of pointers to 'pass'. -- Benchmark results on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte TIGER | 3.25 ns/B 293.5 MiB/s 10.40 c/B After (1.75x faster): | nanosecs/byte mebibytes/sec cycles/byte TIGER | 1.85 ns/B 515.3 MiB/s 5.92 c/B Benchmark results on Cortex-A8 @?1008 Mhz: Before: | nanosecs/byte mebibytes/sec cycles/byte TIGER | 63.42 ns/B 15.04 MiB/s 63.93 c/B After (1.26x faster): | nanosecs/byte mebibytes/sec cycles/byte TIGER | 49.99 ns/B 19.08 MiB/s 50.39 c/B Signed-off-by: Jussi Kivilinna --- cipher/tiger.c | 104 ++++++++++++++++++++++---------------------------------- 1 file changed, 40 insertions(+), 64 deletions(-) diff --git a/cipher/tiger.c b/cipher/tiger.c index 078133a..516bd44 100644 --- a/cipher/tiger.c +++ b/cipher/tiger.c @@ -633,68 +633,44 @@ tiger2_init (void *context, unsigned int flags) do_init (context, 2); } -static void -tiger_round( u64 *ra, u64 *rb, u64 *rc, u64 x, int mul ) -{ - u64 a = *ra; - u64 b = *rb; - u64 c = *rc; - - c ^= x; - a -= ( sbox1[ c & 0xff ] ^ sbox2[ (c >> 16) & 0xff ] - ^ sbox3[ (c >> 32) & 0xff ] ^ sbox4[ (c >> 48) & 0xff ]); - b += ( sbox4[ (c >> 8) & 0xff ] ^ sbox3[ (c >> 24) & 0xff ] - ^ sbox2[ (c >> 40) & 0xff ] ^ sbox1[ (c >> 56) & 0xff ]); - b *= mul; - - *ra = a; - *rb = b; - *rc = c; -} - - -static void -pass( u64 *ra, u64 *rb, u64 *rc, u64 *x, int mul ) -{ - u64 a = *ra; - u64 b = *rb; - u64 c = *rc; - - tiger_round( &a, &b, &c, x[0], mul ); - tiger_round( &b, &c, &a, x[1], mul ); - tiger_round( &c, &a, &b, x[2], mul ); - tiger_round( &a, &b, &c, x[3], mul ); - tiger_round( &b, &c, &a, x[4], mul ); - tiger_round( &c, &a, &b, x[5], mul ); - tiger_round( &a, &b, &c, x[6], mul ); - tiger_round( &b, &c, &a, x[7], mul ); - - *ra = a; - *rb = b; - *rc = c; -} - -static void -key_schedule( u64 *x ) -{ - x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; - x[1] ^= x[0]; - x[2] += x[1]; - x[3] -= x[2] ^ ((~x[1]) << 19 ); - x[4] ^= x[3]; - x[5] += x[4]; - x[6] -= x[5] ^ ((~x[4]) >> 23 ); - x[7] ^= x[6]; - x[0] += x[7]; - x[1] -= x[0] ^ ((~x[7]) << 19 ); - x[2] ^= x[1]; - x[3] += x[2]; - x[4] -= x[3] ^ ((~x[2]) >> 23 ); - x[5] ^= x[4]; - x[6] += x[5]; - x[7] -= x[6] ^ 0x0123456789abcdefLL; -} +#define tiger_round(xa, xb, xc, xx, xmul) { \ + xc ^= xx; \ + xa -= ( sbox1[ (xc) & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \ + ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \ + xb += ( sbox4[ ((xc) >> 8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \ + ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \ + xb *= xmul; } + + +#define pass(ya, yb, yc, yx, ymul) { \ + tiger_round( ya, yb, yc, yx[0], ymul ); \ + tiger_round( yb, yc, ya, yx[1], ymul ); \ + tiger_round( yc, ya, yb, yx[2], ymul ); \ + tiger_round( ya, yb, yc, yx[3], ymul ); \ + tiger_round( yb, yc, ya, yx[4], ymul ); \ + tiger_round( yc, ya, yb, yx[5], ymul ); \ + tiger_round( ya, yb, yc, yx[6], ymul ); \ + tiger_round( yb, yc, ya, yx[7], ymul ); } + + +#define key_schedule(x) { \ + x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \ + x[1] ^= x[0]; \ + x[2] += x[1]; \ + x[3] -= x[2] ^ ((~x[1]) << 19 ); \ + x[4] ^= x[3]; \ + x[5] += x[4]; \ + x[6] -= x[5] ^ ((~x[4]) >> 23 ); \ + x[7] ^= x[6]; \ + x[0] += x[7]; \ + x[1] -= x[0] ^ ((~x[7]) << 19 ); \ + x[2] ^= x[1]; \ + x[3] += x[2]; \ + x[4] -= x[3] ^ ((~x[2]) >> 23 ); \ + x[5] ^= x[4]; \ + x[6] += x[5]; \ + x[7] -= x[6] ^ 0x0123456789abcdefLL; } /**************** @@ -716,11 +692,11 @@ transform_blk ( void *ctx, const unsigned char *data ) b = bb = hd->b; c = cc = hd->c; - pass( &a, &b, &c, x, 5); + pass( a, b, c, x, 5); key_schedule( x ); - pass( &c, &a, &b, x, 7); + pass( c, a, b, x, 7); key_schedule( x ); - pass( &b, &c, &a, x, 9); + pass( b, c, a, x, 9); /* feedforward */ a ^= aa; From cvs at cvs.gnupg.org Sun Nov 1 20:01:41 2015 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Sun, 01 Nov 2015 20:01:41 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-276-gc0b9eee Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via c0b9eee2d93a13930244f9ce0c14ed6b4aeb6c29 (commit) via 28de6f9e16e386018e81a9cdaee596be7616ccab (commit) via 92ad19873562cfce7bcc4a0b5aed8195d8284cfc (commit) via 577dc2b63ceca6a8a716256d034ea4e7414f65fa (commit) via cee2e122ec6c1886957a8d47498eb63a6a921725 (commit) from 74184c28fbe7ff58cf57f0094ef957d94045da7d (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit c0b9eee2d93a13930244f9ce0c14ed6b4aeb6c29 Author: Jussi Kivilinna Date: Sun Oct 25 20:34:50 2015 +0200 Keccak: Add SHAKE Extendable-Output Functions * src/hash-common.c (_gcry_hash_selftest_check_one): Add handling for XOFs. * src/keccak.c (keccak_ops_t): Rename 'extract_inplace' to 'extract' and add 'pos' argument. (KECCAK_CONTEXT): Add 'suffix'. (keccak_extract_inplace64): Rename to... (keccak_extract64): ...this; Add handling for 'pos' argument. (keccak_extract_inplace32bi): Rename to... (keccak_extract32bi): ...this; Add handling for 'pos' argument. (keccak_extract_inplace64): Rename to... (keccak_extract64): ...this; Add handling for 'pos' argument. (keccak_extract_inplace32bi_bmi2): Rename to... (keccak_extract32bi_bmi2): ...this; Add handling for 'pos' argument. (keccak_init): Setup 'suffix'; add SHAKE128 & SHAKE256. (shake128_init, shake256_init): New. (keccak_final): Do not initial permute for SHAKE output; use correct suffix for SHAKE. (keccak_extract): New. (keccak_selftests_keccak): Add SHAKE128 & SHAKE256 test-vectors. (run_selftests): Add SHAKE128 & SHAKE256. (shake128_asn, oid_spec_shake128, shake256_asn, oid_spec_shake256) (_gcry_digest_spec_shake128, _gcry_digest_spec_shake256): New. * cipher/md.c (digest_list): Add SHAKE128 & SHAKE256. * doc/gcrypt.texi: Ditto. * src/cipher.h (_gcry_digest_spec_shake128) (_gcry_digest_spec_shake256): New. * src/gcrypt.h.in (GCRY_MD_SHAKE128, GCRY_MD_SHAKE256): New. * tests/basic.c (check_one_md): Add XOF check; Add 'elen' argument. (check_one_md_multi): Skip if algo is XOF. (check_digests): Add SHAKE128 & SHAKE256 test vectors. * tests/bench-slope.c (kdf_bench_one): Skip XOFs. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/hash-common.c b/cipher/hash-common.c index 6743f09..a750d644 100644 --- a/cipher/hash-common.c +++ b/cipher/hash-common.c @@ -49,8 +49,12 @@ _gcry_hash_selftest_check_one (int algo, gcry_error_t err = 0; gcry_md_hd_t hd; unsigned char *digest; + char aaa[1000]; + int xof = 0; - if (_gcry_md_get_algo_dlen (algo) != expectlen) + if (_gcry_md_get_algo_dlen (algo) == 0) + xof = 1; + else if (_gcry_md_get_algo_dlen (algo) != expectlen) return "digest size does not match expected size"; err = _gcry_md_open (&hd, algo, 0); @@ -65,7 +69,6 @@ _gcry_hash_selftest_check_one (int algo, case 1: /* Hash one million times an "a". */ { - char aaa[1000]; int i; /* Write in odd size chunks so that we test the buffering. */ @@ -81,10 +84,23 @@ _gcry_hash_selftest_check_one (int algo, if (!result) { - digest = _gcry_md_read (hd, algo); - - if ( memcmp (digest, expect, expectlen) ) - result = "digest mismatch"; + if (!xof) + { + digest = _gcry_md_read (hd, algo); + + if ( memcmp (digest, expect, expectlen) ) + result = "digest mismatch"; + } + else + { + gcry_assert(expectlen <= sizeof(aaa)); + + err = _gcry_md_extract (hd, algo, aaa, expectlen); + if (err) + result = "error extracting output from XOF"; + else if ( memcmp (aaa, expect, expectlen) ) + result = "digest mismatch"; + } } _gcry_md_close (hd); diff --git a/cipher/keccak.c b/cipher/keccak.c index d46d9cb..f4f0ef3 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -90,7 +90,8 @@ typedef struct unsigned int (*permute)(KECCAK_STATE *hd); unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes, unsigned int nlanes, int blocklanes); - unsigned int (*extract_inplace) (KECCAK_STATE *hd, unsigned int outlen); + unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen); } keccak_ops_t; @@ -100,6 +101,7 @@ typedef struct KECCAK_CONTEXT_S unsigned int outlen; unsigned int blocksize; unsigned int count; + unsigned int suffix; const keccak_ops_t *ops; } KECCAK_CONTEXT; @@ -124,13 +126,18 @@ static const u64 round_consts_64bit[24] = }; static unsigned int -keccak_extract_inplace64(KECCAK_STATE *hd, unsigned int outlen) +keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) { unsigned int i; - for (i = 0; i < outlen / 8 + !!(outlen % 8); i++) + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { - hd->u.state64[i] = le_bswap64(hd->u.state64[i]); + u64 tmp = hd->u.state64[i]; + buf_put_le64(outbuf, tmp); + outbuf += 8; } return 0; @@ -158,14 +165,17 @@ static const u32 round_consts_32bit[2 * 24] = }; static unsigned int -keccak_extract_inplace32bi(KECCAK_STATE *hd, unsigned int outlen) +keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) { unsigned int i; u32 x0; u32 x1; u32 t; - for (i = 0; i < outlen / 8 + !!(outlen % 8); i++) + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { x0 = hd->u.state32bi[i * 2 + 0]; x1 = hd->u.state32bi[i * 2 + 1]; @@ -182,8 +192,9 @@ keccak_extract_inplace32bi(KECCAK_STATE *hd, unsigned int outlen) t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); - hd->u.state32bi[i * 2 + 0] = le_bswap32(x0); - hd->u.state32bi[i * 2 + 1] = le_bswap32(x1); + buf_put_le32(&outbuf[0], x0); + buf_put_le32(&outbuf[4], x1); + outbuf += 8; } return 0; @@ -249,7 +260,7 @@ static const keccak_ops_t keccak_generic64_ops = { .permute = keccak_f1600_state_permute64, .absorb = keccak_absorb_lanes64, - .extract_inplace = keccak_extract_inplace64, + .extract = keccak_extract64, }; #endif /* USE_64BIT */ @@ -300,7 +311,7 @@ static const keccak_ops_t keccak_shld_64_ops = { .permute = keccak_f1600_state_permute64_shld, .absorb = keccak_absorb_lanes64_shld, - .extract_inplace = keccak_extract_inplace64, + .extract = keccak_extract64, }; #endif /* USE_64BIT_SHLD */ @@ -356,7 +367,7 @@ static const keccak_ops_t keccak_bmi2_64_ops = { .permute = keccak_f1600_state_permute64_bmi2, .absorb = keccak_absorb_lanes64_bmi2, - .extract_inplace = keccak_extract_inplace64, + .extract = keccak_extract64, }; #endif /* USE_64BIT_BMI2 */ @@ -404,7 +415,7 @@ static const keccak_ops_t keccak_generic32bi_ops = { .permute = keccak_f1600_state_permute32bi, .absorb = keccak_absorb_lanes32bi, - .extract_inplace = keccak_extract_inplace32bi, + .extract = keccak_extract32bi, }; #endif /* USE_32BIT */ @@ -483,14 +494,17 @@ keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, } static unsigned int -keccak_extract_inplace32bi_bmi2(KECCAK_STATE *hd, unsigned int outlen) +keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) { unsigned int i; u32 x0; u32 x1; u32 t; - for (i = 0; i < outlen / 8 + !!(outlen % 8); i++) + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) { x0 = hd->u.state32bi[i * 2 + 0]; x1 = hd->u.state32bi[i * 2 + 1]; @@ -502,8 +516,9 @@ keccak_extract_inplace32bi_bmi2(KECCAK_STATE *hd, unsigned int outlen) x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554); x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554); - hd->u.state32bi[i * 2 + 0] = le_bswap32(x0); - hd->u.state32bi[i * 2 + 1] = le_bswap32(x1); + buf_put_le32(&outbuf[0], x0); + buf_put_le32(&outbuf[4], x1); + outbuf += 8; } return 0; @@ -513,7 +528,7 @@ static const keccak_ops_t keccak_bmi2_32bi_ops = { .permute = keccak_f1600_state_permute32bi_bmi2, .absorb = keccak_absorb_lanes32bi_bmi2, - .extract_inplace = keccak_extract_inplace32bi_bmi2, + .extract = keccak_extract32bi_bmi2, }; #endif /* USE_32BIT */ @@ -638,21 +653,35 @@ keccak_init (int algo, void *context, unsigned int flags) switch (algo) { case GCRY_MD_SHA3_224: + ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 1152 / 8; ctx->outlen = 224 / 8; break; case GCRY_MD_SHA3_256: + ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 1088 / 8; ctx->outlen = 256 / 8; break; case GCRY_MD_SHA3_384: + ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 832 / 8; ctx->outlen = 384 / 8; break; case GCRY_MD_SHA3_512: + ctx->suffix = SHA3_DELIMITED_SUFFIX; ctx->blocksize = 576 / 8; ctx->outlen = 512 / 8; break; + case GCRY_MD_SHAKE128: + ctx->suffix = SHAKE_DELIMITED_SUFFIX; + ctx->blocksize = 1344 / 8; + ctx->outlen = 0; + break; + case GCRY_MD_SHAKE256: + ctx->suffix = SHAKE_DELIMITED_SUFFIX; + ctx->blocksize = 1088 / 8; + ctx->outlen = 0; + break; default: BUG(); } @@ -682,6 +711,17 @@ sha3_512_init (void *context, unsigned int flags) keccak_init (GCRY_MD_SHA3_512, context, flags); } +static void +shake128_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHAKE128, context, flags); +} + +static void +shake256_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHAKE256, context, flags); +} /* The routine final terminates the computation and * returns the digest. @@ -696,7 +736,7 @@ keccak_final (void *context) KECCAK_CONTEXT *ctx = context; KECCAK_STATE *hd = &ctx->state; const size_t bsize = ctx->blocksize; - const byte suffix = SHA3_DELIMITED_SUFFIX; + const byte suffix = ctx->suffix; unsigned int nburn, burn = 0; unsigned int lastbytes; byte lane[8]; @@ -716,21 +756,21 @@ keccak_final (void *context) nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1); burn = nburn > burn ? nburn : burn; - /* Switch to the squeezing phase. */ - nburn = ctx->ops->permute(hd); - burn = nburn > burn ? nburn : burn; - - /* Squeeze out all the output blocks */ - if (ctx->outlen < bsize) + if (suffix == SHA3_DELIMITED_SUFFIX) { - /* Output SHA3 digest. */ - nburn = ctx->ops->extract_inplace(hd, ctx->outlen); + /* Switch to the squeezing phase. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + + /* Squeeze out the SHA3 digest. */ + nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen); burn = nburn > burn ? nburn : burn; } else { - /* Output SHAKE digest. */ - BUG(); + /* Output for SHAKE can now be read with md_extract(). */ + + ctx->count = 0; } wipememory(lane, sizeof(lane)); @@ -748,6 +788,124 @@ keccak_read (void *context) } +static void +keccak_extract (void *context, void *out, size_t outlen) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + const size_t bsize = ctx->blocksize; + unsigned int nburn, burn = 0; + byte *outbuf = out; + unsigned int nlanes; + unsigned int nleft; + unsigned int count; + unsigned int i; + byte lane[8]; + + count = ctx->count; + + while (count && outlen && (outlen < 8 || count % 8)) + { + /* Extract partial lane. */ + nburn = ctx->ops->extract(hd, count / 8, lane, 8); + burn = nburn > burn ? nburn : burn; + + for (i = count % 8; outlen && i < 8; i++) + { + *outbuf++ = lane[i]; + outlen--; + count++; + } + + gcry_assert(count <= bsize); + + if (count == bsize) + count = 0; + } + + if (outlen >= 8 && count) + { + /* Extract tail of partial block. */ + nlanes = outlen / 8; + nleft = (bsize - count) / 8; + nlanes = nlanes < nleft ? nlanes : nleft; + + nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); + burn = nburn > burn ? nburn : burn; + outlen -= nlanes * 8; + outbuf += nlanes * 8; + count += nlanes * 8; + + gcry_assert(count <= bsize); + + if (count == bsize) + count = 0; + } + + while (outlen >= bsize) + { + gcry_assert(count == 0); + + /* Squeeze more. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + + /* Extract full block. */ + nburn = ctx->ops->extract(hd, 0, outbuf, bsize); + burn = nburn > burn ? nburn : burn; + + outlen -= bsize; + outbuf += bsize; + } + + if (outlen) + { + gcry_assert(outlen < bsize); + + if (count == 0) + { + /* Squeeze more. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + } + + if (outlen >= 8) + { + /* Extract head of partial block. */ + nlanes = outlen / 8; + nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); + burn = nburn > burn ? nburn : burn; + outlen -= nlanes * 8; + outbuf += nlanes * 8; + count += nlanes * 8; + + gcry_assert(count < bsize); + } + + if (outlen) + { + /* Extract head of partial lane. */ + nburn = ctx->ops->extract(hd, count / 8, lane, 8); + burn = nburn > burn ? nburn : burn; + + for (i = count % 8; outlen && i < 8; i++) + { + *outbuf++ = lane[i]; + outlen--; + count++; + } + + gcry_assert(count < bsize); + } + } + + ctx->count = count; + + if (burn) + _gcry_burn_stack (burn); +} + + /* Self-test section. @@ -829,6 +987,32 @@ selftests_keccak (int algo, int extended, selftest_report_func_t report) "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87"; hash_len = 64; break; + + case GCRY_MD_SHAKE128: + short_hash = + "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7" + "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8"; + long_hash = + "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b" + "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b"; + one_million_a_hash = + "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11" + "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58"; + hash_len = 32; + break; + + case GCRY_MD_SHAKE256: + short_hash = + "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d" + "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39"; + long_hash = + "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9" + "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45"; + one_million_a_hash = + "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb" + "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a"; + hash_len = 32; + break; } what = "short string"; @@ -876,6 +1060,8 @@ run_selftests (int algo, int extended, selftest_report_func_t report) case GCRY_MD_SHA3_256: case GCRY_MD_SHA3_384: case GCRY_MD_SHA3_512: + case GCRY_MD_SHAKE128: + case GCRY_MD_SHAKE256: ec = selftests_keccak (algo, extended, report); break; default: @@ -921,7 +1107,22 @@ static gcry_md_oid_spec_t oid_spec_sha3_512[] = { "?" }, { NULL } }; - +static byte shake128_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_shake128[] = + { + { "2.16.840.1.101.3.4.2.11" }, + /* PKCS#1 shake128WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte shake256_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_shake256[] = + { + { "2.16.840.1.101.3.4.2.12" }, + /* PKCS#1 shake256WithRSAEncryption */ + { "?" }, + { NULL } + }; gcry_md_spec_t _gcry_digest_spec_sha3_224 = { @@ -955,3 +1156,19 @@ gcry_md_spec_t _gcry_digest_spec_sha3_512 = sizeof (KECCAK_CONTEXT), run_selftests }; +gcry_md_spec_t _gcry_digest_spec_shake128 = + { + GCRY_MD_SHAKE128, {0, 1}, + "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0, + shake128_init, keccak_write, keccak_final, NULL, keccak_extract, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_shake256 = + { + GCRY_MD_SHAKE256, {0, 1}, + "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0, + shake256_init, keccak_write, keccak_final, NULL, keccak_extract, + sizeof (KECCAK_CONTEXT), + run_selftests + }; diff --git a/cipher/md.c b/cipher/md.c index 6ef8fee..15d944d 100644 --- a/cipher/md.c +++ b/cipher/md.c @@ -56,6 +56,8 @@ static gcry_md_spec_t *digest_list[] = &_gcry_digest_spec_sha3_256, &_gcry_digest_spec_sha3_384, &_gcry_digest_spec_sha3_512, + &_gcry_digest_spec_shake128, + &_gcry_digest_spec_shake256, #endif #ifdef USE_GOST_R_3411_94 &_gcry_digest_spec_gost3411_94, diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index facdf65..cdb7644 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -3037,7 +3037,7 @@ are also supported. @c begin table of hash algorithms @cindex SHA-1 @cindex SHA-224, SHA-256, SHA-384, SHA-512 - at cindex SHA3-224, SHA3-256, SHA3-384, SHA3-512 + at cindex SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256 @cindex RIPE-MD-160 @cindex MD2, MD4, MD5 @cindex TIGER, TIGER1, TIGER2 @@ -3126,6 +3126,16 @@ See FIPS 202 for the specification. This is the SHA3-384 algorithm which yields a message digest of 64 bytes. See FIPS 202 for the specification. + at item GCRY_MD_SHAKE128 +This is the SHAKE128 extendable-output function (XOF) algorithm with 128 bit +security strength. +See FIPS 202 for the specification. + + at item GCRY_MD_SHAKE256 +This is the SHAKE256 extendable-output function (XOF) algorithm with 256 bit +security strength. +See FIPS 202 for the specification. + @item GCRY_MD_CRC32 This is the ISO 3309 and ITU-T V.42 cyclic redundancy check. It yields an output of 4 bytes. Note that this is not a hash algorithm in the diff --git a/src/cipher.h b/src/cipher.h index d96fdb9..c4b306a 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -295,6 +295,8 @@ extern gcry_md_spec_t _gcry_digest_spec_sha3_224; extern gcry_md_spec_t _gcry_digest_spec_sha3_256; extern gcry_md_spec_t _gcry_digest_spec_sha3_512; extern gcry_md_spec_t _gcry_digest_spec_sha3_384; +extern gcry_md_spec_t _gcry_digest_spec_shake128; +extern gcry_md_spec_t _gcry_digest_spec_shake256; extern gcry_md_spec_t _gcry_digest_spec_tiger; extern gcry_md_spec_t _gcry_digest_spec_tiger1; extern gcry_md_spec_t _gcry_digest_spec_tiger2; diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in index 39be37a..5ddeee3 100644 --- a/src/gcrypt.h.in +++ b/src/gcrypt.h.in @@ -1174,7 +1174,9 @@ enum gcry_md_algos GCRY_MD_SHA3_224 = 312, GCRY_MD_SHA3_256 = 313, GCRY_MD_SHA3_384 = 314, - GCRY_MD_SHA3_512 = 315 + GCRY_MD_SHA3_512 = 315, + GCRY_MD_SHAKE128 = 316, + GCRY_MD_SHAKE256 = 317 }; /* Flags used with the open function. */ diff --git a/tests/basic.c b/tests/basic.c index 75ff349..0762a89 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -5265,13 +5265,15 @@ check_cipher_modes(void) fprintf (stderr, "Completed Cipher Mode checks.\n"); } + static void -check_one_md (int algo, const char *data, int len, const char *expect) +check_one_md (int algo, const char *data, int len, const char *expect, int elen) { gcry_md_hd_t hd, hd2; unsigned char *p; int mdlen; int i; + int xof = 0; gcry_error_t err = 0; err = gcry_md_open (&hd, algo, 0); @@ -5284,8 +5286,15 @@ check_one_md (int algo, const char *data, int len, const char *expect) mdlen = gcry_md_get_algo_dlen (algo); if (mdlen < 1 || mdlen > 500) { - fail ("algo %d, gcry_md_get_algo_dlen failed: %d\n", algo, mdlen); - return; + if (mdlen == 0 && (algo == GCRY_MD_SHAKE128 || algo == GCRY_MD_SHAKE256)) + { + xof = 1; + } + else + { + fail ("algo %d, gcry_md_get_algo_dlen failed: %d\n", algo, mdlen); + return; + } } if (*data == '!' && !data[1]) @@ -5326,19 +5335,168 @@ check_one_md (int algo, const char *data, int len, const char *expect) gcry_md_close (hd); - p = gcry_md_read (hd2, algo); + if (!xof) + { + p = gcry_md_read (hd2, algo); - if (memcmp (p, expect, mdlen)) + if (memcmp (p, expect, mdlen)) + { + printf ("computed: "); + for (i = 0; i < mdlen; i++) + printf ("%02x ", p[i] & 0xFF); + printf ("\nexpected: "); + for (i = 0; i < mdlen; i++) + printf ("%02x ", expect[i] & 0xFF); + printf ("\n"); + + fail ("algo %d, digest mismatch\n", algo); + } + + } + else { - printf ("computed: "); - for (i = 0; i < mdlen; i++) - printf ("%02x ", p[i] & 0xFF); - printf ("\nexpected: "); - for (i = 0; i < mdlen; i++) - printf ("%02x ", expect[i] & 0xFF); - printf ("\n"); + char buf[1000]; + int outmax = sizeof(buf) > elen ? elen : sizeof(buf); - fail ("algo %d, digest mismatch\n", algo); + err = gcry_md_copy (&hd, hd2); + if (err) + { + fail ("algo %d, gcry_md_copy failed: %s\n", algo, gpg_strerror (err)); + } + + err = gcry_md_extract(hd2, algo, buf, outmax); + if (err) + { + fail ("algo %d, gcry_md_extract failed: %s\n", algo, gpg_strerror (err)); + } + + if (memcmp (buf, expect, outmax)) + { + printf ("computed: "); + for (i = 0; i < outmax; i++) + printf ("%02x ", buf[i] & 0xFF); + printf ("\nexpected: "); + for (i = 0; i < outmax; i++) + printf ("%02x ", expect[i] & 0xFF); + printf ("\n"); + + fail ("algo %d, digest mismatch\n", algo); + } + + memset(buf, 0, sizeof(buf)); + + /* Extract one byte at time. */ + for (i = 0; i < outmax && !err; i++) + err = gcry_md_extract(hd, algo, &buf[i], 1); + if (err) + { + fail ("algo %d, gcry_md_extract failed: %s\n", algo, gpg_strerror (err)); + } + + if (memcmp (buf, expect, outmax)) + { + printf ("computed: "); + for (i = 0; i < outmax; i++) + printf ("%02x ", buf[i] & 0xFF); + printf ("\nexpected: "); + for (i = 0; i < outmax; i++) + printf ("%02x ", expect[i] & 0xFF); + printf ("\n"); + + fail ("algo %d, digest mismatch\n", algo); + } + + if (*data == '!' && !data[1]) + { + int crcalgo = GCRY_MD_RMD160; + gcry_md_hd_t crc1, crc2; + size_t startlen; + size_t piecelen; + size_t left; + const unsigned char *p1, *p2; + int crclen; + + crclen = gcry_md_get_algo_dlen (crcalgo); + + err = gcry_md_open (&crc1, crcalgo, 0); + if (err) + { + fail ("algo %d, crcalgo: %d, gcry_md_open failed: %s\n", algo, + crcalgo, gpg_strerror (err)); + return; + } + + err = gcry_md_open (&crc2, crcalgo, 0); + if (err) + { + fail ("algo %d, crcalgo: %d, gcry_md_open failed: %s\n", algo, + crcalgo, gpg_strerror (err)); + return; + } + + /* Extract large chucks, total 1000000 additional bytes. */ + for (i = 0; i < 1000; i++) + { + err = gcry_md_extract(hd, algo, buf, 1000); + if (!err) + gcry_md_write(crc1, buf, 1000); + } + if (err) + { + fail ("algo %d, gcry_md_extract failed: %s\n", algo, + gpg_strerror (err)); + } + + /* Extract in odd size chunks, total 1000000 additional bytes. */ + left = 1000 * 1000; + startlen = 1; + piecelen = startlen; + + while (!err && left > 0) + { + if (piecelen > sizeof(buf)) + piecelen = sizeof(buf); + if (piecelen > left) + piecelen = left; + + err = gcry_md_extract (hd2, algo, buf, piecelen); + if (!err) + gcry_md_write(crc2, buf, piecelen); + if (err) + { + fail ("algo %d, gcry_md_extract failed: %s\n", algo, + gpg_strerror (err)); + } + + left -= piecelen; + + if (piecelen == sizeof(buf)) + piecelen = ++startlen; + else + piecelen = piecelen * 2 - ((piecelen != startlen) ? startlen : 0); + } + + p1 = gcry_md_read (crc1, crcalgo); + p2 = gcry_md_read (crc2, crcalgo); + + if (memcmp (p1, p2, crclen)) + { + printf ("computed: "); + for (i = 0; i < crclen; i++) + printf ("%02x ", p2[i] & 0xFF); + printf ("\nexpected: "); + for (i = 0; i < crclen; i++) + printf ("%02x ", p1[i] & 0xFF); + printf ("\n"); + + fail ("algo %d, large xof output mismatch\n", algo); + } + + gcry_md_close (crc1); + gcry_md_close (crc2); + } + + gcry_md_close (hd); } gcry_md_close (hd2); @@ -5358,6 +5516,9 @@ check_one_md_multi (int algo, const char *data, int len, const char *expect) mdlen = gcry_md_get_algo_dlen (algo); if (mdlen < 1 || mdlen > 64) { + if (mdlen == 0 && (algo == GCRY_MD_SHAKE128 || algo == GCRY_MD_SHAKE256)) + return; + fail ("check_one_md_multi: algo %d, gcry_md_get_algo_dlen failed: %d\n", algo, mdlen); return; @@ -5420,6 +5581,7 @@ check_digests (void) const char *data; const char *expect; int datalen; + int expectlen; } algos[] = { { GCRY_MD_MD2, "", @@ -5917,7 +6079,238 @@ check_digests (void) #include "./sha3-256.h" #include "./sha3-384.h" #include "./sha3-512.h" - { 0 } + { GCRY_MD_SHAKE128, + "", + "\x7F\x9C\x2B\xA4\xE8\x8F\x82\x7D\x61\x60\x45\x50\x76\x05\x85\x3E" + "\xD7\x3B\x80\x93\xF6\xEF\xBC\x88\xEB\x1A\x6E\xAC\xFA\x66\xEF\x26" + "\x3C\xB1\xEE\xA9\x88\x00\x4B\x93\x10\x3C\xFB\x0A\xEE\xFD\x2A\x68" + "\x6E\x01\xFA\x4A\x58\xE8\xA3\x63\x9C\xA8\xA1\xE3\xF9\xAE\x57\xE2" + "\x35\xB8\xCC\x87\x3C\x23\xDC\x62\xB8\xD2\x60\x16\x9A\xFA\x2F\x75" + "\xAB\x91\x6A\x58\xD9\x74\x91\x88\x35\xD2\x5E\x6A\x43\x50\x85\xB2" + "\xBA\xDF\xD6\xDF\xAA\xC3\x59\xA5\xEF\xBB\x7B\xCC\x4B\x59\xD5\x38" + "\xDF\x9A\x04\x30\x2E\x10\xC8\xBC\x1C\xBF\x1A\x0B\x3A\x51\x20\xEA" + "\x17\xCD\xA7\xCF\xAD\x76\x5F\x56\x23\x47\x4D\x36\x8C\xCC\xA8\xAF" + "\x00\x07\xCD\x9F\x5E\x4C\x84\x9F\x16\x7A\x58\x0B\x14\xAA\xBD\xEF" + "\xAE\xE7\xEE\xF4\x7C\xB0\xFC\xA9\x76\x7B\xE1\xFD\xA6\x94\x19\xDF" + "\xB9\x27\xE9\xDF\x07\x34\x8B\x19\x66\x91\xAB\xAE\xB5\x80\xB3\x2D" + "\xEF\x58\x53\x8B\x8D\x23\xF8\x77\x32\xEA\x63\xB0\x2B\x4F\xA0\xF4" + "\x87\x33\x60\xE2\x84\x19\x28\xCD\x60\xDD\x4C\xEE\x8C\xC0\xD4\xC9" + "\x22\xA9\x61\x88\xD0\x32\x67\x5C\x8A\xC8\x50\x93\x3C\x7A\xFF\x15" + "\x33\xB9\x4C\x83\x4A\xDB\xB6\x9C\x61\x15\xBA\xD4\x69\x2D\x86\x19" + "\xF9\x0B\x0C\xDF\x8A\x7B\x9C\x26\x40\x29\xAC\x18\x5B\x70\xB8\x3F" + "\x28\x01\xF2\xF4\xB3\xF7\x0C\x59\x3E\xA3\xAE\xEB\x61\x3A\x7F\x1B" + "\x1D\xE3\x3F\xD7\x50\x81\xF5\x92\x30\x5F\x2E\x45\x26\xED\xC0\x96" + "\x31\xB1\x09\x58\xF4\x64\xD8\x89\xF3\x1B\xA0\x10\x25\x0F\xDA\x7F" + "\x13\x68\xEC\x29\x67\xFC\x84\xEF\x2A\xE9\xAF\xF2\x68\xE0\xB1\x70" + "\x0A\xFF\xC6\x82\x0B\x52\x3A\x3D\x91\x71\x35\xF2\xDF\xF2\xEE\x06" + "\xBF\xE7\x2B\x31\x24\x72\x1D\x4A\x26\xC0\x4E\x53\xA7\x5E\x30\xE7" + "\x3A\x7A\x9C\x4A\x95\xD9\x1C\x55\xD4\x95\xE9\xF5\x1D\xD0\xB5\xE9" + "\xD8\x3C\x6D\x5E\x8C\xE8\x03\xAA\x62\xB8\xD6\x54\xDB\x53\xD0\x9B" + "\x8D\xCF\xF2\x73\xCD\xFE\xB5\x73\xFA\xD8\xBC\xD4\x55\x78\xBE\xC2" + "\xE7\x70\xD0\x1E\xFD\xE8\x6E\x72\x1A\x3F\x7C\x6C\xCE\x27\x5D\xAB" + "\xE6\xE2\x14\x3F\x1A\xF1\x8D\xA7\xEF\xDD\xC4\xC7\xB7\x0B\x5E\x34" + "\x5D\xB9\x3C\xC9\x36\xBE\xA3\x23\x49\x1C\xCB\x38\xA3\x88\xF5\x46" + "\xA9\xFF\x00\xDD\x4E\x13\x00\xB9\xB2\x15\x3D\x20\x41\xD2\x05\xB4" + "\x43\xE4\x1B\x45\xA6\x53\xF2\xA5\xC4\x49\x2C\x1A\xDD\x54\x45\x12" + "\xDD\xA2\x52\x98\x33\x46\x2B\x71\xA4\x1A\x45\xBE\x97\x29\x0B\x6F", + 0, 512, }, + { GCRY_MD_SHAKE128, + "\x5A\xAB\x62\x75\x6D\x30\x7A\x66\x9D\x14\x6A\xBA\x98\x8D\x90\x74" + "\xC5\xA1\x59\xB3\xDE\x85\x15\x1A\x81\x9B\x11\x7C\xA1\xFF\x65\x97" + "\xF6\x15\x6E\x80\xFD\xD2\x8C\x9C\x31\x76\x83\x51\x64\xD3\x7D\xA7" + "\xDA\x11\xD9\x4E\x09\xAD\xD7\x70\xB6\x8A\x6E\x08\x1C\xD2\x2C\xA0" + "\xC0\x04\xBF\xE7\xCD\x28\x3B\xF4\x3A\x58\x8D\xA9\x1F\x50\x9B\x27" + "\xA6\x58\x4C\x47\x4A\x4A\x2F\x3E\xE0\xF1\xF5\x64\x47\x37\x92\x40" + "\xA5\xAB\x1F\xB7\x7F\xDC\xA4\x9B\x30\x5F\x07\xBA\x86\xB6\x27\x56" + "\xFB\x9E\xFB\x4F\xC2\x25\xC8\x68\x45\xF0\x26\xEA\x54\x20\x76\xB9" + "\x1A\x0B\xC2\xCD\xD1\x36\xE1\x22\xC6\x59\xBE\x25\x9D\x98\xE5\x84" + "\x1D\xF4\xC2\xF6\x03\x30\xD4\xD8\xCD\xEE\x7B\xF1\xA0\xA2\x44\x52" + "\x4E\xEC\xC6\x8F\xF2\xAE\xF5\xBF\x00\x69\xC9\xE8\x7A\x11\xC6\xE5" + "\x19\xDE\x1A\x40\x62\xA1\x0C\x83\x83\x73\x88\xF7\xEF\x58\x59\x8A" + "\x38\x46\xF4\x9D\x49\x96\x82\xB6\x83\xC4\xA0\x62\xB4\x21\x59\x4F" + "\xAF\xBC\x13\x83\xC9\x43\xBA\x83\xBD\xEF\x51\x5E\xFC\xF1\x0D", + "\xF0\x71\x5D\xE3\x56\x92\xFD\x70\x12\x3D\xC6\x83\x68\xD0\xFE\xEC" + "\x06\xA0\xC7\x4C\xF8\xAD\xB0\x5D\xDC\x25\x54\x87\xB1\xA8\xD4\xD1" + "\x21\x3E\x9E\xAB\xAF\x41\xF1\x16\x17\x19\xD0\x65\xD7\x94\xB7\x50" + "\xF8\x4B\xE3\x2A\x32\x34\xB4\xD5\x36\x46\x0D\x55\x20\x68\x8A\x5A" + "\x79\xA1\x7A\x4B\xA8\x98\x7F\xCB\x61\xBF\x7D\xAA\x8B\x54\x7B\xF5" + "\xC1\xCE\x36\xB5\x6A\x73\x25\x7D\xBB\xF1\xBA\xBB\x64\xF2\x49\xBD" + "\xCE\xB6\x7B\xA1\xC8\x88\x37\x0A\x96\x3D\xFD\x6B\x6A\x2A\xDE\x2C" + "\xEF\xD1\x4C\x32\x52\xCB\x37\x58\x52\x0F\x0C\x65\xF4\x52\x46\x82" + "\x77\x24\x99\x46\x3A\xE1\xA3\x41\x80\x01\x83\xAA\x60\xEF\xA0\x51" + "\x18\xA2\x82\x01\x74\x4F\x7B\xA0\xB0\xA3\x92\x8D\xD7\xC0\x26\x3F" + "\xD2\x64\xB7\xCD\x7B\x2E\x2E\x09\xB3\x22\xBF\xCE\xA8\xEE\xD0\x42" + "\x75\x79\x5B\xE7\xC0\xF0\x0E\x11\x38\x27\x37\x0D\x05\x1D\x50\x26" + "\x95\x80\x30\x00\x05\xAC\x12\x88\xFE\xA6\xCD\x9A\xE9\xF4\xF3\x7C" + "\xE0\xF8\xAC\xE8\xBF\x3E\xBE\x1D\x70\x56\x25\x59\x54\xC7\x61\x93" + "\x1D\x3C\x42\xED\x62\xF7\xF1\xCE\x1B\x94\x5C\xDE\xCC\x0A\x74\x32" + "\x2D\x7F\x64\xD6\x00\x4F\xF2\x16\x84\x14\x93\x07\x28\x8B\x44\x8E" + "\x45\x43\x34\x75\xB1\xEA\x13\x14\xB0\x0F\x1F\xC4\x50\x08\x9A\x9D" + "\x1F\x77\x10\xC6\xD7\x65\x2E\xCF\x65\x4F\x3B\x48\x7D\x02\x83\xD4" + "\xD8\xA2\x8E\xFB\x50\x66\xC4\x25\x0D\x5A\xD6\x98\xE1\x5D\xBA\x88" + "\xE9\x25\xE4\xDE\x99\xB6\x9B\xC3\x83\xAC\x80\x45\xB7\xF1\x02\x2A" + "\xDD\x39\xD4\x43\x54\x6A\xE0\x92\x4F\x13\xF4\x89\x60\x96\xDF\xDF" + "\x37\xCA\x72\x20\x79\x87\xC4\xA7\x70\x5A\x7A\xBE\x72\x4B\x7F\xA1" + "\x0C\x90\x9F\x39\x25\x44\x9F\x01\x0D\x61\xE2\x07\xAD\xD9\x52\x19" + "\x07\x1A\xCE\xED\xB9\xB9\xDC\xED\x32\xA9\xE1\x23\x56\x1D\x60\x82" + "\xD4\x6A\xEF\xAE\x07\xEE\x1B\xD1\x32\x76\x5E\x3E\x51\x3C\x66\x50" + "\x1B\x38\x7A\xB2\xEE\x09\xA0\x4A\xE6\x3E\x25\x80\x85\x17\xAF\xEA" + "\x3E\x05\x11\x69\xCF\xD2\xFF\xF8\xC5\x85\x8E\x2D\x96\x23\x89\x7C" + "\x9E\x85\x17\x5A\xC5\xA8\x63\x94\xCD\x0A\x32\xA0\xA6\x2A\x8F\x5D" + "\x6C\xCC\xBF\x49\x3D\xAA\x43\xF7\x83\x62\xBB\xCA\x40\xAD\xF7\x33" + "\xF8\x71\xE0\xC0\x09\x98\xD9\xBF\xD6\x88\x06\x56\x66\x6C\xD7\xBE" + "\x4F\xE9\x89\x2C\x61\xDC\xD5\xCD\x23\xA5\xE4\x27\x7E\xEE\x8B\x4A" + "\xFD\x29\xB6\x9B\xBA\x55\x66\x0A\x21\x71\x12\xFF\x6E\x34\x56\xB1", + 223, 512, }, + { GCRY_MD_SHAKE128, + "!", + "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11" + "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58" + "\x42\x4c\x4b\x5c\x28\x47\x5f\xfd\xcf\x98\x16\x63\x86\x7f\xec\x63" + "\x21\xc1\x26\x2e\x38\x7b\xcc\xf8\xca\x67\x68\x84\xc4\xa9\xd0\xc1" + "\x3b\xfa\x68\x69\x76\x3d\x5a\xe4\xbb\xc9\xb3\xcc\xd0\x9d\x1c\xa5" + "\xea\x74\x46\x53\x8d\x69\xb3\xfb\x98\xc7\x2b\x59\xa2\xb4\x81\x7d" + "\xb5\xea\xdd\x90\x11\xf9\x0f\xa7\x10\x91\x93\x1f\x81\x34\xf4\xf0" + "\x0b\x56\x2e\x2f\xe1\x05\x93\x72\x70\x36\x1c\x19\x09\x86\x2a\xd4" + "\x50\x46\xe3\x93\x2f\x5d\xd3\x11\xec\x72\xfe\xc5\xf8\xfb\x8f\x60" + "\xb4\x5a\x3b\xee\x3f\x85\xbb\xf7\xfc\xed\xc6\xa5\x55\x67\x76\x48" + "\xe0\x65\x4b\x38\x19\x41\xa8\x6b\xd3\xe5\x12\x65\x7b\x0d\x57\xa7" + "\x99\x1f\xc4\x54\x3f\x89\xd8\x29\x04\x92\x22\x2c\xe4\xa3\x3e\x17" + "\x60\x2b\x3b\x99\xc0\x09\xf7\x65\x5f\x87\x53\x5c\xda\xa3\x71\x6f" + "\x58\xc4\x7b\x8a\x15\x7a\xd1\x95\xf0\x28\x09\xf2\x75\x00\xb9\x25" + "\x49\x79\x31\x1c\x6b\xb4\x15\x96\x8c\xd1\x04\x31\x16\x9a\x27\xd5" + "\xa8\xd6\x1e\x13\xa6\xb8\xb7\x7a\xf1\xf8\xb6\xdd\x2e\xef\xde\xa0" + "\x40\x78\x96\x80\x49\x0b\x5e\xdc\xb1\xd3\xe5\x38\xa4\x66\xf7\x57" + "\xad\x71\x8f\xe1\xfd\x9f\xae\xef\xa4\x72\x46\xad\x5e\x36\x7f\x87" + "\xd3\xb4\x85\x0d\x44\x86\xeb\x21\x99\xe9\x4a\x79\x79\xe2\x09\x1a" + "\xbc\xdf\x3b\xc1\x33\x79\xc8\x96\xdc\xeb\x79\xa8\xfd\x08\xf1\x10" + "\x73\xf3\x3e\x3f\x99\x23\x22\xb3\x12\x02\xde\xe2\x34\x33\x0c\xf3" + "\x30\x4a\x58\x8f\x0d\x59\xda\xe4\xe6\x3b\xa2\xac\x3c\xe6\x82\xcc" + "\x19\xd4\xe3\x41\x67\x8c\xc3\xa6\x7a\x47\xc1\x13\xb4\xdb\x89\x0f" + "\x30\xa9\x2a\xa0\x8a\x1f\x6d\xc8\xfb\x64\x63\xf8\x03\x8c\x2b\x40" + "\xb2\x53\x00\x77\xb2\x36\xce\x88\xaf\xcc\xcd\xa0\x8a\xd6\xd7\x5e" + "\xee\x18\x99\xb1\x0c\xd8\x00\xc2\xce\x53\x72\xbf\xf2\x2e\xe3\xa3" + "\x39\xd4\xb9\xc1\xa2\xf5\xf4\xb8\x20\xf6\x87\xe5\x51\x9b\xd0\x5b" + "\x1f\xc5\xda\x0e\xb4\x53\x36\x81\x4f\x48\x13\x2c\x64\x0e\x66\xc3" + "\xa0\x2a\x22\xe6\x35\x98\xf9\x4f\x22\xf3\x51\x84\x11\x04\x46\xb6" + "\x48\xcf\x84\x74\xf3\x0c\x43\xea\xd5\x83\x09\xfb\x25\x90\x16\x09" + "\xe2\x41\x87\xe8\x01\xc8\x09\x56\x1a\x64\x80\x94\x50\xe6\x03\xc4" + "\xa8\x03\x95\x25\xc4\x76\xb5\x8e\x32\xce\x2c\x47\xb3\x7d\xa5\x91", + 0, 512, }, + { GCRY_MD_SHAKE256, + "", + "\x46\xB9\xDD\x2B\x0B\xA8\x8D\x13\x23\x3B\x3F\xEB\x74\x3E\xEB\x24" + "\x3F\xCD\x52\xEA\x62\xB8\x1B\x82\xB5\x0C\x27\x64\x6E\xD5\x76\x2F" + "\xD7\x5D\xC4\xDD\xD8\xC0\xF2\x00\xCB\x05\x01\x9D\x67\xB5\x92\xF6" + "\xFC\x82\x1C\x49\x47\x9A\xB4\x86\x40\x29\x2E\xAC\xB3\xB7\xC4\xBE" + "\x14\x1E\x96\x61\x6F\xB1\x39\x57\x69\x2C\xC7\xED\xD0\xB4\x5A\xE3" + "\xDC\x07\x22\x3C\x8E\x92\x93\x7B\xEF\x84\xBC\x0E\xAB\x86\x28\x53" + "\x34\x9E\xC7\x55\x46\xF5\x8F\xB7\xC2\x77\x5C\x38\x46\x2C\x50\x10" + "\xD8\x46\xC1\x85\xC1\x51\x11\xE5\x95\x52\x2A\x6B\xCD\x16\xCF\x86" + "\xF3\xD1\x22\x10\x9E\x3B\x1F\xDD\x94\x3B\x6A\xEC\x46\x8A\x2D\x62" + "\x1A\x7C\x06\xC6\xA9\x57\xC6\x2B\x54\xDA\xFC\x3B\xE8\x75\x67\xD6" + "\x77\x23\x13\x95\xF6\x14\x72\x93\xB6\x8C\xEA\xB7\xA9\xE0\xC5\x8D" + "\x86\x4E\x8E\xFD\xE4\xE1\xB9\xA4\x6C\xBE\x85\x47\x13\x67\x2F\x5C" + "\xAA\xAE\x31\x4E\xD9\x08\x3D\xAB\x4B\x09\x9F\x8E\x30\x0F\x01\xB8" + "\x65\x0F\x1F\x4B\x1D\x8F\xCF\x3F\x3C\xB5\x3F\xB8\xE9\xEB\x2E\xA2" + "\x03\xBD\xC9\x70\xF5\x0A\xE5\x54\x28\xA9\x1F\x7F\x53\xAC\x26\x6B" + "\x28\x41\x9C\x37\x78\xA1\x5F\xD2\x48\xD3\x39\xED\xE7\x85\xFB\x7F" + "\x5A\x1A\xAA\x96\xD3\x13\xEA\xCC\x89\x09\x36\xC1\x73\xCD\xCD\x0F" + "\xAB\x88\x2C\x45\x75\x5F\xEB\x3A\xED\x96\xD4\x77\xFF\x96\x39\x0B" + "\xF9\xA6\x6D\x13\x68\xB2\x08\xE2\x1F\x7C\x10\xD0\x4A\x3D\xBD\x4E" + "\x36\x06\x33\xE5\xDB\x4B\x60\x26\x01\xC1\x4C\xEA\x73\x7D\xB3\xDC" + "\xF7\x22\x63\x2C\xC7\x78\x51\xCB\xDD\xE2\xAA\xF0\xA3\x3A\x07\xB3" + "\x73\x44\x5D\xF4\x90\xCC\x8F\xC1\xE4\x16\x0F\xF1\x18\x37\x8F\x11" + "\xF0\x47\x7D\xE0\x55\xA8\x1A\x9E\xDA\x57\xA4\xA2\xCF\xB0\xC8\x39" + "\x29\xD3\x10\x91\x2F\x72\x9E\xC6\xCF\xA3\x6C\x6A\xC6\xA7\x58\x37" + "\x14\x30\x45\xD7\x91\xCC\x85\xEF\xF5\xB2\x19\x32\xF2\x38\x61\xBC" + "\xF2\x3A\x52\xB5\xDA\x67\xEA\xF7\xBA\xAE\x0F\x5F\xB1\x36\x9D\xB7" + "\x8F\x3A\xC4\x5F\x8C\x4A\xC5\x67\x1D\x85\x73\x5C\xDD\xDB\x09\xD2" + "\xB1\xE3\x4A\x1F\xC0\x66\xFF\x4A\x16\x2C\xB2\x63\xD6\x54\x12\x74" + "\xAE\x2F\xCC\x86\x5F\x61\x8A\xBE\x27\xC1\x24\xCD\x8B\x07\x4C\xCD" + "\x51\x63\x01\xB9\x18\x75\x82\x4D\x09\x95\x8F\x34\x1E\xF2\x74\xBD" + "\xAB\x0B\xAE\x31\x63\x39\x89\x43\x04\xE3\x58\x77\xB0\xC2\x8A\x9B" + "\x1F\xD1\x66\xC7\x96\xB9\xCC\x25\x8A\x06\x4A\x8F\x57\xE2\x7F\x2A", + 0, 512, }, + { GCRY_MD_SHAKE256, + "\xB3\x2D\x95\xB0\xB9\xAA\xD2\xA8\x81\x6D\xE6\xD0\x6D\x1F\x86\x00" + "\x85\x05\xBD\x8C\x14\x12\x4F\x6E\x9A\x16\x3B\x5A\x2A\xDE\x55\xF8" + "\x35\xD0\xEC\x38\x80\xEF\x50\x70\x0D\x3B\x25\xE4\x2C\xC0\xAF\x05" + "\x0C\xCD\x1B\xE5\xE5\x55\xB2\x30\x87\xE0\x4D\x7B\xF9\x81\x36\x22" + "\x78\x0C\x73\x13\xA1\x95\x4F\x87\x40\xB6\xEE\x2D\x3F\x71\xF7\x68" + "\xDD\x41\x7F\x52\x04\x82\xBD\x3A\x08\xD4\xF2\x22\xB4\xEE\x9D\xBD" + "\x01\x54\x47\xB3\x35\x07\xDD\x50\xF3\xAB\x42\x47\xC5\xDE\x9A\x8A" + "\xBD\x62\xA8\xDE\xCE\xA0\x1E\x3B\x87\xC8\xB9\x27\xF5\xB0\x8B\xEB" + "\x37\x67\x4C\x6F\x8E\x38\x0C\x04", + "\xCC\x2E\xAA\x04\xEE\xF8\x47\x9C\xDA\xE8\x56\x6E\xB8\xFF\xA1\x10" + "\x0A\x40\x79\x95\xBF\x99\x9A\xE9\x7E\xDE\x52\x66\x81\xDC\x34\x90" + "\x61\x6F\x28\x44\x2D\x20\xDA\x92\x12\x4C\xE0\x81\x58\x8B\x81\x49" + "\x1A\xED\xF6\x5C\xAA\xF0\xD2\x7E\x82\xA4\xB0\xE1\xD1\xCA\xB2\x38" + "\x33\x32\x8F\x1B\x8D\xA4\x30\xC8\xA0\x87\x66\xA8\x63\x70\xFA\x84" + "\x8A\x79\xB5\x99\x8D\xB3\xCF\xFD\x05\x7B\x96\xE1\xE2\xEE\x0E\xF2" + "\x29\xEC\xA1\x33\xC1\x55\x48\xF9\x83\x99\x02\x04\x37\x30\xE4\x4B" + "\xC5\x2C\x39\xFA\xDC\x1D\xDE\xEA\xD9\x5F\x99\x39\xF2\x20\xCA\x30" + "\x06\x61\x54\x0D\xF7\xED\xD9\xAF\x37\x8A\x5D\x4A\x19\xB2\xB9\x3E" + "\x6C\x78\xF4\x9C\x35\x33\x43\xA0\xB5\xF1\x19\x13\x2B\x53\x12\xD0" + "\x04\x83\x1D\x01\x76\x9A\x31\x6D\x2F\x51\xBF\x64\xCC\xB2\x0A\x21" + "\xC2\xCF\x7A\xC8\xFB\x6F\x6E\x90\x70\x61\x26\xBD\xAE\x06\x11\xDD" + "\x13\x96\x2E\x8B\x53\xD6\xEA\xE2\x6C\x7B\x0D\x25\x51\xDA\xF6\x24" + "\x8E\x9D\x65\x81\x73\x82\xB0\x4D\x23\x39\x2D\x10\x8E\x4D\x34\x43" + "\xDE\x5A\xDC\x72\x73\xC7\x21\xA8\xF8\x32\x0E\xCF\xE8\x17\x7A\xC0" + "\x67\xCA\x8A\x50\x16\x9A\x6E\x73\x00\x0E\xBC\xDC\x1E\x4E\xE6\x33" + "\x9F\xC8\x67\xC3\xD7\xAE\xAB\x84\x14\x63\x98\xD7\xBA\xDE\x12\x1D" + "\x19\x89\xFA\x45\x73\x35\x56\x4E\x97\x57\x70\xA3\xA0\x02\x59\xCA" + "\x08\x70\x61\x08\x26\x1A\xA2\xD3\x4D\xE0\x0F\x8C\xAC\x7D\x45\xD3" + "\x5E\x5A\xA6\x3E\xA6\x9E\x1D\x1A\x2F\x7D\xAB\x39\x00\xD5\x1E\x0B" + "\xC6\x53\x48\xA2\x55\x54\x00\x70\x39\xA5\x2C\x3C\x30\x99\x80\xD1" + "\x7C\xAD\x20\xF1\x15\x63\x10\xA3\x9C\xD3\x93\x76\x0C\xFE\x58\xF6" + "\xF8\xAD\xE4\x21\x31\x28\x82\x80\xA3\x5E\x1D\xB8\x70\x81\x83\xB9" + "\x1C\xFA\xF5\x82\x7E\x96\xB0\xF7\x74\xC4\x50\x93\xB4\x17\xAF\xF9" + "\xDD\x64\x17\xE5\x99\x64\xA0\x1B\xD2\xA6\x12\xFF\xCF\xBA\x18\xA0" + "\xF1\x93\xDB\x29\x7B\x9A\x6C\xC1\xD2\x70\xD9\x7A\xAE\x8F\x8A\x3A" + "\x6B\x26\x69\x5A\xB6\x64\x31\xC2\x02\xE1\x39\xD6\x3D\xD3\xA2\x47" + "\x78\x67\x6C\xEF\xE3\xE2\x1B\x02\xEC\x4E\x8F\x5C\xFD\x66\x58\x7A" + "\x12\xB4\x40\x78\xFC\xD3\x9E\xEE\x44\xBB\xEF\x4A\x94\x9A\x63\xC0" + "\xDF\xD5\x8C\xF2\xFB\x2C\xD5\xF0\x02\xE2\xB0\x21\x92\x66\xCF\xC0" + "\x31\x81\x74\x86\xDE\x70\xB4\x28\x5A\x8A\x70\xF3\xD3\x8A\x61\xD3" + "\x15\x5D\x99\xAA\xF4\xC2\x53\x90\xD7\x36\x45\xAB\x3E\x8D\x80\xF0", + 136, 512, }, + { GCRY_MD_SHAKE256, + "!", + "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb" + "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a" + "\x3f\xd1\x24\xd4\xdf\x76\xc0\xa5\x39\xee\x7d\xd2\xf6\xe1\xec\x34" + "\x61\x24\xc8\x15\xd9\x41\x0e\x14\x5e\xb5\x61\xbc\xd9\x7b\x18\xab" + "\x6c\xe8\xd5\x55\x3e\x0e\xab\x3d\x1f\x7d\xfb\x8f\x9d\xee\xfe\x16" + "\x84\x7e\x21\x92\xf6\xf6\x1f\xb8\x2f\xb9\x0d\xde\x60\xb1\x90\x63" + "\xc5\x6a\x4c\x55\xcd\xd7\xb6\x72\xb7\x5b\xf5\x15\xad\xbf\xe2\x04" + "\x90\x3c\x8c\x00\x36\xde\x54\xa2\x99\x9a\x92\x0d\xe9\x0f\x66\xd7" + "\xff\x6e\xc8\xe4\xc9\x3d\x24\xae\x34\x6f\xdc\xb3\xa5\xa5\xbd\x57" + "\x39\xec\x15\xa6\xed\xdb\x5c\xe5\xb0\x2d\xa5\x30\x39\xfa\xc6\x3e" + "\x19\x55\x5f\xaa\x2e\xdd\xc6\x93\xb1\xf0\xc2\xa6\xfc\xbe\x7c\x0a" + "\x0a\x09\x1d\x0e\xe7\x00\xd7\x32\x2e\x4b\x0f\xf0\x95\x90\xde\x16" + "\x64\x22\xf9\xea\xd5\xda\x4c\x99\x3d\x60\x5f\xe4\xd9\xc6\x34\x84" + "\x3a\xa1\x78\xb1\x76\x72\xc6\x56\x8c\x8a\x2e\x62\xab\xeb\xea\x2c" + "\x21\xc3\x02\xbd\x36\x6a\xd6\x98\x95\x9e\x1f\x6e\x43\x4a\xf1\x55" + "\x56\x8b\x27\x34\xd8\x37\x9f\xcd\x3f\xfe\x64\x89\xba\xff\xa6\xd7" + "\x11\x09\x44\x2e\x1b\x34\x4f\x13\x8a\x09\xca\xe3\xe2\xd3\x94\x2e" + "\xee\x82\x8f\xc4\x7e\x64\xde\xb5\xe0\x0a\x02\x4a\xe1\xf2\xc0\x77" + "\xe6\xb7\xb1\x33\xf6\xc1\xde\x91\x30\x92\xd4\xe8\x29\xec\xd2\xb2" + "\xef\x28\xca\x80\x20\x82\x1e\x2b\x8b\xe5\x17\xd9\x3e\xd0\x88\x36" + "\xf6\xf0\x66\xcc\x3d\x03\xb6\x25\xd8\x49\x7f\x29\xdb\xc1\xc3\x9e" + "\x6f\xe4\x63\x22\x6f\x85\xc1\x28\xa2\xc2\x98\x88\x11\x2e\x06\xa9" + "\x9c\x5d\x17\xb2\x5e\x90\x0d\x20\x4f\x39\x72\x31\xcd\xf7\x9c\x31" + "\x34\x46\x53\x2d\xad\x07\xf4\xc0\xbd\x9f\xba\x1d\xd4\x13\xd8\xa7" + "\xe6\xcb\xc0\xa0\x86\x2c\xc7\x69\x23\x9a\x89\xf9\xdb\x08\x5b\x78" + "\xa0\x54\x59\x6a\xd7\x08\x0d\xdf\x96\x01\x9b\x73\x99\xb5\x03\x48" + "\x0e\x5a\x65\xa2\x20\x8d\x74\x72\x4c\x98\x7d\x32\x5e\x9b\x0e\x82" + "\xfe\xcd\x4f\x27\xf3\x13\x5b\x1d\x9e\x27\xb4\x8e\x69\xdd\x6f\x59" + "\x62\xb8\xa6\x3b\x48\x92\x1e\xc8\xee\x53\x86\x9f\x1a\xc1\xc8\x18" + "\x23\x87\xee\x0d\x6c\xfe\xf6\x53\xff\x8b\xf6\x05\xf1\x47\x04\xb7" + "\x1b\xeb\x65\x53\xf2\x81\xfa\x75\x69\x48\xc4\x38\x49\x4b\x19\xb4" + "\xee\x69\xa5\x43\x6b\x22\x2b\xc9\x88\xed\xa4\xac\x60\x00\x24\xc9", + 0, 512, }, + { 0 } }; gcry_error_t err; int i; @@ -5950,7 +6343,7 @@ check_digests (void) check_one_md (algos[i].md, algos[i].data, algos[i].datalen > 0 ? algos[i].datalen : strlen (algos[i].data), - algos[i].expect); + algos[i].expect, algos[i].expectlen); check_one_md_multi (algos[i].md, algos[i].data, algos[i].datalen > 0 ? algos[i].datalen : strlen (algos[i].data), diff --git a/tests/bench-slope.c b/tests/bench-slope.c index 2679556..3a2aa38 100644 --- a/tests/bench-slope.c +++ b/tests/bench-slope.c @@ -1651,6 +1651,12 @@ kdf_bench_one (int algo, int subalgo) return; } + if (gcry_md_get_algo_dlen (subalgo) == 0) + { + /* Skip XOFs */ + return; + } + *algo_name = 0; if (algo == GCRY_KDF_PBKDF2) commit 28de6f9e16e386018e81a9cdaee596be7616ccab Author: Jussi Kivilinna Date: Sun Oct 25 18:57:15 2015 +0200 Few updates to documentation * doc/gcrypt.text: Add mention of new 'intel-fast-shld' hw feature flag; Add mention of x86 RDRAND support in rndhw. -- Signed-off-by: Jussi Kivilinna diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index 3450bb2..facdf65 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -556,6 +556,7 @@ are @item padlock-sha @item padlock-mmul @item intel-cpu + at item intel-fast-shld @item intel-bmi2 @item intel-ssse3 @item intel-pclmul @@ -5610,9 +5611,9 @@ that system and is the only gathering module available for that OS. @item rndhw Extra module to collect additional entropy by utilizing a hardware -random number generator. As of now the only supported hardware RNG is -the Padlock engine of VIA (Centaur) CPUs. It is not available in FIPS -mode. +random number generator. As of now the supported hardware RNG is +the Padlock engine of VIA (Centaur) CPUs and x86 CPUs with the RDRAND +instruction. It is not available in FIPS mode. @end table commit 92ad19873562cfce7bcc4a0b5aed8195d8284cfc Author: Jussi Kivilinna Date: Sun Oct 25 17:59:33 2015 +0200 Add HMAC-SHA3 test vectors * tests/basic.c (check_mac): Add HMAC_SHA3 test vectors. -- Signed-off-by: Jussi Kivilinna diff --git a/tests/basic.c b/tests/basic.c index 4ea91a9..75ff349 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -6784,6 +6784,169 @@ check_mac (void) "\xde\xbd\x71\xf8\x86\x72\x89\x86\x5d\xf5\xa3\x2d\x20\xcd\xc9\x44" "\xb6\x02\x2c\xac\x3c\x49\x82\xb1\x0d\x5e\xeb\x55\xc3\xe4\xde\x15" "\x13\x46\x76\xfb\x6d\xe0\x44\x60\x65\xc9\x74\x40\xfa\x8c\x6a\x58" }, + /* HMAC-SHA3 test vectors from + * http://wolfgang-ehrhardt.de/hmac-sha3-testvectors.html */ + { GCRY_MAC_HMAC_SHA3_224, + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b", + "\x3b\x16\x54\x6b\xbc\x7b\xe2\x70\x6a\x03\x1d\xca\xfd\x56\x37\x3d" + "\x98\x84\x36\x76\x41\xd8\xc5\x9a\xf3\xc8\x60\xf7" }, + { GCRY_MAC_HMAC_SHA3_256, + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b", + "\xba\x85\x19\x23\x10\xdf\xfa\x96\xe2\xa3\xa4\x0e\x69\x77\x43\x51" + "\x14\x0b\xb7\x18\x5e\x12\x02\xcd\xcc\x91\x75\x89\xf9\x5e\x16\xbb" }, + { GCRY_MAC_HMAC_SHA3_512, + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b", + "\xeb\x3f\xbd\x4b\x2e\xaa\xb8\xf5\xc5\x04\xbd\x3a\x41\x46\x5a\xac" + "\xec\x15\x77\x0a\x7c\xab\xac\x53\x1e\x48\x2f\x86\x0b\x5e\xc7\xba" + "\x47\xcc\xb2\xc6\xf2\xaf\xce\x8f\x88\xd2\x2b\x6d\xc6\x13\x80\xf2" + "\x3a\x66\x8f\xd3\x88\x8b\xb8\x05\x37\xc0\xa0\xb8\x64\x07\x68\x9e" }, + { GCRY_MAC_HMAC_SHA3_224, "what do ya want for nothing?", "Jefe", + "\x7f\xdb\x8d\xd8\x8b\xd2\xf6\x0d\x1b\x79\x86\x34\xad\x38\x68\x11" + "\xc2\xcf\xc8\x5b\xfa\xf5\xd5\x2b\xba\xce\x5e\x66" }, + { GCRY_MAC_HMAC_SHA3_256, "what do ya want for nothing?", "Jefe", + "\xc7\xd4\x07\x2e\x78\x88\x77\xae\x35\x96\xbb\xb0\xda\x73\xb8\x87" + "\xc9\x17\x1f\x93\x09\x5b\x29\x4a\xe8\x57\xfb\xe2\x64\x5e\x1b\xa5" }, + { GCRY_MAC_HMAC_SHA3_384, "what do ya want for nothing?", "Jefe", + "\xf1\x10\x1f\x8c\xbf\x97\x66\xfd\x67\x64\xd2\xed\x61\x90\x3f\x21" + "\xca\x9b\x18\xf5\x7c\xf3\xe1\xa2\x3c\xa1\x35\x08\xa9\x32\x43\xce" + "\x48\xc0\x45\xdc\x00\x7f\x26\xa2\x1b\x3f\x5e\x0e\x9d\xf4\xc2\x0a" }, + { GCRY_MAC_HMAC_SHA3_512, "what do ya want for nothing?", "Jefe", + "\x5a\x4b\xfe\xab\x61\x66\x42\x7c\x7a\x36\x47\xb7\x47\x29\x2b\x83" + "\x84\x53\x7c\xdb\x89\xaf\xb3\xbf\x56\x65\xe4\xc5\xe7\x09\x35\x0b" + "\x28\x7b\xae\xc9\x21\xfd\x7c\xa0\xee\x7a\x0c\x31\xd0\x22\xa9\x5e" + "\x1f\xc9\x2b\xa9\xd7\x7d\xf8\x83\x96\x02\x75\xbe\xb4\xe6\x20\x24" }, + { GCRY_MAC_HMAC_SHA3_224, + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xb9\x6d\x73\x0c\x14\x8c\x2d\xaa\xd8\x64\x9d\x83\xde\xfa\xa3\x71" + "\x97\x38\xd3\x47\x75\x39\x7b\x75\x71\xc3\x85\x15" }, + { GCRY_MAC_HMAC_SHA3_256, + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xa6\x07\x2f\x86\xde\x52\xb3\x8b\xb3\x49\xfe\x84\xcd\x6d\x97\xfb" + "\x6a\x37\xc4\xc0\xf6\x2a\xae\x93\x98\x11\x93\xa7\x22\x9d\x34\x67" }, + { GCRY_MAC_HMAC_SHA3_384, + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\x71\x3d\xff\x03\x02\xc8\x50\x86\xec\x5a\xd0\x76\x8d\xd6\x5a\x13" + "\xdd\xd7\x90\x68\xd8\xd4\xc6\x21\x2b\x71\x2e\x41\x64\x94\x49\x11" + "\x14\x80\x23\x00\x44\x18\x5a\x99\x10\x3e\xd8\x20\x04\xdd\xbf\xcc" }, + { GCRY_MAC_HMAC_SHA3_512, + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xb1\x48\x35\xc8\x19\xa2\x90\xef\xb0\x10\xac\xe6\xd8\x56\x8d\xc6" + "\xb8\x4d\xe6\x0b\xc4\x9b\x00\x4c\x3b\x13\xed\xa7\x63\x58\x94\x51" + "\xe5\xdd\x74\x29\x28\x84\xd1\xbd\xce\x64\xe6\xb9\x19\xdd\x61\xdc" + "\x9c\x56\xa2\x82\xa8\x1c\x0b\xd1\x4f\x1f\x36\x5b\x49\xb8\x3a\x5b" }, + { GCRY_MAC_HMAC_SHA3_224, + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xc7\x9c\x9b\x09\x34\x24\xe5\x88\xa9\x87\x8b\xbc\xb0\x89\xe0\x18" + "\x27\x00\x96\xe9\xb4\xb1\xa9\xe8\x22\x0c\x86\x6a" }, + { GCRY_MAC_HMAC_SHA3_256, + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xe6\xa3\x6d\x9b\x91\x5f\x86\xa0\x93\xca\xc7\xd1\x10\xe9\xe0\x4c" + "\xf1\xd6\x10\x0d\x30\x47\x55\x09\xc2\x47\x5f\x57\x1b\x75\x8b\x5a" }, + { GCRY_MAC_HMAC_SHA3_384, + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xca\xd1\x8a\x8f\xf6\xc4\xcc\x3a\xd4\x87\xb9\x5f\x97\x69\xe9\xb6" + "\x1c\x06\x2a\xef\xd6\x95\x25\x69\xe6\xe6\x42\x18\x97\x05\x4c\xfc" + "\x70\xb5\xfd\xc6\x60\x5c\x18\x45\x71\x12\xfc\x6a\xaa\xd4\x55\x85" }, + { GCRY_MAC_HMAC_SHA3_512, + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + "\xdc\x03\x0e\xe7\x88\x70\x34\xf3\x2c\xf4\x02\xdf\x34\x62\x2f\x31" + "\x1f\x3e\x6c\xf0\x48\x60\xc6\xbb\xd7\xfa\x48\x86\x74\x78\x2b\x46" + "\x59\xfd\xbd\xf3\xfd\x87\x78\x52\x88\x5c\xfe\x6e\x22\x18\x5f\xe7" + "\xb2\xee\x95\x20\x43\x62\x9b\xc9\xd5\xf3\x29\x8a\x41\xd0\x2c\x66" }, /* CMAC AES and DES test vectors from http://web.archive.org/web/20130930212819/http://csrc.nist.gov/publica\ tions/nistpubs/800-38B/Updated_CMAC_Examples.pdf */ commit 577dc2b63ceca6a8a716256d034ea4e7414f65fa Author: Jussi Kivilinna Date: Sun Oct 25 14:50:41 2015 +0200 md: add variable length output interface * cipher/crc.c (_gcry_digest_spec_crc32) (_gcry_digest_spec_crc32_rfc1510, _gcry_digest_spec_crc24_rfc2440): Set 'extract' NULL. * cipher/gostr3411-94.c (_gcry_digest_spec_gost3411_94) (_gcry_digest_spec_gost3411_cp): Ditto. * cipher/keccak.c (_gcry_digest_spec_sha3_224) (_gcry_digest_spec_sha3_256, _gcry_digest_spec_sha3_384) (_gcry_digest_spec_sha3_512): Ditto. * cipher/md2.c (_gcry_digest_spec_md2): Ditto. * cipher/md4.c (_gcry_digest_spec_md4): Ditto. * cipher/md5.c (_gcry_digest_spec_md5): Ditto. * cipher/rmd160.c (_gcry_digest_spec_rmd160): Ditto. * cipher/sha1.c (_gcry_digest_spec_sha1): Ditto. * cipher/sha256.c (_gcry_digest_spec_sha224) (_gcry_digest_spec_sha256): Ditto. * cipher/sha512.c (_gcry_digest_spec_sha384) (_gcry_digest_spec_sha512): Ditto. * cipher/stribog.c (_gcry_digest_spec_stribog_256) (_gcry_digest_spec_stribog_512): Ditto. * cipher/tiger.c (_gcry_digest_spec_tiger) (_gcry_digest_spec_tiger1, _gcry_digest_spec_tiger2): Ditto. * cipher/whirlpool.c (_gcry_digest_spec_whirlpool): Ditto. * cipher/md.c (md_enable): Do not allow combination of HMAC and 'expandable-output function'. (md_final): Check if spec->read is NULL before calling. (md_read): Ditto. (md_extract, _gcry_md_extract): New. * doc/gcrypt.texi: Add SHA3 algorithms and gcry_md_extract. * src/cipher-proto.h (gcry_md_extract_t): New. (gcry_md_spec_t): Add 'extract'. * src/gcrypt-int.g (_gcry_md_extract): New. * src/gcrypt.h.in (gcry_md_extract): New. * src/libgcrypt.def: Add gcry_md_extract. * src/libgcrypt.vers: Add gcry_md_extract. * src/visibility.c (gcry_md_extract): New. * src/visibility.h (gcry_md_extract): New. -- Patch adds new interface for reading output from 'expandable-output function' MD algorithms that can give variable length output (ie. SHAKE algorithms from FIPS-202). New function to read output is gpg_error_t gcry_md_extract(gcry_md_hd_t md, int algo, void *buffer, size_t length); Function implicitly finalizes algorithm so that no new input can be given. Subsequents calls of the function return more output bytes from the algorithm. Signed-off-by: Jussi Kivilinna diff --git a/cipher/crc.c b/cipher/crc.c index 9105dfe..46a185a 100644 --- a/cipher/crc.c +++ b/cipher/crc.c @@ -785,7 +785,7 @@ gcry_md_spec_t _gcry_digest_spec_crc32 = { GCRY_MD_CRC32, {0, 1}, "CRC32", NULL, 0, NULL, 4, - crc32_init, crc32_write, crc32_final, crc32_read, + crc32_init, crc32_write, crc32_final, crc32_read, NULL, sizeof (CRC_CONTEXT) }; @@ -793,8 +793,7 @@ gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 = { GCRY_MD_CRC32_RFC1510, {0, 1}, "CRC32RFC1510", NULL, 0, NULL, 4, - crc32rfc1510_init, crc32_write, - crc32rfc1510_final, crc32_read, + crc32rfc1510_init, crc32_write, crc32rfc1510_final, crc32_read, NULL, sizeof (CRC_CONTEXT) }; @@ -802,7 +801,6 @@ gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 = { GCRY_MD_CRC24_RFC2440, {0, 1}, "CRC24RFC2440", NULL, 0, NULL, 3, - crc24rfc2440_init, crc24rfc2440_write, - crc24rfc2440_final, crc32_read, + crc24rfc2440_init, crc24rfc2440_write, crc24rfc2440_final, crc32_read, NULL, sizeof (CRC_CONTEXT) }; diff --git a/cipher/gostr3411-94.c b/cipher/gostr3411-94.c index 7b16e61..a782427 100644 --- a/cipher/gostr3411-94.c +++ b/cipher/gostr3411-94.c @@ -343,13 +343,13 @@ gcry_md_spec_t _gcry_digest_spec_gost3411_94 = { GCRY_MD_GOSTR3411_94, {0, 0}, "GOSTR3411_94", NULL, 0, NULL, 32, - gost3411_init, _gcry_md_block_write, gost3411_final, gost3411_read, + gost3411_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL, sizeof (GOSTR3411_CONTEXT) }; gcry_md_spec_t _gcry_digest_spec_gost3411_cp = { GCRY_MD_GOSTR3411_CP, {0, 0}, "GOSTR3411_CP", asn, DIM (asn), oid_spec_gostr3411, 32, - gost3411_cp_init, _gcry_md_block_write, gost3411_final, gost3411_read, + gost3411_cp_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL, sizeof (GOSTR3411_CONTEXT) }; diff --git a/cipher/keccak.c b/cipher/keccak.c index 3a72294..d46d9cb 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -927,7 +927,7 @@ gcry_md_spec_t _gcry_digest_spec_sha3_224 = { GCRY_MD_SHA3_224, {0, 1}, "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28, - sha3_224_init, keccak_write, keccak_final, keccak_read, + sha3_224_init, keccak_write, keccak_final, keccak_read, NULL, sizeof (KECCAK_CONTEXT), run_selftests }; @@ -935,7 +935,7 @@ gcry_md_spec_t _gcry_digest_spec_sha3_256 = { GCRY_MD_SHA3_256, {0, 1}, "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32, - sha3_256_init, keccak_write, keccak_final, keccak_read, + sha3_256_init, keccak_write, keccak_final, keccak_read, NULL, sizeof (KECCAK_CONTEXT), run_selftests }; @@ -943,7 +943,7 @@ gcry_md_spec_t _gcry_digest_spec_sha3_384 = { GCRY_MD_SHA3_384, {0, 1}, "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48, - sha3_384_init, keccak_write, keccak_final, keccak_read, + sha3_384_init, keccak_write, keccak_final, keccak_read, NULL, sizeof (KECCAK_CONTEXT), run_selftests }; @@ -951,7 +951,7 @@ gcry_md_spec_t _gcry_digest_spec_sha3_512 = { GCRY_MD_SHA3_512, {0, 1}, "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64, - sha3_512_init, keccak_write, keccak_final, keccak_read, + sha3_512_init, keccak_write, keccak_final, keccak_read, NULL, sizeof (KECCAK_CONTEXT), run_selftests }; diff --git a/cipher/md.c b/cipher/md.c index 948d269..6ef8fee 100644 --- a/cipher/md.c +++ b/cipher/md.c @@ -408,6 +408,12 @@ md_enable (gcry_md_hd_t hd, int algorithm) } } + if (!err && h->flags.hmac && spec->read == NULL) + { + /* Expandable output function cannot act as part of HMAC. */ + err = GPG_ERR_DIGEST_ALGO; + } + if (!err) { size_t size = (sizeof (*entry) @@ -638,11 +644,16 @@ md_final (gcry_md_hd_t a) for (r = a->ctx->list; r; r = r->next) { - byte *p = r->spec->read (&r->context.c); + byte *p; size_t dlen = r->spec->mdlen; byte *hash; gcry_err_code_t err; + if (r->spec->read == NULL) + continue; + + p = r->spec->read (&r->context.c); + if (a->ctx->flags.secure) hash = xtrymalloc_secure (dlen); else @@ -821,6 +832,8 @@ md_read( gcry_md_hd_t a, int algo ) { if (r->next) log_debug ("more than one algorithm in md_read(0)\n"); + if (r->spec->read == NULL) + return NULL; return r->spec->read (&r->context.c); } } @@ -828,7 +841,11 @@ md_read( gcry_md_hd_t a, int algo ) { for (r = a->ctx->list; r; r = r->next) if (r->spec->algo == algo) - return r->spec->read (&r->context.c); + { + if (r->spec->read == NULL) + return NULL; + return r->spec->read (&r->context.c); + } } BUG(); return NULL; @@ -850,6 +867,52 @@ _gcry_md_read (gcry_md_hd_t hd, int algo) } +/**************** + * If ALGO is null get the digest for the used algo (which should be + * only one) + */ +static gcry_err_code_t +md_extract(gcry_md_hd_t a, int algo, void *out, size_t outlen) +{ + GcryDigestEntry *r = a->ctx->list; + + if (!algo) + { + /* Return the first algorithm */ + if (r && r->spec->extract) + { + if (r->next) + log_debug ("more than one algorithm in md_extract(0)\n"); + r->spec->extract (&r->context.c, out, outlen); + return 0; + } + } + else + { + for (r = a->ctx->list; r; r = r->next) + if (r->spec->algo == algo && r->spec->extract) + { + r->spec->extract (&r->context.c, out, outlen); + return 0; + } + } + + return GPG_ERR_DIGEST_ALGO; +} + + +/* + * Expand the output from XOF class digest, this function implictly finalizes + * the hash. + */ +gcry_err_code_t +_gcry_md_extract (gcry_md_hd_t hd, int algo, void *out, size_t outlen) +{ + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + return md_extract (hd, algo, out, outlen); +} + + /* * Read out an intermediate digest. Not yet functional. */ diff --git a/cipher/md2.c b/cipher/md2.c index 97682e5..e339b28 100644 --- a/cipher/md2.c +++ b/cipher/md2.c @@ -177,6 +177,6 @@ gcry_md_spec_t _gcry_digest_spec_md2 = { GCRY_MD_MD2, {0, 0}, "MD2", asn, DIM (asn), oid_spec_md2, 16, - md2_init, _gcry_md_block_write, md2_final, md2_read, + md2_init, _gcry_md_block_write, md2_final, md2_read, NULL, sizeof (MD2_CONTEXT) }; diff --git a/cipher/md4.c b/cipher/md4.c index c9b4154..afa6382 100644 --- a/cipher/md4.c +++ b/cipher/md4.c @@ -286,6 +286,6 @@ gcry_md_spec_t _gcry_digest_spec_md4 = { GCRY_MD_MD4, {0, 0}, "MD4", asn, DIM (asn), oid_spec_md4,16, - md4_init, _gcry_md_block_write, md4_final, md4_read, + md4_init, _gcry_md_block_write, md4_final, md4_read, NULL, sizeof (MD4_CONTEXT) }; diff --git a/cipher/md5.c b/cipher/md5.c index f17af7a..66cc5f6 100644 --- a/cipher/md5.c +++ b/cipher/md5.c @@ -312,6 +312,6 @@ gcry_md_spec_t _gcry_digest_spec_md5 = { GCRY_MD_MD5, {0, 1}, "MD5", asn, DIM (asn), oid_spec_md5, 16, - md5_init, _gcry_md_block_write, md5_final, md5_read, + md5_init, _gcry_md_block_write, md5_final, md5_read, NULL, sizeof (MD5_CONTEXT) }; diff --git a/cipher/rmd160.c b/cipher/rmd160.c index 2695db2..cf7531e 100644 --- a/cipher/rmd160.c +++ b/cipher/rmd160.c @@ -526,6 +526,6 @@ gcry_md_spec_t _gcry_digest_spec_rmd160 = { GCRY_MD_RMD160, {0, 0}, "RIPEMD160", asn, DIM (asn), oid_spec_rmd160, 20, - rmd160_init, _gcry_md_block_write, rmd160_final, rmd160_read, + rmd160_init, _gcry_md_block_write, rmd160_final, rmd160_read, NULL, sizeof (RMD160_CONTEXT) }; diff --git a/cipher/sha1.c b/cipher/sha1.c index 554d55c..0de8412 100644 --- a/cipher/sha1.c +++ b/cipher/sha1.c @@ -573,7 +573,7 @@ gcry_md_spec_t _gcry_digest_spec_sha1 = { GCRY_MD_SHA1, {0, 1}, "SHA1", asn, DIM (asn), oid_spec_sha1, 20, - sha1_init, _gcry_md_block_write, sha1_final, sha1_read, + sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL, sizeof (SHA1_CONTEXT), run_selftests }; diff --git a/cipher/sha256.c b/cipher/sha256.c index 63869d5..bc326e0 100644 --- a/cipher/sha256.c +++ b/cipher/sha256.c @@ -633,7 +633,7 @@ gcry_md_spec_t _gcry_digest_spec_sha224 = { GCRY_MD_SHA224, {0, 1}, "SHA224", asn224, DIM (asn224), oid_spec_sha224, 28, - sha224_init, _gcry_md_block_write, sha256_final, sha256_read, + sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL, sizeof (SHA256_CONTEXT), run_selftests }; @@ -642,7 +642,7 @@ gcry_md_spec_t _gcry_digest_spec_sha256 = { GCRY_MD_SHA256, {0, 1}, "SHA256", asn256, DIM (asn256), oid_spec_sha256, 32, - sha256_init, _gcry_md_block_write, sha256_final, sha256_read, + sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL, sizeof (SHA256_CONTEXT), run_selftests }; diff --git a/cipher/sha512.c b/cipher/sha512.c index 4be1cab..1196db9 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -877,7 +877,7 @@ gcry_md_spec_t _gcry_digest_spec_sha512 = { GCRY_MD_SHA512, {0, 1}, "SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64, - sha512_init, _gcry_md_block_write, sha512_final, sha512_read, + sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL, sizeof (SHA512_CONTEXT), run_selftests }; @@ -903,7 +903,7 @@ gcry_md_spec_t _gcry_digest_spec_sha384 = { GCRY_MD_SHA384, {0, 1}, "SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48, - sha384_init, _gcry_md_block_write, sha512_final, sha512_read, + sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL, sizeof (SHA512_CONTEXT), run_selftests }; diff --git a/cipher/stribog.c b/cipher/stribog.c index de167a7..7f38e6f 100644 --- a/cipher/stribog.c +++ b/cipher/stribog.c @@ -1326,6 +1326,7 @@ gcry_md_spec_t _gcry_digest_spec_stribog_256 = GCRY_MD_STRIBOG256, {0, 0}, "STRIBOG256", NULL, 0, NULL, 32, stribog_init_256, _gcry_md_block_write, stribog_final, stribog_read_256, + NULL, sizeof (STRIBOG_CONTEXT) }; @@ -1334,5 +1335,6 @@ gcry_md_spec_t _gcry_digest_spec_stribog_512 = GCRY_MD_STRIBOG512, {0, 0}, "STRIBOG512", NULL, 0, NULL, 64, stribog_init_512, _gcry_md_block_write, stribog_final, stribog_read_512, + NULL, sizeof (STRIBOG_CONTEXT) }; diff --git a/cipher/tiger.c b/cipher/tiger.c index 8a08953..078133a 100644 --- a/cipher/tiger.c +++ b/cipher/tiger.c @@ -840,7 +840,7 @@ gcry_md_spec_t _gcry_digest_spec_tiger = { GCRY_MD_TIGER, {0, 0}, "TIGER192", NULL, 0, NULL, 24, - tiger_init, _gcry_md_block_write, tiger_final, tiger_read, + tiger_init, _gcry_md_block_write, tiger_final, tiger_read, NULL, sizeof (TIGER_CONTEXT) }; @@ -863,7 +863,7 @@ gcry_md_spec_t _gcry_digest_spec_tiger1 = { GCRY_MD_TIGER1, {0, 0}, "TIGER", asn1, DIM (asn1), oid_spec_tiger1, 24, - tiger1_init, _gcry_md_block_write, tiger_final, tiger_read, + tiger1_init, _gcry_md_block_write, tiger_final, tiger_read, NULL, sizeof (TIGER_CONTEXT) }; @@ -874,7 +874,7 @@ gcry_md_spec_t _gcry_digest_spec_tiger2 = { GCRY_MD_TIGER2, {0, 0}, "TIGER2", NULL, 0, NULL, 24, - tiger2_init, _gcry_md_block_write, tiger_final, tiger_read, + tiger2_init, _gcry_md_block_write, tiger_final, tiger_read, NULL, sizeof (TIGER_CONTEXT) }; diff --git a/cipher/whirlpool.c b/cipher/whirlpool.c index 5f224a1..8a06939 100644 --- a/cipher/whirlpool.c +++ b/cipher/whirlpool.c @@ -1525,6 +1525,6 @@ gcry_md_spec_t _gcry_digest_spec_whirlpool = { GCRY_MD_WHIRLPOOL, {0, 0}, "WHIRLPOOL", NULL, 0, NULL, 64, - whirlpool_init, whirlpool_write, whirlpool_final, whirlpool_read, + whirlpool_init, whirlpool_write, whirlpool_final, whirlpool_read, NULL, sizeof (whirlpool_context_t) }; diff --git a/doc/gcrypt.texi b/doc/gcrypt.texi index f13695a..3450bb2 100644 --- a/doc/gcrypt.texi +++ b/doc/gcrypt.texi @@ -3036,6 +3036,7 @@ are also supported. @c begin table of hash algorithms @cindex SHA-1 @cindex SHA-224, SHA-256, SHA-384, SHA-512 + at cindex SHA3-224, SHA3-256, SHA3-384, SHA3-512 @cindex RIPE-MD-160 @cindex MD2, MD4, MD5 @cindex TIGER, TIGER1, TIGER2 @@ -3108,6 +3109,22 @@ See FIPS 180-2 for the specification. This is the SHA-384 algorithm which yields a message digest of 64 bytes. See FIPS 180-2 for the specification. + at item GCRY_MD_SHA3_224 +This is the SHA3-224 algorithm which yields a message digest of 28 bytes. +See FIPS 202 for the specification. + + at item GCRY_MD_SHA3_256 +This is the SHA3-256 algorithm which yields a message digest of 32 bytes. +See FIPS 202 for the specification. + + at item GCRY_MD_SHA3_384 +This is the SHA3-384 algorithm which yields a message digest of 48 bytes. +See FIPS 202 for the specification. + + at item GCRY_MD_SHA3_512 +This is the SHA3-384 algorithm which yields a message digest of 64 bytes. +See FIPS 202 for the specification. + @item GCRY_MD_CRC32 This is the ISO 3309 and ITU-T V.42 cyclic redundancy check. It yields an output of 4 bytes. Note that this is not a hash algorithm in the @@ -3170,11 +3187,12 @@ this is the hashed data is highly confidential. @item GCRY_MD_FLAG_HMAC @cindex HMAC Turn the algorithm into a HMAC message authentication algorithm. This -only works if just one algorithm is enabled for the handle. Note that -the function @code{gcry_md_setkey} must be used to set the MAC key. -The size of the MAC is equal to the message digest of the underlying -hash algorithm. If you want CBC message authentication codes based on -a cipher, see @xref{Working with cipher handles}. +only works if just one algorithm is enabled for the handle and that +algorithm is not an extendable-output function. Note that the function + at code{gcry_md_setkey} must be used to set the MAC key. The size of the +MAC is equal to the message digest of the underlying hash algorithm. +If you want CBC message authentication codes based on a cipher, +see @xref{Working with cipher handles}. @item GCRY_MD_FLAG_BUGEMU1 @cindex bug emulation @@ -3293,9 +3311,9 @@ message digest or some padding. @deftypefun void gcry_md_final (gcry_md_hd_t @var{h}) Finalize the message digest calculation. This is not really needed -because @code{gcry_md_read} does this implicitly. After this has been -done no further updates (by means of @code{gcry_md_write} or - at code{gcry_md_putc} should be done; However, to mitigate timing +because @code{gcry_md_read} and @code{gcry_md_extract} do this implicitly. +After this has been done no further updates (by means of @code{gcry_md_write} +or @code{gcry_md_putc} should be done; However, to mitigate timing attacks it is sometimes useful to keep on updating the context after having stored away the actual digest. Only the first call to this function has an effect. It is implemented as a macro. @@ -3318,6 +3336,22 @@ The function does return @code{NULL} if the requested algorithm has not been enabled. @end deftypefun +The way to read output of extendable-output function is by using the +function: + + at deftypefun gpg_err_code_t gcry_md_extract (gcry_md_hd_t @var{h}, @ + int @var{algo}, void *@var{buffer}, size_t @var{length}) + + at code{gcry_mac_read} returns output from extendable-output function. +This function may be used as often as required to generate more output +byte stream from the algorithm. Function extracts the new output bytes +to @var{buffer} of the length @var{length}. Buffer will be fully +populated with new output. @var{algo} may be given as 0 to return the only +enabled message digest or it may specify one of the enabled algorithms. +The function does return non-zero value if the requested algorithm has not +been enabled. + at end deftypefun + Because it is often necessary to get the message digest of blocks of memory, two fast convenience function are available for this task: @@ -3493,6 +3527,7 @@ provided by Libgcrypt. @c begin table of MAC algorithms @cindex HMAC-SHA-1 @cindex HMAC-SHA-224, HMAC-SHA-256, HMAC-SHA-384, HMAC-SHA-512 + at cindex HMAC-SHA3-224, HMAC-SHA3-256, HMAC-SHA3-384, HMAC-SHA3-512 @cindex HMAC-RIPE-MD-160 @cindex HMAC-MD2, HMAC-MD4, HMAC-MD5 @cindex HMAC-TIGER1 @@ -3520,6 +3555,22 @@ algorithm. This is HMAC message authentication algorithm based on the SHA-384 hash algorithm. + at item GCRY_MAC_HMAC_SHA3_256 +This is HMAC message authentication algorithm based on the SHA3-384 hash +algorithm. + + at item GCRY_MAC_HMAC_SHA3_224 +This is HMAC message authentication algorithm based on the SHA3-224 hash +algorithm. + + at item GCRY_MAC_HMAC_SHA3_512 +This is HMAC message authentication algorithm based on the SHA3-512 hash +algorithm. + + at item GCRY_MAC_HMAC_SHA3_384 +This is HMAC message authentication algorithm based on the SHA3-384 hash +algorithm. + @item GCRY_MAC_HMAC_SHA1 This is HMAC message authentication algorithm based on the SHA-1 hash algorithm. diff --git a/src/cipher-proto.h b/src/cipher-proto.h index 8267791..3bca9c7 100644 --- a/src/cipher-proto.h +++ b/src/cipher-proto.h @@ -215,6 +215,9 @@ typedef void (*gcry_md_final_t) (void *c); /* Type for the md_read function. */ typedef unsigned char *(*gcry_md_read_t) (void *c); +/* Type for the md_extract function. */ +typedef void (*gcry_md_extract_t) (void *c, void *outbuf, size_t nbytes); + typedef struct gcry_md_oid_spec { const char *oidstring; @@ -237,6 +240,7 @@ typedef struct gcry_md_spec gcry_md_write_t write; gcry_md_final_t final; gcry_md_read_t read; + gcry_md_extract_t extract; size_t contextsize; /* allocate this amount of context */ selftest_func_t selftest; } gcry_md_spec_t; diff --git a/src/gcrypt-int.h b/src/gcrypt-int.h index 8014d61..d367307 100644 --- a/src/gcrypt-int.h +++ b/src/gcrypt-int.h @@ -129,6 +129,8 @@ gpg_err_code_t _gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, size_t buflen); void _gcry_md_write (gcry_md_hd_t hd, const void *buffer, size_t length); unsigned char *_gcry_md_read (gcry_md_hd_t hd, int algo); +gpg_error_t _gcry_md_extract (gcry_md_hd_t hd, int algo, void *buffer, + size_t length); void _gcry_md_hash_buffer (int algo, void *digest, const void *buffer, size_t length); gpg_err_code_t _gcry_md_hash_buffers (int algo, unsigned int flags, diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in index 585da6a..39be37a 100644 --- a/src/gcrypt.h.in +++ b/src/gcrypt.h.in @@ -473,7 +473,7 @@ char *gcry_sexp_nth_string (gcry_sexp_t list, int number); value can't be converted to an MPI, `NULL' is returned. */ gcry_mpi_t gcry_sexp_nth_mpi (gcry_sexp_t list, int number, int mpifmt); -/* Convenience fucntion to extract parameters from an S-expression +/* Convenience function to extract parameters from an S-expression * using a list of single letter parameters. */ gpg_error_t gcry_sexp_extract_param (gcry_sexp_t sexp, const char *path, @@ -1170,7 +1170,7 @@ enum gcry_md_algos GCRY_MD_GOSTR3411_94 = 308, /* GOST R 34.11-94. */ GCRY_MD_STRIBOG256 = 309, /* GOST R 34.11-2012, 256 bit. */ GCRY_MD_STRIBOG512 = 310, /* GOST R 34.11-2012, 512 bit. */ - GCRY_MD_GOSTR3411_CP = 311, /* GOST R 34.11-94 with CryptoPro-A S-Box. */ + GCRY_MD_GOSTR3411_CP = 311, /* GOST R 34.11-94 with CryptoPro-A S-Box. */ GCRY_MD_SHA3_224 = 312, GCRY_MD_SHA3_256 = 313, GCRY_MD_SHA3_384 = 314, @@ -1239,6 +1239,11 @@ void gcry_md_write (gcry_md_hd_t hd, const void *buffer, size_t length); algorithm ALGO. */ unsigned char *gcry_md_read (gcry_md_hd_t hd, int algo); +/* Read more output from algorithm ALGO to BUFFER of size LENGTH from + * digest object HD. Algorithm needs to be 'expendable-output function'. */ +gpg_error_t gcry_md_extract (gcry_md_hd_t hd, int algo, void *buffer, + size_t length); + /* Convenience function to calculate the hash from the data in BUFFER of size LENGTH using the algorithm ALGO avoiding the creating of a hash object. The hash is returned in the caller provided buffer diff --git a/src/libgcrypt.def b/src/libgcrypt.def index 924f17f..f3e074b 100644 --- a/src/libgcrypt.def +++ b/src/libgcrypt.def @@ -278,5 +278,6 @@ EXPORTS gcry_mpi_ec_sub @244 + gcry_md_extract @245 ;; end of file with public symbols for Windows. diff --git a/src/libgcrypt.vers b/src/libgcrypt.vers index 7e8df3f..5b3d419 100644 --- a/src/libgcrypt.vers +++ b/src/libgcrypt.vers @@ -41,7 +41,7 @@ GCRYPT_1.6 { gcry_md_get_algo; gcry_md_get_algo_dlen; gcry_md_hash_buffer; gcry_md_hash_buffers; gcry_md_info; gcry_md_is_enabled; gcry_md_is_secure; - gcry_md_map_name; gcry_md_open; gcry_md_read; + gcry_md_map_name; gcry_md_open; gcry_md_read; gcry_md_extract; gcry_md_reset; gcry_md_setkey; gcry_md_write; gcry_md_debug; diff --git a/src/visibility.c b/src/visibility.c index cbf24e7..23a2705 100644 --- a/src/visibility.c +++ b/src/visibility.c @@ -1165,6 +1165,12 @@ gcry_md_read (gcry_md_hd_t hd, int algo) return _gcry_md_read (hd, algo); } +gcry_err_code_t +gcry_md_extract (gcry_md_hd_t hd, int algo, void *buffer, size_t length) +{ + return _gcry_md_extract(hd, algo, buffer, length); +} + void gcry_md_hash_buffer (int algo, void *digest, const void *buffer, size_t length) diff --git a/src/visibility.h b/src/visibility.h index fa3c763..bb25de0 100644 --- a/src/visibility.h +++ b/src/visibility.h @@ -110,6 +110,7 @@ MARK_VISIBLEX (gcry_md_is_secure) MARK_VISIBLEX (gcry_md_map_name) MARK_VISIBLEX (gcry_md_open) MARK_VISIBLEX (gcry_md_read) +MARK_VISIBLEX (gcry_md_extract) MARK_VISIBLEX (gcry_md_reset) MARK_VISIBLEX (gcry_md_setkey) MARK_VISIBLEX (gcry_md_write) @@ -374,6 +375,7 @@ MARK_VISIBLEX (_gcry_mpi_get_const) #define gcry_md_map_name _gcry_USE_THE_UNDERSCORED_FUNCTION #define gcry_md_open _gcry_USE_THE_UNDERSCORED_FUNCTION #define gcry_md_read _gcry_USE_THE_UNDERSCORED_FUNCTION +#define gcry_md_extract _gcry_USE_THE_UNDERSCORED_FUNCTION #define gcry_md_reset _gcry_USE_THE_UNDERSCORED_FUNCTION #define gcry_md_setkey _gcry_USE_THE_UNDERSCORED_FUNCTION #define gcry_md_write _gcry_USE_THE_UNDERSCORED_FUNCTION commit cee2e122ec6c1886957a8d47498eb63a6a921725 Author: Jussi Kivilinna Date: Sun Oct 25 15:11:14 2015 +0200 md: check hmac flag in prepare_macpads * cipher/md.c (prepare_macpads): Check hmac flag. -- Signed-off-by: Jussi Kivilinna diff --git a/cipher/md.c b/cipher/md.c index c6bf90d..948d269 100644 --- a/cipher/md.c +++ b/cipher/md.c @@ -671,6 +671,9 @@ prepare_macpads (gcry_md_hd_t a, const unsigned char *key, size_t keylen) if (!a->ctx->list) return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */ + if (!a->ctx->flags.hmac) + return GPG_ERR_DIGEST_ALGO; /* Tried setkey for non-HMAC md. */ + for (r = a->ctx->list; r; r = r->next) { const unsigned char *k; ----------------------------------------------------------------------- Summary of changes: cipher/crc.c | 8 +- cipher/gostr3411-94.c | 4 +- cipher/hash-common.c | 28 ++- cipher/keccak.c | 283 +++++++++++++++++++++--- cipher/md.c | 72 ++++++- cipher/md2.c | 2 +- cipher/md4.c | 2 +- cipher/md5.c | 2 +- cipher/rmd160.c | 2 +- cipher/sha1.c | 2 +- cipher/sha256.c | 4 +- cipher/sha512.c | 4 +- cipher/stribog.c | 2 + cipher/tiger.c | 6 +- cipher/whirlpool.c | 2 +- doc/gcrypt.texi | 84 +++++++- src/cipher-proto.h | 4 + src/cipher.h | 2 + src/gcrypt-int.h | 2 + src/gcrypt.h.in | 13 +- src/libgcrypt.def | 1 + src/libgcrypt.vers | 2 +- src/visibility.c | 6 + src/visibility.h | 2 + tests/basic.c | 586 ++++++++++++++++++++++++++++++++++++++++++++++++-- tests/bench-slope.c | 6 + 26 files changed, 1040 insertions(+), 91 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Mon Nov 2 09:31:07 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Mon, 02 Nov 2015 17:31:07 +0900 Subject: random: Use poll instead of select Message-ID: <56371F4B.1080206@fsij.org> Hello, I'm trying to fix a bug in GnuPG 1.4 (caused by (a kind of bug in) duplicity which uses many file descriptors): https://bugs.gnupg.org/gnupg/issue1818 https://bugs.debian.org/771263 And find a patch for libgcrypt in Fedora: http://pkgs.fedoraproject.org/cgit/libgcrypt.git/tree/ libgcrypt-1.6.1-use-poll.patch I think that the patch by Fedora is almost good, but it's not that accurate in the comment and the behavior of setting any_need_entropy to 1 when poll timeouts. So, this is the version which keeps the original behavior; It simply replaces select by poll. Note: poll uses millisecond for its timeout. I think that all Linux kernel nowadays support poll(2). diff --git a/random/rndlinux.c b/random/rndlinux.c index 9eeec57..267a07e 100644 --- a/random/rndlinux.c +++ b/random/rndlinux.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "types.h" #include "g10lib.h" #include "rand-internal.h" @@ -179,12 +180,14 @@ _gcry_rndlinux_gather_random (void (*add)(const void*, size_t, return with something we will actually use 100ms. */ while (length) { - fd_set rfds; - struct timeval tv; int rc; + struct pollfd pfd; + + pfd.fd = fd; + pfd.events = POLLIN; /* If we collected some bytes update the progress indicator. We - do this always and not just if the select timed out because + do this always and not just if the poll timed out because often just a few bytes are gathered within the timeout period. */ if (any_need_entropy || last_so_far != (want - length) ) @@ -195,33 +198,19 @@ _gcry_rndlinux_gather_random (void (*add)(const void*, size_t, any_need_entropy = 1; } - /* If the system has no limit on the number of file descriptors - and we encounter an fd which is larger than the fd_set size, - we don't use the select at all. The select code is only used - to emit progress messages. A better solution would be to - fall back to poll() if available. */ -#ifdef FD_SETSIZE - if (fd < FD_SETSIZE) -#endif + if ( !(rc = poll (&pfd, 1, delay)) ) { - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - tv.tv_sec = delay; - tv.tv_usec = delay? 0 : 100000; - if ( !(rc=select(fd+1, &rfds, NULL, NULL, &tv)) ) - { - any_need_entropy = 1; - delay = 3; /* Use 3 seconds henceforth. */ - continue; - } - else if( rc == -1 ) - { - log_error ("select() error: %s\n", strerror(errno)); - if (!delay) - delay = 1; /* Use 1 second if we encounter an error before - we have ever blocked. */ - continue; - } + any_need_entropy = 1; + delay = 3000; /* Use 3 seconds henceforth. */ + continue; + } + else if( rc == -1 ) + { + log_error ("poll() error: %s\n", strerror (errno)); + if (!delay) + delay = 1000; /* Use 1 second if we encounter an error before + we have ever blocked. */ + continue; } do -- From wk at gnupg.org Mon Nov 2 17:22:59 2015 From: wk at gnupg.org (Werner Koch) Date: Mon, 02 Nov 2015 17:22:59 +0100 Subject: random: Use poll instead of select In-Reply-To: <56371F4B.1080206@fsij.org> (NIIBE Yutaka's message of "Mon, 02 Nov 2015 17:31:07 +0900") References: <56371F4B.1080206@fsij.org> Message-ID: <87si4oxtek.fsf@vigenere.g10code.de> On Mon, 2 Nov 2015 09:31, gniibe at fsij.org said: > I'm trying to fix a bug in GnuPG 1.4 (caused by (a kind of bug in) > duplicity which uses many file descriptors): and doesn't close them before execing gpg? That is a real bug in duplicity and we should better not work around it but only detect it. For Libgcrypt however, we can't limit the number of used file descriptors. > So, this is the version which keeps the original behavior; It simply > replaces select by poll. Note: poll uses millisecond for its timeout. > > I think that all Linux kernel nowadays support poll(2). rndlinux is a misnomer these days because it is actually an access module for /dev/random style devices as available on all kind of platforms. I am not sure whether whether poll is availabale on all these platforms. You should add a test for a working poll or use poll only for linux (to simplify the configure test). Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From vcizek at suse.com Thu Nov 5 14:14:24 2015 From: vcizek at suse.com (Vitezslav Cizek) Date: Thu, 5 Nov 2015 14:14:24 +0100 Subject: DCO signature Message-ID: <20151105131424.GA32700@kolac.suse.cz> Libgcrypt Developer's Certificate of Origin. Version 1.0 ========================================================= By making a contribution to the Libgcrypt project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the free software license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate free software license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same free software license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the free software license(s) involved. Signed-off-by: Vitezslav Cizek -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 819 bytes Desc: Digital signature URL: From jussi.kivilinna at iki.fi Thu Nov 5 18:38:12 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 05 Nov 2015 19:38:12 +0200 Subject: [PATCH] Update license information for CRC Message-ID: <20151105173812.19735.9576.stgit@localhost6.localdomain6> * LICENSES: Remove 'Simple permissive' and 'IETF permissive' licenses for 'cipher/crc.c' as result of rewrite of CRC implementations. -- Signed-off-by: Jussi Kivilinna --- LICENSES | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) diff --git a/LICENSES b/LICENSES index 6c09e1f..ff8b7fa 100644 --- a/LICENSES +++ b/LICENSES @@ -54,56 +54,6 @@ with any binary distributions derived from the GNU C Library. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #+end_quote -* Simple permissive licenses - - For files: - - cipher/crc.c - -#+begin_quote - Copyright (c) 1996 L. Peter Deutsch - - Permission is granted to copy and distribute this document for - any purpose and without charge, including translations into - other languages and incorporation into compilations, provided - that the copyright notice and this notice are preserved, and - that any substantive changes or deletions from the original are - clearly marked. -#+end_quote - -* IETF permissive licenses - - For files: - - cipher/crc.c - -#+begin_quote - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished - to others, and derivative works that comment on or otherwise - explain it or assist in its implementation may be prepared, - copied, published and distributed, in whole or in part, without - restriction of any kind, provided that the above copyright notice - and this paragraph are included on all such copies and derivative - works. However, this document itself may not be modified in any - way, such as by removing the copyright notice or references to - the Internet Society or other Internet organizations, except as - needed for the purpose of developing Internet standards in which - case the procedures for copyrights defined in the Internet - Standards process must be followed, or as required to translate - it into languages other than English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on - an "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE - OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY - IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR - PURPOSE. -#+end_quote - * X License For files: From cvs at cvs.gnupg.org Thu Nov 5 18:11:43 2015 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Thu, 05 Nov 2015 18:11:43 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-280-g89fa74d Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 89fa74d6b3e58cd4fcd6e0939a35e46cbaca2ea0 (commit) via a1cc7bb15473a2419b24ecac765ae0ce5989a13b (commit) via 2857cb89c6dc1c02266600bc1fd2967a3cd5cf88 (commit) via 07e4839e75a7bca3a6c0a94aecfe75efe61d7ff2 (commit) from c0b9eee2d93a13930244f9ce0c14ed6b4aeb6c29 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 89fa74d6b3e58cd4fcd6e0939a35e46cbaca2ea0 Author: Jussi Kivilinna Date: Sun Nov 1 20:44:09 2015 +0200 Improve performance of Tiger hash algorithms * cipher/tiger.c (tiger_round, pass, key_schedule): Convert functions to macros. (transform_blk): Pass variable names instead of pointers to 'pass'. -- Benchmark results on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte TIGER | 3.25 ns/B 293.5 MiB/s 10.40 c/B After (1.75x faster): | nanosecs/byte mebibytes/sec cycles/byte TIGER | 1.85 ns/B 515.3 MiB/s 5.92 c/B Benchmark results on Cortex-A8 @?1008 Mhz: Before: | nanosecs/byte mebibytes/sec cycles/byte TIGER | 63.42 ns/B 15.04 MiB/s 63.93 c/B After (1.26x faster): | nanosecs/byte mebibytes/sec cycles/byte TIGER | 49.99 ns/B 19.08 MiB/s 50.39 c/B Signed-off-by: Jussi Kivilinna diff --git a/cipher/tiger.c b/cipher/tiger.c index 078133a..516bd44 100644 --- a/cipher/tiger.c +++ b/cipher/tiger.c @@ -633,68 +633,44 @@ tiger2_init (void *context, unsigned int flags) do_init (context, 2); } -static void -tiger_round( u64 *ra, u64 *rb, u64 *rc, u64 x, int mul ) -{ - u64 a = *ra; - u64 b = *rb; - u64 c = *rc; - - c ^= x; - a -= ( sbox1[ c & 0xff ] ^ sbox2[ (c >> 16) & 0xff ] - ^ sbox3[ (c >> 32) & 0xff ] ^ sbox4[ (c >> 48) & 0xff ]); - b += ( sbox4[ (c >> 8) & 0xff ] ^ sbox3[ (c >> 24) & 0xff ] - ^ sbox2[ (c >> 40) & 0xff ] ^ sbox1[ (c >> 56) & 0xff ]); - b *= mul; - - *ra = a; - *rb = b; - *rc = c; -} - - -static void -pass( u64 *ra, u64 *rb, u64 *rc, u64 *x, int mul ) -{ - u64 a = *ra; - u64 b = *rb; - u64 c = *rc; - - tiger_round( &a, &b, &c, x[0], mul ); - tiger_round( &b, &c, &a, x[1], mul ); - tiger_round( &c, &a, &b, x[2], mul ); - tiger_round( &a, &b, &c, x[3], mul ); - tiger_round( &b, &c, &a, x[4], mul ); - tiger_round( &c, &a, &b, x[5], mul ); - tiger_round( &a, &b, &c, x[6], mul ); - tiger_round( &b, &c, &a, x[7], mul ); - - *ra = a; - *rb = b; - *rc = c; -} - -static void -key_schedule( u64 *x ) -{ - x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; - x[1] ^= x[0]; - x[2] += x[1]; - x[3] -= x[2] ^ ((~x[1]) << 19 ); - x[4] ^= x[3]; - x[5] += x[4]; - x[6] -= x[5] ^ ((~x[4]) >> 23 ); - x[7] ^= x[6]; - x[0] += x[7]; - x[1] -= x[0] ^ ((~x[7]) << 19 ); - x[2] ^= x[1]; - x[3] += x[2]; - x[4] -= x[3] ^ ((~x[2]) >> 23 ); - x[5] ^= x[4]; - x[6] += x[5]; - x[7] -= x[6] ^ 0x0123456789abcdefLL; -} +#define tiger_round(xa, xb, xc, xx, xmul) { \ + xc ^= xx; \ + xa -= ( sbox1[ (xc) & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \ + ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \ + xb += ( sbox4[ ((xc) >> 8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \ + ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \ + xb *= xmul; } + + +#define pass(ya, yb, yc, yx, ymul) { \ + tiger_round( ya, yb, yc, yx[0], ymul ); \ + tiger_round( yb, yc, ya, yx[1], ymul ); \ + tiger_round( yc, ya, yb, yx[2], ymul ); \ + tiger_round( ya, yb, yc, yx[3], ymul ); \ + tiger_round( yb, yc, ya, yx[4], ymul ); \ + tiger_round( yc, ya, yb, yx[5], ymul ); \ + tiger_round( ya, yb, yc, yx[6], ymul ); \ + tiger_round( yb, yc, ya, yx[7], ymul ); } + + +#define key_schedule(x) { \ + x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \ + x[1] ^= x[0]; \ + x[2] += x[1]; \ + x[3] -= x[2] ^ ((~x[1]) << 19 ); \ + x[4] ^= x[3]; \ + x[5] += x[4]; \ + x[6] -= x[5] ^ ((~x[4]) >> 23 ); \ + x[7] ^= x[6]; \ + x[0] += x[7]; \ + x[1] -= x[0] ^ ((~x[7]) << 19 ); \ + x[2] ^= x[1]; \ + x[3] += x[2]; \ + x[4] -= x[3] ^ ((~x[2]) >> 23 ); \ + x[5] ^= x[4]; \ + x[6] += x[5]; \ + x[7] -= x[6] ^ 0x0123456789abcdefLL; } /**************** @@ -716,11 +692,11 @@ transform_blk ( void *ctx, const unsigned char *data ) b = bb = hd->b; c = cc = hd->c; - pass( &a, &b, &c, x, 5); + pass( a, b, c, x, 5); key_schedule( x ); - pass( &c, &a, &b, x, 7); + pass( c, a, b, x, 7); key_schedule( x ); - pass( &b, &c, &a, x, 9); + pass( b, c, a, x, 9); /* feedforward */ a ^= aa; commit a1cc7bb15473a2419b24ecac765ae0ce5989a13b Author: Jussi Kivilinna Date: Sun Nov 1 16:06:26 2015 +0200 Add ARMv7/NEON implementation of Keccak * cipher/Makefile.am: Add 'keccak-armv7-neon.S'. * cipher/keccak-armv7-neon.S: New. * cipher/keccak.c (USE_64BIT_ARM_NEON): New. (NEED_COMMON64): Select if USE_64BIT_ARM_NEON. [NEED_COMMON64] (round_consts_64bit): Rename to... [NEED_COMMON64] (_gcry_keccak_round_consts_64bit): ...this; Add terminator at end. [USE_64BIT_ARM_NEON] (_gcry_keccak_permute_armv7_neon) (_gcry_keccak_absorb_lanes64_armv7_neon, keccak_permute64_armv7_neon) (keccak_absorb_lanes64_armv7_neon, keccak_armv7_neon_64_ops): New. (keccak_init) [USE_64BIT_ARM_NEON]: Select ARM/NEON implementation if supported by HW. * cipher/keccak_permute_64.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Update to use new round constant table. * configure.ac: Add 'keccak-armv7-neon.lo'. -- Patch adds ARMv7/NEON implementation of Keccak (SHAKE/SHA3). Patch is based on public-domain implementation by Ronny Van Keer from SUPERCOP package: https://github.com/floodyberry/supercop/blob/master/crypto_hash/\ keccakc1024/inplace-armv7a-neon/keccak2.s Benchmark results on Cortex-A8 @ 1008 Mhz: Before (generic 32-bit bit-interleaved impl.): | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 83.00 ns/B 11.49 MiB/s 83.67 c/B SHAKE256 | 101.7 ns/B 9.38 MiB/s 102.5 c/B SHA3-224 | 96.13 ns/B 9.92 MiB/s 96.90 c/B SHA3-256 | 101.5 ns/B 9.40 MiB/s 102.3 c/B SHA3-384 | 131.4 ns/B 7.26 MiB/s 132.5 c/B SHA3-512 | 189.1 ns/B 5.04 MiB/s 190.6 c/B After (ARM/NEON, ~3.2x faster): | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 25.09 ns/B 38.01 MiB/s 25.29 c/B SHAKE256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B SHA3-224 | 29.24 ns/B 32.61 MiB/s 29.48 c/B SHA3-256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B SHA3-384 | 40.42 ns/B 23.59 MiB/s 40.74 c/B SHA3-512 | 58.37 ns/B 16.34 MiB/s 58.84 c/B Signed-off-by: Jussi Kivilinna diff --git a/cipher/Makefile.am b/cipher/Makefile.am index be03d06..88c8fbf 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -90,7 +90,7 @@ sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S sha256-avx2-bmi2-amd64.S \ sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S sha512-avx2-bmi2-amd64.S \ sha512-armv7-neon.S \ -keccak.c keccak_permute_32.h keccak_permute_64.h \ +keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c whirlpool-sse2-amd64.S \ diff --git a/cipher/keccak-armv7-neon.S b/cipher/keccak-armv7-neon.S new file mode 100644 index 0000000..0bec8d5 --- /dev/null +++ b/cipher/keccak-armv7-neon.S @@ -0,0 +1,945 @@ +/* keccak-armv7-neon.S - ARMv7/NEON implementation of Keccak + * + * Copyright (C) 2015 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +/* Based on public-domain/CC0 implementation from SUPERCOP package + * (keccakc1024/inplace-armv7a-neon/keccak2.s) + * + * Original copyright header follows: + */ + +@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +@ Micha?l Peeters and Gilles Van Assche. For more information, feedback or +@ questions, please refer to our website: http://keccak.noekeon.org/ +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ + +.text + +.syntax unified +.fpu neon +.arm + + +.extern _gcry_keccak_round_consts_64bit; + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +@// --- offsets in state +.equ Aba, 0*8 +.equ Aga, 1*8 +.equ Aka, 2*8 +.equ Ama, 3*8 +.equ Asa, 4*8 + +@// --- macros + +.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5 + + @Prepare Theta + @Ca = Aba^Aga^Aka^Ama^Asa@ + @Ce = Abe^Age^Ake^Ame^Ase@ + @Ci = Abi^Agi^Aki^Ami^Asi@ + @Co = Abo^Ago^Ako^Amo^Aso@ + @Cu = Abu^Agu^Aku^Amu^Asu@ + @De = Ca^ROL64(Ci, 1)@ + @Di = Ce^ROL64(Co, 1)@ + @Do = Ci^ROL64(Cu, 1)@ + @Du = Co^ROL64(Ca, 1)@ + @Da = Cu^ROL64(Ce, 1)@ + + veor.64 q4, q6, q7 + veor.64 q5, q9, q10 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d1, d8, d16 + veor.64 d2, d10, d17 + + veor.64 q4, q11, q12 + veor.64 q5, q14, q15 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d3, d8, d26 + + vadd.u64 q4, q1, q1 + veor.64 d4, d10, d27 + vmov.64 d0, d5 + vsri.64 q4, q1, #63 + + vadd.u64 q5, q2, q2 + veor.64 q4, q4, q0 + vsri.64 q5, q2, #63 + vadd.u64 d7, d1, d1 + veor.64 \argA2, \argA2, d8 + veor.64 q5, q5, q1 + + vsri.64 d7, d1, #63 + vshl.u64 d1, \argA2, #44 + veor.64 \argA3, \argA3, d9 + veor.64 d7, d7, d4 + + @Ba = argA1^Da@ + @Be = ROL64((argA2^De), 44)@ + @Bi = ROL64((argA3^Di), 43)@ + @Bo = ROL64((argA4^Do), 21)@ + @Bu = ROL64((argA5^Du), 14)@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@ + vsri.64 d1, \argA2, #64-44 + vshl.u64 d2, \argA3, #43 + vldr.64 d0, [sp, #\argA1] + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA3, #64-43 + vshl.u64 d3, \argA4, #21 + veor.64 \argA5, \argA5, d11 + veor.64 d0, d0, d7 + vsri.64 d3, \argA4, #64-21 + vbic.64 d5, d2, d1 + vshl.u64 d4, \argA5, #14 + vbic.64 \argA2, d3, d2 + vld1.64 d6, [ip]! + veor.64 d5, d0 + vsri.64 d4, \argA5, #64-14 + veor.64 d5, d6 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA2, d1 + vstr.64 d5, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5 + + @d2 = ROL64((argA1^Da), 3)@ + @d3 = ROL64((argA2^De), 45)@ + @d4 = ROL64((argA3^Di), 61)@ + @d0 = ROL64((argA4^Do), 28)@ + @d1 = ROL64((argA5^Du), 20)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d3, \argA2, #45 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d4, \argA3, #61 + veor.64 \argA4, \argA4, d10 + vsri.64 d3, \argA2, #64-45 + veor.64 \argA5, \argA5, d11 + vsri.64 d4, \argA3, #64-61 + vshl.u64 d0, \argA4, #28 + veor.64 d6, d6, d7 + vshl.u64 d1, \argA5, #20 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA4, #64-28 + vbic.64 \argA4, d0, d4 + vshl.u64 d2, d6, #3 + vsri.64 d1, \argA5, #64-20 + veor.64 \argA4, d3 + vsri.64 d2, d6, #64-3 + vbic.64 \argA5, d1, d0 + vbic.64 d6, d2, d1 + vbic.64 \argA2, d3, d2 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5 + + @d4 = ROL64((argA1^Da), 18)@ + @d0 = ROL64((argA2^De), 1)@ + @d1 = ROL64((argA3^Di), 6)@ + @d2 = ROL64((argA4^Do), 25)@ + @d3 = ROL64((argA5^Du), 8)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA3, \argA3, d9 + veor.64 \argA4, \argA4, d10 + vshl.u64 d1, \argA3, #6 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d2, \argA4, #25 + veor.64 \argA5, \argA5, d11 + vsri.64 d1, \argA3, #64-6 + veor.64 \argA2, \argA2, d8 + vsri.64 d2, \argA4, #64-25 + vext.8 d3, \argA5, \argA5, #7 + veor.64 d6, d6, d7 + vbic.64 \argA3, d2, d1 + vadd.u64 d0, \argA2, \argA2 + vbic.64 \argA4, d3, d2 + vsri.64 d0, \argA2, #64-1 + vshl.u64 d4, d6, #18 + veor.64 \argA2, d1, \argA4 + veor.64 \argA3, d0 + vsri.64 d4, d6, #64-18 + vstr.64 \argA3, [sp, #\argA1] + veor.64 d5, \argA3 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5 + + @d1 = ROL64((argA1^Da), 36)@ + @d2 = ROL64((argA2^De), 10)@ + @d3 = ROL64((argA3^Di), 15)@ + @d4 = ROL64((argA4^Do), 56)@ + @d0 = ROL64((argA5^Du), 27)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d2, \argA2, #10 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d3, \argA3, #15 + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA2, #64-10 + vsri.64 d3, \argA3, #64-15 + veor.64 \argA5, \argA5, d11 + vext.8 d4, \argA4, \argA4, #1 + vbic.64 \argA2, d3, d2 + vshl.u64 d0, \argA5, #27 + veor.64 d6, d6, d7 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA5, #64-27 + vshl.u64 d1, d6, #36 + veor.64 \argA3, d2 + vbic.64 \argA4, d0, d4 + vsri.64 d1, d6, #64-36 + + veor.64 \argA4, d3 + vbic.64 d6, d2, d1 + vbic.64 \argA5, d1, d0 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5 + + @d3 = ROL64((argA1^Da), 41)@ + @d4 = ROL64((argA2^De), 2)@ + @d0 = ROL64((argA3^Di), 62)@ + @d1 = ROL64((argA4^Do), 55)@ + @d2 = ROL64((argA5^Du), 39)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d4, \argA2, #2 + veor.64 \argA5, \argA5, d11 + vshl.u64 d0, \argA3, #62 + vldr.64 d6, [sp, #\argA1] + vsri.64 d4, \argA2, #64-2 + veor.64 \argA4, \argA4, d10 + vsri.64 d0, \argA3, #64-62 + + vshl.u64 d1, \argA4, #55 + veor.64 d6, d6, d7 + vshl.u64 d2, \argA5, #39 + vsri.64 d1, \argA4, #64-55 + vbic.64 \argA4, d0, d4 + vsri.64 d2, \argA5, #64-39 + vbic.64 \argA2, d1, d0 + vshl.u64 d3, d6, #41 + veor.64 \argA5, d4, \argA2 + vbic.64 \argA2, d2, d1 + vsri.64 d3, d6, #64-41 + veor.64 d6, d0, \argA2 + + vbic.64 \argA2, d3, d2 + vbic.64 \argA3, d4, d3 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + + .endm + + +@// --- code + + at not callable from C! +.p2align 3 +.type KeccakF_armv7a_neon_asm,%function; +KeccakF_armv7a_neon_asm: @ + +.LroundLoop: + + KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31 + KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28 + KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30 + KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27 + KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29 + + KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29 + KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28 + KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27 + KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31 + KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30 + + KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30 + KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28 + KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31 + KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29 + KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27 + + KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27 + KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28 + KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29 + ldr r0, [ip] + KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30 + cmp r0, #0xFFFFFFFF + KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31 + + bne .LroundLoop + sub ip, #(8*24) + bx lr +.p2align 2 +.ltorg +.size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm; + + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state) callable from C +.p2align 3 +.global _gcry_keccak_permute_armv7_neon +.type _gcry_keccak_permute_armv7_neon,%function; +_gcry_keccak_permute_armv7_neon: + + push {ip, lr} + vpush {q4-q7} + sub sp,sp, #5*8 + + vldr.64 d0, [r0, #0*8] + vldr.64 d12, [r0, #1*8] + vldr.64 d17, [r0, #2*8] + vldr.64 d22, [r0, #3*8] + vldr.64 d27, [r0, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r0, #5*8] + vldr.64 d13, [r0, #6*8] + vldr.64 d18, [r0, #7*8] + vldr.64 d23, [r0, #8*8] + vldr.64 d28, [r0, #9*8] + + vldr.64 d2, [r0, #10*8] + vldr.64 d14, [r0, #11*8] + vldr.64 d19, [r0, #12*8] + vldr.64 d24, [r0, #13*8] + vldr.64 d29, [r0, #14*8] + + vldr.64 d3, [r0, #15*8] + vldr.64 d15, [r0, #16*8] + vldr.64 d20, [r0, #17*8] + vldr.64 d25, [r0, #18*8] + vldr.64 d30, [r0, #19*8] + + vldr.64 d4, [r0, #20*8] + vldr.64 d16, [r0, #21*8] + vldr.64 d21, [r0, #22*8] + vldr.64 d26, [r0, #23*8] + vldr.64 d31, [r0, #24*8] + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + mov r1, r0 + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + vpop.64 { d0- d4 } + + vstr.64 d0, [r1, #0*8] + vstr.64 d12, [r1, #1*8] + vstr.64 d17, [r1, #2*8] + vstr.64 d22, [r1, #3*8] + vstr.64 d27, [r1, #4*8] + + vstr.64 d1, [r1, #5*8] + vstr.64 d13, [r1, #6*8] + vstr.64 d18, [r1, #7*8] + vstr.64 d23, [r1, #8*8] + vstr.64 d28, [r1, #9*8] + + vstr.64 d2, [r1, #10*8] + vstr.64 d14, [r1, #11*8] + vstr.64 d19, [r1, #12*8] + vstr.64 d24, [r1, #13*8] + vstr.64 d29, [r1, #14*8] + + vstr.64 d3, [r1, #15*8] + vstr.64 d15, [r1, #16*8] + vstr.64 d20, [r1, #17*8] + vstr.64 d25, [r1, #18*8] + vstr.64 d30, [r1, #19*8] + + vstr.64 d4, [r1, #20*8] + vstr.64 d16, [r1, #21*8] + vstr.64 d21, [r1, #22*8] + vstr.64 d26, [r1, #23*8] + vstr.64 d31, [r1, #24*8] + + mov r0, #112 + vpop {q4-q7} + pop {ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon; + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state, @r4 +@ int pos, @r1 +@ const byte *lanes, @r2 +@ unsigned int nlanes, @r3 +@ int blocklanes) @ r5 callable from C +.p2align 3 +.global _gcry_keccak_absorb_lanes64_armv7_neon +.type _gcry_keccak_absorb_lanes64_armv7_neon,%function; +_gcry_keccak_absorb_lanes64_armv7_neon: + + cmp r3, #0 @ nlanes == 0 + itt eq + moveq r0, #0 + bxeq lr + + push {r4-r5, ip, lr} + beq .Lout + mov r4, r0 + ldr r5, [sp, #(4*4)] + vpush {q4-q7} + + @ load state + vldr.64 d0, [r4, #0*8] + vldr.64 d12, [r4, #1*8] + vldr.64 d17, [r4, #2*8] + vldr.64 d22, [r4, #3*8] + vldr.64 d27, [r4, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r4, #5*8] + vldr.64 d13, [r4, #6*8] + vldr.64 d18, [r4, #7*8] + vldr.64 d23, [r4, #8*8] + vldr.64 d28, [r4, #9*8] + + vldr.64 d2, [r4, #10*8] + vldr.64 d14, [r4, #11*8] + vldr.64 d19, [r4, #12*8] + vldr.64 d24, [r4, #13*8] + vldr.64 d29, [r4, #14*8] + + vldr.64 d3, [r4, #15*8] + vldr.64 d15, [r4, #16*8] + vldr.64 d20, [r4, #17*8] + vldr.64 d25, [r4, #18*8] + vldr.64 d30, [r4, #19*8] + + vldr.64 d4, [r4, #20*8] + vldr.64 d16, [r4, #21*8] + vldr.64 d21, [r4, #22*8] + vldr.64 d26, [r4, #23*8] + vldr.64 d31, [r4, #24*8] + +.Lmain_loop: + + @ detect absorb mode (full blocks vs lanes) + + cmp r1, #0 @ pos != 0 + bne .Llanes_loop + +.Lmain_loop_pos0: + + @ full blocks mode + + @ switch (blocksize) + cmp r5, #21 + beq .Lfull_block_21 + cmp r5, #18 + beq .Lfull_block_18 + cmp r5, #17 + beq .Lfull_block_17 + cmp r5, #13 + beq .Lfull_block_13 + cmp r5, #9 + beq .Lfull_block_9 + + @ unknown blocksize + b .Llanes_loop + +.Lfull_block_21: + + @ SHAKE128 + + cmp r3, #21 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + vld1.64 {d9-d11}, [r2]! + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + veor d25, d9 + veor d30, d10 + + veor d4, d11 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #21 @ nlanes -= 21 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_21 + +.Lfull_block_18: + + @ SHA3-224 + + cmp r3, #18 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #18 @ nlanes -= 18 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_18 + +.Lfull_block_17: + + @ SHA3-256 & SHAKE256 + + cmp r3, #17 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d7}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #17 @ nlanes -= 17 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_17 + +.Lfull_block_13: + + @ SHA3-384 + + cmp r3, #13 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d10}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + veor d14, d9 + veor d19, d10 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #13 @ nlanes -= 13 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_13 + +.Lfull_block_9: + + @ SHA3-512 + + cmp r3, #9 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d6}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + veor d18, d5 + veor d23, d6 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #9 @ nlanes -= 9 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_9 + +.Llanes_loop: + + @ per-lane mode + + @ switch (pos) + ldrb r0, [pc, r1] + add pc, pc, r0, lsl #2 +.Lswitch_table: + .byte (.Llane0-.Lswitch_table-4)/4 + .byte (.Llane1-.Lswitch_table-4)/4 + .byte (.Llane2-.Lswitch_table-4)/4 + .byte (.Llane3-.Lswitch_table-4)/4 + .byte (.Llane4-.Lswitch_table-4)/4 + .byte (.Llane5-.Lswitch_table-4)/4 + .byte (.Llane6-.Lswitch_table-4)/4 + .byte (.Llane7-.Lswitch_table-4)/4 + .byte (.Llane8-.Lswitch_table-4)/4 + .byte (.Llane9-.Lswitch_table-4)/4 + .byte (.Llane10-.Lswitch_table-4)/4 + .byte (.Llane11-.Lswitch_table-4)/4 + .byte (.Llane12-.Lswitch_table-4)/4 + .byte (.Llane13-.Lswitch_table-4)/4 + .byte (.Llane14-.Lswitch_table-4)/4 + .byte (.Llane15-.Lswitch_table-4)/4 + .byte (.Llane16-.Lswitch_table-4)/4 + .byte (.Llane17-.Lswitch_table-4)/4 + .byte (.Llane18-.Lswitch_table-4)/4 + .byte (.Llane19-.Lswitch_table-4)/4 + .byte (.Llane20-.Lswitch_table-4)/4 + .byte (.Llane21-.Lswitch_table-4)/4 + .byte (.Llane22-.Lswitch_table-4)/4 + .byte (.Llane23-.Lswitch_table-4)/4 + .byte (.Llane24-.Lswitch_table-4)/4 +.p2align 2 + +#define ABSORB_LANE(label, vreg) \ + label: \ + add r1, #1; \ + vld1.64 d5, [r2]!; \ + cmp r1, r5; /* pos == blocklanes */ \ + veor vreg, vreg, d5; \ + beq .Llanes_permute; \ + subs r3, #1; \ + beq .Ldone; + + ABSORB_LANE(.Llane0, d0) + ABSORB_LANE(.Llane1, d12) + ABSORB_LANE(.Llane2, d17) + ABSORB_LANE(.Llane3, d22) + ABSORB_LANE(.Llane4, d27) + + ABSORB_LANE(.Llane5, d1) + ABSORB_LANE(.Llane6, d13) + ABSORB_LANE(.Llane7, d18) + ABSORB_LANE(.Llane8, d23) + ABSORB_LANE(.Llane9, d28) + + ABSORB_LANE(.Llane10, d2) + ABSORB_LANE(.Llane11, d14) + ABSORB_LANE(.Llane12, d19) + ABSORB_LANE(.Llane13, d24) + ABSORB_LANE(.Llane14, d29) + + ABSORB_LANE(.Llane15, d3) + ABSORB_LANE(.Llane16, d15) + ABSORB_LANE(.Llane17, d20) + ABSORB_LANE(.Llane18, d25) + ABSORB_LANE(.Llane19, d30) + + ABSORB_LANE(.Llane20, d4) + ABSORB_LANE(.Llane21, d16) + ABSORB_LANE(.Llane22, d21) + ABSORB_LANE(.Llane23, d26) + ABSORB_LANE(.Llane24, d31) + + b .Llanes_loop + +.Llanes_permute: + + sub sp,sp, #5*8 + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + mov r1, #0 @ pos <= 0 + subs r3, #1 + + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lmain_loop_pos0 + +.Ldone: + + @ save state + vstr.64 d0, [r4, #0*8] + vstr.64 d12, [r4, #1*8] + vstr.64 d17, [r4, #2*8] + vstr.64 d22, [r4, #3*8] + vstr.64 d27, [r4, #4*8] + + vstr.64 d1, [r4, #5*8] + vstr.64 d13, [r4, #6*8] + vstr.64 d18, [r4, #7*8] + vstr.64 d23, [r4, #8*8] + vstr.64 d28, [r4, #9*8] + + vstr.64 d2, [r4, #10*8] + vstr.64 d14, [r4, #11*8] + vstr.64 d19, [r4, #12*8] + vstr.64 d24, [r4, #13*8] + vstr.64 d29, [r4, #14*8] + + vstr.64 d3, [r4, #15*8] + vstr.64 d15, [r4, #16*8] + vstr.64 d20, [r4, #17*8] + vstr.64 d25, [r4, #18*8] + vstr.64 d30, [r4, #19*8] + + vstr.64 d4, [r4, #20*8] + vstr.64 d16, [r4, #21*8] + vstr.64 d21, [r4, #22*8] + vstr.64 d26, [r4, #23*8] + vstr.64 d31, [r4, #24*8] + + mov r0, #120 + vpop {q4-q7} +.Lout: + pop {r4-r5, ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon; + +#endif diff --git a/cipher/keccak.c b/cipher/keccak.c index ce57860..0bb3155 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -59,7 +59,19 @@ #endif -#ifdef USE_64BIT +/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly + * code. */ +#undef USE_64BIT_ARM_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_64BIT_ARM_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) # define NEED_COMMON64 1 #endif @@ -109,7 +121,7 @@ typedef struct KECCAK_CONTEXT_S #ifdef NEED_COMMON64 -static const u64 round_consts_64bit[24] = +const u64 _gcry_keccak_round_consts_64bit[24 + 1] = { U64_C(0x0000000000000001), U64_C(0x0000000000008082), U64_C(0x800000000000808A), U64_C(0x8000000080008000), @@ -122,7 +134,8 @@ static const u64 round_consts_64bit[24] = U64_C(0x8000000000008002), U64_C(0x8000000000000080), U64_C(0x000000000000800A), U64_C(0x800000008000000A), U64_C(0x8000000080008081), U64_C(0x8000000000008080), - U64_C(0x0000000080000001), U64_C(0x8000000080008008) + U64_C(0x0000000080000001), U64_C(0x8000000080008008), + U64_C(0xFFFFFFFFFFFFFFFF) }; static unsigned int @@ -400,6 +413,54 @@ static const keccak_ops_t keccak_bmi2_64_ops = #endif /* USE_64BIT_BMI2 */ +/* 64-bit ARMv7/NEON implementation. */ +#ifdef USE_64BIT_ARM_NEON + +unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); +unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, + const byte *lanes, + unsigned int nlanes, + int blocklanes); + +static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) +{ + return _gcry_keccak_permute_armv7_neon(hd->u.state64); +} + +static unsigned int +keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + if (blocklanes < 0) + { + /* blocklanes == -1, permutationless absorb from keccak_final. */ + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + } + + return 0; + } + else + { + return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, + nlanes, blocklanes); + } +} + +static const keccak_ops_t keccak_armv7_neon_64_ops = +{ + .permute = keccak_permute64_armv7_neon, + .absorb = keccak_absorb_lanes64_armv7_neon, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_ARM_NEON */ + + /* Construct generic 32-bit implementation. */ #ifdef USE_32BIT @@ -662,6 +723,10 @@ keccak_init (int algo, void *context, unsigned int flags) /* Select optimized implementation based in hw features. */ if (0) {} +#ifdef USE_64BIT_ARM_NEON + else if (features & HWF_ARM_NEON) + ctx->ops = &keccak_armv7_neon_64_ops; +#endif #ifdef USE_64BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_64_ops; diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index 6f24217..1a80192 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -25,7 +25,7 @@ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { - const u64 *round_consts = round_consts_64bit; + const u64 *round_consts = _gcry_keccak_round_consts_64bit; u64 Aba, Abe, Abi, Abo, Abu; u64 Aga, Age, Agi, Ago, Agu; u64 Aka, Ake, Aki, Ako, Aku; diff --git a/configure.ac b/configure.ac index 2acfa36..ed37ab5 100644 --- a/configure.ac +++ b/configure.ac @@ -2108,7 +2108,7 @@ if test "$found" = "1" ; then if test x"$neonsupport" = xyes ; then # Build with the NEON implementation - : + GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak-armv7-neon.lo" fi fi commit 2857cb89c6dc1c02266600bc1fd2967a3cd5cf88 Author: Jussi Kivilinna Date: Sat Oct 31 21:29:56 2015 +0200 Optimize Keccak 64-bit absorb functions * cipher/keccak.c [USE_64BIT] [__x86_64__] (absorb_lanes64_8) (absorb_lanes64_4, absorb_lanes64_2, absorb_lanes64_1): New. * cipher/keccak.c [USE_64BIT] [!__x86_64__] (absorb_lanes64_8) (absorb_lanes64_4, absorb_lanes64_2, absorb_lanes64_1): New. [USE_64BIT] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT] (keccak_absorb_lanes64): Remove. [USE_64BIT_SHLD] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT_SHLD] (keccak_absorb_lanes64_shld): Remove. [USE_64BIT_BMI2] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT_BMI2] (keccak_absorb_lanes64_bmi2): Remove. * cipher/keccak_permute_64.h (KECCAK_F1600_ABSORB_FUNC_NAME): New. -- Optimize 64-bit absorb functions for small speed-up. After this change, 64-bit BMI2 implementation matches speed of fastest results from SUPERCOP for Intel Haswell CPUs (long messages). Benchmark on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.32 ns/B 411.7 MiB/s 7.41 c/B SHAKE256 | 2.84 ns/B 336.2 MiB/s 9.08 c/B SHA3-224 | 2.69 ns/B 354.9 MiB/s 8.60 c/B SHA3-256 | 2.84 ns/B 336.0 MiB/s 9.08 c/B SHA3-384 | 3.69 ns/B 258.4 MiB/s 11.81 c/B SHA3-512 | 5.30 ns/B 179.9 MiB/s 16.97 c/B After: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.27 ns/B 420.6 MiB/s 7.26 c/B SHAKE256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-224 | 2.64 ns/B 361.7 MiB/s 8.44 c/B SHA3-256 | 2.79 ns/B 341.5 MiB/s 8.94 c/B SHA3-384 | 3.65 ns/B 261.4 MiB/s 11.68 c/B SHA3-512 | 5.27 ns/B 181.0 MiB/s 16.87 c/B Signed-off-by: Jussi Kivilinna diff --git a/cipher/keccak.c b/cipher/keccak.c index f4f0ef3..ce57860 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -223,38 +223,105 @@ keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1) /* Construct generic 64-bit implementation. */ #ifdef USE_64BIT +#if __GNUC__ >= 4 && defined(__x86_64__) + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "movdqu 2*16(%[dst]), %%xmm2\n\t" + "movdqu 3*16(%[dst]), %%xmm3\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu 2*16(%[in]), %%xmm4\n\t" + "movdqu 3*16(%[in]), %%xmm5\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "pxor %%xmm4, %%xmm2\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm2, 2*16(%[dst])\n\t" + "movdqu %%xmm3, 3*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm4", "memory"); +} + +#else /* __x86_64__ */ + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); + dst[4] ^= buf_get_le64(in + 8 * 4); + dst[5] ^= buf_get_le64(in + 8 * 5); + dst[6] ^= buf_get_le64(in + 8 * 6); + dst[7] ^= buf_get_le64(in + 8 * 7); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); +} + +#endif /* !__x86_64__ */ + +static inline void absorb_lanes64_1(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); +} + + # define ANDN64(x, y) (~(x) & (y)) # define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \ ((x) >> ((64 - (unsigned int)(n)) & 63))) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME - -static unsigned int -keccak_absorb_lanes64(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) -{ - unsigned int burn = 0; - - while (nlanes) - { - hd->u.state64[pos] ^= buf_get_le64(lanes); - lanes += 8; - nlanes--; - - if (++pos == blocklanes) - { - burn = keccak_f1600_state_permute64(hd); - pos = 0; - } - } - - return burn; -} +# undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_generic64_ops = { @@ -279,33 +346,13 @@ static const keccak_ops_t keccak_generic64_ops = tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME - -static unsigned int -keccak_absorb_lanes64_shld(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) -{ - unsigned int burn = 0; - - while (nlanes) - { - hd->u.state64[pos] ^= buf_get_le64(lanes); - lanes += 8; - nlanes--; - - if (++pos == blocklanes) - { - burn = keccak_f1600_state_permute64_shld(hd); - pos = 0; - } - } - - return burn; -} +# undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_shld_64_ops = { @@ -335,33 +382,13 @@ static const keccak_ops_t keccak_shld_64_ops = tmp; }) # define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2 # include "keccak_permute_64.h" # undef ANDN64 # undef ROL64 # undef KECCAK_F1600_PERMUTE_FUNC_NAME - -static unsigned int -keccak_absorb_lanes64_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, - unsigned int nlanes, int blocklanes) -{ - unsigned int burn = 0; - - while (nlanes) - { - hd->u.state64[pos] ^= buf_get_le64(lanes); - lanes += 8; - nlanes--; - - if (++pos == blocklanes) - { - burn = keccak_f1600_state_permute64_bmi2(hd); - pos = 0; - } - } - - return burn; -} +# undef KECCAK_F1600_ABSORB_FUNC_NAME static const keccak_ops_t keccak_bmi2_64_ops = { diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index 1264f19..6f24217 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -288,3 +288,102 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) return sizeof(void *) * 4 + sizeof(u64) * 12 * 5; } + +static unsigned int +KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + switch (blocklanes) + { + case 21: + /* SHAKE128 */ + while (pos == 0 && nlanes >= 21) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_8(&hd->u.state64[12], lanes + 8 * 12); + absorb_lanes64_1(&hd->u.state64[20], lanes + 8 * 20); + lanes += 8 * 21; + nlanes -= 21; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 18: + /* SHA3-224 */ + while (pos == 0 && nlanes >= 18) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_2(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_8(&hd->u.state64[10], lanes + 8 * 10); + lanes += 8 * 18; + nlanes -= 18; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 17: + /* SHA3-256 & SHAKE256 */ + while (pos == 0 && nlanes >= 17) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_8(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_1(&hd->u.state64[16], lanes + 8 * 16); + lanes += 8 * 17; + nlanes -= 17; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 13: + /* SHA3-384 */ + while (pos == 0 && nlanes >= 13) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); + absorb_lanes64_1(&hd->u.state64[12], lanes + 8 * 12); + lanes += 8 * 13; + nlanes -= 13; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 9: + /* SHA3-512 */ + while (pos == 0 && nlanes >= 9) + { + absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); + absorb_lanes64_1(&hd->u.state64[8], lanes + 8 * 8); + lanes += 8 * 9; + nlanes -= 9; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + } + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + pos = 0; + break; + } + } + } + + return burn; +} commit 07e4839e75a7bca3a6c0a94aecfe75efe61d7ff2 Author: Jussi Kivilinna Date: Sat Oct 31 20:19:59 2015 +0200 Enable CRC test vectors with zero bytes * tests/basic.c (check_digests): Enable CRC test-vectors with zero bytes. -- Signed-off-by: Jussi Kivilinna diff --git a/tests/basic.c b/tests/basic.c index 0762a89..7d5de00 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -5851,16 +5851,12 @@ check_digests (void) { GCRY_MD_CRC32_RFC1510, "test0123456789", "\xb8\x3e\x88\xd6" }, { GCRY_MD_CRC32_RFC1510, "MASSACHVSETTS INSTITVTE OF TECHNOLOGY", "\xe3\x41\x80\xf7" }, -#if 0 - { GCRY_MD_CRC32_RFC1510, "\x80\x00", "\x3b\x83\x98\x4b" }, - { GCRY_MD_CRC32_RFC1510, "\x00\x08", "\x0e\xdb\x88\x32" }, - { GCRY_MD_CRC32_RFC1510, "\x00\x80", "\xed\xb8\x83\x20" }, -#endif + { GCRY_MD_CRC32_RFC1510, "\x80\x00", "\x3b\x83\x98\x4b", 2 }, + { GCRY_MD_CRC32_RFC1510, "\x00\x08", "\x0e\xdb\x88\x32", 2 }, + { GCRY_MD_CRC32_RFC1510, "\x00\x80", "\xed\xb8\x83\x20", 2 }, { GCRY_MD_CRC32_RFC1510, "\x80", "\xed\xb8\x83\x20" }, -#if 0 - { GCRY_MD_CRC32_RFC1510, "\x80\x00\x00\x00", "\xed\x59\xb6\x3b" }, - { GCRY_MD_CRC32_RFC1510, "\x00\x00\x00\x01", "\x77\x07\x30\x96" }, -#endif + { GCRY_MD_CRC32_RFC1510, "\x80\x00\x00\x00", "\xed\x59\xb6\x3b", 4 }, + { GCRY_MD_CRC32_RFC1510, "\x00\x00\x00\x01", "\x77\x07\x30\x96", 4 }, { GCRY_MD_CRC32_RFC1510, "123456789", "\x2d\xfd\x2d\x88" }, { GCRY_MD_CRC24_RFC2440, "", "\xb7\x04\xce" }, { GCRY_MD_CRC24_RFC2440, "foo", "\x4f\xc2\x55" }, ----------------------------------------------------------------------- Summary of changes: cipher/Makefile.am | 2 +- cipher/keccak-armv7-neon.S | 945 +++++++++++++++++++++++++++++++++++++++++++++ cipher/keccak.c | 220 ++++++++--- cipher/keccak_permute_64.h | 101 ++++- cipher/tiger.c | 104 ++--- configure.ac | 2 +- tests/basic.c | 14 +- 7 files changed, 1248 insertions(+), 140 deletions(-) create mode 100644 cipher/keccak-armv7-neon.S hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From jussi.kivilinna at iki.fi Sat Nov 7 16:20:29 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sat, 07 Nov 2015 17:20:29 +0200 Subject: [PATCH] Tweak Keccak for small speed-up Message-ID: <20151107152029.18057.23575.stgit@localhost6.localdomain6> * cipher/keccak_permute_32.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Track rounds with round constant pointer instead of separate round counter. * cipher/keccak_permute_64.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Ditto. (KECCAK_F1600_ABSORB_FUNC_NAME): Tweak lanes pointer increment for bulk absorb loops. -- Patch makes small tweaks to improve performance. Benchmark on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.27 ns/B 420.5 MiB/s 7.26 c/B SHAKE256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-224 | 2.64 ns/B 361.7 MiB/s 8.44 c/B SHA3-256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-384 | 3.65 ns/B 261.3 MiB/s 11.68 c/B SHA3-512 | 5.27 ns/B 181.0 MiB/s 16.86 c/B After: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.25 ns/B 423.5 MiB/s 7.21 c/B SHAKE256 | 2.77 ns/B 343.9 MiB/s 8.88 c/B SHA3-224 | 2.62 ns/B 364.1 MiB/s 8.38 c/B SHA3-256 | 2.77 ns/B 343.8 MiB/s 8.88 c/B SHA3-384 | 3.63 ns/B 262.6 MiB/s 11.63 c/B SHA3-512 | 5.23 ns/B 182.3 MiB/s 16.75 c/B Signed-off-by: Jussi Kivilinna --- cipher/keccak_permute_32.h | 13 +++++++------ cipher/keccak_permute_64.h | 44 ++++++++++++++++++++------------------------ 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/cipher/keccak_permute_32.h b/cipher/keccak_permute_32.h index fed9383..1ce42a4 100644 --- a/cipher/keccak_permute_32.h +++ b/cipher/keccak_permute_32.h @@ -27,6 +27,7 @@ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { const u32 *round_consts = round_consts_32bit; + const u32 *round_consts_end = round_consts_32bit + 2 * 24; u32 Aba0, Abe0, Abi0, Abo0, Abu0; u32 Aba1, Abe1, Abi1, Abo1, Abu1; u32 Aga0, Age0, Agi0, Ago0, Agu0; @@ -52,7 +53,6 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) u32 Esa0, Ese0, Esi0, Eso0, Esu0; u32 Esa1, Ese1, Esi1, Eso1, Esu1; u32 *state = hd->u.state32bi; - unsigned int round; Aba0 = state[0]; Aba1 = state[1]; @@ -105,7 +105,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu0 = state[48]; Asu1 = state[49]; - for (round = 0; round < 24; round += 2) + do { /* prepareTheta */ BCa0 = Aba0 ^ Aga0 ^ Aka0 ^ Ama0 ^ Asa0; @@ -142,7 +142,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu0 ^= Du0; BCu0 = ROL32(Asu0, 7); Eba0 = BCa0 ^ ANDN32(BCe0, BCi0); - Eba0 ^= round_consts[round * 2 + 0]; + Eba0 ^= *(round_consts++); Ebe0 = BCe0 ^ ANDN32(BCi0, BCo0); Ebi0 = BCi0 ^ ANDN32(BCo0, BCu0); Ebo0 = BCo0 ^ ANDN32(BCu0, BCa0); @@ -159,7 +159,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu1 ^= Du1; BCu1 = ROL32(Asu1, 7); Eba1 = BCa1 ^ ANDN32(BCe1, BCi1); - Eba1 ^= round_consts[round * 2 + 1]; + Eba1 ^= *(round_consts++); Ebe1 = BCe1 ^ ANDN32(BCi1, BCo1); Ebi1 = BCi1 ^ ANDN32(BCo1, BCu1); Ebo1 = BCo1 ^ ANDN32(BCu1, BCa1); @@ -328,7 +328,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Esu0 ^= Du0; BCu0 = ROL32(Esu0, 7); Aba0 = BCa0 ^ ANDN32(BCe0, BCi0); - Aba0 ^= round_consts[round * 2 + 2]; + Aba0 ^= *(round_consts++); Abe0 = BCe0 ^ ANDN32(BCi0, BCo0); Abi0 = BCi0 ^ ANDN32(BCo0, BCu0); Abo0 = BCo0 ^ ANDN32(BCu0, BCa0); @@ -345,7 +345,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Esu1 ^= Du1; BCu1 = ROL32(Esu1, 7); Aba1 = BCa1 ^ ANDN32(BCe1, BCi1); - Aba1 ^= round_consts[round * 2 + 3]; + Aba1 ^= *(round_consts++); Abe1 = BCe1 ^ ANDN32(BCi1, BCo1); Abi1 = BCi1 ^ ANDN32(BCo1, BCu1); Abo1 = BCo1 ^ ANDN32(BCu1, BCa1); @@ -479,6 +479,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Aso1 = BCo1 ^ ANDN32(BCu1, BCa1); Asu1 = BCu1 ^ ANDN32(BCa1, BCe1); } + while (round_consts < round_consts_end); state[0] = Aba0; state[1] = Aba1; diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index 1a80192..b28c871 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -26,6 +26,7 @@ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { const u64 *round_consts = _gcry_keccak_round_consts_64bit; + const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24; u64 Aba, Abe, Abi, Abo, Abu; u64 Aga, Age, Agi, Ago, Agu; u64 Aka, Ake, Aki, Ako, Aku; @@ -39,7 +40,6 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) u64 Ema, Eme, Emi, Emo, Emu; u64 Esa, Ese, Esi, Eso, Esu; u64 *state = hd->u.state64; - unsigned int round; Aba = state[0]; Abe = state[1]; @@ -67,7 +67,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Aso = state[23]; Asu = state[24]; - for (round = 0; round < 24; round += 2) + do { /* prepareTheta */ BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; @@ -94,7 +94,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu ^= Du; BCu = ROL64(Asu, 14); Eba = BCa ^ ANDN64(BCe, BCi); - Eba ^= (u64)round_consts[round]; + Eba ^= *(round_consts++); Ebe = BCe ^ ANDN64(BCi, BCo); Ebi = BCi ^ ANDN64(BCo, BCu); Ebo = BCo ^ ANDN64(BCu, BCa); @@ -189,7 +189,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Esu ^= Du; BCu = ROL64(Esu, 14); Aba = BCa ^ ANDN64(BCe, BCi); - Aba ^= (u64)round_consts[round + 1]; + Aba ^= *(round_consts++); Abe = BCe ^ ANDN64(BCi, BCo); Abi = BCi ^ ANDN64(BCo, BCu); Abo = BCo ^ ANDN64(BCu, BCa); @@ -259,6 +259,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Aso = BCo ^ ANDN64(BCu, BCa); Asu = BCu ^ ANDN64(BCa, BCe); } + while (round_consts < round_consts_end); state[0] = Aba; state[1] = Abe; @@ -303,12 +304,11 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHAKE128 */ while (pos == 0 && nlanes >= 21) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_8(&hd->u.state64[12], lanes + 8 * 12); - absorb_lanes64_1(&hd->u.state64[20], lanes + 8 * 20); - lanes += 8 * 21; nlanes -= 21; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -318,11 +318,10 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-224 */ while (pos == 0 && nlanes >= 18) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_2(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_8(&hd->u.state64[10], lanes + 8 * 10); - lanes += 8 * 18; nlanes -= 18; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -332,11 +331,10 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-256 & SHAKE256 */ while (pos == 0 && nlanes >= 17) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_8(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_1(&hd->u.state64[16], lanes + 8 * 16); - lanes += 8 * 17; nlanes -= 17; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -346,11 +344,10 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-384 */ while (pos == 0 && nlanes >= 13) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_1(&hd->u.state64[12], lanes + 8 * 12); - lanes += 8 * 13; nlanes -= 13; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -360,10 +357,9 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-512 */ while (pos == 0 && nlanes >= 9) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_1(&hd->u.state64[8], lanes + 8 * 8); - lanes += 8 * 9; nlanes -= 9; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } From campbell+gcrypt at mumble.net Sat Nov 7 23:08:27 2015 From: campbell+gcrypt at mumble.net (Taylor R Campbell) Date: Sat, 7 Nov 2015 22:08:27 +0000 Subject: multiple timing side channels Message-ID: <20151107220814.9C7BB60322@jupiter.mumble.net> This morning I had occasion to glance at the libgcrypt source code, and to my unpleasant surprise I found a collection of independent, obvious timing side channels in the code: - In twisted Edwards scalar multiplication, _gcry_mpi_ec_mul_point branches depending on whether a bit in a secret scalar is set: (mpi/ec.c, _gcry_mpi_ec_mul_point) 1232 _gcry_mpi_ec_add_points (&tmppnt, result, point, ctx); 1233 if (mpi_test_bit (scalar, j)) 1234 point_set (result, &tmppnt); Presumably this is intended to run in constant time, because the scalar is a secret -- there's even a comment above saying `we use constant time operation' in that case. But secret-dependent branches are not that. (Easy fix: add point_swap_cond like point_set, using mpi_cond_swap, and use it here.) - In conditional mpi swapping, _gcry_mpi_cond_swap branches depending on the swap condition: (mpi/mpiutil.c, _gcry_mpi_cond_swap) 576 mpi_limb_t mask = ((mpi_limb_t)0) - !!swap; Some compilers may not turn this into a branch -- but some do. Presumably this is intended to run in constant time because it is used in code that operates on secrets, so I suggest it be documented as such. (Easy fix: saturate instead of !!, e.g. iterate swap |= (swap << (1<> (1<> bitno).) - ~All general-purpose modular reduction involves numerator- and denominator-dependent branches, e.g.: (mpi/mpih-div.c, _gcry_mpih_divrem) 319 if( n0 >= dX ) { Here n0 and dX are limbs of the numerator and the denominator. This includes modular reduction for elliptic curve arithmetic, in which the numerator is secret; and modular reduction for RSA, in which the numerator (plaintext message) or denominator (p, q) can be secret. - Many arithmetic routines normalize their inputs and outputs, so that secrets flow into nlimbs and thence into loop counts and memory reference patterns. I don't have exploits for these -- but I hope the world is at a stage in crypto engineering where it is not necessary to demonstrate remote exploitability of every obvious secret-dependent branch and memory reference. These are all I found in half an hour of code inspection, when until I found the first two I hadn't even thought to look for timing side channels. I started writing a patch, but the code is so riddled with secret-dependent branches and memory references that it's not an easy effort. In order to avoid a new paper year after year demonstrating a new timing side channel exploit on GnuPG, I suggest: 1. Avoid general-purpose division. For divisors fixed by an algorithm, divisor-specific reduction is usually better, especially for divisors chosen for it such as 2^255 - 19 or 2^448 - 2^224 - 1. For a priori unknown divisors, Montgomery reduction is likely much faster anyway -- and together with Barrett reduction, it is never necessary to combine attacker-controlled data with secrets in a general-purpose division. (For RSA, you need to divide a constant, 4^k, by p and by q, every time you load a key, but that's all.) 2. Eliminate mpi normalization. Most k-bit multiprecision integers that libgcrypt handles are uniformly distributed in [0, 2^k), or at least in [0, p) where 2^(k - 1) <= p <= 2^k. It's hard to imagine that there's much value in saving a handful of integer operations on the last limb once in every ~2^32 cases for an mpi operation -- but this frequency is high enough that it's not hard to imagine devising a timing attack where you learn something after a billion messages. 3. Eliminate mpi altogether for arithmetic in fixed finite fields, such as GF(2^255 - 19) as used in Curve25519. There's plenty of easy-to-use, high-quality, high-performance, constant-time code to compute it -- faster and more safely than the generic mpi code. 4. Eliminate the generic elliptic-curve abstraction, especially for new curves. For modern curve design, it offers no benefits over curve-specific code, and makes variable-time code much more tempting. Applications don't care that there are elliptic curves or points on them involved -- applications deal in opaque octet strings. 5. Eliminate mpi_is_secure. This interleaves code paths that may operate on secret or public data. Better to statically distinguish the code paths that operate on secrets. While it is OK to use code paths designed for secret data on public data, making the code paths conditional makes it harder to audit. Auditability is critical for code that millions of people rely on for crypto. 6. Aggressively reject all new code, and prune old code, that uses secret-dependent branches and memory references. Kocher's paper was published in 1996; it shouldn't take twenty years for the world to learn its lesson. I understand why the RSA code written long ago might be vulnerable -- but twisted Edwards arithmetic was designed from the beginning to make constant-time evaluation easy. From gniibe at fsij.org Tue Nov 10 09:00:37 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Tue, 10 Nov 2015 17:00:37 +0900 Subject: multiple timing side channels In-Reply-To: <20151107220814.9C7BB60322@jupiter.mumble.net> References: <20151107220814.9C7BB60322@jupiter.mumble.net> Message-ID: <5641A425.20103@fsij.org> On 11/08/2015 07:08 AM, Taylor R Campbell wrote: > This morning I had occasion to glance at the libgcrypt source code, > and to my unpleasant surprise I found a collection of independent, > obvious timing side channels in the code: Thank you for your review and suggestions. While I share your view, it's not that easy to fix all at once. I'd like to fix one by one, with my capability. I understand your suggestions, but it is not clear for me how to achieve that. When I implement Curve25519, I did my best on top of existing code. Let me fix where I can. > - In twisted Edwards scalar multiplication, _gcry_mpi_ec_mul_point > branches depending on whether a bit in a secret scalar is set: > > (mpi/ec.c, _gcry_mpi_ec_mul_point) > 1232 _gcry_mpi_ec_add_points (&tmppnt, result, point, ctx); > 1233 if (mpi_test_bit (scalar, j)) > 1234 point_set (result, &tmppnt); > > Presumably this is intended to run in constant time, because the > scalar is a secret -- there's even a comment above saying `we use > constant time operation' in that case. But secret-dependent branches > are not that. (Easy fix: add point_swap_cond like point_set, using > mpi_cond_swap, and use it here.) Yes. I'll fix that. > - In conditional mpi swapping, _gcry_mpi_cond_swap branches depending > on the swap condition: > > (mpi/mpiutil.c, _gcry_mpi_cond_swap) > 576 mpi_limb_t mask = ((mpi_limb_t)0) - !!swap; > > Some compilers may not turn this into a branch -- but some do. > Presumably this is intended to run in constant time because it is used > in code that operates on secrets, so I suggest it be documented as > such. (Easy fix: saturate instead of !!, e.g. iterate swap |= (swap > << (1<> (1< - In mpi bit testing, _gcry_mpi_test_bit branches depending on the > value of the limb: > > (mpi/mpi-bit.c, _gcry_mpi_test_bit) > 109 return (limb & (A_LIMB_1 << bitno))? 1: 0; > > Again, some compilers may use a constant-time conditional move here -- > but some will use a branch. (Easy fix: return 1 & (limb >> bitno).) I understand your point. While good compilers turns the expression to the one you suggest, I'm afraid it could not be constant time on some architecture which doesn't have barrel shifter in lower level. Let me consider. > - ~All general-purpose modular reduction involves numerator- and > denominator-dependent branches, e.g.: > > (mpi/mpih-div.c, _gcry_mpih_divrem) > 319 if( n0 >= dX ) { > > Here n0 and dX are limbs of the numerator and the denominator. This > includes modular reduction for elliptic curve arithmetic, in which the > numerator is secret; and modular reduction for RSA, in which the > numerator (plaintext message) or denominator (p, q) can be secret. > > - Many arithmetic routines normalize their inputs and outputs, so that > secrets flow into nlimbs and thence into loop counts and memory > reference patterns. Yes. I understand. For now, here is a patch to address the first issue. Built and tested. diff --git a/mpi/ec.c b/mpi/ec.c index 7266f2a..671cf78 100644 --- a/mpi/ec.c +++ b/mpi/ec.c @@ -138,6 +138,22 @@ point_set (mpi_point_t d, mpi_point_t s) mpi_set (d->z, s->z); } +static void +point_resize (mpi_point_t p, size_t nlimbs) +{ + mpi_resize (p->x, nlimbs); + mpi_resize (p->y, nlimbs); + mpi_resize (p->z, nlimbs); +} + +static void +point_swap_cond (mpi_point_t d, mpi_point_t s, unsigned long swap) +{ + mpi_swap_cond (d->x, s->x, swap); + mpi_swap_cond (d->y, s->y, swap); + mpi_swap_cond (d->z, s->z, swap); +} + /* Set the projective coordinates from POINT into X, Y, and Z. If a coordinate is not required, X, Y, or Z may be passed as NULL. */ @@ -1224,14 +1240,16 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, /* If SCALAR is in secure memory we assume that it is the secret key we use constant time operation. */ mpi_point_struct tmppnt; + size_t nlimbs = 2*(nbits+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB+1; point_init (&tmppnt); + point_resize (result, nlimbs); + point_resize (&tmppnt, nlimbs); for (j=nbits-1; j >= 0; j--) { _gcry_mpi_ec_dup_point (result, result, ctx); _gcry_mpi_ec_add_points (&tmppnt, result, point, ctx); - if (mpi_test_bit (scalar, j)) - point_set (result, &tmppnt); + point_swap_cond (result, &tmppnt, mpi_test_bit (scalar, j)); } point_free (&tmppnt); } -- From campbell+gcrypt at mumble.net Tue Nov 10 18:49:12 2015 From: campbell+gcrypt at mumble.net (Taylor R Campbell) Date: Tue, 10 Nov 2015 17:49:12 +0000 Subject: multiple timing side channels In-Reply-To: <5641A425.20103@fsij.org> (gniibe@fsij.org) Message-ID: <20151110174857.33A75604DD@jupiter.mumble.net> Date: Tue, 10 Nov 2015 17:00:37 +0900 From: NIIBE Yutaka On 11/08/2015 07:08 AM, Taylor R Campbell wrote: > - In conditional mpi swapping, _gcry_mpi_cond_swap branches depending > on the swap condition: > > (mpi/mpiutil.c, _gcry_mpi_cond_swap) > 576 mpi_limb_t mask = ((mpi_limb_t)0) - !!swap; > > Some compilers may not turn this into a branch -- but some do. > Presumably this is intended to run in constant time because it is used > in code that operates on secrets, so I suggest it be documented as > such. (Easy fix: saturate instead of !!, e.g. iterate swap |= (swap > << (1<> (1<> i; swap = ~((swap & 1) - 1); (It is, of course, theoretically possible for a compiler would translate even this into a conditional branch -- but I've never heard of a compiler doing that, and that would be rather surprising to many people.) > - In mpi bit testing, _gcry_mpi_test_bit branches depending on the > value of the limb: > > (mpi/mpi-bit.c, _gcry_mpi_test_bit) > 109 return (limb & (A_LIMB_1 << bitno))? 1: 0; > > Again, some compilers may use a constant-time conditional move here -- > but some will use a branch. (Easy fix: return 1 & (limb >> bitno).) I understand your point. While good compilers turns the expression to the one you suggest, I'm afraid it could not be constant time on some architecture which doesn't have barrel shifter in lower level. Let me consider. In this case, bitno is not (or should not be) secret, so it's OK for the time of the shift to vary depending on bitno. What's not OK is when the time of an operation varies depending on the value of limb -- that's secret. The problem here is not the shift, but using the secret (limb) in the condition part of a ?: expression. For now, here is a patch to address the first issue. Built and tested. Great, thanks! That looks better. Some other parts of _gcry_mpi_ec_mul_point look likely to be problematic: - `if (p1.z->nlimbs == 0) ...' I don't see how this could be true -- but if it can be, that's probably secret-dependent and thus leads to a timing leak. - `if ( mpi_has_sign (k) )' Even allowing for negative scalars seems to me likely to be a mistake. - The rest of the routine, after branches for the twisted Edwards and Montgomery cases, I assume handles Weierstrass coordinates, for which timing side channels are not surprising -- but I started to list them before I realized it was for Weierstrass coordinates, since nothing in the routine says so. From jussi.kivilinna at iki.fi Tue Nov 10 21:09:20 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Tue, 10 Nov 2015 22:09:20 +0200 Subject: multiple timing side channels In-Reply-To: <20151110174857.33A75604DD@jupiter.mumble.net> References: <20151110174857.33A75604DD@jupiter.mumble.net> Message-ID: <56424EF0.2090707@iki.fi> On 10.11.2015 19:49, Taylor R Campbell wrote: > Date: Tue, 10 Nov 2015 17:00:37 +0900 > From: NIIBE Yutaka > > On 11/08/2015 07:08 AM, Taylor R Campbell wrote: > > - In conditional mpi swapping, _gcry_mpi_cond_swap branches depending > > on the swap condition: > > > > (mpi/mpiutil.c, _gcry_mpi_cond_swap) > > 576 mpi_limb_t mask = ((mpi_limb_t)0) - !!swap; > > > > Some compilers may not turn this into a branch -- but some do. > > Presumably this is intended to run in constant time because it is used > > in code that operates on secrets, so I suggest it be documented as > > such. (Easy fix: saturate instead of !!, e.g. iterate swap |= (swap > > << (1<> (1< > I understand your point. It is possible for compilers to turn this > into a branch, that's true, but I haven't had any experience for > existing compilers for libgcrypt (for supported architectures), so > far. Let me consider. It would be also good considering its API > itself. > > It's certainly worth naming, with cheaper CPU-specific versions. The > last time I ran into this, I wrote down that `it's not hard to find > CPU/compiler combinations with branches for ``!res'' ' -- but I > foolishly neglected to write down which combinations. > > Another possibly cheaper generic option, with fewer shifts, is: > > for (i = 1; i < CHAR_BIT*sizeof(swap); i <<= 1) > swap |= swap >> i; > swap = ~((swap & 1) - 1); > > (It is, of course, theoretically possible for a compiler would > translate even this into a conditional branch -- but I've never heard > of a compiler doing that, and that would be rather surprising to many > people.) > Another is to do '!!' by bit-wise ORing number and its negative and extracting sign-bit, which will be set only if number was non-zero: /* Convert non-zero values to '1' and zero to '0'. */ static inline int is_not_zero(unsigned long val) { val |= -val; /* sign-bit will be set if 'val != 0' */ return (val >> (CHAR_BIT * sizeof(val) - 1)) & 1; } ... mpi_limb_t mask = ((mpi_limb_t)0) - is_not_zero(swap); With above GCC/x86-64 generates four instructions (in: swap = rdx, out: mask = rdx): mov %rdx,%rax neg %rax or %rax,%rdx sar $0x3f,%rdx Which is same amount as with original '!!swap' (in: swap = rdx, out: mask = r10): xor %r10d,%r10d test %rdx,%rdx setne %r10b neg %r10 -Jussi From yunlian at google.com Fri Nov 13 18:43:29 2015 From: yunlian at google.com (Yunlian Jiang) Date: Fri, 13 Nov 2015 09:43:29 -0800 Subject: [PATCH] Enable --noexecstack with -g for clang Message-ID: modify the configuration a little bit. When I try to build libgcrypt with clang, the --noexecstack is not used with CFLAGS="-g". This is a similar bug to https://sourceware.org/bugzilla/show_bug.cgi?id=6428 diff --git a/m4/noexecstack.m4 b/m4/noexecstack.m4 index 4aab484..ef1bafd 100644 --- a/m4/noexecstack.m4 +++ b/m4/noexecstack.m4 @@ -38,7 +38,7 @@ EOF if AC_TRY_COMMAND([${CC} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c >/dev/null]) \ && grep .note.GNU-stack conftest.s >/dev/null \ - && AC_TRY_COMMAND([${CCAS} $CCASFLAGS $CPPFLAGS -Wa,--noexecstack + && AC_TRY_COMMAND([${CCAS} $ASFLAGS -Wa,--noexecstack -c -o conftest.o conftest.s >/dev/null]) then cl_cv_as_noexecstack=yes From gniibe at fsij.org Mon Nov 16 04:20:56 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Mon, 16 Nov 2015 12:20:56 +0900 Subject: ecc: Montgomery curve always uses the prefix 0x40 Message-ID: <56494B98.4020103@fsij.org> Hello, I'm checking GnuPG's Curve25519 implementation. Currently, in the GnuPG implementation, ECDH shared point and ephemeral public key (with x-coordinate only) is represented in native little endian format with no prefix. I think that it should be prefixed by 0x40, so that it can be also accessed as a fixed size MPI. Here is the change for libgcrypt, so that libgcrypt always uses the prefix 0x40 for the representation of key for Montgomery curve (regerdless of PUBKEY_FLAG_COMP flag). (The change for GnuPG will be posted soon, after this patch will be committed.) OK to commit? diff --git a/cipher/ecc-misc.c b/cipher/ecc-misc.c index 2f2e593..79708f2 100644 --- a/cipher/ecc-misc.c +++ b/cipher/ecc-misc.c @@ -292,6 +292,7 @@ _gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec, gpg_err_code_t _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) { + unsigned char *a; unsigned char *rawmpi; unsigned int rawmpilen; @@ -311,8 +312,8 @@ _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) buf++; } - rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); - if (!rawmpi) + a = rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); + if (!a) return gpg_err_code_from_syserror (); p = rawmpi + rawmpilen; @@ -321,16 +322,19 @@ _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) } else { - /* Note: Without using an opaque MPI it is not reliable possible - to find out whether the public key has been given in - uncompressed format. Thus we expect native EdDSA format. */ - rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); - if (!rawmpi) + a = rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); + if (!a) return gpg_err_code_from_syserror (); + /* + * It is not reliable to assume that 0x40 means the prefix. + * Now, we always put 0x40 for X-only coordinate, it is reliable. + */ + if (a[0] == 0x40 && (rawmpilen%2)) + rawmpi++; } _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0); - xfree (rawmpi); + xfree (a); mpi_set_ui (result->z, 1); return 0; diff --git a/cipher/ecc.c b/cipher/ecc.c index 4958fbb..6932489 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -606,17 +606,14 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) &encpk, &encpklen); else { - int off = !!(flags & PUBKEY_FLAG_COMP); - - encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, off?-1:0, + encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, -1, &encpklen, NULL); if (encpk == NULL) rc = gpg_err_code_from_syserror (); else { - if (off) - encpk[0] = 0x40; - encpklen += off; + encpk[0] = 0x40; + encpklen++; } } if (rc) @@ -1374,11 +1371,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); else { - rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + rawmpi = _gcry_mpi_get_buffer_extra (x, ec->nbits/8, -1, + &rawmpilen, NULL); if (!rawmpi) rc = gpg_err_code_from_syserror (); else { + rawmpi[0] = 0x40; mpi_s = mpi_new (0); mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8); } @@ -1393,11 +1392,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); else { - rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + rawmpi = _gcry_mpi_get_buffer_extra (x, ec->nbits/8, -1, + &rawmpilen, NULL); if (!rawmpi) rc = gpg_err_code_from_syserror (); else { + rawmpi[0] = 0x40; mpi_e = mpi_new (0); mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8); } @@ -1587,11 +1588,13 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) unsigned char *rawmpi; unsigned int rawmpilen; - rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + rawmpi = _gcry_mpi_get_buffer_extra (x, ec->nbits/8, -1, + &rawmpilen, NULL); if (!rawmpi) rc = gpg_err_code_from_syserror (); else { + rawmpi[0] = 0x40; r = mpi_new (0); mpi_set_opaque (r, rawmpi, rawmpilen*8); } -- From cvs at cvs.gnupg.org Tue Nov 17 11:20:22 2015 From: cvs at cvs.gnupg.org (by Justus Winter) Date: Tue, 17 Nov 2015 11:20:22 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-281-g0e39594 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 0e395944b70c7a92a6437f6bcc14f287c19ce9de (commit) from 89fa74d6b3e58cd4fcd6e0939a35e46cbaca2ea0 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 0e395944b70c7a92a6437f6bcc14f287c19ce9de Author: Justus Winter Date: Mon Nov 16 12:18:47 2015 +0100 Fix typos found using codespell * cipher/cipher-ocb.c: Fix typos. * cipher/des.c: Likewise. * cipher/dsa-common.c: Likewise. * cipher/ecc.c: Likewise. * cipher/pubkey.c: Likewise. * cipher/rsa-common.c: Likewise. * cipher/scrypt.c: Likewise. * random/random-csprng.c: Likewise. * random/random-fips.c: Likewise. * random/rndw32.c: Likewise. * src/cipher-proto.h: Likewise. * src/context.c: Likewise. * src/fips.c: Likewise. * src/gcrypt.h.in: Likewise. * src/global.c: Likewise. * src/sexp.c: Likewise. * tests/mpitests.c: Likewise. * tests/t-lock.c: Likewise. Signed-off-by: Justus Winter diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c index a3a2c9b..6db1db3 100644 --- a/cipher/cipher-ocb.c +++ b/cipher/cipher-ocb.c @@ -307,7 +307,7 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, c->spec->encrypt (&c->context.c, l_tmp, l_tmp); buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); - /* Mark AAD as finalized to avoid accidently calling this + /* Mark AAD as finalized to avoid accidentally calling this function again after a non-full block has been processed. */ c->u_mode.ocb.aad_finalized = 1; } diff --git a/cipher/des.c b/cipher/des.c index be62763..5c99f50 100644 --- a/cipher/des.c +++ b/cipher/des.c @@ -49,7 +49,7 @@ * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode. * * (In the examples below the slashes at the beginning and ending of comments - * are omited.) + * are omitted.) * * DES Example * ----------- @@ -68,7 +68,7 @@ * * Encrypt the plaintext * * des_ecb_encrypt(context, plaintext, ciphertext); * - * * To recover the orginal plaintext from ciphertext use: * + * * To recover the original plaintext from ciphertext use: * * des_ecb_decrypt(context, ciphertext, recoverd); * * diff --git a/cipher/dsa-common.c b/cipher/dsa-common.c index a5e42a2..6f2c2f9 100644 --- a/cipher/dsa-common.c +++ b/cipher/dsa-common.c @@ -319,7 +319,7 @@ _gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k, /* The caller may have requested that we introduce some extra loops. This is for example useful if the caller wants another value for - K because the last returned one yielded an R of 0. Becuase this + K because the last returned one yielded an R of 0. Because this is very unlikely we implement it in a straightforward way. */ if (extraloops) { diff --git a/cipher/ecc.c b/cipher/ecc.c index 4958fbb..bd3e754 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -30,7 +30,7 @@ Ramiro Moreno Chiral Mikael Mylnikov (mmr) For use in Libgcrypt the code has been heavily modified and cleaned - up. In fact there is not much left of the orginally code except for + up. In fact there is not much left of the originally code except for some variable names and the text book implementaion of the sign and verification algorithms. The arithmetic functions have entirely been rewritten and moved to mpi/ec.c. diff --git a/cipher/pubkey.c b/cipher/pubkey.c index e3842c0..b321a89 100644 --- a/cipher/pubkey.c +++ b/cipher/pubkey.c @@ -114,7 +114,7 @@ spec_from_name (const char *name) * set the function will only succeed if a private key has been given. * On success the spec is stored at R_SPEC. On error NULL is stored * at R_SPEC and an error code returned. If R_PARMS is not NULL and - * the fucntion returns success, the parameter list below + * the function returns success, the parameter list below * "private-key" or "public-key" is stored there and the caller must * call gcry_sexp_release on it. */ @@ -945,7 +945,7 @@ _gcry_pk_selftest (int algo, int extended, selftest_report_func_t report) else { ec = GPG_ERR_PUBKEY_ALGO; - /* Fixme: We need to change the report fucntion to allow passing + /* Fixme: We need to change the report function to allow passing of an encryption mode (e.g. pkcs1, ecdsa, or ecdh). */ if (report) report ("pubkey", algo, "module", diff --git a/cipher/rsa-common.c b/cipher/rsa-common.c index b260142..7b56237 100644 --- a/cipher/rsa-common.c +++ b/cipher/rsa-common.c @@ -46,7 +46,7 @@ octet_string_from_mpi (unsigned char **r_frame, void *space, /* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block - type 2 padding. On sucess the result is stored as a new MPI at + type 2 padding. On success the result is stored as a new MPI at R_RESULT. On error the value at R_RESULT is undefined. If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as @@ -675,7 +675,7 @@ _gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen, } db = seed + hlen; - /* To avoid choosen ciphertext attacks from now on we make sure to + /* To avoid chosen ciphertext attacks from now on we make sure to run all code even in the error case; this avoids possible timing attacks as described by Manger. */ diff --git a/cipher/scrypt.c b/cipher/scrypt.c index 3c21c2a..a05b5bf 100644 --- a/cipher/scrypt.c +++ b/cipher/scrypt.c @@ -246,7 +246,7 @@ _gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen, unsigned long iterations, size_t dkLen, unsigned char *DK) { - u64 N = subalgo; /* CPU/memory cost paramter. */ + u64 N = subalgo; /* CPU/memory cost parameter. */ u32 r; /* Block size. */ u32 p = iterations; /* Parallelization parameter. */ diff --git a/random/random-csprng.c b/random/random-csprng.c index da50fda..dbebe98 100644 --- a/random/random-csprng.c +++ b/random/random-csprng.c @@ -1234,7 +1234,7 @@ do_fast_random_poll (void) # endif /*!RUSAGE_SELF*/ #endif /*HAVE_GETRUSAGE*/ - /* Time and clock are availabe on all systems - so we better do it + /* Time and clock are available on all systems - so we better do it just in case one of the above functions didn't work. */ { time_t x = time(NULL); @@ -1275,12 +1275,12 @@ _gcry_rngcsprng_fast_poll (void) static void -read_random_source (enum random_origins orgin, size_t length, int level ) +read_random_source (enum random_origins origin, size_t length, int level) { if ( !slow_gather_fnc ) log_fatal ("Slow entropy gathering module not yet initialized\n"); - if ( slow_gather_fnc (add_randomness, orgin, length, level) < 0) + if (slow_gather_fnc (add_randomness, origin, length, level) < 0) log_fatal ("No way to gather entropy for the RNG\n"); } diff --git a/random/random-fips.c b/random/random-fips.c index 0a76362..3a641b2 100644 --- a/random/random-fips.c +++ b/random/random-fips.c @@ -104,7 +104,7 @@ static size_t entropy_collect_buffer_size; /* Allocated length. */ /* This random context type is used to track properties of one random generator. Thee context are usually allocated in secure memory so that the seed value is well protected. There are a couble of guard - fields to help detecting applications accidently overwriting parts + fields to help detecting applications accidentally overwriting parts of the memory. */ struct rng_context { @@ -315,7 +315,7 @@ x931_get_dt (unsigned char *buffer, size_t length, rng_context_t rng_ctx) if (gettimeofday (&tv, NULL)) log_fatal ("gettimeofday() failed: %s\n", strerror (errno)); - /* The microseconds part is always less than 1 millon (0x0f4240). + /* The microseconds part is always less than 1 million (0x0f4240). Thus we don't care about the MSB and in addition shift it to the left by 4 bits. */ usec = tv.tv_usec; diff --git a/random/rndw32.c b/random/rndw32.c index 1325b18..1c0fc3d 100644 --- a/random/rndw32.c +++ b/random/rndw32.c @@ -955,7 +955,7 @@ _gcry_rndw32_gather_random_fast (void (*add)(const void*, size_t, However, the kernel appears to synchronise the TSCs across CPUs at boot time (it resets the TSC as part of its system init), so this shouldn't really be a problem. Under WinCE it's completely platform- - dependant, if there's no hardware performance counter available, it + dependent, if there's no hardware performance counter available, it uses the 1ms system timer. Another feature of the TSC (although it doesn't really affect us here) diff --git a/src/cipher-proto.h b/src/cipher-proto.h index 3bca9c7..d1ddc5d 100644 --- a/src/cipher-proto.h +++ b/src/cipher-proto.h @@ -92,7 +92,7 @@ typedef const char *(*pk_get_curve_t)(gcry_sexp_t keyparms, int iterator, typedef gcry_sexp_t (*pk_get_curve_param_t)(const char *name); -/* Module specification structure for public key algoritms. */ +/* Module specification structure for public key algorithms. */ typedef struct gcry_pk_spec { int algo; diff --git a/src/context.c b/src/context.c index 94e5be9..f77878b 100644 --- a/src/context.c +++ b/src/context.c @@ -47,7 +47,7 @@ struct gcry_context /* Allocate a fresh generic context of contect TYPE and allocate LENGTH extra bytes for private use of the type handler. DEINIT is a - fucntion used called to deinitialize the private part; it may be + function used called to deinitialize the private part; it may be NULL if de-initialization is not required. Returns NULL and sets ERRNO if memory allocation failed. */ gcry_ctx_t diff --git a/src/fips.c b/src/fips.c index 7939abd..edcbeac 100644 --- a/src/fips.c +++ b/src/fips.c @@ -102,7 +102,7 @@ _gcry_initialize_fips_mode (int force) static int done; gpg_error_t err; - /* Make sure we are not accidently called twice. */ + /* Make sure we are not accidentally called twice. */ if (done) { if ( fips_mode () ) @@ -377,7 +377,7 @@ _gcry_fips_is_operational (void) (GCRYCTL_INITIALIZATION_FINISHED) where the latter will run the selftests. The drawback of these on-demand self-tests are a small chance that self-tests are - performed by severeal threads; that is no problem because + performed by several threads; that is no problem because our FSM make sure that we won't oversee any error. */ unlock_fsm (); _gcry_fips_run_selftests (0); diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in index 5ddeee3..93b1f43 100644 --- a/src/gcrypt.h.in +++ b/src/gcrypt.h.in @@ -583,7 +583,7 @@ gcry_error_t gcry_mpi_print (enum gcry_mpi_format format, size_t *nwritten, const gcry_mpi_t a); -/* Convert the big integer A int the external representation described +/* Convert the big integer A into the external representation described by FORMAT and store it in a newly allocated buffer which address will be put into BUFFER. NWRITTEN receives the actual lengths of the external representation. */ @@ -1598,7 +1598,7 @@ gcry_error_t gcry_prime_generate (gcry_mpi_t *prime, /* Find a generator for PRIME where the factorization of (prime-1) is in the NULL terminated array FACTORS. Return the generator as a newly allocated MPI in R_G. If START_G is not NULL, use this as - teh start for the search. */ + the start for the search. */ gcry_error_t gcry_prime_group_generator (gcry_mpi_t *r_g, gcry_mpi_t prime, gcry_mpi_t *factors, diff --git a/src/global.c b/src/global.c index 2290393..889de4c 100644 --- a/src/global.c +++ b/src/global.c @@ -305,7 +305,7 @@ print_config ( int (*fnc)(FILE *fp, const char *format, ...), FILE *fp) fnc (fp, "%s:", s); fnc (fp, "\n"); /* We use y/n instead of 1/0 for the simple reason that Emacsen's - compile error parser would accidently flag that line when printed + compile error parser would accidentally flag that line when printed during "make check" as an error. */ fnc (fp, "fips-mode:%c:%c:\n", fips_mode ()? 'y':'n', @@ -867,7 +867,7 @@ _gcry_free (void *p) return; /* In case ERRNO is set we better save it so that the free machinery - may not accidently change ERRNO. We restore it only if it was + may not accidentally change ERRNO. We restore it only if it was already set to comply with the usual C semantic for ERRNO. */ save_errno = errno; if (free_func) diff --git a/src/sexp.c b/src/sexp.c index f1bbffa..d063962 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -53,7 +53,7 @@ data) is required as well. The close_tag finishes the list and would actually be sufficient. For fail-safe reasons a final stop tag is always the last byte in a buffer; it has a value of 0 so - that string function accidently applied to an S-expression will + that string function accidentally applied to an S-expression will never access unallocated data. We do not support display hints and thus don't need to represent them. A list may have more an arbitrary number of data elements but at least one is required. diff --git a/tests/mpitests.c b/tests/mpitests.c index d75aca9..e6f8525 100644 --- a/tests/mpitests.c +++ b/tests/mpitests.c @@ -426,7 +426,7 @@ test_mul (void) /* What we test here is that we don't overwrite our args and that - using thne same mpi for several args works. */ + using the same mpi for several args works. */ static int test_powm (void) { diff --git a/tests/t-lock.c b/tests/t-lock.c index 22b67ef..815f63b 100644 --- a/tests/t-lock.c +++ b/tests/t-lock.c @@ -229,7 +229,7 @@ init_accounts (void) } -/* Check that the sum of all accounts matches the intial sum. */ +/* Check that the sum of all accounts matches the initial sum. */ static void check_accounts (void) { @@ -261,7 +261,7 @@ get_rand (int high) } -/* Pick a random account. Note that this fucntion is not +/* Pick a random account. Note that this function is not thread-safe. */ static int pick_account (void) ----------------------------------------------------------------------- Summary of changes: cipher/cipher-ocb.c | 2 +- cipher/des.c | 4 ++-- cipher/dsa-common.c | 2 +- cipher/ecc.c | 2 +- cipher/pubkey.c | 4 ++-- cipher/rsa-common.c | 4 ++-- cipher/scrypt.c | 2 +- random/random-csprng.c | 6 +++--- random/random-fips.c | 4 ++-- random/rndw32.c | 2 +- src/cipher-proto.h | 2 +- src/context.c | 2 +- src/fips.c | 4 ++-- src/gcrypt.h.in | 4 ++-- src/global.c | 4 ++-- src/sexp.c | 2 +- tests/mpitests.c | 2 +- tests/t-lock.c | 4 ++-- 18 files changed, 28 insertions(+), 28 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From cvs at cvs.gnupg.org Wed Nov 18 08:52:43 2015 From: cvs at cvs.gnupg.org (by Jussi Kivilinna) Date: Wed, 18 Nov 2015 08:52:43 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-283-g6571a64 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 6571a64331839d7d952292163afbf34c8bef62e0 (commit) via 15ea0acf8bb0aa307eccc23024a0bd7878fb8080 (commit) from 0e395944b70c7a92a6437f6bcc14f287c19ce9de (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 6571a64331839d7d952292163afbf34c8bef62e0 Author: Jussi Kivilinna Date: Wed Nov 18 09:44:18 2015 +0200 Tweak Keccak for small speed-up * cipher/keccak_permute_32.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Track rounds with round constant pointer instead of separate round counter. * cipher/keccak_permute_64.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Ditto. (KECCAK_F1600_ABSORB_FUNC_NAME): Tweak lanes pointer increment for bulk absorb loops. -- Patch makes small tweaks to improve performance. Benchmark on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.27 ns/B 420.5 MiB/s 7.26 c/B SHAKE256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-224 | 2.64 ns/B 361.7 MiB/s 8.44 c/B SHA3-256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-384 | 3.65 ns/B 261.3 MiB/s 11.68 c/B SHA3-512 | 5.27 ns/B 181.0 MiB/s 16.86 c/B After: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.25 ns/B 423.5 MiB/s 7.21 c/B SHAKE256 | 2.77 ns/B 343.9 MiB/s 8.88 c/B SHA3-224 | 2.62 ns/B 364.1 MiB/s 8.38 c/B SHA3-256 | 2.77 ns/B 343.8 MiB/s 8.88 c/B SHA3-384 | 3.63 ns/B 262.6 MiB/s 11.63 c/B SHA3-512 | 5.23 ns/B 182.3 MiB/s 16.75 c/B Signed-off-by: Jussi Kivilinna diff --git a/cipher/keccak_permute_32.h b/cipher/keccak_permute_32.h index fed9383..1ce42a4 100644 --- a/cipher/keccak_permute_32.h +++ b/cipher/keccak_permute_32.h @@ -27,6 +27,7 @@ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { const u32 *round_consts = round_consts_32bit; + const u32 *round_consts_end = round_consts_32bit + 2 * 24; u32 Aba0, Abe0, Abi0, Abo0, Abu0; u32 Aba1, Abe1, Abi1, Abo1, Abu1; u32 Aga0, Age0, Agi0, Ago0, Agu0; @@ -52,7 +53,6 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) u32 Esa0, Ese0, Esi0, Eso0, Esu0; u32 Esa1, Ese1, Esi1, Eso1, Esu1; u32 *state = hd->u.state32bi; - unsigned int round; Aba0 = state[0]; Aba1 = state[1]; @@ -105,7 +105,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu0 = state[48]; Asu1 = state[49]; - for (round = 0; round < 24; round += 2) + do { /* prepareTheta */ BCa0 = Aba0 ^ Aga0 ^ Aka0 ^ Ama0 ^ Asa0; @@ -142,7 +142,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu0 ^= Du0; BCu0 = ROL32(Asu0, 7); Eba0 = BCa0 ^ ANDN32(BCe0, BCi0); - Eba0 ^= round_consts[round * 2 + 0]; + Eba0 ^= *(round_consts++); Ebe0 = BCe0 ^ ANDN32(BCi0, BCo0); Ebi0 = BCi0 ^ ANDN32(BCo0, BCu0); Ebo0 = BCo0 ^ ANDN32(BCu0, BCa0); @@ -159,7 +159,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu1 ^= Du1; BCu1 = ROL32(Asu1, 7); Eba1 = BCa1 ^ ANDN32(BCe1, BCi1); - Eba1 ^= round_consts[round * 2 + 1]; + Eba1 ^= *(round_consts++); Ebe1 = BCe1 ^ ANDN32(BCi1, BCo1); Ebi1 = BCi1 ^ ANDN32(BCo1, BCu1); Ebo1 = BCo1 ^ ANDN32(BCu1, BCa1); @@ -328,7 +328,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Esu0 ^= Du0; BCu0 = ROL32(Esu0, 7); Aba0 = BCa0 ^ ANDN32(BCe0, BCi0); - Aba0 ^= round_consts[round * 2 + 2]; + Aba0 ^= *(round_consts++); Abe0 = BCe0 ^ ANDN32(BCi0, BCo0); Abi0 = BCi0 ^ ANDN32(BCo0, BCu0); Abo0 = BCo0 ^ ANDN32(BCu0, BCa0); @@ -345,7 +345,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Esu1 ^= Du1; BCu1 = ROL32(Esu1, 7); Aba1 = BCa1 ^ ANDN32(BCe1, BCi1); - Aba1 ^= round_consts[round * 2 + 3]; + Aba1 ^= *(round_consts++); Abe1 = BCe1 ^ ANDN32(BCi1, BCo1); Abi1 = BCi1 ^ ANDN32(BCo1, BCu1); Abo1 = BCo1 ^ ANDN32(BCu1, BCa1); @@ -479,6 +479,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Aso1 = BCo1 ^ ANDN32(BCu1, BCa1); Asu1 = BCu1 ^ ANDN32(BCa1, BCe1); } + while (round_consts < round_consts_end); state[0] = Aba0; state[1] = Aba1; diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h index 1a80192..b28c871 100644 --- a/cipher/keccak_permute_64.h +++ b/cipher/keccak_permute_64.h @@ -26,6 +26,7 @@ static unsigned int KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) { const u64 *round_consts = _gcry_keccak_round_consts_64bit; + const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24; u64 Aba, Abe, Abi, Abo, Abu; u64 Aga, Age, Agi, Ago, Agu; u64 Aka, Ake, Aki, Ako, Aku; @@ -39,7 +40,6 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) u64 Ema, Eme, Emi, Emo, Emu; u64 Esa, Ese, Esi, Eso, Esu; u64 *state = hd->u.state64; - unsigned int round; Aba = state[0]; Abe = state[1]; @@ -67,7 +67,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Aso = state[23]; Asu = state[24]; - for (round = 0; round < 24; round += 2) + do { /* prepareTheta */ BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; @@ -94,7 +94,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Asu ^= Du; BCu = ROL64(Asu, 14); Eba = BCa ^ ANDN64(BCe, BCi); - Eba ^= (u64)round_consts[round]; + Eba ^= *(round_consts++); Ebe = BCe ^ ANDN64(BCi, BCo); Ebi = BCi ^ ANDN64(BCo, BCu); Ebo = BCo ^ ANDN64(BCu, BCa); @@ -189,7 +189,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Esu ^= Du; BCu = ROL64(Esu, 14); Aba = BCa ^ ANDN64(BCe, BCi); - Aba ^= (u64)round_consts[round + 1]; + Aba ^= *(round_consts++); Abe = BCe ^ ANDN64(BCi, BCo); Abi = BCi ^ ANDN64(BCo, BCu); Abo = BCo ^ ANDN64(BCu, BCa); @@ -259,6 +259,7 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) Aso = BCo ^ ANDN64(BCu, BCa); Asu = BCu ^ ANDN64(BCa, BCe); } + while (round_consts < round_consts_end); state[0] = Aba; state[1] = Abe; @@ -303,12 +304,11 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHAKE128 */ while (pos == 0 && nlanes >= 21) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_8(&hd->u.state64[12], lanes + 8 * 12); - absorb_lanes64_1(&hd->u.state64[20], lanes + 8 * 20); - lanes += 8 * 21; nlanes -= 21; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -318,11 +318,10 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-224 */ while (pos == 0 && nlanes >= 18) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_2(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_8(&hd->u.state64[10], lanes + 8 * 10); - lanes += 8 * 18; nlanes -= 18; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -332,11 +331,10 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-256 & SHAKE256 */ while (pos == 0 && nlanes >= 17) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_8(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_1(&hd->u.state64[16], lanes + 8 * 16); - lanes += 8 * 17; nlanes -= 17; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -346,11 +344,10 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-384 */ while (pos == 0 && nlanes >= 13) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8); - absorb_lanes64_1(&hd->u.state64[12], lanes + 8 * 12); - lanes += 8 * 13; nlanes -= 13; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } @@ -360,10 +357,9 @@ KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, /* SHA3-512 */ while (pos == 0 && nlanes >= 9) { - absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0); - absorb_lanes64_1(&hd->u.state64[8], lanes + 8 * 8); - lanes += 8 * 9; nlanes -= 9; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1; burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); } commit 15ea0acf8bb0aa307eccc23024a0bd7878fb8080 Author: Jussi Kivilinna Date: Wed Nov 18 09:44:18 2015 +0200 Update license information for CRC * LICENSES: Remove 'Simple permissive' and 'IETF permissive' licenses for 'cipher/crc.c' as result of rewrite of CRC implementations. -- Signed-off-by: Jussi Kivilinna diff --git a/LICENSES b/LICENSES index 6c09e1f..ff8b7fa 100644 --- a/LICENSES +++ b/LICENSES @@ -54,56 +54,6 @@ with any binary distributions derived from the GNU C Library. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #+end_quote -* Simple permissive licenses - - For files: - - cipher/crc.c - -#+begin_quote - Copyright (c) 1996 L. Peter Deutsch - - Permission is granted to copy and distribute this document for - any purpose and without charge, including translations into - other languages and incorporation into compilations, provided - that the copyright notice and this notice are preserved, and - that any substantive changes or deletions from the original are - clearly marked. -#+end_quote - -* IETF permissive licenses - - For files: - - cipher/crc.c - -#+begin_quote - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished - to others, and derivative works that comment on or otherwise - explain it or assist in its implementation may be prepared, - copied, published and distributed, in whole or in part, without - restriction of any kind, provided that the above copyright notice - and this paragraph are included on all such copies and derivative - works. However, this document itself may not be modified in any - way, such as by removing the copyright notice or references to - the Internet Society or other Internet organizations, except as - needed for the purpose of developing Internet standards in which - case the procedures for copyrights defined in the Internet - Standards process must be followed, or as required to translate - it into languages other than English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on - an "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE - OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY - IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR - PURPOSE. -#+end_quote - * X License For files: ----------------------------------------------------------------------- Summary of changes: LICENSES | 50 ---------------------------------------------- cipher/keccak_permute_32.h | 13 ++++++------ cipher/keccak_permute_64.h | 44 +++++++++++++++++++--------------------- 3 files changed, 27 insertions(+), 80 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From cvs at cvs.gnupg.org Wed Nov 18 17:35:43 2015 From: cvs at cvs.gnupg.org (by Justus Winter) Date: Wed, 18 Nov 2015 17:35:43 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-284-g940dc8a Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 940dc8adc034a6c6c38742f6bfd7d837a532d537 (commit) from 6571a64331839d7d952292163afbf34c8bef62e0 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 940dc8adc034a6c6c38742f6bfd7d837a532d537 Author: Justus Winter Date: Tue Nov 17 16:00:16 2015 +0100 cipher: Fix error handling. * cipher/cipher.c (_gcry_cipher_ctl): Fix error handling. -- Found using the Clang Static Analyzer. Signed-off-by: Justus Winter diff --git a/cipher/cipher.c b/cipher/cipher.c index ab9f0dc..f163bde 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -1359,6 +1359,7 @@ _gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen) (&h->context.c, GCRYCTL_SET_SBOX, buffer, buflen); else rc = GPG_ERR_NOT_SUPPORTED; + break; default: rc = GPG_ERR_INV_OP; ----------------------------------------------------------------------- Summary of changes: cipher/cipher.c | 1 + 1 file changed, 1 insertion(+) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Mon Nov 23 04:11:31 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Mon, 23 Nov 2015 12:11:31 +0900 Subject: multiple timing side channels In-Reply-To: <56424EF0.2090707@iki.fi> References: <20151110174857.33A75604DD@jupiter.mumble.net> <56424EF0.2090707@iki.fi> Message-ID: <565283E3.5020008@fsij.org> On 11/11/2015 05:09 AM, Jussi Kivilinna wrote: > Another is to do '!!' by bit-wise ORing number and its negative and > extracting sign-bit, which will be set only if number was non-zero: > > /* Convert non-zero values to '1' and zero to '0'. */ > > static inline int is_not_zero(unsigned long val) > { > val |= -val; /* sign-bit will be set if 'val != 0' */ > return (val >> (CHAR_BIT * sizeof(val) - 1)) & 1; > } > > ... > > mpi_limb_t mask = ((mpi_limb_t)0) - is_not_zero(swap); > > With above GCC/x86-64 generates four instructions (in: swap = rdx, > out: mask = rdx): > > mov %rdx,%rax > neg %rax > or %rax,%rdx > sar $0x3f,%rdx > > Which is same amount as with original '!!swap' (in: swap = rdx, > out: mask = r10): Thank you for discussions. I'll be back to this issue. Before this fine-grained timing issue, I have to handle following, as we have attacks now (as you had already imagined). Please don't get me wrong when you see another fixes before this paticular fix. I never ignore your point. Well, I am considering how to fix libgcrypt ECC to be constant-time. > 2. Eliminate mpi normalization. Most k-bit multiprecision integers > that libgcrypt handles are uniformly distributed in [0, 2^k), or at > least in [0, p) where 2^(k - 1) <= p <= 2^k. It's hard to imagine > that there's much value in saving a handful of integer operations on > the last limb once in every ~2^32 cases for an mpi operation -- but > this frequency is high enough that it's not hard to imagine devising a > timing attack where you learn something after a billion messages. Yes. When I did the implementation for Montgomery curve computation, I also think that MPI normalization should be done at highest layer only, and we should avoid the normalization in the middle of computation. Chosen cipher text attack would be surly possible. I'm going to fix functions in mpi/ec.c (ec_mod, ec_addm, ec_subm, ec_mulm, ec_mul2, and possibly ec_powm), so that those will use fixed number of limbs. Perhaps, we need another implementation of mpi_invm which is constant-time. > 3. Eliminate mpi altogether for arithmetic in fixed finite fields, > such as GF(2^255 - 19) as used in Curve25519. There's plenty of > easy-to-use, high-quality, high-performance, constant-time code to > compute it -- faster and more safely than the generic mpi code. I basically agree this view, and I do something like this for my own project (Gnuk). For the maintenance and development of libgcrypt itself, it's not that easy though, since we need to maintain API for existing applications. > 4. Eliminate the generic elliptic-curve abstraction, especially for > new curves. For modern curve design, it offers no benefits over > curve-specific code, and makes variable-time code much more tempting. > Applications don't care that there are elliptic curves or points on > them involved -- applications deal in opaque octet strings. While I agree, this would require major API changes, I'm afraid of. I'm going to fix major timing difference of ec_* functions, small timing difference issues like point_set_cond, and micro timing difference issues like use of !!. -- From cvs at cvs.gnupg.org Wed Nov 25 04:48:27 2015 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Wed, 25 Nov 2015 04:48:27 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-289-g88e1358 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 88e1358962e902ff1cbec8d53ba3eee46407851a (commit) via f88adee3e1f3e2de7d63f92f90bfb3078afd3b4f (commit) via 8ad682c412047d3b9196950709dbd7bd14ac8732 (commit) via 295b1c3540752af4fc5e6f41480e6db215222fba (commit) via b6015176df6bfae107ac82f9baa29ef2c175c9f9 (commit) from 940dc8adc034a6c6c38742f6bfd7d837a532d537 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 88e1358962e902ff1cbec8d53ba3eee46407851a Author: NIIBE Yutaka Date: Wed Nov 25 12:46:19 2015 +0900 ecc: Constant-time multiplication for Weierstrass curve. * mpi/ec.c (_gcry_mpi_ec_mul_point): Use simple left-to-right binary method for Weierstrass curve when SCALAR is secure. diff --git a/mpi/ec.c b/mpi/ec.c index 9394d89..4d59a7e 100644 --- a/mpi/ec.c +++ b/mpi/ec.c @@ -1236,16 +1236,27 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, unsigned int i, loops; mpi_point_struct p1, p2, p1inv; - if (ctx->model == MPI_EC_EDWARDS) + if (ctx->model == MPI_EC_EDWARDS + || (ctx->model == MPI_EC_WEIERSTRASS + && mpi_is_secure (scalar))) { /* Simple left to right binary method. GECC Algorithm 3.27 */ unsigned int nbits; int j; nbits = mpi_get_nbits (scalar); - mpi_set_ui (result->x, 0); - mpi_set_ui (result->y, 1); - mpi_set_ui (result->z, 1); + if (ctx->model == MPI_EC_WEIERSTRASS) + { + mpi_set_ui (result->x, 1); + mpi_set_ui (result->y, 1); + mpi_set_ui (result->z, 0); + } + else + { + mpi_set_ui (result->x, 0); + mpi_set_ui (result->y, 1); + mpi_set_ui (result->z, 1); + } if (mpi_is_secure (scalar)) { commit f88adee3e1f3e2de7d63f92f90bfb3078afd3b4f Author: NIIBE Yutaka Date: Wed Nov 25 12:13:04 2015 +0900 mpi: fix gcry_mpi_swap_cond. * mpi/mpiutil.c (_gcry_mpi_swap_cond): Relax the condition. diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c index d3264c7..99402b8 100644 --- a/mpi/mpiutil.c +++ b/mpi/mpiutil.c @@ -582,11 +582,15 @@ void _gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap) { mpi_size_t i; - mpi_size_t nlimbs = a->alloced; + mpi_size_t nlimbs; mpi_limb_t mask = ((mpi_limb_t)0) - swap; mpi_limb_t x; - if (a->alloced != b->alloced) + if (a->alloced > b->alloced) + nlimbs = b->alloced; + else + nlimbs = a->alloced; + if (a->nlimbs > nlimbs || b->nlimbs > nlimbs) log_bug ("mpi_swap_cond: different sizes\n"); for (i = 0; i < nlimbs; i++) commit 8ad682c412047d3b9196950709dbd7bd14ac8732 Author: NIIBE Yutaka Date: Wed Nov 25 10:52:57 2015 +0900 mpi: Fix mpi_set_cond and mpi_swap_cond . * mpi/mpiutil.c (_gcry_mpi_set_cond, _gcry_mpi_swap_cond): Don't use the operator of !!, but assume SET/SWAP is 0 or 1. -- If the code for !! would include a branch, it spoils the purpose of mpi_set_cond/mpi_swap_cond at all. It's better to make sure the use of this function to be called with 0 or 1 for SET/SWAP. Note that it conforms when SET/SWAP is the result of conditional expression of mpi_test_bit. Reported-by: Taylor R Campbell. diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c index 71b3f1c..d3264c7 100644 --- a/mpi/mpiutil.c +++ b/mpi/mpiutil.c @@ -483,12 +483,17 @@ _gcry_mpi_set (gcry_mpi_t w, gcry_mpi_t u) return w; } +/**************** + * Set the value of W by the one of U, when SET is 1. + * Leave the value when SET is 0. + * This implementation should be constant-time regardless of SET. + */ gcry_mpi_t _gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u, unsigned long set) { mpi_size_t i; mpi_size_t nlimbs = u->alloced; - mpi_limb_t mask = ((mpi_limb_t)0) - !!set; + mpi_limb_t mask = ((mpi_limb_t)0) - set; mpi_limb_t x; if (w->alloced != u->alloced) @@ -568,12 +573,17 @@ _gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b) } +/**************** + * Swap the value of A and B, when SWAP is 1. + * Leave the value when SWAP is 0. + * This implementation should be constant-time regardless of SWAP. + */ void _gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap) { mpi_size_t i; mpi_size_t nlimbs = a->alloced; - mpi_limb_t mask = ((mpi_limb_t)0) - !!swap; + mpi_limb_t mask = ((mpi_limb_t)0) - swap; mpi_limb_t x; if (a->alloced != b->alloced) commit 295b1c3540752af4fc5e6f41480e6db215222fba Author: NIIBE Yutaka Date: Wed Nov 25 10:42:47 2015 +0900 ecc: multiplication of Edwards curve to be constant-time. * mpi/ec.c (_gcry_mpi_ec_mul_point): Use point_swap_cond. -- Reported-by: Taylor R Campbell. diff --git a/mpi/ec.c b/mpi/ec.c index 1644942..9394d89 100644 --- a/mpi/ec.c +++ b/mpi/ec.c @@ -1254,12 +1254,13 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, mpi_point_struct tmppnt; point_init (&tmppnt); + point_resize (result, ctx); + point_resize (&tmppnt, ctx); for (j=nbits-1; j >= 0; j--) { _gcry_mpi_ec_dup_point (result, result, ctx); _gcry_mpi_ec_add_points (&tmppnt, result, point, ctx); - if (mpi_test_bit (scalar, j)) - point_set (result, &tmppnt); + point_swap_cond (result, &tmppnt, mpi_test_bit (scalar, j), ctx); } point_free (&tmppnt); } commit b6015176df6bfae107ac82f9baa29ef2c175c9f9 Author: NIIBE Yutaka Date: Wed Nov 25 10:19:39 2015 +0900 ecc: Add point_resize and point_swap_cond. * mpi/ec.c (point_resize, point_swap_cond): New. (_gcry_mpi_ec_mul_point): Use point_resize and point_swap_cond. -- Thanks to Taylor R Campbell who suggests. diff --git a/mpi/ec.c b/mpi/ec.c index 7266f2a..1644942 100644 --- a/mpi/ec.c +++ b/mpi/ec.c @@ -139,6 +139,34 @@ point_set (mpi_point_t d, mpi_point_t s) } +static void +point_resize (mpi_point_t p, mpi_ec_t ctx) +{ + /* + * For now, we allocate enough limbs for our EC computation of ec_*. + * Once we will improve ec_* to be constant size (and constant + * time), NLIMBS can be ctx->p->nlimbs. + */ + size_t nlimbs = 2*ctx->p->nlimbs+1; + + mpi_resize (p->x, nlimbs); + if (ctx->model != MPI_EC_MONTGOMERY) + mpi_resize (p->y, nlimbs); + mpi_resize (p->z, nlimbs); +} + + +static void +point_swap_cond (mpi_point_t d, mpi_point_t s, unsigned long swap, + mpi_ec_t ctx) +{ + mpi_swap_cond (d->x, s->x, swap); + if (ctx->model != MPI_EC_MONTGOMERY) + mpi_swap_cond (d->y, s->y, swap); + mpi_swap_cond (d->z, s->z, swap); +} + + /* Set the projective coordinates from POINT into X, Y, and Z. If a coordinate is not required, X, Y, or Z may be passed as NULL. */ void @@ -1253,7 +1281,6 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, mpi_point_struct p1_, p2_; mpi_point_t q1, q2, prd, sum; unsigned long sw; - size_t nlimbs; /* Compute scalar point multiplication with Montgomery Ladder. Note that we don't use Y-coordinate in the points at all. @@ -1269,15 +1296,10 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, p2.x = mpi_copy (point->x); mpi_set_ui (p2.z, 1); - nlimbs = 2*(nbits+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB+1; - mpi_resize (p1.x, nlimbs); - mpi_resize (p1.z, nlimbs); - mpi_resize (p2.x, nlimbs); - mpi_resize (p2.z, nlimbs); - mpi_resize (p1_.x, nlimbs); - mpi_resize (p1_.z, nlimbs); - mpi_resize (p2_.x, nlimbs); - mpi_resize (p2_.z, nlimbs); + point_resize (&p1, ctx); + point_resize (&p2, ctx); + point_resize (&p1_, ctx); + point_resize (&p2_, ctx); q1 = &p1; q2 = &p2; @@ -1289,19 +1311,16 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, mpi_point_t t; sw = mpi_test_bit (scalar, j); - mpi_swap_cond (q1->x, q2->x, sw); - mpi_swap_cond (q1->z, q2->z, sw); + point_swap_cond (q1, q2, sw, ctx); montgomery_ladder (prd, sum, q1, q2, point->x, ctx); - mpi_swap_cond (prd->x, sum->x, sw); - mpi_swap_cond (prd->z, sum->z, sw); + point_swap_cond (prd, sum, sw, ctx); t = q1; q1 = prd; prd = t; t = q2; q2 = sum; sum = t; } mpi_clear (result->y); sw = (nbits & 1); - mpi_swap_cond (p1.x, p1_.x, sw); - mpi_swap_cond (p1.z, p1_.z, sw); + point_swap_cond (&p1, &p1_, sw, ctx); if (p1.z->nlimbs == 0) { ----------------------------------------------------------------------- Summary of changes: mpi/ec.c | 75 +++++++++++++++++++++++++++++++++++++++++------------------ mpi/mpiutil.c | 22 ++++++++++++++---- 2 files changed, 71 insertions(+), 26 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Thu Nov 26 02:12:05 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Thu, 26 Nov 2015 10:12:05 +0900 Subject: multiple timing side channels In-Reply-To: <565283E3.5020008@fsij.org> References: <20151110174857.33A75604DD@jupiter.mumble.net> <56424EF0.2090707@iki.fi> <565283E3.5020008@fsij.org> Message-ID: <56565C65.2020007@fsij.org> Hello, Please have a look at the development version. http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git I committed five changes. I'll keep considering changes for ec_* implementation. commit 88e1358962e902ff1cbec8d53ba3eee46407851a Author: NIIBE Yutaka Date: Wed Nov 25 12:46:19 2015 +0900 ecc: Constant-time multiplication for Weierstrass curve. * mpi/ec.c (_gcry_mpi_ec_mul_point): Use simple left-to-right binary method for Weierstrass curve when SCALAR is secure. commit f88adee3e1f3e2de7d63f92f90bfb3078afd3b4f Author: NIIBE Yutaka Date: Wed Nov 25 12:13:04 2015 +0900 mpi: fix gcry_mpi_swap_cond. * mpi/mpiutil.c (_gcry_mpi_swap_cond): Relax the condition. commit 8ad682c412047d3b9196950709dbd7bd14ac8732 Author: NIIBE Yutaka Date: Wed Nov 25 10:52:57 2015 +0900 mpi: Fix mpi_set_cond and mpi_swap_cond . * mpi/mpiutil.c (_gcry_mpi_set_cond, _gcry_mpi_swap_cond): Don't use the operator of !!, but assume SET/SWAP is 0 or 1. -- If the code for !! would include a branch, it spoils the purpose of mpi_set_cond/mpi_swap_cond at all. It's better to make sure the use of this function to be called with 0 or 1 for SET/SWAP. Note that it conforms when SET/SWAP is the result of conditional expression of mpi_test_bit. Reported-by: Taylor R Campbell. commit 295b1c3540752af4fc5e6f41480e6db215222fba Author: NIIBE Yutaka Date: Wed Nov 25 10:42:47 2015 +0900 ecc: multiplication of Edwards curve to be constant-time. * mpi/ec.c (_gcry_mpi_ec_mul_point): Use point_swap_cond. -- Reported-by: Taylor R Campbell. commit b6015176df6bfae107ac82f9baa29ef2c175c9f9 Author: NIIBE Yutaka Date: Wed Nov 25 10:19:39 2015 +0900 ecc: Add point_resize and point_swap_cond. * mpi/ec.c (point_resize, point_swap_cond): New. (_gcry_mpi_ec_mul_point): Use point_resize and point_swap_cond. -- Thanks to Taylor R Campbell who suggests. -- From cvs at cvs.gnupg.org Thu Nov 26 03:41:00 2015 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Thu, 26 Nov 2015 03:41:00 +0100 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-290-g3658afd Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 3658afd09c3b03b4398aaa5748387220c93b1a94 (commit) from 88e1358962e902ff1cbec8d53ba3eee46407851a (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 3658afd09c3b03b4398aaa5748387220c93b1a94 Author: NIIBE Yutaka Date: Thu Nov 26 11:37:47 2015 +0900 ecc: minor improvement of point multiplication. * mpi/ec.c (_gcry_mpi_ec_mul_point): Move ec_subm out of the loop. diff --git a/mpi/ec.c b/mpi/ec.c index 4d59a7e..40e09be 100644 --- a/mpi/ec.c +++ b/mpi/ec.c @@ -1415,6 +1415,10 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, point_init (&p2); point_init (&p1inv); + /* Invert point: y = p - y mod p */ + point_set (&p1inv, &p1); + ec_subm (p1inv.y, ctx->p, p1inv.y, ctx); + for (i=loops-2; i > 0; i--) { _gcry_mpi_ec_dup_point (result, result, ctx); @@ -1426,9 +1430,6 @@ _gcry_mpi_ec_mul_point (mpi_point_t result, if (mpi_test_bit (h, i) == 0 && mpi_test_bit (k, i) == 1) { point_set (&p2, result); - /* Invert point: y = p - y mod p */ - point_set (&p1inv, &p1); - ec_subm (p1inv.y, ctx->p, p1inv.y, ctx); _gcry_mpi_ec_add_points (result, &p2, &p1inv, ctx); } } ----------------------------------------------------------------------- Summary of changes: mpi/ec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Thu Nov 26 03:48:15 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Thu, 26 Nov 2015 11:48:15 +0900 Subject: ecc: Montgomery curve always uses the prefix 0x40 In-Reply-To: <56494B98.4020103@fsij.org> References: <56494B98.4020103@fsij.org> Message-ID: <565672EF.10400@fsij.org> On 11/16/2015 12:20 PM, NIIBE Yutaka wrote: > I'm checking GnuPG's Curve25519 implementation. [..] > I think that it should be prefixed by 0x40, so that it can be also > accessed as a fixed size MPI. Here is update. This can handle old data with no prefix, too. diff --git a/cipher/ecc-misc.c b/cipher/ecc-misc.c index 2f2e593..b9ad060 100644 --- a/cipher/ecc-misc.c +++ b/cipher/ecc-misc.c @@ -292,6 +292,7 @@ _gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec, gpg_err_code_t _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) { + unsigned char *a; unsigned char *rawmpi; unsigned int rawmpilen; @@ -311,8 +312,8 @@ _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) buf++; } - rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); - if (!rawmpi) + a = rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); + if (!a) return gpg_err_code_from_syserror (); p = rawmpi + rawmpilen; @@ -321,16 +322,27 @@ _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) } else { - /* Note: Without using an opaque MPI it is not reliable possible - to find out whether the public key has been given in - uncompressed format. Thus we expect native EdDSA format. */ - rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); - if (!rawmpi) + a = rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); + if (!a) return gpg_err_code_from_syserror (); + /* + * It is not reliable to assume that 0x40 means the prefix. + * + * For newer implementation, it is reliable since we always put + * 0x40 for x-only coordinate. + * + * For older implementation (non-released development version), + * it is possibe to have the 0x40 as a part of data. + * + * So, we need to check if it's really the prefix or not. + * Only when it's the prefix, we remove it. + */ + if (a[0] == 0x40 && ctx->nbits/8 == rawmpilen - 1) + rawmpi++; } _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0); - xfree (rawmpi); + xfree (a); mpi_set_ui (result->z, 1); return 0; diff --git a/cipher/ecc.c b/cipher/ecc.c index bd3e754..51621f8 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -606,17 +606,14 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) &encpk, &encpklen); else { - int off = !!(flags & PUBKEY_FLAG_COMP); - - encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, off?-1:0, + encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, -1, &encpklen, NULL); if (encpk == NULL) rc = gpg_err_code_from_syserror (); else { - if (off) - encpk[0] = 0x40; - encpklen += off; + encpk[0] = 0x40; + encpklen++; } } if (rc) @@ -1374,11 +1371,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); else { - rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + rawmpi = _gcry_mpi_get_buffer_extra (x, ec->nbits/8, -1, + &rawmpilen, NULL); if (!rawmpi) rc = gpg_err_code_from_syserror (); else { + rawmpi[0] = 0x40; mpi_s = mpi_new (0); mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8); } @@ -1393,11 +1392,13 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); else { - rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + rawmpi = _gcry_mpi_get_buffer_extra (x, ec->nbits/8, -1, + &rawmpilen, NULL); if (!rawmpi) rc = gpg_err_code_from_syserror (); else { + rawmpi[0] = 0x40; mpi_e = mpi_new (0); mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8); } @@ -1587,11 +1588,13 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) unsigned char *rawmpi; unsigned int rawmpilen; - rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + rawmpi = _gcry_mpi_get_buffer_extra (x, ec->nbits/8, -1, + &rawmpilen, NULL); if (!rawmpi) rc = gpg_err_code_from_syserror (); else { + rawmpi[0] = 0x40; r = mpi_new (0); mpi_set_opaque (r, rawmpi, rawmpilen*8); } -- From cpm at fbsd.es Thu Nov 26 10:37:29 2015 From: cpm at fbsd.es (Carlos J Puga Medina) Date: Thu, 26 Nov 2015 10:37:29 +0100 Subject: Patch to fix libgcrypt and KeepassX issue on FreeBSD Message-ID: <1448530649.1549.14.camel@fbsd.es> Hi people, The following patch fixes a current problem on FreeBSD. Please, can someone commit it for the next libgcrypt release? --- cipher/Makefile.in.orig 2015-09-08 06:32:11 UTC +++ cipher/Makefile.in @@ -818,13 +818,19 @@ uninstall-am: tags tags-am uninstall uninstall-am -# We need to lower the optimization for this module. +# We need to lower the optimization for these modules. tiger.o: $(srcdir)/tiger.c `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` tiger.lo: $(srcdir)/tiger.c `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` +salsa20.o: $(srcdir)/salsa20.c + `echo $(COMPILE) -c $(srcdir)/salsa20.c | $(o_flag_munging) ` + +salsa20.lo: $(srcdir)/salsa20.c + `echo $(LTCOMPILE) -c $(srcdir)/salsa20.c | $(o_flag_munging) ` + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: See bug 204323 for further details: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=204323 Kind regards, -- Carlos Jacobo Puga Medina PGP fingerprint = C60E 9497 5302 793B CC2D BB89 A1F3 5D66 E6D0 5453 -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 473 bytes Desc: This is a digitally signed message part URL: From jussi.kivilinna at iki.fi Thu Nov 26 19:25:28 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 26 Nov 2015 20:25:28 +0200 Subject: Patch to fix libgcrypt and KeepassX issue on FreeBSD In-Reply-To: <1448530649.1549.14.camel@fbsd.es> References: <1448530649.1549.14.camel@fbsd.es> Message-ID: <56574E98.50707@iki.fi> Hello, On 26.11.2015 11:37, Carlos J Puga Medina wrote: > Hi people, > > The following patch fixes a current problem on FreeBSD. Please, can > someone commit it for the next libgcrypt release? > I managed to reproduce this on Ubuntu/clang and found bug in salsa20 selftest code. Does attached patch fix the issue for you? -Jussi > --- cipher/Makefile.in.orig 2015-09-08 06:32:11 UTC > +++ cipher/Makefile.in > @@ -818,13 +818,19 @@ uninstall-am: > tags tags-am uninstall uninstall-am > > > -# We need to lower the optimization for this module. > +# We need to lower the optimization for these modules. > tiger.o: $(srcdir)/tiger.c > `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` > > tiger.lo: $(srcdir)/tiger.c > `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) ` > > +salsa20.o: $(srcdir)/salsa20.c > + `echo $(COMPILE) -c $(srcdir)/salsa20.c | $(o_flag_munging) ` > + > +salsa20.lo: $(srcdir)/salsa20.c > + `echo $(LTCOMPILE) -c $(srcdir)/salsa20.c | $(o_flag_munging) > ` > + > # Tell versions [3.59,3.63) of GNU make to not export all variables. > # Otherwise a system limit (for SysV at least) may be exceeded. > .NOEXPORT: > > See bug 204323 for further details: > > https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=204323 > > Kind regards, > > > > _______________________________________________ > Gcrypt-devel mailing list > Gcrypt-devel at gnupg.org > http://lists.gnupg.org/mailman/listinfo/gcrypt-devel > -------------- next part -------------- A non-text attachment was scrubbed... Name: 01-salsa20-fix-alignment-of-self.patch Type: text/x-patch Size: 3109 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 648 bytes Desc: OpenPGP digital signature URL: From jussi.kivilinna at iki.fi Sun Nov 29 12:07:37 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 29 Nov 2015 13:07:37 +0200 Subject: [PATCH 1/2] salsa20: fix alignment of self-test context Message-ID: <20151129110737.20931.4361.stgit@localhost6.localdomain6> * cipher/salsa20.c (selftest): Ensure 16-byte alignment for salsa20 context structure. -- Reported-by: Carlos J Puga Medina Signed-off-by: Jussi Kivilinna --- cipher/salsa20.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/cipher/salsa20.c b/cipher/salsa20.c index fa3d23b..9768198 100644 --- a/cipher/salsa20.c +++ b/cipher/salsa20.c @@ -501,7 +501,8 @@ salsa20r12_encrypt_stream (void *context, static const char* selftest (void) { - SALSA20_context_t ctx; + byte ctxbuf[sizeof(SALSA20_context_t) + 15]; + SALSA20_context_t *ctx; byte scratch[8+1]; byte buf[256+64+4]; int i; @@ -518,32 +519,35 @@ selftest (void) static const byte ciphertext_1[] = { 0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3}; - salsa20_setkey (&ctx, key_1, sizeof key_1); - salsa20_setiv (&ctx, nonce_1, sizeof nonce_1); + /* 16-byte alignment required for amd64 implementation. */ + ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); + + salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); scratch[8] = 0; - salsa20_encrypt_stream (&ctx, scratch, plaintext_1, sizeof plaintext_1); + salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) return "Salsa20 encryption test 1 failed."; if (scratch[8]) return "Salsa20 wrote too much."; - salsa20_setkey( &ctx, key_1, sizeof(key_1)); - salsa20_setiv (&ctx, nonce_1, sizeof nonce_1); - salsa20_encrypt_stream (&ctx, scratch, scratch, sizeof plaintext_1); + salsa20_setkey( ctx, key_1, sizeof(key_1)); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) return "Salsa20 decryption test 1 failed."; for (i = 0; i < sizeof buf; i++) buf[i] = i; - salsa20_setkey (&ctx, key_1, sizeof key_1); - salsa20_setiv (&ctx, nonce_1, sizeof nonce_1); + salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); /*encrypt*/ - salsa20_encrypt_stream (&ctx, buf, buf, sizeof buf); + salsa20_encrypt_stream (ctx, buf, buf, sizeof buf); /*decrypt*/ - salsa20_setkey (&ctx, key_1, sizeof key_1); - salsa20_setiv (&ctx, nonce_1, sizeof nonce_1); - salsa20_encrypt_stream (&ctx, buf, buf, 1); - salsa20_encrypt_stream (&ctx, buf+1, buf+1, (sizeof buf)-1-1); - salsa20_encrypt_stream (&ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1); + salsa20_setkey (ctx, key_1, sizeof key_1); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + salsa20_encrypt_stream (ctx, buf, buf, 1); + salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1); + salsa20_encrypt_stream (ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1); for (i = 0; i < sizeof buf; i++) if (buf[i] != (byte)i) return "Salsa20 encryption test 2 failed."; From jussi.kivilinna at iki.fi Sun Nov 29 12:07:42 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Sun, 29 Nov 2015 13:07:42 +0200 Subject: [PATCH 2/2] chacha20: fix alignment of self-test context In-Reply-To: <20151129110737.20931.4361.stgit@localhost6.localdomain6> References: <20151129110737.20931.4361.stgit@localhost6.localdomain6> Message-ID: <20151129110742.20931.53162.stgit@localhost6.localdomain6> * cipher/chacha20.c (selftest): Ensure 16-byte alignment for chacha20 context structure. -- Signed-off-by: Jussi Kivilinna --- cipher/chacha20.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/cipher/chacha20.c b/cipher/chacha20.c index e25e239..613fa82 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -514,7 +514,8 @@ chacha20_encrypt_stream (void *context, byte * outbuf, const byte * inbuf, static const char * selftest (void) { - CHACHA20_context_t ctx; + byte ctxbuf[sizeof(CHACHA20_context_t) + 15]; + CHACHA20_context_t *ctx; byte scratch[127 + 1]; byte buf[512 + 64 + 4]; int i; @@ -565,46 +566,49 @@ selftest (void) 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33 }; - chacha20_setkey (&ctx, key_1, sizeof key_1); - chacha20_setiv (&ctx, nonce_1, sizeof nonce_1); + /* 16-byte alignment required for amd64 implementation. */ + ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); + + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); scratch[sizeof (scratch) - 1] = 0; - chacha20_encrypt_stream (&ctx, scratch, plaintext_1, sizeof plaintext_1); + chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) return "ChaCha20 encryption test 1 failed."; if (scratch[sizeof (scratch) - 1]) return "ChaCha20 wrote too much."; - chacha20_setkey (&ctx, key_1, sizeof (key_1)); - chacha20_setiv (&ctx, nonce_1, sizeof nonce_1); - chacha20_encrypt_stream (&ctx, scratch, scratch, sizeof plaintext_1); + chacha20_setkey (ctx, key_1, sizeof (key_1)); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) return "ChaCha20 decryption test 1 failed."; for (i = 0; i < sizeof buf; i++) buf[i] = i; - chacha20_setkey (&ctx, key_1, sizeof key_1); - chacha20_setiv (&ctx, nonce_1, sizeof nonce_1); + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); /*encrypt */ - chacha20_encrypt_stream (&ctx, buf, buf, sizeof buf); + chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); /*decrypt */ - chacha20_setkey (&ctx, key_1, sizeof key_1); - chacha20_setiv (&ctx, nonce_1, sizeof nonce_1); - chacha20_encrypt_stream (&ctx, buf, buf, 1); - chacha20_encrypt_stream (&ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); - chacha20_encrypt_stream (&ctx, buf + (sizeof buf) - 1, + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, buf, buf, 1); + chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); + chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1, buf + (sizeof buf) - 1, 1); for (i = 0; i < sizeof buf; i++) if (buf[i] != (byte) i) return "ChaCha20 encryption test 2 failed."; - chacha20_setkey (&ctx, key_1, sizeof key_1); - chacha20_setiv (&ctx, nonce_1, sizeof nonce_1); + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); /* encrypt */ for (i = 0; i < sizeof buf; i++) - chacha20_encrypt_stream (&ctx, &buf[i], &buf[i], 1); + chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); /* decrypt */ - chacha20_setkey (&ctx, key_1, sizeof key_1); - chacha20_setiv (&ctx, nonce_1, sizeof nonce_1); - chacha20_encrypt_stream (&ctx, buf, buf, sizeof buf); + chacha20_setkey (ctx, key_1, sizeof key_1); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); for (i = 0; i < sizeof buf; i++) if (buf[i] != (byte) i) return "ChaCha20 encryption test 3 failed.";