From jussi.kivilinna at iki.fi Mon Aug 1 16:25:34 2022 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 1 Aug 2022 17:25:34 +0300 Subject: [PATCH] sm4: add ARMv8 CE accelerated implementation for XTS mode In-Reply-To: <20220728082655.47697-1-tianjia.zhang@linux.alibaba.com> References: <20220728082655.47697-1-tianjia.zhang@linux.alibaba.com> Message-ID: <88833b2a-cdc8-8e81-75f7-de8a05a3b279@iki.fi> Hello, Patch applied to master, thanks. -Jussi On 28.7.2022 11.26, Tianjia Zhang via Gcrypt-devel wrote: > * cipher/sm4-armv8-aarch64-ce.S (_gcry_sm4_armv8_ce_xts_crypt): New. > * cipher/sm4.c (_gcry_sm4_armv8_ce_xts_crypt): New. > (_gcry_sm4_xts_crypt) [USE_ARM_CE]: Add ARMv8 CE implementation for XTS. > -- > > Benchmark on T-Head Yitian-710 2.75 GHz: > > Before: > SM4 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz > XTS enc | 0.373 ns/B 2560 MiB/s 1.02 c/B 2749 > XTS dec | 0.372 ns/B 2562 MiB/s 1.02 c/B 2750 > > After (1.18x faster): > SM4 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz > XTS enc | 0.314 ns/B 3038 MiB/s 0.863 c/B 2749 > XTS dec | 0.314 ns/B 3037 MiB/s 0.863 c/B 2749 > > Signed-off-by: Tianjia Zhang > --- > cipher/sm4-armv8-aarch64-ce.S | 151 ++++++++++++++++++++++++++++++++++ > cipher/sm4.c | 18 +++- > 2 files changed, 168 insertions(+), 1 deletion(-) > > diff --git a/cipher/sm4-armv8-aarch64-ce.S b/cipher/sm4-armv8-aarch64-ce.S > index 5fb55947edc1..1a4ff736ad27 100644 > --- a/cipher/sm4-armv8-aarch64-ce.S > +++ b/cipher/sm4-armv8-aarch64-ce.S > @@ -62,6 +62,7 @@ > #define RTMP3 v19 > > #define RIV v20 > +#define RMASK v21 > > /* Helper macros. */ > > @@ -69,6 +70,20 @@ > ld1 {v24.16b-v27.16b}, [ptr], #64; \ > ld1 {v28.16b-v31.16b}, [ptr]; > > +#define SM4_CRYPT_BLK(b0) \ > + rev32 b0.16b, b0.16b; \ > + sm4e(b0, v24); \ > + sm4e(b0, v25); \ > + sm4e(b0, v26); \ > + sm4e(b0, v27); \ > + sm4e(b0, v28); \ > + sm4e(b0, v29); \ > + sm4e(b0, v30); \ > + sm4e(b0, v31); \ > + rev64 b0.4s, b0.4s; \ > + ext b0.16b, b0.16b, b0.16b, #8; \ > + rev32 b0.16b, b0.16b; > + > #define crypt_blk4(b0, b1, b2, b3) \ > rev32 b0.16b, b0.16b; \ > rev32 b1.16b, b1.16b; \ > @@ -577,4 +592,140 @@ _gcry_sm4_armv8_ce_ctr_enc: > CFI_ENDPROC(); > ELF(.size _gcry_sm4_armv8_ce_ctr_enc,.-_gcry_sm4_armv8_ce_ctr_enc;) > > +.align 3 > +.global _gcry_sm4_armv8_ce_xts_crypt > +ELF(.type _gcry_sm4_armv8_ce_xts_crypt,%function;) > +_gcry_sm4_armv8_ce_xts_crypt: > + /* input: > + * x0: round key array, CTX > + * x1: dst > + * x2: src > + * x3: tweak (big endian, 128 bit) > + * x4: nblocks > + */ > + CFI_STARTPROC() > + VPUSH_ABI > + > + load_rkey(x0) > + > + mov x7, #0x87 > + mov x8, #0x1 > + mov RMASK.d[0], x7 > + mov RMASK.d[1], x8 > + > + ld1 {RIV.16b}, [x3] > + mov v8.16b, RIV.16b > + ext RIV.16b, RIV.16b, RIV.16b, #8 > + > +.Lxts_loop_blk: > + sub x4, x4, #8 > + tbnz x4, #63, .Lxts_tail8 > + > +#define tweak_next(vt, vin, RTMP) \ > + sshr RTMP.2d, RIV.2d, #63; \ > + add vt.2d, vin.2d, vin.2d; \ > + and RTMP.16b, RTMP.16b, RMASK.16b; \ > + add RIV.2d, RIV.2d, RIV.2d; \ > + eor vt.16b, vt.16b, RTMP.16b; > + > + tweak_next( v9, v8, RTMP0) > + tweak_next(v10, v9, RTMP1) > + tweak_next(v11, v10, RTMP2) > + tweak_next(v12, v11, RTMP3) > + tweak_next(v13, v12, RTMP0) > + tweak_next(v14, v13, RTMP1) > + tweak_next(v15, v14, RTMP2) > + > + ld1 {v0.16b-v3.16b}, [x2], #64 > + eor v0.16b, v0.16b, v8.16b > + eor v1.16b, v1.16b, v9.16b > + eor v2.16b, v2.16b, v10.16b > + eor v3.16b, v3.16b, v11.16b > + ld1 {v4.16b-v7.16b}, [x2], #64 > + eor v4.16b, v4.16b, v12.16b > + eor v5.16b, v5.16b, v13.16b > + eor v6.16b, v6.16b, v14.16b > + eor v7.16b, v7.16b, v15.16b > + > + crypt_blk8(v0, v1, v2, v3, v4, v5, v6, v7) > + > + eor v0.16b, v0.16b, v8.16b > + eor v1.16b, v1.16b, v9.16b > + eor v2.16b, v2.16b, v10.16b > + eor v3.16b, v3.16b, v11.16b > + st1 {v0.16b-v3.16b}, [x1], #64 > + eor v4.16b, v4.16b, v12.16b > + eor v5.16b, v5.16b, v13.16b > + eor v6.16b, v6.16b, v14.16b > + eor v7.16b, v7.16b, v15.16b > + st1 {v4.16b-v7.16b}, [x1], #64 > + > + tweak_next(v8, v15, RTMP3) > + > + cbz x4, .Lxts_end > + b .Lxts_loop_blk > + > +.Lxts_tail8: > + add x4, x4, #8 > + cmp x4, #4 > + blt .Lxts_tail4 > + > + sub x4, x4, #4 > + > + tweak_next( v9, v8, RTMP0) > + tweak_next(v10, v9, RTMP1) > + tweak_next(v11, v10, RTMP2) > + > + ld1 {v0.16b-v3.16b}, [x2], #64 > + eor v0.16b, v0.16b, v8.16b > + eor v1.16b, v1.16b, v9.16b > + eor v2.16b, v2.16b, v10.16b > + eor v3.16b, v3.16b, v11.16b > + > + crypt_blk4(v0, v1, v2, v3); > + > + eor v0.16b, v0.16b, v8.16b > + eor v1.16b, v1.16b, v9.16b > + eor v2.16b, v2.16b, v10.16b > + eor v3.16b, v3.16b, v11.16b > + st1 {v0.16b-v3.16b}, [x1], #64 > + > + tweak_next(v8, v11, RTMP3) > + > + cbz x4, .Lxts_end > + > +.Lxts_tail4: > + sub x4, x4, #1 > + > + ld1 {v0.16b}, [x2], #16 > + eor v0.16b, v0.16b, v8.16b > + > + SM4_CRYPT_BLK(v0) > + > + eor v0.16b, v0.16b, v8.16b > + st1 {v0.16b}, [x1], #16 > + > + tweak_next(v8, v8, RTMP0) > + > + cbnz x4, .Lxts_tail4 > + > +.Lxts_end: > + /* store new tweak */ > + st1 {v8.16b}, [x3] > + > + CLEAR_REG(v8) > + CLEAR_REG(v9) > + CLEAR_REG(v10) > + CLEAR_REG(v11) > + CLEAR_REG(v12) > + CLEAR_REG(v13) > + CLEAR_REG(v14) > + CLEAR_REG(v15) > + CLEAR_REG(RIV) > + > + VPOP_ABI > + ret_spec_stop > + CFI_ENDPROC() > +ELF(.size _gcry_sm4_armv8_ce_xts_crypt,.-_gcry_sm4_armv8_ce_xts_crypt;) > + > #endif > diff --git a/cipher/sm4.c b/cipher/sm4.c > index b5d4691ddbcb..4cac3b6c64b0 100644 > --- a/cipher/sm4.c > +++ b/cipher/sm4.c > @@ -1,6 +1,6 @@ > /* sm4.c - SM4 Cipher Algorithm > * Copyright (C) 2020 Alibaba Group. > - * Copyright (C) 2020 Tianjia Zhang > + * Copyright (C) 2020-2022 Tianjia Zhang > * Copyright (C) 2020-2022 Jussi Kivilinna > * > * This file is part of Libgcrypt. > @@ -539,6 +539,11 @@ extern void _gcry_sm4_armv8_ce_cfb_dec(const u32 *rk_enc, byte *out, > byte *iv, > size_t nblocks); > > +extern void _gcry_sm4_armv8_ce_xts_crypt(const u32 *rk, byte *out, > + const byte *in, > + byte *tweak, > + size_t nblocks); > + > extern void _gcry_sm4_armv8_ce_crypt_blk1_8(const u32 *rk, byte *out, > const byte *in, > size_t num_blocks); > @@ -1510,6 +1515,17 @@ _gcry_sm4_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg, > const unsigned char *inbuf = inbuf_arg; > int burn_stack_depth = 0; > > +#ifdef USE_ARM_CE > + if (ctx->use_arm_ce) > + { > + /* Process all blocks at a time. */ > + _gcry_sm4_armv8_ce_xts_crypt(encrypt ? ctx->rkey_enc : ctx->rkey_dec, > + outbuf, inbuf, tweak, nblocks); > + > + nblocks = 0; > + } > +#endif > + > /* Process remaining blocks. */ > if (nblocks) > { From cllang at redhat.com Tue Aug 2 13:26:37 2022 From: cllang at redhat.com (Clemens Lang) Date: Tue, 2 Aug 2022 13:26:37 +0200 Subject: [PATCH] tests/basic: Add ifdefs for SM4 and CAMELLIA tests Message-ID: <20220802112637.18868-1-cllang@redhat.com> * tests/basic.c (check_gcm_siv_cipher): Do not run test vectors that are known to fail when compiled without support for SM4 or CAMELLIA. -- Fixes test execution when configured --enable-ciphers excluding sm4 or camellia. Signed-off-by: Clemens Lang --- tests/basic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/basic.c b/tests/basic.c index 0d03636b..26980e15 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -6160,6 +6160,7 @@ check_gcm_siv_cipher (void) "\xee\x12\x55\x82\x25\x25\x30\xb9\xa6\xf8\x3c\x81\x36\xcd\xef", "\xce\xc3\x13\x6c\x40\x2a\xcc\x51\xa1\xce\xb3\xed\xe8\xa6\x5b\x04", }, +#if USE_SM4 { GCRY_CIPHER_SM4, FLAG_NOFIPS, "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -6298,6 +6299,8 @@ check_gcm_siv_cipher (void) "\x64\x15\x17\x55\x60\x71\xb4\xaf\xcd\xc8\x90\x25\xc8\xc8\x62", "\xe2\x32\xda\x3a\x5a\x0e\x45\x1b\x8e\xf8\xbb\xe6\x60\x71\x81\xeb", }, +#endif /* USE_SM4 */ +#if USE_CAMELLIA { GCRY_CIPHER_CAMELLIA128, FLAG_NOFIPS, "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" @@ -6564,6 +6567,7 @@ check_gcm_siv_cipher (void) "\xfb\x92\x1d\xd1\x31\x8d\x9d\xa1\xc3\x46\x69\xfa\x71\x2c\x42", "\xc6\x54\xf9\xf0\x22\x2c\xc3\xee\xdd\x13\x02\xb8\xe7\x5a\x2e\x7e" } +#endif /* USE_CAMELLIA */ }; gcry_cipher_hd_t hde, hdd; unsigned char out[MAX_DATA_LEN * 2]; -- 2.37.1 From jussi.kivilinna at iki.fi Tue Aug 2 18:05:12 2022 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Tue, 2 Aug 2022 19:05:12 +0300 Subject: [PATCH] tests/basic: Add ifdefs for SM4 and CAMELLIA tests In-Reply-To: <20220802112637.18868-1-cllang@redhat.com> References: <20220802112637.18868-1-cllang@redhat.com> Message-ID: <1812555c-903c-68f6-1163-a4a9d133aa7f@iki.fi> Hello, Patch applied to master, thanks. -Jussi On 2.8.2022 14.26, Clemens Lang via Gcrypt-devel wrote: > * tests/basic.c (check_gcm_siv_cipher): Do not run test vectors that > are known to fail when compiled without support for SM4 or CAMELLIA. > > -- > > Fixes test execution when configured --enable-ciphers excluding sm4 or > camellia. > > Signed-off-by: Clemens Lang > --- > tests/basic.c | 4 ++++ > 1 file changed, 4 insertions(+) > > diff --git a/tests/basic.c b/tests/basic.c > index 0d03636b..26980e15 100644 > --- a/tests/basic.c > +++ b/tests/basic.c > @@ -6160,6 +6160,7 @@ check_gcm_siv_cipher (void) > "\xee\x12\x55\x82\x25\x25\x30\xb9\xa6\xf8\x3c\x81\x36\xcd\xef", > "\xce\xc3\x13\x6c\x40\x2a\xcc\x51\xa1\xce\xb3\xed\xe8\xa6\x5b\x04", > }, > +#if USE_SM4 > { > GCRY_CIPHER_SM4, FLAG_NOFIPS, > "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" > @@ -6298,6 +6299,8 @@ check_gcm_siv_cipher (void) > "\x64\x15\x17\x55\x60\x71\xb4\xaf\xcd\xc8\x90\x25\xc8\xc8\x62", > "\xe2\x32\xda\x3a\x5a\x0e\x45\x1b\x8e\xf8\xbb\xe6\x60\x71\x81\xeb", > }, > +#endif /* USE_SM4 */ > +#if USE_CAMELLIA > { > GCRY_CIPHER_CAMELLIA128, FLAG_NOFIPS, > "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" > @@ -6564,6 +6567,7 @@ check_gcm_siv_cipher (void) > "\xfb\x92\x1d\xd1\x31\x8d\x9d\xa1\xc3\x46\x69\xfa\x71\x2c\x42", > "\xc6\x54\xf9\xf0\x22\x2c\xc3\xee\xdd\x13\x02\xb8\xe7\x5a\x2e\x7e" > } > +#endif /* USE_CAMELLIA */ > }; > gcry_cipher_hd_t hde, hdd; > unsigned char out[MAX_DATA_LEN * 2]; From gmazyland at gmail.com Thu Aug 11 21:54:26 2022 From: gmazyland at gmail.com (Milan Broz) Date: Thu, 11 Aug 2022 21:54:26 +0200 Subject: [PATCH] kdf: Allow empty password for Argon2 Message-ID: <20220811195426.525805-1-gmazyland@gmail.com> * cipher/kdf.c (_gcry_kdf_open) allow empty password for Argon2. -- This patch fixes libcryptsetup requirement to support empty passwords for PBKDF (and passes Argon2 test vectors there now). Signed-off-by: Milan Broz --- cipher/kdf.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cipher/kdf.c b/cipher/kdf.c index d371bdd7..3e51e115 100644 --- a/cipher/kdf.c +++ b/cipher/kdf.c @@ -402,10 +402,13 @@ argon2_fill_first_blocks (argon2_ctx_t a) iov[iov_count].len = 4 * 7; iov[iov_count].off = 0; iov_count++; - iov[iov_count].data = (void *)a->password; - iov[iov_count].len = a->passwordlen; - iov[iov_count].off = 0; - iov_count++; + if (a->passwordlen) + { + iov[iov_count].data = (void *)a->password; + iov[iov_count].len = a->passwordlen; + iov[iov_count].off = 0; + iov_count++; + } buf_put_le32 (buf[7], a->saltlen); iov[iov_count].data = buf[7]; @@ -1861,7 +1864,7 @@ _gcry_kdf_open (gcry_kdf_hd_t *hd, int algo, int subalgo, switch (algo) { case GCRY_KDF_ARGON2: - if (!inputlen || !saltlen) + if (!saltlen) ec = GPG_ERR_INV_VALUE; else ec = argon2_open (hd, subalgo, param, paramlen, -- 2.36.1