[PATCH v2 2/2] Add SM4 ARMv8/AArch64/CE assembly implementation

Jussi Kivilinna jussi.kivilinna at iki.fi
Tue Mar 1 08:26:28 CET 2022


Hello,

On 1.3.2022 6.38, Tianjia Zhang wrote:
> new file mode 100644
> index 00000000..57e84683
> --- /dev/null
> +++ b/cipher/sm4-armv8-aarch64-ce.S
> @@ -0,0 +1,568 @@
> +/* sm4-armv8-aarch64-ce.S  -  ARMv8/AArch64/CE accelerated SM4 cipher
> + *
> + * Copyright (C) 2022 Alibaba Group.
> + * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang at linux.alibaba.com>
> + *
> + * This file is part of Libgcrypt.
> + *
> + * Libgcrypt is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as
> + * published by the Free Software Foundation; either version 2.1 of
> + * the License, or (at your option) any later version.
> + *
> + * Libgcrypt is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "asm-common-aarch64.h"
> +
> +#if defined(__AARCH64EL__) && \
> +    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
> +    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO_SM) && \
> +    defined(USE_SM4)
> +
> +.cpu generic+simd+crypto
> +
> +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 24, 25, 26, 27, 28, 29, 30, 31
> +    .set .Lv\b\().4s, \b
> +.endr
> +
> +.macro sm4e, vd, vn
> +    .inst 0xcec08400 | (.L\vn << 5) | .L\vd
> +.endm
> +
> +.macro sm4ekey, vd, vn, vm
> +    .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd
> +.endm

I meant that the problem is that ".macro"/".endm"/".set"/".irp" may not be not supported by all compilers/assemblers. Implementation here could either:
- Rely on assembler supporting these instructions and use "sm4e" and "sm4ekey" directly or
- Use preprocessor #define macros instead of assembler .macros to provide these instructions. Something like this could work:

#define vecnum_v0 0
#define vecnum_v1 1
#define vecnum_v2 2
#define vecnum_v3 3
#define vecnum_v4 4
#define vecnum_v5 5
#define vecnum_v6 6
#define vecnum_v7 7
#define vecnum_v16 16
#define vecnum_v24 24
#define vecnum_v25 25
#define vecnum_v26 26
#define vecnum_v27 27
#define vecnum_v28 28
#define vecnum_v29 29
#define vecnum_v30 30
#define vecnum_v31 31

#define sm4e(vd,vn) \
   .inst (0xcec08400 | (vecnum_##vn << 5) | vecnum_##vd)

#define sm4ekey(vd, vn, vm) \
   .inst (0xce60c800 | (vecnum_##vm << 16) | (vecnum_##vn << 5) | vecnum_##vd)

...

#define crypt_blk4(b0, b1, b2, b3)         \
         rev32 b0.16b, b0.16b;              \
         rev32 b1.16b, b1.16b;              \
         rev32 b2.16b, b2.16b;              \
         rev32 b3.16b, b3.16b;              \
         sm4e(b0, v24);                     \
         sm4e(b1, v24);                     \

-Jussi



More information about the Gcrypt-devel mailing list