[PATCH 1/1] Improved whirlpool hash performance
And Sch
andsch at inbox.com
Fri Aug 22 18:27:51 CEST 2014
Hello again, I have signed the DCO now. Here is the whirlpool patch again, signed. I uploaded my public key to the keyserver as well.
before:
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
WHIRLPOOL | 7.75 ns/B 123.0 MiB/s - c/B
after:
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
WHIRLPOOL | 6.70 ns/B 142.3 MiB/s - c/B
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
diff -ruNp libgcrypt-1.6.2/cipher/whirlpool.c libgcrypt-1.6.3/cipher/whirlpool.c
- --- libgcrypt-1.6.2/cipher/whirlpool.c 2014-08-21 07:50:39.000000000 -0500
+++ libgcrypt-1.6.3/cipher/whirlpool.c 2014-08-22 11:17:06.496754032 -0500
@@ -87,6 +87,17 @@ typedef struct {
for (i = 0; i < 8; i++) \
block_dst[i] ^= block_src[i];
+/* XOR lookup boxes with index SRC [(SHIFT + n) & 7] >> x. */
+#define WHIRLPOOL_XOR(src, shift) \
+ C[((unsigned int)(src[ (shift) ] >> 56) ) ] ^ \
+ C[((unsigned int)(src[((shift) + 7) & 7] >> 48) & 0xff) + 256 ] ^ \
+ C[((unsigned int)(src[((shift) + 6) & 7] >> 40) & 0xff) + (256*2)] ^ \
+ C[((unsigned int)(src[((shift) + 5) & 7] >> 32) & 0xff) + (256*3)] ^ \
+ C[((unsigned int)(src[((shift) + 4) & 7] >> 24) & 0xff) + (256*4)] ^ \
+ C[((unsigned int)(src[((shift) + 3) & 7] >> 16) & 0xff) + (256*5)] ^ \
+ C[((unsigned int)(src[((shift) + 2) & 7] >> 8) & 0xff) + (256*6)] ^ \
+ C[((unsigned int)(src[((shift) + 1) & 7] ) & 0xff) + (256*7)] \
+
?
/* Round constants. */
@@ -107,7 +118,7 @@ static const u64 rc[R] =
?
/* Main lookup boxes. */
- -static const u64 C0[256] =
+static const u64 C[8*256] =
{
U64_C (0x18186018c07830d8), U64_C (0x23238c2305af4626),
U64_C (0xc6c63fc67ef991b8), U64_C (0xe8e887e8136fcdfb),
@@ -237,10 +248,7 @@ static const u64 C0[256] =
U64_C (0x98985a98b4c22d2c), U64_C (0xa4a4aaa4490e55ed),
U64_C (0x2828a0285d885075), U64_C (0x5c5c6d5cda31b886),
U64_C (0xf8f8c7f8933fed6b), U64_C (0x8686228644a411c2),
- - };
- -static const u64 C1[256] =
- - {
U64_C (0xd818186018c07830), U64_C (0x2623238c2305af46),
U64_C (0xb8c6c63fc67ef991), U64_C (0xfbe8e887e8136fcd),
U64_C (0xcb878726874ca113), U64_C (0x11b8b8dab8a9626d),
@@ -369,10 +377,7 @@ static const u64 C1[256] =
U64_C (0x2c98985a98b4c22d), U64_C (0xeda4a4aaa4490e55),
U64_C (0x752828a0285d8850), U64_C (0x865c5c6d5cda31b8),
U64_C (0x6bf8f8c7f8933fed), U64_C (0xc28686228644a411),
- - };
- -static const u64 C2[256] =
- - {
U64_C (0x30d818186018c078), U64_C (0x462623238c2305af),
U64_C (0x91b8c6c63fc67ef9), U64_C (0xcdfbe8e887e8136f),
U64_C (0x13cb878726874ca1), U64_C (0x6d11b8b8dab8a962),
@@ -501,10 +506,7 @@ static const u64 C2[256] =
U64_C (0x2d2c98985a98b4c2), U64_C (0x55eda4a4aaa4490e),
U64_C (0x50752828a0285d88), U64_C (0xb8865c5c6d5cda31),
U64_C (0xed6bf8f8c7f8933f), U64_C (0x11c28686228644a4),
- - };
- -static const u64 C3[256] =
- - {
U64_C (0x7830d818186018c0), U64_C (0xaf462623238c2305),
U64_C (0xf991b8c6c63fc67e), U64_C (0x6fcdfbe8e887e813),
U64_C (0xa113cb878726874c), U64_C (0x626d11b8b8dab8a9),
@@ -633,10 +635,7 @@ static const u64 C3[256] =
U64_C (0xc22d2c98985a98b4), U64_C (0x0e55eda4a4aaa449),
U64_C (0x8850752828a0285d), U64_C (0x31b8865c5c6d5cda),
U64_C (0x3fed6bf8f8c7f893), U64_C (0xa411c28686228644),
- - };
- -static const u64 C4[256] =
- - {
U64_C (0xc07830d818186018), U64_C (0x05af462623238c23),
U64_C (0x7ef991b8c6c63fc6), U64_C (0x136fcdfbe8e887e8),
U64_C (0x4ca113cb87872687), U64_C (0xa9626d11b8b8dab8),
@@ -765,10 +764,7 @@ static const u64 C4[256] =
U64_C (0xb4c22d2c98985a98), U64_C (0x490e55eda4a4aaa4),
U64_C (0x5d8850752828a028), U64_C (0xda31b8865c5c6d5c),
U64_C (0x933fed6bf8f8c7f8), U64_C (0x44a411c286862286),
- - };
- -static const u64 C5[256] =
- - {
U64_C (0x18c07830d8181860), U64_C (0x2305af462623238c),
U64_C (0xc67ef991b8c6c63f), U64_C (0xe8136fcdfbe8e887),
U64_C (0x874ca113cb878726), U64_C (0xb8a9626d11b8b8da),
@@ -897,10 +893,7 @@ static const u64 C5[256] =
U64_C (0x98b4c22d2c98985a), U64_C (0xa4490e55eda4a4aa),
U64_C (0x285d8850752828a0), U64_C (0x5cda31b8865c5c6d),
U64_C (0xf8933fed6bf8f8c7), U64_C (0x8644a411c2868622),
- - };
- -static const u64 C6[256] =
- - {
U64_C (0x6018c07830d81818), U64_C (0x8c2305af46262323),
U64_C (0x3fc67ef991b8c6c6), U64_C (0x87e8136fcdfbe8e8),
U64_C (0x26874ca113cb8787), U64_C (0xdab8a9626d11b8b8),
@@ -1029,10 +1022,7 @@ static const u64 C6[256] =
U64_C (0x5a98b4c22d2c9898), U64_C (0xaaa4490e55eda4a4),
U64_C (0xa0285d8850752828), U64_C (0x6d5cda31b8865c5c),
U64_C (0xc7f8933fed6bf8f8), U64_C (0x228644a411c28686),
- - };
- -static const u64 C7[256] =
- - {
U64_C (0x186018c07830d818), U64_C (0x238c2305af462623),
U64_C (0xc63fc67ef991b8c6), U64_C (0xe887e8136fcdfbe8),
U64_C (0x8726874ca113cb87), U64_C (0xb8dab8a9626d11b8),
@@ -1163,7 +1153,6 @@ static const u64 C7[256] =
U64_C (0xf8c7f8933fed6bf8), U64_C (0x86228644a411c286),
};
- -
?
/*
* Transform block.
@@ -1172,97 +1161,36 @@ static unsigned int
whirlpool_transform (void *ctx, const unsigned char *data)
{
whirlpool_context_t *context = ctx;
- - whirlpool_block_t data_block;
- - whirlpool_block_t key;
- - whirlpool_block_t state;
- - whirlpool_block_t block;
+ u64 key[2][BLOCK_SIZE / 8];
+ u64 state[2][BLOCK_SIZE / 8];
unsigned int r;
unsigned int i;
- - buffer_to_block (data, data_block, i);
- - block_copy (key, context->hash_state, i);
- - block_copy (state, context->hash_state, i);
- - block_xor (state, data_block, i);
+ /* buffer_to_block and block_xor at once */
+
+ for (i = 0; i < 8; i++)
+ state[0][i] = buf_get_be64((data) + i * 8) ^ context->hash_state[i];
+
+ block_copy (key[0], context->hash_state, i);
+ block_copy (context->hash_state, state[0], i);
- - for (r = 0; r < R; r++)
+ for (r = 0, i = 0; r < R; r++, i = !i)
{
- - /* Compute round key K^r. */
+ /* Compute round key K^r, and apply r-th round transformation, interleaved */
- - block[0] = (C0[(key[0] >> 56) & 0xFF] ^ C1[(key[7] >> 48) & 0xFF] ^
- - C2[(key[6] >> 40) & 0xFF] ^ C3[(key[5] >> 32) & 0xFF] ^
- - C4[(key[4] >> 24) & 0xFF] ^ C5[(key[3] >> 16) & 0xFF] ^
- - C6[(key[2] >> 8) & 0xFF] ^ C7[(key[1] >> 0) & 0xFF] ^ rc[r]);
- - block[1] = (C0[(key[1] >> 56) & 0xFF] ^ C1[(key[0] >> 48) & 0xFF] ^
- - C2[(key[7] >> 40) & 0xFF] ^ C3[(key[6] >> 32) & 0xFF] ^
- - C4[(key[5] >> 24) & 0xFF] ^ C5[(key[4] >> 16) & 0xFF] ^
- - C6[(key[3] >> 8) & 0xFF] ^ C7[(key[2] >> 0) & 0xFF]);
- - block[2] = (C0[(key[2] >> 56) & 0xFF] ^ C1[(key[1] >> 48) & 0xFF] ^
- - C2[(key[0] >> 40) & 0xFF] ^ C3[(key[7] >> 32) & 0xFF] ^
- - C4[(key[6] >> 24) & 0xFF] ^ C5[(key[5] >> 16) & 0xFF] ^
- - C6[(key[4] >> 8) & 0xFF] ^ C7[(key[3] >> 0) & 0xFF]);
- - block[3] = (C0[(key[3] >> 56) & 0xFF] ^ C1[(key[2] >> 48) & 0xFF] ^
- - C2[(key[1] >> 40) & 0xFF] ^ C3[(key[0] >> 32) & 0xFF] ^
- - C4[(key[7] >> 24) & 0xFF] ^ C5[(key[6] >> 16) & 0xFF] ^
- - C6[(key[5] >> 8) & 0xFF] ^ C7[(key[4] >> 0) & 0xFF]);
- - block[4] = (C0[(key[4] >> 56) & 0xFF] ^ C1[(key[3] >> 48) & 0xFF] ^
- - C2[(key[2] >> 40) & 0xFF] ^ C3[(key[1] >> 32) & 0xFF] ^
- - C4[(key[0] >> 24) & 0xFF] ^ C5[(key[7] >> 16) & 0xFF] ^
- - C6[(key[6] >> 8) & 0xFF] ^ C7[(key[5] >> 0) & 0xFF]);
- - block[5] = (C0[(key[5] >> 56) & 0xFF] ^ C1[(key[4] >> 48) & 0xFF] ^
- - C2[(key[3] >> 40) & 0xFF] ^ C3[(key[2] >> 32) & 0xFF] ^
- - C4[(key[1] >> 24) & 0xFF] ^ C5[(key[0] >> 16) & 0xFF] ^
- - C6[(key[7] >> 8) & 0xFF] ^ C7[(key[6] >> 0) & 0xFF]);
- - block[6] = (C0[(key[6] >> 56) & 0xFF] ^ C1[(key[5] >> 48) & 0xFF] ^
- - C2[(key[4] >> 40) & 0xFF] ^ C3[(key[3] >> 32) & 0xFF] ^
- - C4[(key[2] >> 24) & 0xFF] ^ C5[(key[1] >> 16) & 0xFF] ^
- - C6[(key[0] >> 8) & 0xFF] ^ C7[(key[7] >> 0) & 0xFF]);
- - block[7] = (C0[(key[7] >> 56) & 0xFF] ^ C1[(key[6] >> 48) & 0xFF] ^
- - C2[(key[5] >> 40) & 0xFF] ^ C3[(key[4] >> 32) & 0xFF] ^
- - C4[(key[3] >> 24) & 0xFF] ^ C5[(key[2] >> 16) & 0xFF] ^
- - C6[(key[1] >> 8) & 0xFF] ^ C7[(key[0] >> 0) & 0xFF]);
- - block_copy (key, block, i);
- -
- - /* Apply r-th round transformation. */
- -
- - block[0] = (C0[(state[0] >> 56) & 0xFF] ^ C1[(state[7] >> 48) & 0xFF] ^
- - C2[(state[6] >> 40) & 0xFF] ^ C3[(state[5] >> 32) & 0xFF] ^
- - C4[(state[4] >> 24) & 0xFF] ^ C5[(state[3] >> 16) & 0xFF] ^
- - C6[(state[2] >> 8) & 0xFF] ^ C7[(state[1] >> 0) & 0xFF] ^ key[0]);
- - block[1] = (C0[(state[1] >> 56) & 0xFF] ^ C1[(state[0] >> 48) & 0xFF] ^
- - C2[(state[7] >> 40) & 0xFF] ^ C3[(state[6] >> 32) & 0xFF] ^
- - C4[(state[5] >> 24) & 0xFF] ^ C5[(state[4] >> 16) & 0xFF] ^
- - C6[(state[3] >> 8) & 0xFF] ^ C7[(state[2] >> 0) & 0xFF] ^ key[1]);
- - block[2] = (C0[(state[2] >> 56) & 0xFF] ^ C1[(state[1] >> 48) & 0xFF] ^
- - C2[(state[0] >> 40) & 0xFF] ^ C3[(state[7] >> 32) & 0xFF] ^
- - C4[(state[6] >> 24) & 0xFF] ^ C5[(state[5] >> 16) & 0xFF] ^
- - C6[(state[4] >> 8) & 0xFF] ^ C7[(state[3] >> 0) & 0xFF] ^ key[2]);
- - block[3] = (C0[(state[3] >> 56) & 0xFF] ^ C1[(state[2] >> 48) & 0xFF] ^
- - C2[(state[1] >> 40) & 0xFF] ^ C3[(state[0] >> 32) & 0xFF] ^
- - C4[(state[7] >> 24) & 0xFF] ^ C5[(state[6] >> 16) & 0xFF] ^
- - C6[(state[5] >> 8) & 0xFF] ^ C7[(state[4] >> 0) & 0xFF] ^ key[3]);
- - block[4] = (C0[(state[4] >> 56) & 0xFF] ^ C1[(state[3] >> 48) & 0xFF] ^
- - C2[(state[2] >> 40) & 0xFF] ^ C3[(state[1] >> 32) & 0xFF] ^
- - C4[(state[0] >> 24) & 0xFF] ^ C5[(state[7] >> 16) & 0xFF] ^
- - C6[(state[6] >> 8) & 0xFF] ^ C7[(state[5] >> 0) & 0xFF] ^ key[4]);
- - block[5] = (C0[(state[5] >> 56) & 0xFF] ^ C1[(state[4] >> 48) & 0xFF] ^
- - C2[(state[3] >> 40) & 0xFF] ^ C3[(state[2] >> 32) & 0xFF] ^
- - C4[(state[1] >> 24) & 0xFF] ^ C5[(state[0] >> 16) & 0xFF] ^
- - C6[(state[7] >> 8) & 0xFF] ^ C7[(state[6] >> 0) & 0xFF] ^ key[5]);
- - block[6] = (C0[(state[6] >> 56) & 0xFF] ^ C1[(state[5] >> 48) & 0xFF] ^
- - C2[(state[4] >> 40) & 0xFF] ^ C3[(state[3] >> 32) & 0xFF] ^
- - C4[(state[2] >> 24) & 0xFF] ^ C5[(state[1] >> 16) & 0xFF] ^
- - C6[(state[0] >> 8) & 0xFF] ^ C7[(state[7] >> 0) & 0xFF] ^ key[6]);
- - block[7] = (C0[(state[7] >> 56) & 0xFF] ^ C1[(state[6] >> 48) & 0xFF] ^
- - C2[(state[5] >> 40) & 0xFF] ^ C3[(state[4] >> 32) & 0xFF] ^
- - C4[(state[3] >> 24) & 0xFF] ^ C5[(state[2] >> 16) & 0xFF] ^
- - C6[(state[1] >> 8) & 0xFF] ^ C7[(state[0] >> 0) & 0xFF] ^ key[7]);
- - block_copy (state, block, i);
+ state[!i][0] = WHIRLPOOL_XOR(state[i], 0) ^ (key[!i][0] = WHIRLPOOL_XOR(key[i], 0) ^ rc[r]);
+ state[!i][1] = WHIRLPOOL_XOR(state[i], 1) ^ (key[!i][1] = WHIRLPOOL_XOR(key[i], 1));
+ state[!i][2] = WHIRLPOOL_XOR(state[i], 2) ^ (key[!i][2] = WHIRLPOOL_XOR(key[i], 2));
+ state[!i][3] = WHIRLPOOL_XOR(state[i], 3) ^ (key[!i][3] = WHIRLPOOL_XOR(key[i], 3));
+ state[!i][4] = WHIRLPOOL_XOR(state[i], 4) ^ (key[!i][4] = WHIRLPOOL_XOR(key[i], 4));
+ state[!i][5] = WHIRLPOOL_XOR(state[i], 5) ^ (key[!i][5] = WHIRLPOOL_XOR(key[i], 5));
+ state[!i][6] = WHIRLPOOL_XOR(state[i], 6) ^ (key[!i][6] = WHIRLPOOL_XOR(key[i], 6));
+ state[!i][7] = WHIRLPOOL_XOR(state[i], 7) ^ (key[!i][7] = WHIRLPOOL_XOR(key[i], 7));
}
/* Compression. */
- - block_xor (context->hash_state, data_block, i);
- - block_xor (context->hash_state, state, i);
+ block_xor (context->hash_state, state[0], i);
return /*burn_stack*/ 4 * sizeof(whirlpool_block_t) + 2 * sizeof(int) +
4 * sizeof(void*);
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQEcBAEBAgAGBQJT927aAAoJEClrD5EX5v64jR0H/Ax+eamg7Z1mfLz/E/HWmtZm
ibSEkN4Z5PnqCw7GedoguuoSnxmLJI3ovJ85hXvJGrxVHy/kIy2RstZZfqbrJIgi
xTypSuRCx81zebgmgQOn6MZ1MVQF1h3N8KYsJEwjtcVbOioGBKfEPZbcX7ySLSdX
vNWYhyoZwXBgICaluP6M8QSeLQov1CkmWl/TLk8P3r1o68djTcI4O1YeFvazfqWj
z1mMmVpmfIDZJGac/rrKR8rNL/HUW+sCNTaJ84OGBWrwFPqkf0ABTxCrMpDQrHCc
GVi8cAfiI0t2JfcAQB2zruU8girJamT7ox/94CMKwYfo2AaqltnHkRnlFqm4sAo=
=5Nuj
-----END PGP SIGNATURE-----
____________________________________________________________
Can't remember your password? Do you need a strong and secure password?
Use Password manager! It stores your passwords & protects your account.
Check it out at http://mysecurelogon.com/manager
More information about the Gcrypt-devel
mailing list