[PATCH 4/4] Improve performance of Tiger hash algorithms
Jussi Kivilinna
jussi.kivilinna at iki.fi
Sun Nov 1 20:06:25 CET 2015
* cipher/tiger.c (tiger_round, pass, key_schedule): Convert functions
to macros.
(transform_blk): Pass variable names instead of pointers to 'pass'.
--
Benchmark results on Intel Haswell @ 3.2 Ghz:
Before:
| nanosecs/byte mebibytes/sec cycles/byte
TIGER | 3.25 ns/B 293.5 MiB/s 10.40 c/B
After (1.75x faster):
| nanosecs/byte mebibytes/sec cycles/byte
TIGER | 1.85 ns/B 515.3 MiB/s 5.92 c/B
Benchmark results on Cortex-A8 @ 1008 Mhz:
Before:
| nanosecs/byte mebibytes/sec cycles/byte
TIGER | 63.42 ns/B 15.04 MiB/s 63.93 c/B
After (1.26x faster):
| nanosecs/byte mebibytes/sec cycles/byte
TIGER | 49.99 ns/B 19.08 MiB/s 50.39 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/tiger.c | 104 ++++++++++++++++++++++----------------------------------
1 file changed, 40 insertions(+), 64 deletions(-)
diff --git a/cipher/tiger.c b/cipher/tiger.c
index 078133a..516bd44 100644
--- a/cipher/tiger.c
+++ b/cipher/tiger.c
@@ -633,68 +633,44 @@ tiger2_init (void *context, unsigned int flags)
do_init (context, 2);
}
-static void
-tiger_round( u64 *ra, u64 *rb, u64 *rc, u64 x, int mul )
-{
- u64 a = *ra;
- u64 b = *rb;
- u64 c = *rc;
-
- c ^= x;
- a -= ( sbox1[ c & 0xff ] ^ sbox2[ (c >> 16) & 0xff ]
- ^ sbox3[ (c >> 32) & 0xff ] ^ sbox4[ (c >> 48) & 0xff ]);
- b += ( sbox4[ (c >> 8) & 0xff ] ^ sbox3[ (c >> 24) & 0xff ]
- ^ sbox2[ (c >> 40) & 0xff ] ^ sbox1[ (c >> 56) & 0xff ]);
- b *= mul;
-
- *ra = a;
- *rb = b;
- *rc = c;
-}
-
-
-static void
-pass( u64 *ra, u64 *rb, u64 *rc, u64 *x, int mul )
-{
- u64 a = *ra;
- u64 b = *rb;
- u64 c = *rc;
-
- tiger_round( &a, &b, &c, x[0], mul );
- tiger_round( &b, &c, &a, x[1], mul );
- tiger_round( &c, &a, &b, x[2], mul );
- tiger_round( &a, &b, &c, x[3], mul );
- tiger_round( &b, &c, &a, x[4], mul );
- tiger_round( &c, &a, &b, x[5], mul );
- tiger_round( &a, &b, &c, x[6], mul );
- tiger_round( &b, &c, &a, x[7], mul );
-
- *ra = a;
- *rb = b;
- *rc = c;
-}
-
-static void
-key_schedule( u64 *x )
-{
- x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL;
- x[1] ^= x[0];
- x[2] += x[1];
- x[3] -= x[2] ^ ((~x[1]) << 19 );
- x[4] ^= x[3];
- x[5] += x[4];
- x[6] -= x[5] ^ ((~x[4]) >> 23 );
- x[7] ^= x[6];
- x[0] += x[7];
- x[1] -= x[0] ^ ((~x[7]) << 19 );
- x[2] ^= x[1];
- x[3] += x[2];
- x[4] -= x[3] ^ ((~x[2]) >> 23 );
- x[5] ^= x[4];
- x[6] += x[5];
- x[7] -= x[6] ^ 0x0123456789abcdefLL;
-}
+#define tiger_round(xa, xb, xc, xx, xmul) { \
+ xc ^= xx; \
+ xa -= ( sbox1[ (xc) & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \
+ ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \
+ xb += ( sbox4[ ((xc) >> 8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \
+ ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \
+ xb *= xmul; }
+
+
+#define pass(ya, yb, yc, yx, ymul) { \
+ tiger_round( ya, yb, yc, yx[0], ymul ); \
+ tiger_round( yb, yc, ya, yx[1], ymul ); \
+ tiger_round( yc, ya, yb, yx[2], ymul ); \
+ tiger_round( ya, yb, yc, yx[3], ymul ); \
+ tiger_round( yb, yc, ya, yx[4], ymul ); \
+ tiger_round( yc, ya, yb, yx[5], ymul ); \
+ tiger_round( ya, yb, yc, yx[6], ymul ); \
+ tiger_round( yb, yc, ya, yx[7], ymul ); }
+
+
+#define key_schedule(x) { \
+ x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \
+ x[1] ^= x[0]; \
+ x[2] += x[1]; \
+ x[3] -= x[2] ^ ((~x[1]) << 19 ); \
+ x[4] ^= x[3]; \
+ x[5] += x[4]; \
+ x[6] -= x[5] ^ ((~x[4]) >> 23 ); \
+ x[7] ^= x[6]; \
+ x[0] += x[7]; \
+ x[1] -= x[0] ^ ((~x[7]) << 19 ); \
+ x[2] ^= x[1]; \
+ x[3] += x[2]; \
+ x[4] -= x[3] ^ ((~x[2]) >> 23 ); \
+ x[5] ^= x[4]; \
+ x[6] += x[5]; \
+ x[7] -= x[6] ^ 0x0123456789abcdefLL; }
/****************
@@ -716,11 +692,11 @@ transform_blk ( void *ctx, const unsigned char *data )
b = bb = hd->b;
c = cc = hd->c;
- pass( &a, &b, &c, x, 5);
+ pass( a, b, c, x, 5);
key_schedule( x );
- pass( &c, &a, &b, x, 7);
+ pass( c, a, b, x, 7);
key_schedule( x );
- pass( &b, &c, &a, x, 9);
+ pass( b, c, a, x, 9);
/* feedforward */
a ^= aa;
More information about the Gcrypt-devel
mailing list