[PATCH 10/10] gostr3411_94: rewrite to use u32 mathematic
Dmitry Eremin-Solenikov
dbaryshkov at gmail.com
Fri Jun 6 20:48:36 CEST 2014
* cipher/gost28147.c (_gcry_gost_enc_data): New.
* cipher/gostr3411-94.c: Rewrite implementation to use u32 mathematic
internally.
* cipher/gost28147.c (_gcry_gost_enc_one): Remove.
--
On my box (Core2 Duo, i386) this highly improves GOST R 34.11-94 speed.
Before:
GOSTR3411_94 | 55.04 ns/B 17.33 MiB/s - c/B
After:
GOSTR3411_94 | 36.70 ns/B 25.99 MiB/s - c/B
Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov at gmail.com>
---
cipher/gost.h | 4 +-
cipher/gost28147.c | 36 ++++++---
cipher/gostr3411-94.c | 202 ++++++++++++++++++++++++++++----------------------
3 files changed, 139 insertions(+), 103 deletions(-)
diff --git a/cipher/gost.h b/cipher/gost.h
index caaf34b..025119c 100644
--- a/cipher/gost.h
+++ b/cipher/gost.h
@@ -26,7 +26,7 @@ typedef struct {
} GOST28147_context;
/* This is a simple interface that will be used by GOST R 34.11-94 */
-extern unsigned int _gcry_gost_enc_one (GOST28147_context *c, const byte *key,
- byte *out, byte *in, int cryptopro);
+unsigned int _gcry_gost_enc_data (GOST28147_context *c, const u32 *key,
+ u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro);
#endif
diff --git a/cipher/gost28147.c b/cipher/gost28147.c
index 0841722..5db9f0b 100644
--- a/cipher/gost28147.c
+++ b/cipher/gost28147.c
@@ -69,13 +69,9 @@ gost_val (GOST28147_context *ctx, u32 cm1, int subkey)
}
static unsigned int
-gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf)
+_gost_encrypt_data (void *c, u32 *o1, u32 *o2, u32 n1, u32 n2)
{
GOST28147_context *ctx = c;
- u32 n1, n2;
-
- n1 = buf_get_le32 (inbuf);
- n2 = buf_get_le32 (inbuf+4);
n2 ^= gost_val (ctx, n1, 0); n1 ^= gost_val (ctx, n2, 1);
n2 ^= gost_val (ctx, n1, 2); n1 ^= gost_val (ctx, n2, 3);
@@ -97,23 +93,41 @@ gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf)
n2 ^= gost_val (ctx, n1, 3); n1 ^= gost_val (ctx, n2, 2);
n2 ^= gost_val (ctx, n1, 1); n1 ^= gost_val (ctx, n2, 0);
- buf_put_le32 (outbuf+0, n2);
- buf_put_le32 (outbuf+4, n1);
+ *o1 = n2;
+ *o2 = n1;
return /* burn_stack */ 4*sizeof(void*) /* func call */ +
3*sizeof(void*) /* stack */ +
4*sizeof(void*) /* gost_val call */;
}
-unsigned int _gcry_gost_enc_one (GOST28147_context *c, const byte *key,
- byte *out, byte *in, int cryptopro)
+static unsigned int
+gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf)
+{
+ GOST28147_context *ctx = c;
+ u32 n1, n2;
+ unsigned int burn;
+
+ n1 = buf_get_le32 (inbuf);
+ n2 = buf_get_le32 (inbuf+4);
+
+ burn = _gost_encrypt_data(ctx, &n1, &n2, n1, n2);
+
+ buf_put_le32 (outbuf+0, n1);
+ buf_put_le32 (outbuf+4, n2);
+
+ return /* burn_stack */ burn + 6*sizeof(void*) /* func call */;
+}
+
+unsigned int _gcry_gost_enc_data (GOST28147_context *c, const u32 *key,
+ u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro)
{
if (cryptopro)
c->sbox = sbox_CryptoPro_3411;
else
c->sbox = sbox_test_3411;
- gost_setkey (c, key, 32);
- return gost_encrypt_block (c, out, in) + 5 * sizeof(void *);
+ memcpy (c->key, key, 8*4);
+ return _gost_encrypt_data (c, o1, o2, n1, n2) + 7 * sizeof(void *);
}
static unsigned int
diff --git a/cipher/gostr3411-94.c b/cipher/gostr3411-94.c
index 9d065fb..91e5b4c 100644
--- a/cipher/gostr3411-94.c
+++ b/cipher/gostr3411-94.c
@@ -25,6 +25,7 @@
#include "g10lib.h"
#include "bithelp.h"
+#include "bufhelp.h"
#include "cipher.h"
#include "hash-common.h"
@@ -35,8 +36,11 @@
typedef struct {
gcry_md_block_ctx_t bctx;
GOST28147_context hd;
- byte h[32];
- byte sigma[32];
+ union {
+ u32 h[8];
+ byte result[32];
+ };
+ u32 sigma[8];
u32 len;
int cryptopro;
} GOSTR3411_CONTEXT;
@@ -71,102 +75,122 @@ gost3411_cp_init (void *context, unsigned int flags)
}
static void
-do_p (unsigned char *p, unsigned char *u, unsigned char *v)
+do_p (u32 *p, u32 *u, u32 *v)
{
- int i, k;
+ int k;
+ u32 t[8];
+
for (k = 0; k < 8; k++)
+ t[k] = u[k] ^ v[k];
+
+ for (k = 0; k < 4; k++)
{
- for (i = 0; i < 4; i++)
- {
- p[i + 4 * k] = u[8 * i + k] ^ v[8 * i + k];
- }
+ p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+ ((t[2] >> (8*k)) & 0xff) << 8 |
+ ((t[4] >> (8*k)) & 0xff) << 16 |
+ ((t[6] >> (8*k)) & 0xff) << 24;
+ p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+ ((t[3] >> (8*k)) & 0xff) << 8 |
+ ((t[5] >> (8*k)) & 0xff) << 16 |
+ ((t[7] >> (8*k)) & 0xff) << 24;
}
}
static void
-do_a (unsigned char *u)
+do_a (u32 *u)
{
- unsigned char temp[8];
+ u32 t[2];
int i;
- memcpy (temp, u, 8);
- memmove (u, u+8, 24);
- for (i = 0; i < 8; i++)
- {
- u[24 + i] = u[i] ^ temp[i];
- }
+ memcpy(t, u, 2*4);
+ for (i = 0; i < 6; i++)
+ u[i] = u[i+2];
+ u[6] = u[0] ^ t[0];
+ u[7] = u[1] ^ t[1];
}
/* apply do_a twice: 1 2 3 4 -> 3 4 1^2 2^3 */
static void
-do_a2 (unsigned char *u)
+do_a2 (u32 *u)
{
- unsigned char temp[16];
+ u32 t[4];
int i;
- memcpy (temp, u, 16);
- memcpy (u, u + 16, 16);
- for (i = 0; i < 8; i++)
+ memcpy (t, u, 16);
+ memcpy (u, u + 4, 16);
+ for (i = 0; i < 2; i++)
{
- u[16 + i] = temp[i] ^ temp[8 + i];
- u[24 + i] = u[i] ^ temp[8 + i];
+ u[4+i] = t[i] ^ t[i + 2];
+ u[6+i] = u[i] ^ t[i + 2];
}
}
static void
-do_apply_c2 (unsigned char *u)
+do_apply_c2 (u32 *u)
{
- u[ 1] ^= 0xff;
- u[ 3] ^= 0xff;
- u[ 5] ^= 0xff;
- u[ 7] ^= 0xff;
-
- u[ 8] ^= 0xff;
- u[10] ^= 0xff;
- u[12] ^= 0xff;
- u[14] ^= 0xff;
-
- u[17] ^= 0xff;
- u[18] ^= 0xff;
- u[20] ^= 0xff;
- u[23] ^= 0xff;
-
- u[24] ^= 0xff;
- u[28] ^= 0xff;
- u[29] ^= 0xff;
- u[31] ^= 0xff;
+ u[ 0] ^= 0xff00ff00;
+ u[ 1] ^= 0xff00ff00;
+ u[ 2] ^= 0x00ff00ff;
+ u[ 3] ^= 0x00ff00ff;
+ u[ 4] ^= 0x00ffff00;
+ u[ 5] ^= 0xff0000ff;
+ u[ 6] ^= 0x000000ff;
+ u[ 7] ^= 0xff00ffff;
}
-#define do_phi_step(e, i) \
- e[(0 + 2*i) % 32] ^= e[(2 + 2*i) % 32] ^ e[(4 + 2*i) % 32] ^ e[(6 + 2*i) % 32] ^ e[(24 + 2*i) % 32] ^ e[(30 + 2*i) % 32]; \
- e[(1 + 2*i) % 32] ^= e[(3 + 2*i) % 32] ^ e[(5 + 2*i) % 32] ^ e[(7 + 2*i) % 32] ^ e[(25 + 2*i) % 32] ^ e[(31 + 2*i) % 32];
+#define do_chi_step12(e) \
+ e[6] ^= ((e[6] >> 16) ^ e[7] ^ (e[7] >> 16) ^ e[4] ^ (e[5] >>16)) & 0xffff;
+
+#define do_chi_step13(e) \
+ e[6] ^= ((e[7] ^ (e[7] >> 16) ^ e[0] ^ (e[4] >> 16) ^ e[6]) & 0xffff) << 16;
+
+#define do_chi_doublestep(e, i) \
+ e[i] ^= (e[i] >> 16) ^ (e[(i+1)%8] << 16) ^ e[(i+1)%8] ^ (e[(i+1)%8] >> 16) ^ (e[(i+2)%8] << 16) ^ e[(i+6)%8] ^ (e[(i+7)%8] >> 16); \
+ e[i] ^= (e[i] << 16);
static void
-do_phi_submix (unsigned char *e, unsigned char *x, int round)
+do_chi_submix12 (u32 *e, u32 *x)
{
- int i;
- round *= 2;
- for (i = 0; i < 32; i++)
- {
- e[(i + round) % 32] ^= x[i];
- }
+ e[6] ^= x[0];
+ e[7] ^= x[1];
+ e[0] ^= x[2];
+ e[1] ^= x[3];
+ e[2] ^= x[4];
+ e[3] ^= x[5];
+ e[4] ^= x[6];
+ e[5] ^= x[7];
+}
+
+static void
+do_chi_submix13 (u32 *e, u32 *x)
+{
+ e[6] ^= (x[0] << 16) | (x[7] >> 16);
+ e[7] ^= (x[1] << 16) | (x[0] >> 16);
+ e[0] ^= (x[2] << 16) | (x[1] >> 16);
+ e[1] ^= (x[3] << 16) | (x[2] >> 16);
+ e[2] ^= (x[4] << 16) | (x[3] >> 16);
+ e[3] ^= (x[5] << 16) | (x[4] >> 16);
+ e[4] ^= (x[6] << 16) | (x[5] >> 16);
+ e[5] ^= (x[7] << 16) | (x[6] >> 16);
}
static void
-do_add (unsigned char *s, unsigned char *a)
+do_add (u32 *s, u32 *a)
{
- unsigned temp = 0;
+ u32 carry = 0;
int i;
- for (i = 0; i < 32; i++)
+ for (i = 0; i < 8; i++)
{
- temp = s[i] + a[i] + (temp >> 8);
- s[i] = temp & 0xff;
+ u32 op = carry + a[i];
+ s[i] += op;
+ carry = (a[i] > op) || (op > s[i]);
}
}
static unsigned int
-do_hash_step (GOSTR3411_CONTEXT *hd, unsigned char *h, unsigned char *m)
+do_hash_step (GOSTR3411_CONTEXT *hd, u32 *h, u32 *m)
{
- unsigned char u[32], v[32], s[32];
- unsigned char k[32];
+ u32 u[8], v[8];
+ u32 s[8];
+ u32 k[8];
unsigned int burn;
int i;
@@ -176,7 +200,7 @@ do_hash_step (GOSTR3411_CONTEXT *hd, unsigned char *h, unsigned char *m)
for (i = 0; i < 4; i++) {
do_p (k, u, v);
- burn = _gcry_gost_enc_one (&hd->hd, k, s + i*8, h + i*8, hd->cryptopro);
+ burn = _gcry_gost_enc_data (&hd->hd, k, &s[2*i], &s[2*i+1], h[2*i], h[2*i+1], hd->cryptopro);
do_a (u);
if (i == 1)
@@ -186,33 +210,26 @@ do_hash_step (GOSTR3411_CONTEXT *hd, unsigned char *h, unsigned char *m)
for (i = 0; i < 5; i++)
{
- do_phi_step (s, 0);
- do_phi_step (s, 1);
- do_phi_step (s, 2);
- do_phi_step (s, 3);
- do_phi_step (s, 4);
- do_phi_step (s, 5);
- do_phi_step (s, 6);
- do_phi_step (s, 7);
- do_phi_step (s, 8);
- do_phi_step (s, 9);
+ do_chi_doublestep (s, 0);
+ do_chi_doublestep (s, 1);
+ do_chi_doublestep (s, 2);
+ do_chi_doublestep (s, 3);
+ do_chi_doublestep (s, 4);
/* That is in total 12 + 1 + 61 = 74 = 16 * 4 + 10 rounds */
if (i == 4)
break;
- do_phi_step (s, 10);
- do_phi_step (s, 11);
+ do_chi_doublestep (s, 5);
if (i == 0)
- do_phi_submix(s, m, 12);
- do_phi_step (s, 12);
+ do_chi_submix12(s, m);
+ do_chi_step12 (s);
if (i == 0)
- do_phi_submix(s, h, 13);
- do_phi_step (s, 13);
- do_phi_step (s, 14);
- do_phi_step (s, 15);
+ do_chi_submix13(s, h);
+ do_chi_step13 (s);
+ do_chi_doublestep (s, 7);
}
- memcpy (h, s+20, 12);
- memcpy (h+12, s, 20);
+ memcpy (h, s+5, 12);
+ memcpy (h+3, s, 20);
return /* burn_stack */ 4 * sizeof(void*) /* func call (ret addr + args) */ +
4 * 32 + 2 * sizeof(int) /* stack */ +
@@ -221,15 +238,16 @@ do_hash_step (GOSTR3411_CONTEXT *hd, unsigned char *h, unsigned char *m)
16 + sizeof(int) /* do_a2 stack */ );
}
-
static unsigned int
transform_blk (void *ctx, const unsigned char *data)
{
GOSTR3411_CONTEXT *hd = ctx;
- byte m[32];
+ u32 m[8];
unsigned int burn;
+ int i;
- memcpy (m, data, 32);
+ for (i = 0; i < 8; i++)
+ m[i] = buf_get_le32(data + i*4);
burn = do_hash_step (hd, hd->h, m);
do_add (hd->sigma, m);
@@ -263,9 +281,9 @@ gost3411_final (void *context)
{
GOSTR3411_CONTEXT *hd = context;
size_t padlen = 0;
- byte l[32];
+ u32 l[8];
int i;
- u32 nblocks;
+ MD_NBLOCKS_TYPE nblocks;
if (hd->bctx.count > 0)
{
@@ -286,15 +304,19 @@ gost3411_final (void *context)
nblocks --;
l[0] = 256 - padlen * 8;
}
+ l[0] |= nblocks << 8;
+ nblocks >>= 24;
for (i = 1; i < 32 && nblocks != 0; i++)
{
- l[i] = nblocks % 256;
- nblocks /= 256;
+ l[i] = nblocks;
+ nblocks >>= 24;
}
do_hash_step (hd, hd->h, l);
do_hash_step (hd, hd->h, hd->sigma);
+ for (i = 0; i < 8; i++)
+ hd->h[i] = le_bswap32(hd->h[i]);
}
static byte *
@@ -302,7 +324,7 @@ gost3411_read (void *context)
{
GOSTR3411_CONTEXT *hd = context;
- return hd->h;
+ return hd->result;
}
static unsigned char asn[6] = /* Object ID is 1.2.643.2.2.3 */
--
2.0.0.rc2
More information about the Gcrypt-devel
mailing list