[PATCH 2/3] camellia.c: Prepare for AES-NI/AVX implementation
Jussi Kivilinna
jussi.kivilinna at mbnet.fi
Wed Jan 23 10:55:08 CET 2013
* cipher/camellia-glue.c (CAMELLIA_encrypt_stack_burn_size)
(CAMELLIA_decrypt_stack_burn_size): Increase stack burn size.
* cipher/camellia.c (CAMELLIA_ROUNDSM): Move key-material mixing in
the front.
(camellia_setup128, camellia_setup256): Remove now unneeded
key-material mangling.
(camellia_encrypt128, camellia_decrypt128, amellia_encrypt256)
(camellia_decrypt256): Copy block to stack, so that compiler can
optimize it for register usage.
--
Camellia implementation needs to be modified slightly for compatibility with
AES-NI/AVX version.
Before:
Running each test 100 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
CAMELLIA128 800ms 790ms 840ms 730ms 810ms 800ms 820ms 820ms 730ms 740ms
CAMELLIA192 1040ms 1040ms 1030ms 930ms 1000ms 1000ms 1020ms 1020ms 940ms 930ms
CAMELLIA256 1000ms 980ms 1040ms 930ms 1010ms 990ms 1040ms 1040ms 940ms 930ms
After:
Running each test 100 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
CAMELLIA128 780ms 750ms 810ms 690ms 780ms 770ms 810ms 810ms 700ms 690ms
CAMELLIA192 1020ms 990ms 1000ms 890ms 970ms 970ms 990ms 1000ms 890ms 900ms
CAMELLIA256 960ms 960ms 1000ms 900ms 970ms 970ms 990ms 1010ms 900ms 890ms
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at mbnet.fi>
---
cipher/camellia-glue.c | 4 +
cipher/camellia.c | 144 +++++++++++++++++-------------------------------
2 files changed, 52 insertions(+), 96 deletions(-)
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 686262a..ba8aa28 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -121,7 +121,7 @@ camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
#define CAMELLIA_encrypt_stack_burn_size \
(sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
- +4*sizeof(u32) \
+ +4*sizeof(u32)+4*sizeof(u32) \
+2*sizeof(u32*)+4*sizeof(u32) \
+2*2*sizeof(void*) /* Function calls. */ \
)
@@ -138,7 +138,7 @@ camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
#define CAMELLIA_decrypt_stack_burn_size \
(sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
- +4*sizeof(u32) \
+ +4*sizeof(u32)+4*sizeof(u32) \
+2*sizeof(u32*)+4*sizeof(u32) \
+2*2*sizeof(void*) /* Function calls. */ \
)
diff --git a/cipher/camellia.c b/cipher/camellia.c
index 2e28bce..42a9b73 100644
--- a/cipher/camellia.c
+++ b/cipher/camellia.c
@@ -151,6 +151,8 @@ typedef unsigned char u8;
#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
do { \
+ yl ^= kl; \
+ yr ^= kr; \
ir = CAMELLIA_SP1110(xr & 0xff) \
^ CAMELLIA_SP0222((xr >> 24) & 0xff) \
^ CAMELLIA_SP3033((xr >> 16) & 0xff) \
@@ -159,8 +161,6 @@ typedef unsigned char u8;
^ CAMELLIA_SP0222((xl >> 16) & 0xff) \
^ CAMELLIA_SP3033((xl >> 8) & 0xff) \
^ CAMELLIA_SP4404(xl & 0xff); \
- il ^= kl; \
- ir ^= kr; \
ir ^= il; \
il = CAMELLIA_RR8(il); \
il ^= ir; \
@@ -614,44 +614,6 @@ void camellia_setup128(const unsigned char *key, u32 *subkey)
CamelliaSubkeyL(24) = subl(24) ^ subl(23);
CamelliaSubkeyR(24) = subr(24) ^ subr(23);
- /* apply the inverse of the last half of P-function */
- dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
- dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
- dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
- dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
- dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
- dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
- dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
- dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
- dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
- dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
- dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
- dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
- dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
- dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
- dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
- dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
- dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
- dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
-
return;
}
@@ -888,56 +850,6 @@ void camellia_setup256(const unsigned char *key, u32 *subkey)
CamelliaSubkeyL(32) = subl(32) ^ subl(31);
CamelliaSubkeyR(32) = subr(32) ^ subr(31);
- /* apply the inverse of the last half of P-function */
- dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
- dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
- dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
- dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
- dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
- dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
- dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
- dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
- dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
- dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
- dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
- dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
- dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
- dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
- dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
- dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
- dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
- dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
- dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw;
- dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw;
- dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw;
- dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw;
- dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw;
- dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw);
- CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw,CamelliaSubkeyL(31) = dw;
-
return;
}
@@ -963,9 +875,15 @@ void camellia_setup192(const unsigned char *key, u32 *subkey)
*
* "io" must be 4byte aligned and big-endian data.
*/
-void camellia_encrypt128(const u32 *subkey, u32 *io)
+void camellia_encrypt128(const u32 *subkey, u32 *blocks)
{
u32 il, ir, t0, t1;
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(0);
@@ -1050,13 +968,24 @@ void camellia_encrypt128(const u32 *subkey, u32 *io)
io[2] = t0;
io[3] = t1;
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
return;
}
-void camellia_decrypt128(const u32 *subkey, u32 *io)
+void camellia_decrypt128(const u32 *subkey, u32 *blocks)
{
u32 il,ir,t0,t1; /* temporary valiables */
+ u32 io[4];
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
+
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(24);
io[1] ^= CamelliaSubkeyR(24);
@@ -1140,15 +1069,26 @@ void camellia_decrypt128(const u32 *subkey, u32 *io)
io[2] = t0;
io[3] = t1;
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
return;
}
/**
* stuff for 192 and 256bit encryption/decryption
*/
-void camellia_encrypt256(const u32 *subkey, u32 *io)
+void camellia_encrypt256(const u32 *subkey, u32 *blocks)
{
u32 il,ir,t0,t1; /* temporary valiables */
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(0);
@@ -1257,12 +1197,23 @@ void camellia_encrypt256(const u32 *subkey, u32 *io)
io[2] = t0;
io[3] = t1;
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
return;
}
-void camellia_decrypt256(const u32 *subkey, u32 *io)
+void camellia_decrypt256(const u32 *subkey, u32 *blocks)
{
u32 il,ir,t0,t1; /* temporary valiables */
+ u32 io[4];
+
+ io[0] = blocks[0];
+ io[1] = blocks[1];
+ io[2] = blocks[2];
+ io[3] = blocks[3];
/* pre whitening but absorb kw2*/
io[0] ^= CamelliaSubkeyL(32);
@@ -1371,6 +1322,11 @@ void camellia_decrypt256(const u32 *subkey, u32 *io)
io[2] = t0;
io[3] = t1;
+ blocks[0] = io[0];
+ blocks[1] = io[1];
+ blocks[2] = io[2];
+ blocks[3] = io[3];
+
return;
}
More information about the Gcrypt-devel
mailing list