[svn] GnuPG - r4711 - in branches/STABLE-BRANCH-1-4: . cipher doc scripts

svn author wk cvs at cvs.gnupg.org
Sat Mar 22 18:01:40 CET 2008


Author: wk
Date: 2008-03-22 18:01:37 +0100 (Sat, 22 Mar 2008)
New Revision: 4711

Modified:
   branches/STABLE-BRANCH-1-4/NEWS
   branches/STABLE-BRANCH-1-4/cipher/ChangeLog
   branches/STABLE-BRANCH-1-4/cipher/algorithms.h
   branches/STABLE-BRANCH-1-4/cipher/cipher.c
   branches/STABLE-BRANCH-1-4/cipher/rijndael.c
   branches/STABLE-BRANCH-1-4/configure.ac
   branches/STABLE-BRANCH-1-4/doc/ChangeLog
   branches/STABLE-BRANCH-1-4/doc/README.W32
   branches/STABLE-BRANCH-1-4/scripts/ChangeLog
   branches/STABLE-BRANCH-1-4/scripts/w32installer.nsi
Log:
Improved AES performance.


[The diff below has been truncated]

Modified: branches/STABLE-BRANCH-1-4/cipher/ChangeLog
===================================================================
--- branches/STABLE-BRANCH-1-4/cipher/ChangeLog	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/cipher/ChangeLog	2008-03-22 17:01:37 UTC (rev 4711)
@@ -1,3 +1,13 @@
+2008-03-22  Werner Koch  <wk at g10code.com>
+
+	* cipher.c (struct cipher_handle_s): Make sure IV is u32
+	aligned.  Change all users of IV.
+	(do_cfb_encrypt): Optimize and use bulk code for AES.
+	(do_cfb_decrypt): Ditto.
+	* rijndael.c (do_encrypt): Remove.
+	(do_encrypt_aligned, do_encrypt): New.  Taken from libgcrypt.
+	(rijndael_cfb_enc, rijndael_cfb_dec): New.
+
 2007-12-12  Werner Koch  <wk at g10code.com>
 
 	* pubkey.c (pubkey_encrypt, pubkey_decrypt): Allow type 20 keys.

Modified: branches/STABLE-BRANCH-1-4/doc/ChangeLog
===================================================================
--- branches/STABLE-BRANCH-1-4/doc/ChangeLog	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/doc/ChangeLog	2008-03-22 17:01:37 UTC (rev 4711)
@@ -1,3 +1,9 @@
+2007-12-21  Werner Koch  <wk at g10code.com>
+
+	* README.W32: Tell that Vista is supported and that at least NT-4
+	is required.  It might still work on older systems, but I don't
+	know for sure.
+
 2007-12-12  Werner Koch  <wk at g10code.com>
 
 	* gpg.texi, specify-user-id.texi: Update from gnupg-2.

Modified: branches/STABLE-BRANCH-1-4/scripts/ChangeLog
===================================================================
--- branches/STABLE-BRANCH-1-4/scripts/ChangeLog	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/scripts/ChangeLog	2008-03-22 17:01:37 UTC (rev 4711)
@@ -1,3 +1,7 @@
+2008-01-30  Werner Koch  <wk at g10code.com>
+
+	* w32installer.nsi: Set the OutPath back.
+
 2007-12-12  Werner Koch  <wk at g10code.com>
 
 	* config.sub, config.guess: Update to version 2007-11-19.

Modified: branches/STABLE-BRANCH-1-4/NEWS
===================================================================
--- branches/STABLE-BRANCH-1-4/NEWS	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/NEWS	2008-03-22 17:01:37 UTC (rev 4711)
@@ -1,3 +1,11 @@
+Noteworthy changes in version 1.4.9 (unreleased)
+------------------------------------------------
+
+    * Improved AES encryption performance by more than 20% (on ia32).
+      Decryption is also a bit faster.
+
+
+
 Noteworthy changes in version 1.4.8 (2007-12-20)
 ------------------------------------------------
         

Modified: branches/STABLE-BRANCH-1-4/cipher/algorithms.h
===================================================================
--- branches/STABLE-BRANCH-1-4/cipher/algorithms.h	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/cipher/algorithms.h	2008-03-22 17:01:37 UTC (rev 4711)
@@ -118,8 +118,6 @@
 		  void (**decryptf)( void *c, byte *outbuf, const byte *inbuf )
 		  );
 
-/* this is just a kludge for the time we have not yet changed the cipher
- * stuff to the scheme we use for random and digests */
 const char *
 rijndael_get_info( int algo, size_t *keylen,
 		   size_t *blocksize, size_t *contextsize,
@@ -127,6 +125,12 @@
 		   void (**encryptf)(void *c, byte *outbuf, const byte *inbuf),
 		   void (**decryptf)(void *c, byte *outbuf, const byte *inbuf)
 		   );
+void rijndael_cfb_enc (void *context, unsigned char *iv, 
+                       void *outbuf_arg, const void *inbuf_arg,
+                       unsigned int nblocks);
+void rijndael_cfb_dec (void *context, unsigned char *iv, 
+                       void *outbuf_arg, const void *inbuf_arg,
+                       unsigned int nblocks);
 
 const char *
 idea_get_info( int algo, size_t *keylen,

Modified: branches/STABLE-BRANCH-1-4/cipher/cipher.c
===================================================================
--- branches/STABLE-BRANCH-1-4/cipher/cipher.c	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/cipher/cipher.c	2008-03-22 17:01:37 UTC (rev 4711)
@@ -1,6 +1,6 @@
 /* cipher.c  -	cipher dispatcher
  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
- *               2007 Free Software Foundation, Inc.
+ *               2007, 2008 Free Software Foundation, Inc.
  *
  * This file is part of GnuPG.
  *
@@ -52,17 +52,26 @@
 static int disabled_algos[TABLE_SIZE];
 
 
-struct cipher_handle_s {
-    int  algo;
-    int  mode;
-    size_t blocksize;
-    byte iv[MAX_BLOCKSIZE];	/* (this should be ulong aligned) */
-    byte lastiv[MAX_BLOCKSIZE];
-    int  unused;  /* in IV */
-    int  (*setkey)( void *c, const byte *key, unsigned keylen );
-    void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
-    void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
-    PROPERLY_ALIGNED_TYPE context;
+struct cipher_handle_s 
+{
+  int  algo;
+  int  mode;
+  size_t blocksize;
+  
+  /* The initialization vector.  To help code optimization we make
+     sure that it is aligned on an unsigned long and u32 boundary.  */
+  union {
+    unsigned long dummy_ul_iv;         
+    u32 dummy_u32_iv;
+    unsigned char iv[MAX_BLOCKSIZE];	
+  } u_iv;
+  
+  byte lastiv[MAX_BLOCKSIZE];
+  int  unused;  /* in IV */
+  int  (*setkey)( void *c, const byte *key, unsigned keylen );
+  void (*encrypt)( void *c, byte *outbuf, const byte *inbuf );
+  void (*decrypt)( void *c, byte *outbuf, const byte *inbuf );
+  PROPERLY_ALIGNED_TYPE context;
 };
 
 
@@ -459,14 +468,14 @@
 void
 cipher_setiv( CIPHER_HANDLE c, const byte *iv, unsigned ivlen )
 {
-    memset( c->iv, 0, c->blocksize );
+    memset( c->u_iv.iv, 0, c->blocksize );
     if( iv ) {
 	if( ivlen != c->blocksize )
 	    log_info("WARNING: cipher_setiv: ivlen=%u blklen=%u\n",
 					     ivlen, (unsigned)c->blocksize );
 	if( ivlen > c->blocksize )
 	    ivlen = c->blocksize;
-	memcpy( c->iv, iv, ivlen );
+	memcpy( c->u_iv.iv, iv, ivlen );
     }
     c->unused = 0;
 }
@@ -507,10 +516,10 @@
 	/* fixme: the xor should works on words and not on
 	 * bytes.  Maybe it is a good idea to enhance the cipher backend
 	 * API to allow for CBC handling in the backend */
-	for(ivp=c->iv,i=0; i < blocksize; i++ )
+	for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
 	    outbuf[i] = inbuf[i] ^ *ivp++;
 	(*c->encrypt)( &c->context.c, outbuf, outbuf );
-	memcpy(c->iv, outbuf, blocksize );
+	memcpy(c->u_iv.iv, outbuf, blocksize );
 	inbuf  += c->blocksize;
 	outbuf += c->blocksize;
     }
@@ -530,9 +539,9 @@
 	 * for this here because it is not used otherwise */
 	memcpy(c->lastiv, inbuf, blocksize );
 	(*c->decrypt)( &c->context.c, outbuf, inbuf );
-	for(ivp=c->iv,i=0; i < blocksize; i++ )
+	for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
 	    outbuf[i] ^= *ivp++;
-	memcpy(c->iv, c->lastiv, blocksize );
+	memcpy(c->u_iv.iv, c->lastiv, blocksize );
 	inbuf  += c->blocksize;
 	outbuf += c->blocksize;
     }
@@ -542,120 +551,182 @@
 static void
 do_cfb_encrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
 {
-    byte *ivp;
-    size_t blocksize = c->blocksize;
+  byte *ivp;
+  size_t blocksize = c->blocksize;
+  size_t blocksize_x_2 = blocksize + blocksize;
 
-    if( nbytes <= c->unused ) {
-	/* short enough to be encoded by the remaining XOR mask */
-	/* XOR the input with the IV and store input into IV */
-	for(ivp=c->iv+c->blocksize - c->unused; nbytes; nbytes--, c->unused-- )
+  if ( nbytes <= c->unused )
+    {
+      /* Short enough to be encoded by the remaining XOR mask.  XOR
+	 the input with the IV and store input into IV.  */
+      for (ivp=c->u_iv.iv+c->blocksize - c->unused; nbytes; 
+            nbytes--, c->unused-- )
 	    *outbuf++ = (*ivp++ ^= *inbuf++);
 	return;
     }
+  
+  if ( c->unused )
+    {
+      /* XOR the input with the IV and store input into IV.  */
+      nbytes -= c->unused;
+      for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
+        *outbuf++ = (*ivp++ ^= *inbuf++);
+    }
 
-    if( c->unused ) {
-	/* XOR the input with the IV and store input into IV */
-	nbytes -= c->unused;
-	for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- )
-	    *outbuf++ = (*ivp++ ^= *inbuf++);
+  /* Now we can process complete blocks.  We use a loop as long as we
+     have at least 2 blocks and use conditions for the rest.  This
+     also allows to use a bulk encryption function if available.  */
+#ifdef USE_AES
+  if (nbytes >= blocksize_x_2 
+      && (c->algo == CIPHER_ALGO_AES
+          || c->algo == CIPHER_ALGO_AES256
+          || c->algo == CIPHER_ALGO_AES192))
+    {
+      unsigned int nblocks = nbytes / blocksize;
+      rijndael_cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); 
+      outbuf += nblocks * blocksize;
+      inbuf  += nblocks * blocksize;
+      nbytes -= nblocks * blocksize;
     }
+  else
+#endif /*USE_AES*/
+    {
+      while ( nbytes >= blocksize_x_2 )
+        {
+          int i;
+          /* Encrypt the IV. */
+          c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+          /* XOR the input with the IV and store input into IV.  */
+          for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+            *outbuf++ = (*ivp++ ^= *inbuf++);
+          nbytes -= blocksize;
+        }
+    }
 
-    /* Now we can process complete blocks. */
-#if 0 
-    /* Experimental code.  We may only use this for standard CFB
-       because for Phil's mode we need to save the IV of before the
-       last encryption - we don't want to do this in tghe fasf CFB
-       encryption routine.  */
-    if (c->algo == CIPHER_ALGO_AES
-        && nbytes >= blocksize 
-        && c->mode != CIPHER_MODE_PHILS_CFB) {
-        size_t n;
-
-	memcpy( c->lastiv, c->iv, blocksize );
-        n = (nbytes / blocksize) * blocksize;
-        rijndael_cfb_encrypt (&c->context.c, c->iv, outbuf, inbuf, n);
-        inbuf  += n;
-        outbuf += n;
-	nbytes -= n;
+  if ( nbytes >= blocksize )
+    {
+      int i;
+      /* Save the current IV and then encrypt the IV. */
+      memcpy( c->lastiv, c->u_iv.iv, blocksize );
+      c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      /* XOR the input with the IV and store input into IV */
+      for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+        *outbuf++ = (*ivp++ ^= *inbuf++);
+      nbytes -= blocksize;
     }
-#endif
-    while( nbytes >= blocksize ) {
-	int i;
-	/* encrypt the IV (and save the current one) */
-	memcpy( c->lastiv, c->iv, blocksize );
-	(*c->encrypt)( &c->context.c, c->iv, c->iv );
-	/* XOR the input with the IV and store input into IV */
-	for(ivp=c->iv,i=0; i < blocksize; i++ )
-	    *outbuf++ = (*ivp++ ^= *inbuf++);
-	nbytes -= blocksize;
+  if ( nbytes ) 
+    {
+      /* Save the current IV and then encrypt the IV. */
+      memcpy (c->lastiv, c->u_iv.iv, blocksize );
+      c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      c->unused = blocksize;
+      /* Apply the XOR. */
+      c->unused -= nbytes;
+      for(ivp=c->u_iv.iv; nbytes; nbytes-- )
+        *outbuf++ = (*ivp++ ^= *inbuf++);
     }
-    if( nbytes ) { /* process the remaining bytes */
-	/* encrypt the IV (and save the current one) */
-	memcpy( c->lastiv, c->iv, blocksize );
-	(*c->encrypt)( &c->context.c, c->iv, c->iv );
-	c->unused = blocksize;
-	/* and apply the xor */
-	c->unused -= nbytes;
-	for(ivp=c->iv; nbytes; nbytes-- )
-	    *outbuf++ = (*ivp++ ^= *inbuf++);
-    }
 }
 
+
 static void
 do_cfb_decrypt( CIPHER_HANDLE c, byte *outbuf, byte *inbuf, unsigned nbytes )
 {
-    byte *ivp;
-    ulong temp;
-    size_t blocksize = c->blocksize;
-
-    if( nbytes <= c->unused ) {
-	/* short enough to be encoded by the remaining XOR mask */
-	/* XOR the input with the IV and store input into IV */
-	for(ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--){
-	    temp = *inbuf++;
-	    *outbuf++ = *ivp ^ temp;
-	    *ivp++ = temp;
-	}
-	return;
+  unsigned char *ivp;
+  unsigned long temp;
+  int i;
+  size_t blocksize = c->blocksize;
+  size_t blocksize_x_2 = blocksize + blocksize;
+  
+  if (nbytes <= c->unused)
+    {
+      /* Short enough to be encoded by the remaining XOR mask. */
+      /* XOR the input with the IV and store input into IV. */
+      for (ivp=c->u_iv.iv+blocksize - c->unused;
+           nbytes; 
+           nbytes--, c->unused--)
+        {
+          temp = *inbuf++;
+          *outbuf++ = *ivp ^ temp;
+          *ivp++ = temp;
+        }
+      return;
     }
+  
+  if (c->unused)
+    {
+      /* XOR the input with the IV and store input into IV. */
+      nbytes -= c->unused;
+      for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
+        {
+          temp = *inbuf++;
+          *outbuf++ = *ivp ^ temp;
+          *ivp++ = temp;
+        }
+    }
+  
+  /* Now we can process complete blocks.  We use a loop as long as we
+     have at least 2 blocks and use conditions for the rest.  This
+     also allows to use a bulk encryption function if available.  */
+#ifdef USE_AES
+  if (nbytes >= blocksize_x_2 
+      && (c->algo == CIPHER_ALGO_AES
+          || c->algo == CIPHER_ALGO_AES256
+          || c->algo == CIPHER_ALGO_AES192))
+    {
+      unsigned int nblocks = nbytes / blocksize;
+      rijndael_cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); 
+      outbuf += nblocks * blocksize;
+      inbuf  += nblocks * blocksize;
+      nbytes -= nblocks * blocksize;
+    }
+  else
+#endif /*USE_AES*/
+    {
+      while (nbytes >= blocksize_x_2 )
+        {
+          /* Encrypt the IV. */
+          c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+          /* XOR the input with the IV and store input into IV. */
+          for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+            {
+              temp = *inbuf++;
+              *outbuf++ = *ivp ^ temp;
+              *ivp++ = temp;
+            }
+          nbytes -= blocksize;
+        }
+    }
 
-    if( c->unused ) {
-	/* XOR the input with the IV and store input into IV */
-	nbytes -= c->unused;
-	for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) {
-	    temp = *inbuf++;
-	    *outbuf++ = *ivp ^ temp;
-	    *ivp++ = temp;
-	}
+  if (nbytes >= blocksize )
+    {
+      /* Save the current IV and then encrypt the IV. */
+      memcpy ( c->lastiv, c->u_iv.iv, blocksize);
+      c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      /* XOR the input with the IV and store input into IV */
+      for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
+        {
+          temp = *inbuf++;
+          *outbuf++ = *ivp ^ temp;
+          *ivp++ = temp;
+        }
+      nbytes -= blocksize;
     }
 
-    /* now we can process complete blocks */
-    while( nbytes >= blocksize ) {
-	int i;
-	/* encrypt the IV (and save the current one) */
-	memcpy( c->lastiv, c->iv, blocksize );
-	(*c->encrypt)( &c->context.c, c->iv, c->iv );
-	/* XOR the input with the IV and store input into IV */
-	for(ivp=c->iv,i=0; i < blocksize; i++ ) {
-	    temp = *inbuf++;
-	    *outbuf++ = *ivp ^ temp;
-	    *ivp++ = temp;
-	}
-	nbytes -= blocksize;
+  if (nbytes)
+    { 
+      /* Save the current IV and then encrypt the IV. */
+      memcpy ( c->lastiv, c->u_iv.iv, blocksize );
+      c->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      c->unused = blocksize;
+      /* Apply the XOR. */
+      c->unused -= nbytes;
+      for (ivp=c->u_iv.iv; nbytes; nbytes-- )
+        {
+          temp = *inbuf++;
+          *outbuf++ = *ivp ^ temp;
+          *ivp++ = temp;
+        }
     }
-    if( nbytes ) { /* process the remaining bytes */
-	/* encrypt the IV (and save the current one) */
-	memcpy( c->lastiv, c->iv, blocksize );
-	(*c->encrypt)( &c->context.c, c->iv, c->iv );
-	c->unused = blocksize;
-	/* and apply the xor */
-	c->unused -= nbytes;
-	for(ivp=c->iv; nbytes; nbytes-- ) {
-	    temp = *inbuf++;
-	    *outbuf++ = *ivp ^ temp;
-	    *ivp++ = temp;
-	}
-    }
 }
 
 
@@ -732,8 +803,8 @@
 cipher_sync( CIPHER_HANDLE c )
 {
     if( c->mode == CIPHER_MODE_PHILS_CFB && c->unused ) {
-	memmove(c->iv + c->unused, c->iv, c->blocksize - c->unused );
-	memcpy(c->iv, c->lastiv + c->blocksize - c->unused, c->unused);
+	memmove(c->u_iv.iv + c->unused, c->u_iv.iv, c->blocksize - c->unused );
+	memcpy(c->u_iv.iv, c->lastiv + c->blocksize - c->unused, c->unused);
 	c->unused = 0;
     }
 }

Modified: branches/STABLE-BRANCH-1-4/cipher/rijndael.c
===================================================================
--- branches/STABLE-BRANCH-1-4/cipher/rijndael.c	2008-03-20 15:31:43 UTC (rev 4710)
+++ branches/STABLE-BRANCH-1-4/cipher/rijndael.c	2008-03-22 17:01:37 UTC (rev 4711)
@@ -1,5 +1,5 @@
 /* Rijndael (AES) for GnuPG
- *	Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+ *	Copyright (C) 2000, 2001, 2008 Free Software Foundation, Inc.
  *
  * This file is part of GnuPG.
  *
@@ -1866,144 +1866,127 @@
 
 
 
-/* Encrypt one block.  A and B may be the same. */
+/* Encrypt one block.  A and B need to be aligned on a 4 byte
+   boundary.  A and B may be the same. */
 static void
-do_encrypt (const RIJNDAEL_context *ctx, byte *b, const byte *a)
+do_encrypt_aligned (const RIJNDAEL_context *ctx, 
+                    unsigned char *b, const unsigned char *a)
 {
-    int r;
+#define rk (ctx->keySched)
+  int ROUNDS = ctx->ROUNDS;
+  int r;
+  union
+  {
+    u32  tempu32[4];  /* Force correct alignment. */
     byte temp[4][4];
-    int ROUNDS = ctx->ROUNDS;
-#define rk (ctx->keySched)
+  } u;
 
-    *((u32*)temp[0]) = *((u32*)(a   )) ^ *((u32*)rk[0][0]);
-    *((u32*)temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
-    *((u32*)temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
-    *((u32*)temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
-    *((u32*)(b    )) = *((u32*)T1[temp[0][0]])
-        ^ *((u32*)T2[temp[1][1]])
-        ^ *((u32*)T3[temp[2][2]]) 
-        ^ *((u32*)T4[temp[3][3]]);
-    *((u32*)(b + 4)) = *((u32*)T1[temp[1][0]])
-        ^ *((u32*)T2[temp[2][1]])
-        ^ *((u32*)T3[temp[3][2]]) 
-        ^ *((u32*)T4[temp[0][3]]);
-    *((u32*)(b + 8)) = *((u32*)T1[temp[2][0]])
-        ^ *((u32*)T2[temp[3][1]])
-        ^ *((u32*)T3[temp[0][2]]) 
-        ^ *((u32*)T4[temp[1][3]]);
-    *((u32*)(b +12)) = *((u32*)T1[temp[3][0]])
-        ^ *((u32*)T2[temp[0][1]])
-        ^ *((u32*)T3[temp[1][2]]) 
-        ^ *((u32*)T4[temp[2][3]]);
-    for (r = 1; r < ROUNDS-1; r++) {
-        *((u32*)temp[0]) = *((u32*)(b   )) ^ *((u32*)rk[r][0]);
-        *((u32*)temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
-        *((u32*)temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
-        *((u32*)temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
+  *((u32*)u.temp[0]) = *((u32*)(a   )) ^ *((u32*)rk[0][0]);
+  *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
+  *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
+  *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
+  *((u32*)(b    ))   = (*((u32*)T1[u.temp[0][0]])
+                        ^ *((u32*)T2[u.temp[1][1]])
+                        ^ *((u32*)T3[u.temp[2][2]]) 
+                        ^ *((u32*)T4[u.temp[3][3]]));
+  *((u32*)(b + 4))   = (*((u32*)T1[u.temp[1][0]])
+                        ^ *((u32*)T2[u.temp[2][1]])
+                        ^ *((u32*)T3[u.temp[3][2]]) 
+                        ^ *((u32*)T4[u.temp[0][3]]));
+  *((u32*)(b + 8))   = (*((u32*)T1[u.temp[2][0]])
+                        ^ *((u32*)T2[u.temp[3][1]])




More information about the Gnupg-commits mailing list