[svn] gcry - r1417 - in branches/LIBGCRYPT-1-4-BRANCH: . cipher

svn author wk cvs at cvs.gnupg.org
Fri Dec 11 18:05:20 CET 2009


Author: wk
Date: 2009-12-11 18:05:20 +0100 (Fri, 11 Dec 2009)
New Revision: 1417

Modified:
   branches/LIBGCRYPT-1-4-BRANCH/ChangeLog
   branches/LIBGCRYPT-1-4-BRANCH/NEWS
   branches/LIBGCRYPT-1-4-BRANCH/README
   branches/LIBGCRYPT-1-4-BRANCH/cipher/ChangeLog
   branches/LIBGCRYPT-1-4-BRANCH/cipher/Makefile.am
   branches/LIBGCRYPT-1-4-BRANCH/cipher/sha256.c
   branches/LIBGCRYPT-1-4-BRANCH/cipher/sha512.c
   branches/LIBGCRYPT-1-4-BRANCH/configure.ac
Log:
Backported some chnages from trunk.


Modified: branches/LIBGCRYPT-1-4-BRANCH/ChangeLog
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/ChangeLog	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/ChangeLog	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,3 +1,11 @@
+2009-12-11  Werner Koch  <wk at g10code.com>
+
+	Release 1.4.5.
+
+	* configure.ac: Bump LT version to C16/A5/R3.
+
+	* configure.ac: Add option --disable-O-flag-munging.
+
 2009-01-22  Werner Koch  <wk at g10code.com>
 
 	Release 1.4.4.

Modified: branches/LIBGCRYPT-1-4-BRANCH/cipher/ChangeLog
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/cipher/ChangeLog	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/cipher/ChangeLog	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,3 +1,17 @@
+2009-12-11  Werner Koch  <wk at g10code.com>
+
+	* Makefile.am (o_flag_munging): New.
+	(tiger.o, tiger.lo): Use it.
+
+	* sha256.c (Cho, Maj, Sum0, Sum1): Turn macros into inline
+	functions.
+	(transform): Partly unroll to interweave the chain variables.
+
+	* sha512.c (ROTR, Ch, Maj, Sum0, Sum1): Turn macros into inline
+	functions.
+	(transform): Partly unroll to interweave the chain variables.
+	Suggested by Christian Grothoff.
+
 2009-08-21  Werner Koch  <wk at g10code.com>
 
 	* dsa.c (dsa_generate_ext): Release retfactors array before

Modified: branches/LIBGCRYPT-1-4-BRANCH/NEWS
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/NEWS	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/NEWS	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,15 +1,18 @@
-Noteworthy changes in version 1.4.5 (unreleased)
+Noteworthy changes in version 1.4.5 (2009-12-11)
 ------------------------------------------------
 
- * Fix minor memory leak in DSA key generation.
+ * Fixed minor memory leak in DSA key generation.
 
- * No switch into FIPS mode if /proc/version is not readable.
+ * No more switching to FIPS mode if /proc/version is not readable.
 
- * Fix sigill during Padlock detection on old CPUs.
+ * Fixed a sigill during Padlock detection on old CPUs.
 
- * Fix a hang on some W2000 machines.
+ * Fixed a hang on some W2000 machines.
 
+ * Boosted SHA-512 performance by 30% on ia32 boxes and gcc 4.3;
+   SHA-256 went up by 25%.
 
+
 Noteworthy changes in version 1.4.4 (2009-01-22)
 ------------------------------------------------
 

Modified: branches/LIBGCRYPT-1-4-BRANCH/README
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/README	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/README	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,6 +1,6 @@
 		    Libgcrypt - The GNU Crypto Library
 		   ------------------------------------
-                            Version 1.4.4
+                            Version 1.4.5
  
 
     Copyright 2000, 2002, 2003, 2004, 2007, 2008,
@@ -130,7 +130,7 @@
                      time.  This is helpful to create OS X fat binaries.
 
      --enable-random-daemon
-                     Include support for a global random damon and
+                     Include support for a global random dawmon and
                      build the daemon.  This is an experimental feature.
 
      --enable-mpi-path=EXTRA_PATH
@@ -168,9 +168,63 @@
                      available.  Try this if you get problems with
                      assembler code.
 
-       
+     --disable-O-flag-munging
+                     Some code is too complex for some compilers while
+                     in higher optimization modes, thus the compiler
+                     invocation is modified to use a lower
+                     optimization level.  Usually this works very well
+                     but on some platforms these rules break the
+                     invocation.  This option may be used to disable
+                     the feature under the assumption that either good
+                     CFLAGS are given or the compiler can grok the code.
+      
 
+    Build Problems
+    --------------
 
+    We can't check all assembler files, so if you have problems
+    assembling them (or the program crashes) use --disable-asm with
+    ./configure.  If you opt to delete individual replacement files in
+    hopes of using the remaining ones, be aware that the configure
+    scripts may consider several subdirectories to get all available
+    assembler files; be sure to delete the correct ones.  Never delete
+    udiv-qrnnd.S in any CPU directory, because there may be no C
+    substitute (in mpi/genereic).  Don't forget to delete
+    "config.cache" and run "./config.status --recheck".  We got a few
+    reports about problems using versions of gcc earlier than 2.96
+    along with a non-GNU assembler (as).  If this applies to your
+    platform, you can either upgrade gcc to a more recent version, or
+    use the GNU assembler.
+
+    Some make tools are broken - the best solution is to use GNU's
+    make.  Try gmake or grab the sources from a GNU archive and
+    install them.
+
+    Specific problems on some machines:
+
+      * IBM RS/6000 running AIX
+
+	Due to a change in gcc (since version 2.8) the MPI stuff may
+	not build. In this case try to run configure using:
+	    CFLAGS="-g -O2 -mcpu=powerpc" ./configure
+
+      * SVR4.2 (ESIX V4.2 cc)
+
+        Due to problems with the ESIX as(1), you probably want to do:
+            CFLAGS="-O -K pentium" ./configure --disable-asm
+
+      * SunOS 4.1.4
+      
+         ./configure ac_cv_sys_symbol_underscore=yes
+
+      * Sparc64 CPUs
+
+        We have reports about failures in the AES module when
+        compiling using gcc (e.g. version 4.1.2) and the option -O3;
+        using -O2 solves the problem.
+
+
+
     License
     -------
     

Modified: branches/LIBGCRYPT-1-4-BRANCH/cipher/Makefile.am
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/cipher/Makefile.am	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/cipher/Makefile.am	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,6 +1,6 @@
 # Makefile for cipher modules
 # Copyright (C) 1998, 1999, 2000, 2001, 2002,
-#               2003 Free Software Foundation, Inc.
+#               2003, 2009 Free Software Foundation, Inc.
 #
 # This file is part of Libgcrypt.
 #
@@ -67,10 +67,16 @@
 rfc2268.c \
 camellia.c camellia.h camellia-glue.c
 
+if ENABLE_O_FLAG_MUNGING
+o_flag_munging = sed -e 's/-O[2-9s]*/-O1/g'
+else
+o_flag_munging = cat
+endif
 
+
 # We need to lower the optimization for this module.
 tiger.o: $(srcdir)/tiger.c
-	`echo $(COMPILE) -c $(srcdir)/tiger.c | sed -e 's/-O[2-9s]*/-O1/g' `
+	`echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) `
 
 tiger.lo: $(srcdir)/tiger.c
-	`echo $(LTCOMPILE) -c $(srcdir)/tiger.c | sed -e 's/-O[2-9s]*/-O1/g' `
+	`echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) `

Modified: branches/LIBGCRYPT-1-4-BRANCH/cipher/sha256.c
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/cipher/sha256.c	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/cipher/sha256.c	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,5 +1,5 @@
 /* sha256.c - SHA256 hash function
- *	Copyright (C) 2003, 2006, 2008 Free Software Foundation, Inc.
+ * Copyright (C) 2003, 2006, 2008, 2009 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
@@ -41,7 +41,6 @@
 #include <string.h>
 
 #include "g10lib.h"
-#include "memory.h"
 #include "bithelp.h"
 #include "cipher.h"
 #include "hash-common.h"
@@ -95,10 +94,6 @@
 /*
   Transform the message X which consists of 16 32-bit-words. See FIPS
   180-2 for details.  */
-#define Cho(x,y,z) (z ^ (x & (y ^ z)))      /* (4.2) same as SHA-1's F1 */
-#define Maj(x,y,z) ((x & y) | (z & (x|y)))  /* (4.3) same as SHA-1's F3 */
-#define Sum0(x) (ror ((x), 2) ^ ror ((x), 13) ^ ror ((x), 22))  /* (4.4) */
-#define Sum1(x) (ror ((x), 6) ^ ror ((x), 11) ^ ror ((x), 25))  /* (4.5) */
 #define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))       /* (4.6) */
 #define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))     /* (4.7) */
 #define R(a,b,c,d,e,f,g,h,k,w) do                                 \
@@ -114,6 +109,35 @@
             b = a;                                                \
             a = t1 + t2;                                          \
           } while (0)
+
+/* (4.2) same as SHA-1's F1.  */
+static inline u32
+Cho (u32 x, u32 y, u32 z)
+{
+  return (z ^ (x & (y ^ z)));
+}
+
+/* (4.3) same as SHA-1's F3 */
+static inline u32
+Maj (u32 x, u32 y, u32 z)
+{
+  return ((x & y) | (z & (x|y)));
+}
+  
+/* (4.4) */
+static inline u32
+Sum0 (u32 x)
+{
+  return (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22));
+}
+
+/* (4.5) */
+static inline u32
+Sum1 (u32 x)
+{
+  return (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25));
+}
+
  
 static void
 transform (SHA256_CONTEXT *hd, const unsigned char *data)
@@ -172,9 +196,56 @@
   for (; i < 64; i++)
     w[i] = S1(w[i-2]) + w[i-7] + S0(w[i-15]) + w[i-16];
 
-  for (i=0; i < 64; i++)
-    R(a,b,c,d,e,f,g,h,K[i],w[i]);
+  for (i=0; i < 64;)
+    {
+#if 0
+      R(a,b,c,d,e,f,g,h,K[i],w[i]);
+      i++;
+#else
+      t1 = h + Sum1 (e) + Cho (e, f, g) + K[i] + w[i];   
+      t2 = Sum0 (a) + Maj (a, b, c);
+      d += t1;
+      h  = t1 + t2;
 
+      t1 = g + Sum1 (d) + Cho (d, e, f) + K[i+1] + w[i+1];
+      t2 = Sum0 (h) + Maj (h, a, b);
+      c += t1;
+      g  = t1 + t2;
+
+      t1 = f + Sum1 (c) + Cho (c, d, e) + K[i+2] + w[i+2];
+      t2 = Sum0 (g) + Maj (g, h, a);
+      b += t1;
+      f  = t1 + t2;
+
+      t1 = e + Sum1 (b) + Cho (b, c, d) + K[i+3] + w[i+3];
+      t2 = Sum0 (f) + Maj (f, g, h);
+      a += t1;
+      e  = t1 + t2;
+
+      t1 = d + Sum1 (a) + Cho (a, b, c) + K[i+4] + w[i+4];
+      t2 = Sum0 (e) + Maj (e, f, g);
+      h += t1;
+      d  = t1 + t2;
+
+      t1 = c + Sum1 (h) + Cho (h, a, b) + K[i+5] + w[i+5];
+      t2 = Sum0 (d) + Maj (d, e, f);
+      g += t1;
+      c  = t1 + t2;
+
+      t1 = b + Sum1 (g) + Cho (g, h, a) + K[i+6] + w[i+6];
+      t2 = Sum0 (c) + Maj (c, d, e);
+      f += t1;
+      b  = t1 + t2;
+
+      t1 = a + Sum1 (f) + Cho (f, g, h) + K[i+7] + w[i+7];
+      t2 = Sum0 (b) + Maj (b, c, d);
+      e += t1;
+      a  = t1 + t2;
+
+      i += 8;
+#endif
+    }
+
   hd->h0 += a;
   hd->h1 += b;
   hd->h2 += c;
@@ -184,10 +255,6 @@
   hd->h6 += g;
   hd->h7 += h;
 }
-#undef Cho
-#undef Maj
-#undef Sum0
-#undef Sum1
 #undef S0
 #undef S1
 #undef R

Modified: branches/LIBGCRYPT-1-4-BRANCH/cipher/sha512.c
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/cipher/sha512.c	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/cipher/sha512.c	2009-12-11 17:05:20 UTC (rev 1417)
@@ -1,5 +1,5 @@
 /* sha512.c - SHA384 and SHA512 hash functions
- *	Copyright (C) 2003, 2008 Free Software Foundation, Inc.
+ * Copyright (C) 2003, 2008, 2009 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
@@ -98,6 +98,36 @@
 }
 
 
+static inline u64
+ROTR (u64 x, u64 n)
+{
+  return ((x >> n) | (x << (64 - n)));
+}
+
+static inline u64
+Ch (u64 x, u64 y, u64 z)
+{
+  return ((x & y) ^ ( ~x & z));
+}
+
+static inline u64
+Maj (u64 x, u64 y, u64 z)
+{
+  return ((x & y) ^ (x & z) ^ (y & z));
+}
+
+static inline u64
+Sum0 (u64 x)
+{
+  return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
+}
+
+static inline u64
+Sum1 (u64 x)
+{
+  return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
+}
+
 /****************
  * Transform the message W which consists of 16 64-bit-words
  */
@@ -182,21 +212,26 @@
   }
 #endif
 
-#define ROTR(x,n) (((x)>>(n)) | ((x)<<(64-(n))))
-#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
-#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
-#define Sum0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
-#define Sum1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
 #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
 #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
 
   for (t = 16; t < 80; t++)
     w[t] = S1 (w[t - 2]) + w[t - 7] + S0 (w[t - 15]) + w[t - 16];
 
-  for (t = 0; t < 80; t++)
+
+  for (t = 0; t < 80; )
     {
       u64 t1, t2;
 
+      /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e
+         with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes
+         initialized to 0,1,2,3...255,0,... and 1000 iterations:
+
+         Not unrolled with macros:  440ms
+         Unrolled with macros:      350ms
+         Unrolled with inline:      330ms
+      */
+#if 0 /* Not unrolled.  */
       t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t];
       t2 = Sum0 (a) + Maj (a, b, c);
       h = g;
@@ -207,12 +242,53 @@
       c = b;
       b = a;
       a = t1 + t2;
+      t++;
+#else /* Unrolled to interweave the chain variables.  */
+      t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t];
+      t2 = Sum0 (a) + Maj (a, b, c);
+      d += t1;
+      h  = t1 + t2;
 
-      /* printf("t=%d a=%016llX b=%016llX c=%016llX d=%016llX "
-          "e=%016llX f=%016llX g=%016llX h=%016llX\n",t,a,b,c,d,e,f,g,h); */
+      t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[t+1];
+      t2 = Sum0 (h) + Maj (h, a, b);
+      c += t1;
+      g  = t1 + t2;
+
+      t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[t+2];
+      t2 = Sum0 (g) + Maj (g, h, a);
+      b += t1;
+      f  = t1 + t2;
+
+      t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[t+3];
+      t2 = Sum0 (f) + Maj (f, g, h);
+      a += t1;
+      e  = t1 + t2;
+
+      t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[t+4];
+      t2 = Sum0 (e) + Maj (e, f, g);
+      h += t1;
+      d  = t1 + t2;
+
+      t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[t+5];
+      t2 = Sum0 (d) + Maj (d, e, f);
+      g += t1;
+      c  = t1 + t2;
+
+      t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[t+6];
+      t2 = Sum0 (c) + Maj (c, d, e);
+      f += t1;
+      b  = t1 + t2;
+
+      t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[t+7];
+      t2 = Sum0 (b) + Maj (b, c, d);
+      e += t1;
+      a  = t1 + t2;
+      
+      t += 8;
+#endif
     }
 
-  /* update chaining vars */
+  /* Update chaining vars.  */
   hd->h0 += a;
   hd->h1 += b;
   hd->h2 += c;

Modified: branches/LIBGCRYPT-1-4-BRANCH/configure.ac
===================================================================
--- branches/LIBGCRYPT-1-4-BRANCH/configure.ac	2009-12-11 16:32:04 UTC (rev 1416)
+++ branches/LIBGCRYPT-1-4-BRANCH/configure.ac	2009-12-11 17:05:20 UTC (rev 1417)
@@ -27,7 +27,7 @@
 # Set my_issvn to "yes" for non-released code.  Remember to run an
 # "svn up" and "autogen.sh" right before creating a distribution.
 m4_define([my_version], [1.4.5])
-m4_define([my_issvn], [yes])
+m4_define([my_issvn], [no])
 
 m4_define([svn_revision], m4_esyscmd([printf "%d" $(svn info 2>/dev/null \
           | sed -n '/^Revision:/ s/[^0-9]//gp'|head -1)]))
@@ -40,7 +40,7 @@
 #   (No interfaces changed:                   REVISION++)
 LIBGCRYPT_LT_CURRENT=16
 LIBGCRYPT_LT_AGE=5
-LIBGCRYPT_LT_REVISION=2
+LIBGCRYPT_LT_REVISION=3
 
 
 # If the API is changed in an incompatible way: increment the next counter.
@@ -475,6 +475,15 @@
             [Enable support for the PadLock engine.])
 fi
 
+# Implementation of the --disable-O-flag-munging switch.
+AC_MSG_CHECKING([whether a -O flag munging is requested])
+AC_ARG_ENABLE([O-flag-munging],
+              AC_HELP_STRING([--disable-O-flag-munging],
+                 [Disable modification of the cc -O flag]),
+              [enable_o_flag_munging=$enableval],
+              [enable_o_flag_munging=yes])
+AC_MSG_RESULT($enable_o_flag_munging)
+AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes")
 
 
 AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME",




More information about the Gnupg-commits mailing list