[svn] gcry - r1272 - in trunk: . cipher src

svn author wk cvs at cvs.gnupg.org
Thu Nov 29 11:55:48 CET 2007


Author: wk
Date: 2007-11-29 11:55:46 +0100 (Thu, 29 Nov 2007)
New Revision: 1272

Modified:
   trunk/NEWS
   trunk/cipher/ChangeLog
   trunk/cipher/cipher.c
   trunk/cipher/rijndael.c
   trunk/src/ChangeLog
   trunk/src/g10lib.h
   trunk/src/hwfeatures.c
Log:
The ACE engine of VIA processors is now used for AES-128.


Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/NEWS	2007-11-29 10:55:46 UTC (rev 1272)
@@ -3,7 +3,9 @@
 
  * Make use of the visibility attribute if supported.
 
+ * The ACE engine of VIA processors is now used for AES-128.
 
+
 Noteworthy changes in version 1.3.1 (2007-10-26)
 ------------------------------------------------
 

Modified: trunk/cipher/ChangeLog
===================================================================
--- trunk/cipher/ChangeLog	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/cipher/ChangeLog	2007-11-29 10:55:46 UTC (rev 1272)
@@ -1,3 +1,20 @@
+2007-11-29  Werner Koch  <wk at g10code.com>
+
+	* rijndael.c (USE_PADLOCK): Define new macro used for ia32.
+	(RIJNDAEL_context) [USE_PADLOCK]: Add fields USE_PADLOCK and
+	PADLOCK_KEY.
+	(do_setkey) [USE_PADLOCK]: Enable padlock if available for 128 bit
+	AES.
+	(do_padlock) [USE_PADLOCK]: New.
+	(rijndael_encrypt, rijndael_decrypt) [USE_PADLOCK]: Divert to
+	do_padlock.
+	* cipher.c (cipher_context_alignment_t): New.  Use it in this
+	module in place of PROPERLY_ALIGNED_TYPE.
+	(NEED_16BYTE_ALIGNED_CONTEXT): Define macro for ia32.
+	(struct gcry_cipher_handle): Add field HANDLE_OFFSET.
+	(gcry_cipher_open): Take care of increased alignment requirements.
+	(gcry_cipher_close): Ditto.
+
 2007-11-28  Werner Koch  <wk at g10code.com>
 
 	* sha256.c (asn224): Fixed wrong template.  It happened due to a

Modified: trunk/cipher/cipher.c
===================================================================
--- trunk/cipher/cipher.c	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/cipher/cipher.c	2007-11-29 10:55:46 UTC (rev 1272)
@@ -1,6 +1,6 @@
 /* cipher.c  -	cipher dispatcher
  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
- *               2005, Free Software Foundation, Inc.
+ *               2005, 2007 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
@@ -15,8 +15,7 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <config.h>
@@ -34,6 +33,11 @@
 #define CTX_MAGIC_NORMAL 0x24091964
 #define CTX_MAGIC_SECURE 0x46919042
 
+#undef NEED_16BYTE_ALIGNED_CONTEXT
+#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+#define NEED_16BYTE_ALIGNED_CONTEXT 1
+#endif
+
 /* This is the list of the default ciphers, which are included in
    libgcrypt.  */
 static struct cipher_table_entry
@@ -107,11 +111,30 @@
     }                                              \
   while (0)
 
+
+/* A VIA processor with the Padlock engine requires an alignment of
+   most data on a 16 byte boundary.  Because we trick out the compiler
+   while allocating the context, the align attribute as used in
+   rijndael.c does not work on its own.  Thus we need to make sure
+   that the entire context structure is a aligned on that boundary.
+   We achieve this by defining a new type and use that instead of our
+   usual alignment type.  */
+typedef union 
+{
+  PROPERLY_ALIGNED_TYPE foo;
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+  char bar[16] __attribute__ ((aligned (16)));
+#endif  
+  char c[1];
+} cipher_context_alignment_t;
+
+
 /* The handle structure.  */
 struct gcry_cipher_handle
 {
   int magic;
   size_t actual_handle_size;     /* Allocated size of this handle. */
+  size_t handle_offset;          /* Offset to the malloced block.  */
   gcry_cipher_spec_t *cipher;
   gcry_module_t module;
   int mode;
@@ -120,7 +143,12 @@
   unsigned char lastiv[MAX_BLOCKSIZE];
   int unused;  /* in IV */
   unsigned char ctr[MAX_BLOCKSIZE];     /* For Counter (CTR) mode. */
-  PROPERLY_ALIGNED_TYPE context;
+  /* What follows are two contexts of the cipher in use.  The first
+     one needs to be aligned well enough for the cipher operation
+     whereas the second one is a copy created by cipher_setkey and
+     used by cipher_reset.  That second copy has no need for proper
+     aligment because it is only accessed by memcpy.  */
+  cipher_context_alignment_t context;
 };
 
 
@@ -635,14 +663,21 @@
 	err = GPG_ERR_INV_CIPHER_MODE;
       }
 
-  /* ? FIXME: perform selftest here and mark this with a flag in
-     cipher_table ? */
+  /* Perform selftest here and mark this with a flag in cipher_table?
+     No, we should not do this as it takes too long.  Further it does
+     not make sense to exclude algorithms with failing selftests at
+     runtime: If a selftest fails there is something seriously wrong
+     with the system and thus we better die immediately. */
 
   if (! err)
     {
       size_t size = (sizeof (*h)
                      + 2 * cipher->contextsize
-                     - sizeof (PROPERLY_ALIGNED_TYPE));
+                     - sizeof (cipher_context_alignment_t)
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+                     + 15  /* Space for leading alignment gap.  */
+#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
+                     );
 
       if (secure)
 	h = gcry_calloc_secure (1, size);
@@ -653,8 +688,21 @@
 	err = gpg_err_code_from_errno (errno);
       else
 	{
+          size_t off = 0;
+
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+          if ( ((unsigned long)h & 0x0f) )
+            {
+              /* The malloced block is not aligned on a 16 byte
+                 boundary.  Correct for this.  */
+              off = 16 - ((unsigned long)h & 0x0f);
+              h = (void*)((char*)h + off);
+            }
+#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
+
 	  h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL;
-          h->actual_handle_size = size;
+          h->actual_handle_size = size - off;
+          h->handle_offset = off;
 	  h->cipher = cipher;
 	  h->module = module;
 	  h->mode = mode;
@@ -686,6 +734,8 @@
 void
 gcry_cipher_close (gcry_cipher_hd_t h)
 {
+  size_t off;
+
   if (! h)
     return;
 
@@ -707,9 +757,10 @@
      do the wiping.  To accomplish this we need to keep track of the
      actual size of this structure because we have no way to known
      how large the allocated area was when using a standard malloc. */
+  off = h->handle_offset;
   wipememory (h, h->actual_handle_size);
 
-  gcry_free (h);
+  gcry_free ((char*)h - off);
 }
 
 
@@ -749,7 +800,7 @@
 }
 
 
-/* Reset the cipher context to the initial contex.  This is basically
+/* Reset the cipher context to the initial context.  This is basically
    the same as an release followed by a new. */
 static void
 cipher_reset (gcry_cipher_hd_t c)

Modified: trunk/cipher/rijndael.c
===================================================================
--- trunk/cipher/rijndael.c	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/cipher/rijndael.c	2007-11-29 10:55:46 UTC (rev 1272)
@@ -14,8 +14,7 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  *******************************************************************
  * The code here is based on the optimized implementation taken from
  * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000,
@@ -46,13 +45,26 @@
 #define MAXROUNDS		14
 
 
+/* USE_PADLOCK indicates whether to compile the padlock specific
+   code.  */
+#undef USE_PADLOCK
+#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+#define USE_PADLOCK
+#endif
+
+
 static const char *selftest(void);
 
 typedef struct 
 {
-  int   ROUNDS;                   /* key-length-dependent number of rounds */
-  int decryption_prepared;
-  union 
+  int   ROUNDS;             /* Key-length-dependent number of rounds.  */
+  int decryption_prepared;  /* The decryption key schedule is available.  */
+#ifdef USE_PADLOCK
+  int use_padlock;          /* Padlock shall be used.  */
+  /* The key as passed to the padlock engine.  */
+  unsigned char padlock_key[16] __attribute__ ((aligned (16)));
+#endif
+  union
   {
     PROPERLY_ALIGNED_TYPE dummy;
     byte keyschedule[MAXROUNDS+1][4][4];
@@ -69,26 +81,26 @@
 
 
 static const byte S[256] = {
-    99, 124, 119, 123, 242, 107, 111, 197,
-    48,   1, 103,  43, 254, 215, 171, 118, 
+     99, 124, 119, 123, 242, 107, 111, 197,
+     48,   1, 103,  43, 254, 215, 171, 118, 
     202, 130, 201, 125, 250,  89,  71, 240,
     173, 212, 162, 175, 156, 164, 114, 192, 
     183, 253, 147,  38,  54,  63, 247, 204,
-    52, 165, 229, 241, 113, 216,  49,  21, 
-    4, 199,  35, 195,  24, 150,   5, 154,
-    7,  18, 128, 226, 235,  39, 178, 117, 
-    9, 131,  44,  26,  27, 110,  90, 160, 
-    82,  59, 214, 179,  41, 227,  47, 132, 
-    83, 209,   0, 237,  32, 252, 177,  91,
+     52, 165, 229, 241, 113, 216,  49,  21, 
+      4, 199,  35, 195,  24, 150,   5, 154,
+      7,  18, 128, 226, 235,  39, 178, 117, 
+      9, 131,  44,  26,  27, 110,  90, 160, 
+     82,  59, 214, 179,  41, 227,  47, 132, 
+     83, 209,   0, 237,  32, 252, 177,  91,
     106, 203, 190,  57,  74,  76,  88, 207, 
     208, 239, 170, 251,  67,  77,  51, 133,
-    69, 249,   2, 127,  80,  60, 159, 168, 
-    81, 163,  64, 143, 146, 157,  56, 245,
+     69, 249,   2, 127,  80,  60, 159, 168, 
+     81, 163,  64, 143, 146, 157,  56, 245,
     188, 182, 218,  33,  16, 255, 243, 210, 
     205,  12,  19, 236,  95, 151,  68,  23,
     196, 167, 126,  61, 100,  93,  25, 115, 
-    96, 129,  79, 220,  34,  42, 144, 136,
-    70, 238, 184,  20, 222,  94,  11, 219, 
+     96, 129,  79, 220,  34,  42, 144, 136,
+     70, 238, 184,  20, 222,  94,  11, 219, 
     224,  50,  58,  10,  73,   6,  36,  92,
     194, 211, 172,  98, 145, 149, 228, 121, 
     231, 200,  55, 109, 141, 213,  78, 169, 
@@ -96,11 +108,11 @@
     186, 120,  37,  46,  28, 166, 180, 198, 
     232, 221, 116,  31,  75, 189, 139, 138, 
     112,  62, 181, 102,  72,   3, 246,  14,
-    97,  53,  87, 185, 134, 193,  29, 158, 
+     97,  53,  87, 185, 134, 193,  29, 158, 
     225, 248, 152,  17, 105, 217, 142, 148,
     155,  30, 135, 233, 206,  85,  40, 223, 
     140, 161, 137,  13, 191, 230,  66, 104, 
-    65, 153,  45,  15, 176,  84, 187,  22
+     65, 153,  45,  15, 176,  84, 187,  22
 };
 
 
@@ -1743,10 +1755,22 @@
   if( selftest_failed )
     return GPG_ERR_SELFTEST_FAILED;
 
+  ctx->decryption_prepared = 0;
+#ifdef USE_PADLOCK
+  ctx->use_padlock = 0;
+#endif
+
   if( keylen == 128/8 )
     {
       ROUNDS = 10;
       KC = 4;
+#ifdef USE_PADLOCK
+      if ((_gcry_get_hw_features () & HWF_PADLOCK_AES))
+        {
+          ctx->use_padlock = 1;
+          memcpy (ctx->padlock_key, key, keylen);
+        }
+#endif
     }
   else if ( keylen == 192/8 )
     {
@@ -1762,68 +1786,29 @@
     return GPG_ERR_INV_KEYLEN;
 
   ctx->ROUNDS = ROUNDS;
-  ctx->decryption_prepared = 0;
 
-  for (i = 0; i < keylen; i++) 
+#ifdef USE_PADLOCK
+  if (ctx->use_padlock)
     {
-      k[i >> 2][i & 3] = key[i]; 
+      /* Nothing to do as we support only hardware key generation for
+         now.  */
     }
+  else
+#endif /*USE_PADLOCK*/
+    {
 #define W (ctx->keySched)
-
-  for (j = KC-1; j >= 0; j--) 
-    {
-      *((u32*)tk[j]) = *((u32*)k[j]);
-    }
-  r = 0;
-  t = 0;
-  /* copy values into round key array */
-  for (j = 0; (j < KC) && (r < ROUNDS + 1); )
-    {
-      for (; (j < KC) && (t < 4); j++, t++)
+      for (i = 0; i < keylen; i++) 
         {
-          *((u32*)W[r][t]) = *((u32*)tk[j]);
+          k[i >> 2][i & 3] = key[i]; 
         }
-      if (t == 4)
+      
+      for (j = KC-1; j >= 0; j--) 
         {
-          r++;
-          t = 0;
+          *((u32*)tk[j]) = *((u32*)k[j]);
         }
-    }
-		
-  while (r < ROUNDS + 1)
-    {
-      /* While not enough round key material calculated */
-      /* calculate new values. */
-      tk[0][0] ^= S[tk[KC-1][1]];
-      tk[0][1] ^= S[tk[KC-1][2]];
-      tk[0][2] ^= S[tk[KC-1][3]];
-      tk[0][3] ^= S[tk[KC-1][0]];
-      tk[0][0] ^= rcon[rconpointer++];
-        
-      if (KC != 8)
-        {
-          for (j = 1; j < KC; j++) 
-            {
-              *((u32*)tk[j]) ^= *((u32*)tk[j-1]);
-            }
-        } 
-      else 
-        {
-          for (j = 1; j < KC/2; j++)
-            {
-              *((u32*)tk[j]) ^= *((u32*)tk[j-1]);
-            }
-          tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
-          tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
-          tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
-          tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
-          for (j = KC/2 + 1; j < KC; j++)
-            {
-              *((u32*)tk[j]) ^= *((u32*)tk[j-1]);
-            }
-        }
-
-      /* Copy values into round key array. */
+      r = 0;
+      t = 0;
+      /* Copy values into round key array.  */
       for (j = 0; (j < KC) && (r < ROUNDS + 1); )
         {
           for (; (j < KC) && (t < 4); j++, t++)
@@ -1836,12 +1821,61 @@
               t = 0;
             }
         }
-    }		
-    
+      
+      while (r < ROUNDS + 1)
+        {
+          /* While not enough round key material calculated calculate
+             new values.  */
+          tk[0][0] ^= S[tk[KC-1][1]];
+          tk[0][1] ^= S[tk[KC-1][2]];
+          tk[0][2] ^= S[tk[KC-1][3]];
+          tk[0][3] ^= S[tk[KC-1][0]];
+          tk[0][0] ^= rcon[rconpointer++];
+          
+          if (KC != 8)
+            {
+              for (j = 1; j < KC; j++) 
+                {
+                  *((u32*)tk[j]) ^= *((u32*)tk[j-1]);
+                }
+            } 
+          else 
+            {
+              for (j = 1; j < KC/2; j++)
+                {
+                  *((u32*)tk[j]) ^= *((u32*)tk[j-1]);
+                }
+              tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
+              tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
+              tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
+              tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
+              for (j = KC/2 + 1; j < KC; j++)
+                {
+                  *((u32*)tk[j]) ^= *((u32*)tk[j-1]);
+                }
+            }
+          
+          /* Copy values into round key array.  */
+          for (j = 0; (j < KC) && (r < ROUNDS + 1); )
+            {
+              for (; (j < KC) && (t < 4); j++, t++)
+                {
+                  *((u32*)W[r][t]) = *((u32*)tk[j]);
+                }
+              if (t == 4)
+                {
+                  r++;
+                  t = 0;
+                }
+            }
+        }		
 #undef W    
+    }
+
   return 0;
 }
 
+
 static gcry_err_code_t
 rijndael_setkey (void *context, const byte *key, const unsigned keylen)
 {
@@ -1998,13 +2032,70 @@
 #undef rk
 }
 
+
+/* Encrypt or decrypt one block using the padlock engine.  A and B may
+   be the same. */
+#ifdef USE_PADLOCK
 static void
+do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,
+            unsigned char *bx, const unsigned char *ax)
+{
+  /* BX and AX are not necessary correctly aligned.  Thus we need to
+     copy them here. */
+  unsigned char a[16] __attribute__ ((aligned (16)));
+  unsigned char b[16] __attribute__ ((aligned (16)));
+  unsigned int cword[4] __attribute__ ((aligned (16)));
+
+  /* The control word fields are:
+      127:12   11:10 9     8     7     6     5     4     3:0
+      RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND  */
+  cword[0] = (ctx->ROUNDS & 15);  /* (The mask is just a safeguard.)  */
+  cword[1] = 0;
+  cword[2] = 0;
+  cword[3] = 0;
+  if (decrypt_flag)
+    cword[0] |= 0x00000200;
+
+  memcpy (a, ax, 16);
+   
+  asm volatile 
+    ("pushfl\n\t"          /* Force key reload.  */            
+     "popfl\n\t"
+     "pushl %%ebx\n\t"     /* Save GOT register.  */
+     "movl %0, %%esi\n\t"  /* Load input.  */
+     "movl %1, %%edi\n\t"  /* Load output.  */
+     "movl %2, %%edx\n\t"  /* Load control world.  */
+     "movl %3, %%ebx\n\t"  /* Load key.  */
+     "movl $1, %%ecx\n\t"  /* Init counter for just one block.  */
+     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
+     "popl %%ebx\n"         /* Restore GOT register.  */
+     : /* No output */
+     : "g" (a), "g" (b), "g" (cword), "g" (ctx->padlock_key)
+     : "%esi", "%edi", "%edx", "%ecx"
+     );
+
+  memcpy (bx, b, 16);
+}
+#endif /*USE_PADLOCK*/
+
+
+static void
 rijndael_encrypt (void *context, byte *b, const byte *a)
 {
   RIJNDAEL_context *ctx = context;
 
-  do_encrypt (ctx, b, a);
-  _gcry_burn_stack (48 + 2*sizeof(int));
+#ifdef USE_PADLOCK
+  if (ctx->use_padlock)
+    {
+      do_padlock (ctx, 0, b, a);
+      _gcry_burn_stack (48 + 15 /* possible padding for alignment */);
+    }
+  else
+#endif /*USE_PADLOCK*/
+    {
+      do_encrypt (ctx, b, a);
+      _gcry_burn_stack (48 + 2*sizeof(int));
+    }
 }
 
 
@@ -2124,10 +2215,21 @@
 {
   RIJNDAEL_context *ctx = context;
 
-  do_decrypt (ctx, b, a);
-  _gcry_burn_stack (48+2*sizeof(int));
+#ifdef USE_PADLOCK
+  if (ctx->use_padlock)
+    {
+      do_padlock (ctx, 1, b, a);
+      _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */);
+    }
+  else
+#endif /*USE_PADLOCK*/
+    {
+      do_decrypt (ctx, b, a);
+      _gcry_burn_stack (48+2*sizeof(int));
+    }
 }
 
+
 
 /* Test a single encryption and decryption with each key size. */
 static const char*

Modified: trunk/src/ChangeLog
===================================================================
--- trunk/src/ChangeLog	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/src/ChangeLog	2007-11-29 10:55:46 UTC (rev 1272)
@@ -1,3 +1,7 @@
+2007-11-29  Werner Koch  <wk at g10code.com>
+
+	* hwfeatures.c (detect_ia32_gnuc): Detect Padlock engine.
+
 2007-11-13  Werner Koch  <wk at g10code.com>
 
 	* gcrypt.h.in (_GCRY_GCC_ATTR_MALLOC): Fixed gcc version check.

Modified: trunk/src/g10lib.h
===================================================================
--- trunk/src/g10lib.h	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/src/g10lib.h	2007-11-29 10:55:46 UTC (rev 1272)
@@ -110,6 +110,7 @@
 
 
 /*-- src/hwfeatures.c --*/
+/* (Do not change these values unless synced with the asm code.)  */
 #define HWF_PADLOCK_RNG  1
 #define HWF_PADLOCK_AES  2
 #define HWF_PADLOCK_SHA  4

Modified: trunk/src/hwfeatures.c
===================================================================
--- trunk/src/hwfeatures.c	2007-11-28 10:37:20 UTC (rev 1271)
+++ trunk/src/hwfeatures.c	2007-11-29 10:55:46 UTC (rev 1272)
@@ -44,10 +44,102 @@
 static void
 detect_ia32_gnuc (void)
 {
+  int has_cpuid = 0;
+  char vendor_id[12+1];
   
+  /* Detect the CPUID feature by testing some undefined behaviour (16
+     vs 32 bit pushf/popf). */
+  asm volatile
+    ("pushf\n\t"                 /* Copy flags to EAX.  */
+     "popl %%eax\n\t"
+     "movl %%eax, %%ecx\n\t"     /* Save flags into ECX.  */
+     "xorl $0x200000, %%eax\n\t" /* Toggle ID bit and copy it to the flags.  */
+     "pushl %%eax\n\t"            
+     "popf\n\t"                
+     "pushf\n\t"                 /* Copy changed flags again to EAX.  */    
+     "popl %%eax\n\t"
+     "pushl %%ecx\n\t"           /* Restore flags from ECX.  */
+     "popf\n\t"
+     "xorl %%eax, %%ecx\n\t"     /* Compare flags against saved flags.  */
+     "jz .Lno_cpuid%=\n\t"       /* Toggling did not work, thus no CPUID.  */
+     "movl $1, %0\n"             /* Worked. true -> HAS_CPUID.  */
+     ".Lno_cpuid%=:\n\t"
+     : "=r" (has_cpuid)
+     :
+     : "%eax", "%ecx", "cc"
+     );
+  
+  if (!has_cpuid)
+    return;  /* No way.  */
+           
+  asm volatile
+    ("pushl %%ebx\n\t"           /* Save GOT register.  */
+     "xorl  %%eax, %%eax\n\t"    /* 0 -> EAX.  */
+     "cpuid\n\t"                 /* Get vendor ID.  */
+     "leal  %0, %%eax\n\t"       /* EBX,EDX,ECX -> VENDOR_ID.  */
+     "movl  %%ebx, (%%eax)\n\t"
+     "movl  %%edx, 4(%%eax)\n\t"
+     "movl  %%ecx, 8(%%eax)\n\t"
+     "popl  %%ebx\n"
+     : "=m" (vendor_id)
+     :
+     : "%eax", "%ecx", "%edx", "cc"
+     );
+  vendor_id[12] = 0;
 
+  /* Check whether this is a VIA CPU and what PadLock features we
+     have.  */
+  if (!strcmp (vendor_id, "CentaurHauls"))
+    {
+      asm volatile 
+        ("pushl %%ebx\n\t"	        /* Save GOT register.  */
+         "movl $0xC0000000, %%eax\n\t"  /* Check for extended centaur  */
+         "cpuid\n\t"                    /* feature flags.              */
+         "popl %%ebx\n\t"	        /* Restore GOT register. */
+         "cmpl $0xC0000001, %%eax\n\t"
+         "jb .Lready%=\n\t"             /* EAX < 0xC0000000 => no padlock.  */
 
+         "pushl %%ebx\n\t"	        /* Save GOT register. */
+         "movl $0xC0000001, %%eax\n\t"  /* Ask for the extended */
+         "cpuid\n\t"                    /* feature flags.       */
+         "popl %%ebx\n\t"	        /* Restore GOT register. */
+
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0x0C, %%eax\n\t"        /* Test bits 2 and 3 to see whether */
+         "cmpl $0x0C, %%eax\n\t"        /* the RNG exists and is enabled.   */
+         "jnz .Lno_rng%=\n\t"
+         "orl $1, %0\n"                 /* Set our HWF_PADLOCK_RNG bit.  */
+
+         ".Lno_rng%=:\n\t"
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0xC0, %%eax\n\t"        /* Test bits 6 and 7 to see whether */
+         "cmpl $0xC0, %%eax\n\t"        /* the ACE exists and is enabled.   */
+         "jnz .Lno_ace%=\n\t"
+         "orl $2, %0\n"                 /* Set our HWF_PADLOCK_AES bit.  */
+
+         ".Lno_ace%=:\n\t"
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0xC00, %%eax\n\t"       /* Test bits 10, 11 to see whether  */
+         "cmpl $0xC00, %%eax\n\t"       /* the PHE exists and is enabled.   */
+         "jnz .Lno_phe%=\n\t"
+         "orl $4, %0\n"                 /* Set our HWF_PADLOCK_SHA bit.  */
+
+         ".Lno_phe%=:\n\t"
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0x3000, %%eax\n\t"      /* Test bits 12, 13 to see whether  */
+         "cmpl $0x3000, %%eax\n\t"      /* MONTMUL exists and is enabled.   */
+         "jnz .Lready%=\n\t"
+         "orl $8, %0\n"                 /* Set our HWF_PADLOCK_MMUL bit.  */
+
+         ".Lready%=:\n"
+         : "+r" (hw_features)
+         :
+         : "%eax", "%edx", "cc"
+         );
+    }
+
 }
+
 #endif /* __i386__ && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ */
 
 
@@ -58,6 +150,8 @@
 void
 _gcry_detect_hw_features (void)
 {
+  hw_features = 0;
+
 #if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4
 #ifdef __GNUC__  
   detect_ia32_gnuc ();




More information about the Gnupg-commits mailing list