[PATCH] Enable VIA Padlock on x86_64 platforms

Rafaël Carré funman at videolan.org
Wed Apr 11 06:20:14 CEST 2012


Convert existing 32bits asm to 64bits:
    - *l -> *q (long->quad)
    - e** registers -> r** registers
    - don't mess with ebx GOT register

Tested with make check on VIA Nano X2 L4350
---
Note: make check actually fails, but it's not a regression from 32 bits,
where that test fails with the exact same output.

% ./tests/basic 
aes-ctr, encrypt mismatch entry 0:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 0:1
expected: 98 06 f6 6b 79 70 fd ff 86 17 18 7b b9 ff fd ff
computed: 2b 2d 2c db 8c 5b d3 ee d1 57 79 37 9d 09 10 ed
aes-ctr, encrypt mismatch entry 0:2
expected: 5a e4 df 3e db d5 d3 5e 5b 4f 09 02 0d b0 3e ab
computed: 39 1e cc a8 f9 d0 62 bf 4c 60 4b 82 1d 8e 32 90
aes-ctr, encrypt mismatch entry 0:3
expected: 1e 03 1d da 2f be 03 d1 79 21 70 a0 f3 00 9c ee
computed: fb 7d c4 ce 68 9e 5d dc 74 32 1a 71 35 8a cf 49
aes-ctr, encrypt mismatch entry 3:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 4:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 7:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 7:2
expected: ff 5a e4 df 3e db d5 d3 5e 5b 4f 09 02 0d b0 3e ab
computed: ff 39 1e cc a8 f9 d0 62 bf 4c 60 4b 82 1d 8e 32 90
aes-ctr, encrypt mismatch entry 7:3
expected: 1e 03 1d da 2f be 03 d1 79 21 70 a0 f3 00 9c ee
computed: fb 7d c4 ce 68 9e 5d dc 74 32 1a 71 35 8a cf 49
encrypt mismatch (algo 7, mode 6)

 cipher/rijndael.c |   21 +++++++++---
 random/rndhw.c    |   13 ++++++-
 src/hwfeatures.c  |   97 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2df8ea9..c2e0a77 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -63,7 +63,7 @@
    code.  */
 #undef USE_PADLOCK
 #ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( ( defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 ) || defined(__x86_64__) ) && defined (__GNUC__)
 #  define USE_PADLOCK 1
 # endif
 #endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -663,17 +663,28 @@ do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,
 
   memcpy (a, ax, 16);
 
+  int blocks = 1; /* Init counter for just one block.  */
+#ifdef __x86_64__
+  asm volatile
+    ("pushfq\n\t"          /* Force key reload.  */
+     "popfq\n\t"
+     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+     : /* No output */
+     : "S" (a), "D" (b), "d" (cword), "b" (ctx->padlockkey), "c" (blocks)
+     : "cc", "memory"
+     );
+#else
   asm volatile
     ("pushfl\n\t"          /* Force key reload.  */
      "popfl\n\t"
      "xchg %3, %%ebx\n\t"  /* Load key.  */
-     "movl $1, %%ecx\n\t"  /* Init counter for just one block.  */
-     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
+     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
      "xchg %3, %%ebx\n"    /* Restore GOT register.  */
      : /* No output */
-     : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
-     : "%ecx", "cc", "memory"
+     : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey), "c" (blocks)
+     : "cc", "memory"
      );
+#endif
 
   memcpy (bx, b, 16);
 
diff --git a/random/rndhw.c b/random/rndhw.c
index 82faab4..c933cf9 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -27,7 +27,7 @@
 
 #undef USE_PADLOCK
 #ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) ) && defined (__GNUC__)
 # define USE_PADLOCK
 # endif
 #endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -55,6 +55,16 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
   nbytes = 0;
   while (nbytes < 64)
     {
+#ifdef __x86_64__
+      asm volatile
+        ("movq %1, %%rdi\n\t"         /* Set buffer.  */
+         "xorq %%rdx, %%rdx\n\t"      /* Request up to 8 bytes.  */
+         ".byte 0x0f, 0xa7, 0xc0\n\t" /* XSTORE RNG. */
+         : "=a" (status)
+         : "g" (p)
+         : "%rdx", "%rdi", "cc"
+         );
+#else
       asm volatile
         ("movl %1, %%edi\n\t"         /* Set buffer.  */
          "xorl %%edx, %%edx\n\t"      /* Request up to 8 bytes.  */
@@ -63,6 +73,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
          : "g" (p)
          : "%edx", "%edi", "cc"
          );
+#endif
       if ((status & (1<<6))         /* RNG still enabled.  */
           && !(status & (1<<13))    /* von Neumann corrector is enabled.  */
           && !(status & (1<<14))    /* String filter is disabled.  */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index c356798..73db917 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -40,6 +40,99 @@ _gcry_get_hw_features (void)
 }
 
 
+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+detect_x86_64_gnuc (void)
+{
+  /* The code here is only useful for the PadLock engine thus we don't
+     build it if that support has been disabled.  */
+  char vendor_id[12+1];
+
+  asm volatile
+    ("xorl  %%eax, %%eax\n\t"    /* 0 -> EAX.  */
+     "cpuid\n\t"                 /* Get vendor ID.  */
+     "movl  %%ebx, (%0)\n\t"     /* EBX,EDX,ECX -> VENDOR_ID.  */
+     "movl  %%edx, 4(%0)\n\t"
+     "movl  %%ecx, 8(%0)\n\t"
+     :
+     : "S" (&vendor_id[0])
+     : "%eax", "%ecx", "%edx", "cc"
+     );
+  vendor_id[12] = 0;
+
+  if (0)
+    ; /* Just to make "else if" and ifdef macros look pretty.  */
+#ifdef ENABLE_PADLOCK_SUPPORT
+  else if (!strcmp (vendor_id, "CentaurHauls"))
+    {
+      /* This is a VIA CPU.  Check what PadLock features we have.  */
+      asm volatile
+        ("movl $0xC0000000, %%eax\n\t"  /* Check for extended centaur  */
+         "cpuid\n\t"                    /* feature flags.              */
+         "cmpl $0xC0000001, %%eax\n\t"
+         "jb .Lready%=\n\t"             /* EAX < 0xC0000000 => no padlock.  */
+
+         "movl $0xC0000001, %%eax\n\t"  /* Ask for the extended */
+         "cpuid\n\t"                    /* feature flags.       */
+
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0x0C, %%eax\n\t"        /* Test bits 2 and 3 to see whether */
+         "cmpl $0x0C, %%eax\n\t"        /* the RNG exists and is enabled.   */
+         "jnz .Lno_rng%=\n\t"
+         "orl $1, %0\n"                 /* Set our HWF_PADLOCK_RNG bit.  */
+
+         ".Lno_rng%=:\n\t"
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0xC0, %%eax\n\t"        /* Test bits 6 and 7 to see whether */
+         "cmpl $0xC0, %%eax\n\t"        /* the ACE exists and is enabled.   */
+         "jnz .Lno_ace%=\n\t"
+         "orl $2, %0\n"                 /* Set our HWF_PADLOCK_AES bit.  */
+
+         ".Lno_ace%=:\n\t"
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0xC00, %%eax\n\t"       /* Test bits 10, 11 to see whether  */
+         "cmpl $0xC00, %%eax\n\t"       /* the PHE exists and is enabled.   */
+         "jnz .Lno_phe%=\n\t"
+         "orl $4, %0\n"                 /* Set our HWF_PADLOCK_SHA bit.  */
+
+         ".Lno_phe%=:\n\t"
+         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
+         "andl $0x3000, %%eax\n\t"      /* Test bits 12, 13 to see whether  */
+         "cmpl $0x3000, %%eax\n\t"      /* MONTMUL exists and is enabled.   */
+         "jnz .Lready%=\n\t"
+         "orl $8, %0\n"                 /* Set our HWF_PADLOCK_MMUL bit.  */
+
+         ".Lready%=:\n"
+         : "+r" (hw_features)
+         :
+         : "%eax", "%edx", "cc"
+         );
+    }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+  else if (!strcmp (vendor_id, "GenuineIntel"))
+    {
+      /* This is an Intel CPU.  */
+      asm volatile
+        ("movl $1, %%eax\n\t"           /* Get CPU info and feature flags.  */
+         "cpuid\n"
+         "testl $0x02000000, %%ecx\n\t" /* Test bit 25.  */
+         "jz .Lno_aes%=\n\t"            /* No AES support.  */
+         "orl $256, %0\n"               /* Set our HWF_INTEL_AES bit.  */
+
+         ".Lno_aes%=:\n"
+         : "+r" (hw_features)
+         :
+         : "%eax", "%ecx", "%edx", "cc"
+         );
+    }
+  else if (!strcmp (vendor_id, "AuthenticAMD"))
+    {
+      /* This is an AMD CPU.  */
+
+    }
+}
+#endif /* __x86_64__ && __GNUC__ */
+
 #if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
 static void
 detect_ia32_gnuc (void)
@@ -186,6 +279,10 @@ _gcry_detect_hw_features (unsigned int disabled_features)
 #elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 8
 #ifdef __GNUC__
 #endif
+#elif defined (__x86_64__)
+#ifdef __GNUC__
+  detect_x86_64_gnuc ();
+#endif
 #endif
 
   hw_features &= ~disabled_features;
-- 
1.7.9.1



More information about the Gcrypt-devel mailing list