[git] GCRYPT - branch, master, updated. post-nuke-of-trailing-ws-15-g2674140

by Werner Koch cvs at cvs.gnupg.org
Tue Feb 22 16:32:13 CET 2011


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  2674140cdfdc59ce5ad0238177da1542f5df6e00 (commit)
      from  2442ca23030c8900c907166ca3f5d31d1907ead8 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 2674140cdfdc59ce5ad0238177da1542f5df6e00
Author: Werner Koch <wk at gnupg.org>
Date:   Tue Feb 22 16:08:13 2011 +0100

    Use AES-NI insns for CTR mode.
    
    That really boosts the performance of CTR.

diff --git a/NEWS b/NEWS
index ffbdc84..979ad40 100644
--- a/NEWS
+++ b/NEWS
@@ -40,15 +40,18 @@ Noteworthy changes in version 1.5.x (unreleased)
  * Boosted SHA-512 performance by 30% on ia32 boxes and gcc 4.3;
    SHA-256 went up by 25%.  [also in 1.4.5]
 
- * Interface changes relative to the 1.4.2 release:
+ * Interface changes relative to the 1.4.6 release:
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  GCRY_PK_ECDH               NEW.
  gcry_pk_get_curve          NEW.
  gcry_pk_get_param          NEW.
  GCRYCTL_DISABLE_HWF        NEW.
- GCRY_CIPHER_MODE_AESWRAP   NEW. [also in 1.4.6]
- GCRY_MD_TIGER1             NEW. [also in 1.4.6]
- GCRY_MD_TIGER2             NEW. [also in 1.4.6]
+
+ * Interface changes relative to the 1.4.2 release:
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ GCRY_CIPHER_MODE_AESWRAP   NEW.
+ GCRY_MD_TIGER1             NEW.
+ GCRY_MD_TIGER2             NEW.
 
 
 Noteworthy changes in version 1.4.4 (2009-01-22)
diff --git a/cipher/ChangeLog b/cipher/ChangeLog
index d10ce07..7e00da7 100644
--- a/cipher/ChangeLog
+++ b/cipher/ChangeLog
@@ -1,3 +1,13 @@
+2011-02-22  Werner Koch  <wk at g10code.com>
+
+	* rijndael.c (aesni_cleanup_2_4): New.
+	(aesenc_xmm1_xmm0, do_aesni_ctr_4): New.
+	(_gcry_aes_ctr_enc): New.
+	* cipher.c (struct gcry_cipher_handle): Add CTR_ENC.  Move field
+	CTR into an u_ctr union and adjust all users.
+	(gcry_cipher_open): Use _gcry_aes_ctr_enc.
+	(do_ctr_encrypt): Use bulk mode.
+
 2011-02-18  Werner Koch  <wk at g10code.com>
 
 	* rijndael.c (u32_a_t): New.
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 92b3698..a2f8bb9 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -190,6 +190,9 @@ struct gcry_cipher_handle
     void (*cbc_dec)(void *context, unsigned char *iv,
                     void *outbuf_arg, const void *inbuf_arg,
                     unsigned int nblocks);
+    void (*ctr_enc)(void *context, unsigned char *iv,
+                    void *outbuf_arg, const void *inbuf_arg,
+                    unsigned int nblocks);
   } bulk;
 
 
@@ -209,12 +212,16 @@ struct gcry_cipher_handle
     unsigned char iv[MAX_BLOCKSIZE];
   } u_iv;
 
+  /* The counter for CTR mode.  This field is also used by AESWRAP and
+     thus we can't use the U_IV union.  */
+  union {
+    cipher_context_alignment_t iv_align;
+    unsigned char ctr[MAX_BLOCKSIZE];
+  } u_ctr;
+
   unsigned char lastiv[MAX_BLOCKSIZE];
   int unused;  /* Number of unused bytes in the IV. */
 
-  unsigned char ctr[MAX_BLOCKSIZE];     /* For Counter (CTR) mode. */
-
-
   /* What follows are two contexts of the cipher in use.  The first
      one needs to be aligned well enough for the cipher operation
      whereas the second one is a copy created by cipher_setkey and
@@ -814,6 +821,7 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
               h->bulk.cfb_dec = _gcry_aes_cfb_dec;
               h->bulk.cbc_enc = _gcry_aes_cbc_enc;
               h->bulk.cbc_dec = _gcry_aes_cbc_dec;
+              h->bulk.ctr_enc = _gcry_aes_ctr_enc;
               break;
 #endif /*USE_AES*/
 
@@ -936,7 +944,7 @@ cipher_reset (gcry_cipher_hd_t c)
   memset (&c->marks, 0, sizeof c->marks);
   memset (c->u_iv.iv, 0, c->cipher->blocksize);
   memset (c->lastiv, 0, c->cipher->blocksize);
-  memset (c->ctr, 0, c->cipher->blocksize);
+  memset (c->u_ctr.ctr, 0, c->cipher->blocksize);
 }
 
 
@@ -1441,9 +1449,11 @@ do_ctr_encrypt (gcry_cipher_hd_t c,
                 const unsigned char *inbuf, unsigned int inbuflen)
 {
   unsigned int n;
-  unsigned char tmp[MAX_BLOCKSIZE];
   int i;
   unsigned int blocksize = c->cipher->blocksize;
+  unsigned int nblocks;
+
+  /* FIXME: This code does only work on complete blocks.  */
 
   if (outbuflen < inbuflen)
     return GPG_ERR_BUFFER_TOO_SHORT;
@@ -1451,25 +1461,38 @@ do_ctr_encrypt (gcry_cipher_hd_t c,
   if ((inbuflen % blocksize))
     return GPG_ERR_INV_LENGTH;
 
-  for (n=0; n < inbuflen; n++)
+  nblocks = inbuflen / blocksize;
+  if (nblocks && c->bulk.ctr_enc)
     {
-      if ((n % blocksize) == 0)
-	{
-	  c->cipher->encrypt (&c->context.c, tmp, c->ctr);
+      c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks);
+      inbuf  += nblocks * blocksize;
+      outbuf += nblocks * blocksize;
+    }
+  else
+    {
+      unsigned char tmp[MAX_BLOCKSIZE];
 
-	  for (i = blocksize; i > 0; i--)
-	    {
-	      c->ctr[i-1]++;
-	      if (c->ctr[i-1] != 0)
-		break;
-	    }
-	}
+      for (n=0; n < inbuflen; n++)
+        {
+          if ((n % blocksize) == 0)
+            {
+              c->cipher->encrypt (&c->context.c, tmp, c->u_ctr.ctr);
+
+              for (i = blocksize; i > 0; i--)
+                {
+                  c->u_ctr.ctr[i-1]++;
+                  if (c->u_ctr.ctr[i-1] != 0)
+                    break;
+                }
+            }
+
+          /* XOR input with encrypted counter and store in output.  */
+          outbuf[n] = inbuf[n] ^ tmp[n % blocksize];
+        }
 
-      /* XOR input with encrypted counter and store in output.  */
-      outbuf[n] = inbuf[n] ^ tmp[n % blocksize];
+      wipememory (tmp, sizeof tmp);
     }
 
-  wipememory (tmp, sizeof tmp);
   return 0;
 }
 
@@ -1517,7 +1540,7 @@ do_aeswrap_encrypt (gcry_cipher_hd_t c, byte *outbuf, unsigned int outbuflen,
 
   r = outbuf;
   a = outbuf;  /* We store A directly in OUTBUF.  */
-  b = c->ctr;  /* B is also used to concatenate stuff.  */
+  b = c->u_ctr.ctr;  /* B is also used to concatenate stuff.  */
 
   /* If an IV has been set we use that IV as the Alternative Initial
      Value; if it has not been set we use the standard value.  */
@@ -1593,7 +1616,7 @@ do_aeswrap_decrypt (gcry_cipher_hd_t c, byte *outbuf, unsigned int outbuflen,
 
   r = outbuf;
   a = c->lastiv;  /* We use c->LASTIV as buffer for A.  */
-  b = c->ctr;     /* B is also used to concatenate stuff.  */
+  b = c->u_ctr.ctr;     /* B is also used to concatenate stuff.  */
 
   /* Copy the inbuf to the outbuf and save A. */
   memcpy (a, inbuf, 8);
@@ -1861,9 +1884,9 @@ gpg_error_t
 _gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen)
 {
   if (ctr && ctrlen == hd->cipher->blocksize)
-    memcpy (hd->ctr, ctr, hd->cipher->blocksize);
+    memcpy (hd->u_ctr.ctr, ctr, hd->cipher->blocksize);
   else if (!ctr || !ctrlen)
-    memset (hd->ctr, 0, hd->cipher->blocksize);
+    memset (hd->u_ctr.ctr, 0, hd->cipher->blocksize);
   else
     return gpg_error (GPG_ERR_INV_ARG);
   return 0;
@@ -1923,9 +1946,9 @@ gcry_cipher_ctl( gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen)
 
     case GCRYCTL_SET_CTR: /* Deprecated; use gcry_cipher_setctr.  */
       if (buffer && buflen == h->cipher->blocksize)
-	memcpy (h->ctr, buffer, h->cipher->blocksize);
+	memcpy (h->u_ctr.ctr, buffer, h->cipher->blocksize);
       else if (buffer == NULL || buflen == 0)
-	memset (h->ctr, 0, h->cipher->blocksize);
+	memset (h->u_ctr.ctr, 0, h->cipher->blocksize);
       else
 	rc = GPG_ERR_INV_ARG;
       break;
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index fb97274..2df8ea9 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -90,9 +90,7 @@ typedef u32           u32_a_t;
 #endif
 
 
-static const char *selftest(void);
-
-
+
 /* Our context object.  */
 typedef struct
 {
@@ -144,6 +142,11 @@ typedef struct
   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                          \
                      "pxor %%xmm1, %%xmm1\n" :: );                      \
   } while (0)
+# define aesni_cleanup_2_4()                                            \
+  do { asm volatile ("pxor %%xmm2, %%xmm2\n\t"                          \
+                     "pxor %%xmm3, %%xmm3\n"                            \
+                     "pxor %%xmm4, %%xmm4\n":: );                       \
+  } while (0)
 #else
 # define aesni_prepare() do { } while (0)
 # define aesni_cleanup() do { } while (0)
@@ -154,6 +157,23 @@ typedef struct
 #include "rijndael-tables.h"
 
 
+
+/* Function prototypes.  */
+#ifdef USE_AESNI
+/* We don't want to inline these functions to help gcc allocate enough
+   registers.  */
+static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr,
+                          unsigned char *b, const unsigned char *a)
+  __attribute__ ((__noinline__));
+static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr,
+                            unsigned char *b, const unsigned char *a)
+  __attribute__ ((__noinline__));
+#endif /*USE_AESNI*/
+
+static const char *selftest(void);
+
+
+
 /* Perform the key setup.  */
 static gcry_err_code_t
 do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
@@ -272,7 +292,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
   else if (ctx->use_aesni && ctx->rounds == 10)
     {
       /* Note: This code works for AES-128 but it is not much better
-         than than using the standard key schedule.  We disable it for
+         than using the standard key schedule.  We disable it for
          now and don't put any effort into implementing this for
          AES-192 and AES-256.  */
       asm volatile ("movl   %[key], %%esi\n\t"
@@ -860,6 +880,239 @@ do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
 #undef aesenclast_xmm1_xmm0
 }
 
+/* Perform a CTR encryption round using the counter CTR and the input
+   block A.  Write the result to the output block B and update CTR.
+   CTR needs to be a 16 byte aligned little-endian value.  */
+static void
+do_aesni_ctr (const RIJNDAEL_context *ctx,
+              unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+  static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+  asm volatile ("movdqa %[ctr], %%xmm0\n\t"     /* xmm0, xmm2 := CTR   */
+                "movaps %%xmm0, %%xmm2\n\t"
+                "mov    $1, %%esi\n\t"          /* xmm2++ (big-endian) */
+                "movd   %%esi, %%xmm1\n\t"
+                "pshufb %[mask], %%xmm2\n\t"
+                "paddq  %%xmm1, %%xmm2\n\t"
+                "pshufb %[mask], %%xmm2\n\t"
+                "movdqa %%xmm2, %[ctr]\n"       /* Update CTR.         */
+
+                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
+                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0]    */
+                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
+                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "cmp $10, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "cmp $12, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xe0(%%esi), %%xmm1\n"
+
+                ".Lenclast%=:\n\t"
+                aesenclast_xmm1_xmm0
+                "movdqu %[src], %%xmm1\n\t"      /* xmm1 := input   */
+                "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR ^= input  */
+                "movdqu %%xmm0, %[dst]"          /* Store EncCTR.    */
+
+                : [ctr] "+m" (*ctr), [dst] "=m" (*b)
+                : [src] "m" (*a),
+                  [key] "g" (ctx->keyschenc),
+                  [rounds] "g" (ctx->rounds),
+                  [mask] "m" (*be_mask)
+                : "%esi", "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+}
+
+
+/* Four blocks at a time variant of do_aesni_ctr.  */
+static void
+do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
+                unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenc_xmm1_xmm2      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
+#define aesenc_xmm1_xmm3      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
+#define aesenc_xmm1_xmm4      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t"
+#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+#define aesenclast_xmm1_xmm2  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t"
+#define aesenclast_xmm1_xmm3  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t"
+#define aesenclast_xmm1_xmm4  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t"
+
+  static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+  /* Register usage:
+      esi   keyschedule
+      xmm0  CTR-0
+      xmm1  temp / round key
+      xmm2  CTR-1
+      xmm3  CTR-2
+      xmm4  CTR-3
+      xmm5  temp
+   */
+
+  asm volatile ("movdqa %[ctr], %%xmm0\n\t"     /* xmm0, xmm2 := CTR   */
+                "movaps %%xmm0, %%xmm2\n\t"
+                "mov    $1, %%esi\n\t"          /* xmm1 := 1 */
+                "movd   %%esi, %%xmm1\n\t"
+                "pshufb %[mask], %%xmm2\n\t"    /* xmm2 := le(xmm2) */
+                "paddq  %%xmm1, %%xmm2\n\t"     /* xmm2++           */
+                "movaps %%xmm2, %%xmm3\n\t"     /* xmm3 := xmm2     */
+                "paddq  %%xmm1, %%xmm3\n\t"     /* xmm3++           */
+                "movaps %%xmm3, %%xmm4\n\t"     /* xmm4 := xmm3     */
+                "paddq  %%xmm1, %%xmm4\n\t"     /* xmm4++           */
+                "movaps %%xmm4, %%xmm5\n\t"     /* xmm5 := xmm4     */
+                "paddq  %%xmm1, %%xmm5\n\t"     /* xmm5++           */
+                "pshufb %[mask], %%xmm2\n\t"    /* xmm2 := be(xmm2) */
+                "pshufb %[mask], %%xmm3\n\t"    /* xmm3 := be(xmm3) */
+                "pshufb %[mask], %%xmm4\n\t"    /* xmm4 := be(xmm4) */
+                "pshufb %[mask], %%xmm5\n\t"    /* xmm5 := be(xmm5) */
+                "movdqa %%xmm5, %[ctr]\n"       /* Update CTR.      */
+
+                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
+                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0]    */
+                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
+                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "cmp $10, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "cmp $12, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xe0(%%esi), %%xmm1\n"
+
+                ".Lenclast%=:\n\t"
+                aesenclast_xmm1_xmm0
+                aesenclast_xmm1_xmm2
+                aesenclast_xmm1_xmm3
+                aesenclast_xmm1_xmm4
+
+                "movdqu %[src], %%xmm1\n\t"      /* Get block 1.      */
+                "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR-1 ^= input */
+                "movdqu %%xmm0, %[dst]\n\t"      /* Store block 1     */
+
+                "movdqu (16)%[src], %%xmm1\n\t"  /* Get block 2.      */
+                "pxor %%xmm1, %%xmm2\n\t"        /* EncCTR-2 ^= input */
+                "movdqu %%xmm2, (16)%[dst]\n\t"  /* Store block 2.    */
+
+                "movdqu (32)%[src], %%xmm1\n\t"  /* Get block 3.      */
+                "pxor %%xmm1, %%xmm3\n\t"        /* EncCTR-3 ^= input */
+                "movdqu %%xmm3, (32)%[dst]\n\t"  /* Store block 3.    */
+
+                "movdqu (48)%[src], %%xmm1\n\t"  /* Get block 4.      */
+                "pxor %%xmm1, %%xmm4\n\t"        /* EncCTR-4 ^= input */
+                "movdqu %%xmm4, (48)%[dst]"      /* Store block 4.   */
+
+                : [ctr] "+m" (*ctr), [dst] "=m" (*b)
+                : [src] "m" (*a),
+                  [key] "g" (ctx->keyschenc),
+                  [rounds] "g" (ctx->rounds),
+                  [mask] "m" (*be_mask)
+                : "%esi", "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenc_xmm1_xmm2
+#undef aesenc_xmm1_xmm3
+#undef aesenc_xmm1_xmm4
+#undef aesenclast_xmm1_xmm0
+#undef aesenclast_xmm1_xmm2
+#undef aesenclast_xmm1_xmm3
+#undef aesenclast_xmm1_xmm4
+}
+
 
 static void
 do_aesni (RIJNDAEL_context *ctx, int decrypt_flag,
@@ -1016,6 +1269,69 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
 }
 
 
+/* Bulk encryption of complete blocks in CTR mode.  Caller needs to
+   make sure that CTR is aligned on a 16 byte boundary if AESNI; the
+   minimum alignment is for an u32.  This function is only intended
+   for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size BLOCKSIZE. */
+void
+_gcry_aes_ctr_enc (void *context, unsigned char *ctr,
+                   void *outbuf_arg, const void *inbuf_arg,
+                   unsigned int nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char *p;
+  int i;
+
+  if (0)
+    ;
+#ifdef USE_AESNI
+  else if (ctx->use_aesni)
+    {
+      aesni_prepare ();
+      for ( ;nblocks > 3 ; nblocks -= 4 )
+        {
+          do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf);
+          outbuf += 4*BLOCKSIZE;
+          inbuf  += 4*BLOCKSIZE;
+        }
+      for ( ;nblocks; nblocks-- )
+        {
+          do_aesni_ctr (ctx, ctr, outbuf, inbuf);
+          outbuf += BLOCKSIZE;
+          inbuf  += BLOCKSIZE;
+        }
+      aesni_cleanup ();
+      aesni_cleanup_2_4 ();
+    }
+#endif /*USE_AESNI*/
+  else
+    {
+      union { unsigned char x1[16]; u32 x32[4]; } tmp;
+
+      for ( ;nblocks; nblocks-- )
+        {
+          /* Encrypt the counter. */
+          do_encrypt_aligned (ctx, tmp.x1, ctr);
+          /* XOR the input with the encrypted counter and store in output.  */
+          for (p=tmp.x1, i=0; i < BLOCKSIZE; i++)
+            *outbuf++ = (*p++ ^= *inbuf++);
+          /* Increment the counter.  */
+          for (i = BLOCKSIZE; i > 0; i--)
+            {
+              ctr[i-1]++;
+              if (ctr[i-1])
+                break;
+            }
+        }
+    }
+
+  _gcry_burn_stack (48 + 2*sizeof(int));
+}
+
+
 
 /* Decrypt one block.  A and B need to be aligned on a 4 byte boundary
    and the decryption must have been prepared.  A and B may be the
diff --git a/configure.ac b/configure.ac
index a28ea83..013ff3a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -26,8 +26,8 @@ min_automake_version="1.10"
 # Remember to change the version number immediately *after* a release.
 # Set my_issvn to "yes" for non-released code.  Remember to run an
 # "svn up" and "autogen.sh" right before creating a distribution.
-m4_define([my_version], [1.5.0-beta1])
-m4_define([my_issvn], [no])
+m4_define([my_version], [1.5.0])
+m4_define([my_issvn], [yes])
 
 m4_define([svn_revision], m4_esyscmd([printf "%d" $(svn info 2>/dev/null \
           | sed -n '/^Revision:/ s/[^0-9]//gp'|head -1)]))
diff --git a/src/cipher.h b/src/cipher.h
index ca2d956..a568800 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -53,6 +53,9 @@ void _gcry_aes_cbc_enc (void *context, unsigned char *iv,
 void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
                         void *outbuf_arg, const void *inbuf_arg,
                         unsigned int nblocks);
+void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
+                        void *outbuf_arg, const void *inbuf_arg,
+                        unsigned int nblocks);
 
 
 /*-- dsa.c --*/

-----------------------------------------------------------------------

Summary of changes:
 NEWS              |   11 +-
 cipher/ChangeLog  |   10 ++
 cipher/cipher.c   |   73 ++++++++----
 cipher/rijndael.c |  324 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 configure.ac      |    4 +-
 src/cipher.h      |    3 +
 6 files changed, 390 insertions(+), 35 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org




More information about the Gnupg-commits mailing list