[git] GCRYPT - branch, master, updated. libgcrypt-1.8.1-123-g23f56d3

by Jussi Kivilinna cvs at cvs.gnupg.org
Mon Nov 5 19:58:26 CET 2018


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "The GNU crypto library".

The branch, master has been updated
       via  23f56d3359ca7d152aa87874ddd6305171a91408 (commit)
       via  d6c6680ca31c05bafbb8becda56da051346eceb3 (commit)
       via  4faeaa1cbd235a2560fa04a8ac3766a07029acd8 (commit)
       via  0068d41d9304ebcdb2caba1fa8848925e2bfaac7 (commit)
       via  30e783ec487466132324673f197d36b85a91b060 (commit)
       via  ec49013d23d9a7b874c42d77ceb08bd313ba69e1 (commit)
       via  2aece89d3967e692743541cea857f2e4771b0b62 (commit)
      from  a2e0cb1542818ad8a71de34ccbf191adab0a0b86 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 23f56d3359ca7d152aa87874ddd6305171a91408
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    stdmem: free: only call _gcry_secmem_free if needed
    
    * src/stdmem.c (_gcry_private_free): Check if memory is secure before
    calling _gcry_secmem_free to avoid unnecessarily taking secmem lock.
    --
    
    Unnecessarily taking secmem lock on non-secure memory can result poor
    performance on multi-threaded workloads:
      https://lists.gnupg.org/pipermail/gcrypt-devel/2018-August/004535.html
    
    Reported-by: Christian Grothoff <grothoff at gnunet.org>
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/src/stdmem.c b/src/stdmem.c
index cbda8d8..04ce64f 100644
--- a/src/stdmem.c
+++ b/src/stdmem.c
@@ -224,19 +224,23 @@ void
 _gcry_private_free (void *a)
 {
   unsigned char *p = a;
+  unsigned char *freep;
 
   if (!p)
     return;
-  if (use_m_guard )
+  if (use_m_guard)
     {
-      _gcry_private_check_heap(p);
-      if (! _gcry_secmem_free (p - EXTRA_ALIGN - 4))
-        {
-          free (p - EXTRA_ALIGN - 4);
-	}
+      _gcry_private_check_heap (p);
+      freep = p - EXTRA_ALIGN - 4;
+    }
+  else
+    {
+      freep = p;
     }
-  else if (!_gcry_secmem_free (p))
+
+  if (!_gcry_private_is_secure (freep) ||
+      !_gcry_secmem_free (freep))
     {
-      free(p);
+      free (freep);
     }
 }

commit d6c6680ca31c05bafbb8becda56da051346eceb3
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    secmem: fix potential memory visibility issue
    
    * configure.ac (gcry_cv_have_sync_synchronize): New check.
    * src/secmem.c (pooldesc_s): Make next pointer volatile.
    (memory_barrier): New.
    (_gcry_secmem_malloc_internal): Insert memory barrier between
    pool->next and mainpool.next assigments.
    (_gcry_private_is_secure): Update comments.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/configure.ac b/configure.ac
index 4e4f1f7..9803d51 100644
--- a/configure.ac
+++ b/configure.ac
@@ -859,6 +859,21 @@ fi
 
 
 #
+# Check for __sync_synchronize intrinsic.
+#
+AC_CACHE_CHECK(for __sync_synchronize,
+       [gcry_cv_have_sync_synchronize],
+       [gcry_cv_have_sync_synchronize=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [__sync_synchronize(); return 0;])],
+          [gcry_cv_have_sync_synchronize=yes])])
+if test "$gcry_cv_have_sync_synchronize" = "yes" ; then
+   AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1,
+             [Defined if compiler has '__sync_synchronize' intrinsic])
+fi
+
+
+#
 # Check for VLA support (variable length arrays).
 #
 AC_CACHE_CHECK(whether the variable length arrays are supported,
diff --git a/src/secmem.c b/src/secmem.c
index 3e091d8..b6f07c5 100644
--- a/src/secmem.c
+++ b/src/secmem.c
@@ -64,7 +64,7 @@ typedef struct pooldesc_s
 {
   /* A link to the next pool.  This is used to connect the overflow
    * pools.  */
-  struct pooldesc_s *next;
+  struct pooldesc_s * volatile next;
 
   /* A memory buffer used as allocation pool.  */
   void *mem;
@@ -118,6 +118,29 @@ GPGRT_LOCK_DEFINE (secmem_lock);
 #define ADDR_TO_BLOCK(addr) \
   (memblock_t *) (void *) ((char *) addr - BLOCK_HEAD_SIZE)
 
+
+/* Memory barrier */
+static inline void
+memory_barrier(void)
+{
+#ifdef HAVE_SYNC_SYNCHRONIZE
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+  asm volatile ("":::"memory");
+#endif
+  /* Use GCC / clang intrinsic for memory barrier. */
+  __sync_synchronize();
+#else
+  /* Slow portable alternative, implement memory barrier by using mutex. */
+  gpgrt_lock_t tmp;
+  memset (&tmp, 0, sizeof(tmp));
+  gpgrt_lock_init (&tmp);
+  gpgrt_lock_lock (&tmp);
+  gpgrt_lock_unlock (&tmp);
+  gpgrt_lock_destroy (&tmp);
+#endif
+}
+
+
 /* Check whether P points into POOL.  */
 static inline int
 ptr_into_pool_p (pooldesc_t *pool, const void *p)
@@ -520,7 +543,7 @@ _gcry_secmem_get_flags (void)
 }
 
 
-/* This function initializes the main memory pool MAINPOOL.  Itis
+/* This function initializes the main memory pool MAINPOOL.  It is
  * expected to be called with the secmem lock held.  */
 static void
 _gcry_secmem_init_internal (size_t n)
@@ -670,8 +693,12 @@ _gcry_secmem_malloc_internal (size_t size, int xhint)
       pool->okay = 1;
 
       /* Take care: in _gcry_private_is_secure we do not lock and thus
-       * we assume that the second assignment below is atomic.  */
+       * we assume that the second assignment below is atomic.  Memory
+       * barrier prevents reordering of stores to new pool structure after
+       * MAINPOOL.NEXT assigment and prevents _gcry_private_is_secure seeing
+       * non-initialized POOL->NEXT pointers.  */
       pool->next = mainpool.next;
+      memory_barrier();
       mainpool.next = pool;
 
       /* After the first time we allocated an overflow pool, print a
@@ -811,9 +838,13 @@ _gcry_private_is_secure (const void *p)
 {
   pooldesc_t *pool;
 
-  /* We do no lock here because once a pool is allocatred it will not
-   * be removed anymore (except for gcry_secmem_term).  Further,
-   * adding a new pool to the list should be atomic.  */
+  /* We do no lock here because once a pool is allocated it will not
+   * be removed anymore (except for gcry_secmem_term).  Further, as
+   * assigment of POOL->NEXT in new pool structure is visible in
+   * this thread before assigment of MAINPOOL.NEXT, pool list can be
+   * iterated locklessly.  This visiblity is ensured by memory barrier
+   * between POOL->NEXT and MAINPOOL.NEXT assignments in
+   * _gcry_secmem_malloc_internal. */
   for (pool = &mainpool; pool; pool = pool->next)
     if (pool->okay && ptr_into_pool_p (pool, p))
       return 1;

commit 4faeaa1cbd235a2560fa04a8ac3766a07029acd8
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    wipememory: use memset for non-constant length or large buffer wipes
    
    * src/g10lib.h (CONSTANT_P): New.
    (_gcry_wipememory2): New prototype.
    (wipememory2): Use _gcry_wipememory2 if _len not constant expression or
    lenght is larger than 64 bytes.
    (FASTWIPE_T, FASTWIPE_MULT, fast_wipememory2_unaligned_head): Remove.
    (fast_wipememory2): Always handle buffer as unaligned.
    * src/misc.c (__gcry_burn_stack): Move memset_ptr variable to...
    (memset_ptr): ... here. New.
    (_gcry_wipememory2): New.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/src/g10lib.h b/src/g10lib.h
index c64cbcf..9b21478 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -76,11 +76,13 @@
 #endif
 
 #if __GNUC__ >= 3
-#define LIKELY( expr )    __builtin_expect( !!(expr), 1 )
-#define UNLIKELY( expr )  __builtin_expect( !!(expr), 0 )
+#define LIKELY(expr)      __builtin_expect( !!(expr), 1 )
+#define UNLIKELY(expr)    __builtin_expect( !!(expr), 0 )
+#define CONSTANT_P(expr)  __builtin_constant_p( expr )
 #else
-#define LIKELY( expr )    (!!(expr))
-#define UNLIKELY( expr )  (!!(expr))
+#define LIKELY(expr)      (!!(expr))
+#define UNLIKELY(expr)    (!!(expr))
+#define CONSTANT_P(expr)  (0)
 #endif
 
 /* Gettext macros.  */
@@ -334,60 +336,50 @@ void __gcry_burn_stack (unsigned int bytes);
 
 
 /* To avoid that a compiler optimizes certain memset calls away, these
-   macros may be used instead. */
+   macros may be used instead.  For small constant length buffers,
+   memory wiping is inlined.  For non-constant or large length buffers,
+   memory is wiped with memset through _gcry_wipememory. */
+void _gcry_wipememory2(void *ptr, int set, size_t len);
 #define wipememory2(_ptr,_set,_len) do { \
-              volatile char *_vptr=(volatile char *)(_ptr); \
-              size_t _vlen=(_len); \
-              unsigned char _vset=(_set); \
-              fast_wipememory2(_vptr,_vset,_vlen); \
-              while(_vlen) { *_vptr=(_vset); _vptr++; _vlen--; } \
-                  } while(0)
+	      if (!CONSTANT_P(_len) || _len > 64) { \
+		_gcry_wipememory2((void *)_ptr, _set, _len); \
+	      } else {\
+		volatile char *_vptr = (volatile char *)(_ptr); \
+		size_t _vlen = (_len); \
+		const unsigned char _vset = (_set); \
+		fast_wipememory2(_vptr, _vset, _vlen); \
+		while(_vlen) { *_vptr = (_vset); _vptr++; _vlen--; } \
+	      } \
+	    } while(0)
 #define wipememory(_ptr,_len) wipememory2(_ptr,0,_len)
 
-#define FASTWIPE_T u64
-#define FASTWIPE_MULT (U64_C(0x0101010101010101))
-
-/* Following architectures can handle unaligned accesses fast.  */
 #if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
     defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \
-    defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) && \
-    (defined(__i386__) || defined(__x86_64__) || \
-     defined(__powerpc__) || defined(__powerpc64__) || \
-     (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
-     defined(__aarch64__))
-#define fast_wipememory2_unaligned_head(_ptr,_set,_len) /*do nothing*/
+    defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS)
 typedef struct fast_wipememory_s
 {
-  FASTWIPE_T a;
+  u64 a;
 } __attribute__((packed, aligned(1), may_alias)) fast_wipememory_t;
+/* fast_wipememory may leave tail bytes unhandled, in which case tail bytes
+   are handled by wipememory. */
+# define fast_wipememory2(_vptr,_vset,_vlen) do { \
+	      fast_wipememory_t _vset_long; \
+	      if (_vlen < sizeof(fast_wipememory_t)) \
+		break; \
+	      _vset_long.a = (_vset); \
+	      _vset_long.a *= U64_C(0x0101010101010101); \
+	      do { \
+		volatile fast_wipememory_t *_vptr_long = \
+		  (volatile void *)_vptr; \
+		_vptr_long->a = _vset_long.a; \
+		_vlen -= sizeof(fast_wipememory_t); \
+		_vptr += sizeof(fast_wipememory_t); \
+	      } while (_vlen >= sizeof(fast_wipememory_t)); \
+	    } while (0)
 #else
-#define fast_wipememory2_unaligned_head(_vptr,_vset,_vlen) do { \
-              while(UNLIKELY((size_t)(_vptr)&(sizeof(FASTWIPE_T)-1)) && _vlen) \
-                { *_vptr=(_vset); _vptr++; _vlen--; } \
-                  } while(0)
-typedef struct fast_wipememory_s
-{
-  FASTWIPE_T a;
-} fast_wipememory_t;
+# define fast_wipememory2(_vptr,_vset,_vlen)
 #endif
 
-/* fast_wipememory2 may leave tail bytes unhandled, in which case tail bytes
-   are handled by wipememory2. */
-#define fast_wipememory2(_vptr,_vset,_vlen) do { \
-              FASTWIPE_T _vset_long = _vset; \
-              fast_wipememory2_unaligned_head(_vptr,_vset,_vlen); \
-              if (_vlen < sizeof(FASTWIPE_T)) \
-                break; \
-              _vset_long *= FASTWIPE_MULT; \
-              do { \
-                volatile fast_wipememory_t *_vptr_long = \
-                  (volatile void *)_vptr; \
-                _vptr_long->a = _vset_long; \
-                _vlen -= sizeof(FASTWIPE_T); \
-                _vptr += sizeof(FASTWIPE_T); \
-              } while (_vlen >= sizeof(FASTWIPE_T)); \
-                  } while (0)
-
 
 /* Digit predicates.  */
 
diff --git a/src/misc.c b/src/misc.c
index 47d2dc7..420ce74 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -32,6 +32,8 @@
 
 static int verbosity_level = 0;
 
+static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset;
+
 static void (*fatal_error_handler)(void*,int, const char*) = NULL;
 static void *fatal_error_handler_value = 0;
 static void (*log_handler)(void*,int, const char*, va_list) = NULL;
@@ -498,22 +500,28 @@ _gcry_strtokenize (const char *string, const char *delim)
 
 
 void
+_gcry_wipememory2 (void *ptr, int set, size_t len)
+{
+  memset_ptr (ptr, set, len);
+}
+
+
+void
 __gcry_burn_stack (unsigned int bytes)
 {
 #ifdef HAVE_VLA
-    static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset;
-    /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */
-    unsigned int buflen = ((!bytes + bytes) + 63) & ~63;
-    char buf[buflen];
+  /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */
+  unsigned int buflen = ((!bytes + bytes) + 63) & ~63;
+  char buf[buflen];
 
-    memset_ptr (buf, 0, sizeof buf);
+  memset_ptr (buf, 0, buflen);
 #else
-    volatile char buf[64];
+  volatile char buf[64];
 
-    wipememory (buf, sizeof buf);
+  wipememory (buf, sizeof buf);
 
-    if (bytes > sizeof buf)
-        _gcry_burn_stack (bytes - sizeof buf);
+  if (bytes > sizeof buf)
+      _gcry_burn_stack (bytes - sizeof buf);
 #endif
 }
 

commit 0068d41d9304ebcdb2caba1fa8848925e2bfaac7
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    Change buf_cpy and buf_xor* functions to use buf_put/buf_get helpers
    
    * cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS)
    (bufhelp_int_s, buf_xor_1): Remove.
    (buf_cpy, buf_xor, buf_xor_2dst, buf_xor_n_copy_2): Use
    buf_put/buf_get helpers to handle unaligned memory accesses.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index 4e97c4d..0e8f599 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -35,277 +35,6 @@
 #endif
 
 
-#undef BUFHELP_FAST_UNALIGNED_ACCESS
-#if defined(BUFHELP_UNALIGNED_ACCESS) && \
-    (defined(__i386__) || defined(__x86_64__) || \
-     (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
-     defined(__aarch64__))
-/* These architectures are able of unaligned memory accesses and can
-   handle those fast.
- */
-# define BUFHELP_FAST_UNALIGNED_ACCESS 1
-#endif
-
-
-#ifdef BUFHELP_FAST_UNALIGNED_ACCESS
-/* Define type with one-byte alignment on architectures with fast unaligned
-   memory accesses.
- */
-typedef struct bufhelp_int_s
-{
-  uintptr_t a;
-} __attribute__((packed, aligned(1), may_alias)) bufhelp_int_t;
-#else
-/* Define type with default alignment for other architectures (unaligned
-   accessed handled in per byte loops).
- */
-#ifdef HAVE_GCC_ATTRIBUTE_MAY_ALIAS
-typedef struct bufhelp_int_s
-{
-  uintptr_t a;
-} __attribute__((may_alias)) bufhelp_int_t;
-#else
-typedef struct bufhelp_int_s
-{
-  uintptr_t a;
-} bufhelp_int_t;
-#endif
-#endif
-
-
-/* Optimized function for small buffer copying */
-static inline void
-buf_cpy(void *_dst, const void *_src, size_t len)
-{
-#if __GNUC__ >= 4 && (defined(__x86_64__) || defined(__i386__))
-  /* For AMD64 and i386, memcpy is faster.  */
-  memcpy(_dst, _src, len);
-#else
-  byte *dst = _dst;
-  const byte *src = _src;
-  bufhelp_int_t *ldst;
-  const bufhelp_int_t *lsrc;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src) & longmask))
-    goto do_bytes;
-#endif
-
-  ldst = (bufhelp_int_t *)(void *)dst;
-  lsrc = (const bufhelp_int_t *)(const void *)src;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst++)->a = (lsrc++)->a;
-
-  dst = (byte *)ldst;
-  src = (const byte *)lsrc;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst++ = *src++;
-#endif /*__GNUC__ >= 4 && (__x86_64__ || __i386__)*/
-}
-
-
-/* Optimized function for buffer xoring */
-static inline void
-buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
-{
-  byte *dst = _dst;
-  const byte *src1 = _src1;
-  const byte *src2 = _src2;
-  bufhelp_int_t *ldst;
-  const bufhelp_int_t *lsrc1, *lsrc2;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask))
-    goto do_bytes;
-#endif
-
-  ldst = (bufhelp_int_t *)(void *)dst;
-  lsrc1 = (const bufhelp_int_t *)(const void *)src1;
-  lsrc2 = (const bufhelp_int_t *)(const void *)src2;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a;
-
-  dst = (byte *)ldst;
-  src1 = (const byte *)lsrc1;
-  src2 = (const byte *)lsrc2;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst++ = *src1++ ^ *src2++;
-}
-
-
-/* Optimized function for in-place buffer xoring. */
-static inline void
-buf_xor_1(void *_dst, const void *_src, size_t len)
-{
-  byte *dst = _dst;
-  const byte *src = _src;
-  bufhelp_int_t *ldst;
-  const bufhelp_int_t *lsrc;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (UNLIKELY(((uintptr_t)dst | (uintptr_t)src) & longmask))
-    goto do_bytes;
-#endif
-
-  ldst = (bufhelp_int_t *)(void *)dst;
-  lsrc = (const bufhelp_int_t *)(const void *)src;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst++)->a ^= (lsrc++)->a;
-
-  dst = (byte *)ldst;
-  src = (const byte *)lsrc;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst++ ^= *src++;
-}
-
-
-/* Optimized function for buffer xoring with two destination buffers.  Used
-   mainly by CFB mode encryption.  */
-static inline void
-buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
-{
-  byte *dst1 = _dst1;
-  byte *dst2 = _dst2;
-  const byte *src = _src;
-  bufhelp_int_t *ldst1, *ldst2;
-  const bufhelp_int_t *lsrc;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (UNLIKELY(((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask))
-    goto do_bytes;
-#endif
-
-  ldst1 = (bufhelp_int_t *)(void *)dst1;
-  ldst2 = (bufhelp_int_t *)(void *)dst2;
-  lsrc = (const bufhelp_int_t *)(const void *)src;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a);
-
-  dst1 = (byte *)ldst1;
-  dst2 = (byte *)ldst2;
-  src = (const byte *)lsrc;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst1++ = (*dst2++ ^= *src++);
-}
-
-
-/* Optimized function for combined buffer xoring and copying.  Used by mainly
-   CBC mode decryption.  */
-static inline void
-buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
-		 const void *_src_cpy, size_t len)
-{
-  byte *dst_xor = _dst_xor;
-  byte *srcdst_cpy = _srcdst_cpy;
-  const byte *src_xor = _src_xor;
-  const byte *src_cpy = _src_cpy;
-  byte temp;
-  bufhelp_int_t *ldst_xor, *lsrcdst_cpy;
-  const bufhelp_int_t *lsrc_cpy, *lsrc_xor;
-  uintptr_t ltemp;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (UNLIKELY(((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor |
-       (uintptr_t)srcdst_cpy) & longmask))
-    goto do_bytes;
-#endif
-
-  ldst_xor = (bufhelp_int_t *)(void *)dst_xor;
-  lsrc_xor = (const bufhelp_int_t *)(void *)src_xor;
-  lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy;
-  lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    {
-      ltemp = (lsrc_cpy++)->a;
-      (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a;
-      (lsrcdst_cpy++)->a = ltemp;
-    }
-
-  dst_xor = (byte *)ldst_xor;
-  src_xor = (const byte *)lsrc_xor;
-  srcdst_cpy = (byte *)lsrcdst_cpy;
-  src_cpy = (const byte *)lsrc_cpy;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    {
-      temp = *src_cpy++;
-      *dst_xor++ = *srcdst_cpy ^ *src_xor++;
-      *srcdst_cpy++ = temp;
-    }
-}
-
-
-/* Optimized function for combined buffer xoring and copying.  Used by mainly
-   CFB mode decryption.  */
-static inline void
-buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len)
-{
-  buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len);
-}
-
-
-/* Constant-time compare of two buffers.  Returns 1 if buffers are equal,
-   and 0 if buffers differ.  */
-static inline int
-buf_eq_const(const void *_a, const void *_b, size_t len)
-{
-  const byte *a = _a;
-  const byte *b = _b;
-  int ab, ba;
-  size_t i;
-
-  /* Constant-time compare. */
-  for (i = 0, ab = 0, ba = 0; i < len; i++)
-    {
-      /* If a[i] != b[i], either ab or ba will be negative. */
-      ab |= a[i] - b[i];
-      ba |= b[i] - a[i];
-    }
-
-  /* 'ab | ba' is negative when buffers are not equal. */
-  return (ab | ba) >= 0;
-}
-
-
 #ifndef BUFHELP_UNALIGNED_ACCESS
 
 /* Functions for loading and storing unaligned u32 values of different
@@ -467,4 +196,188 @@ static inline void buf_put_le64(void *_buf, u64 val)
 #endif
 
 
+
+/* Optimized function for small buffer copying */
+static inline void
+buf_cpy(void *_dst, const void *_src, size_t len)
+{
+  byte *dst = _dst;
+  const byte *src = _src;
+
+#if __GNUC__ >= 4
+  if (!__builtin_constant_p (len))
+    {
+      memcpy(_dst, _src, len);
+      return;
+    }
+#endif
+
+  while (len >= sizeof(u64))
+    {
+      buf_put_he64(dst, buf_get_he64(src));
+      dst += sizeof(u64);
+      src += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len >= sizeof(u32))
+    {
+      buf_put_he32(dst, buf_get_he32(src));
+      dst += sizeof(u32);
+      src += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    *dst++ = *src++;
+}
+
+
+/* Optimized function for buffer xoring */
+static inline void
+buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
+{
+  byte *dst = _dst;
+  const byte *src1 = _src1;
+  const byte *src2 = _src2;
+
+  while (len >= sizeof(u64))
+    {
+      buf_put_he64(dst, buf_get_he64(src1) ^ buf_get_he64(src2));
+      dst += sizeof(u64);
+      src1 += sizeof(u64);
+      src2 += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len > sizeof(u32))
+    {
+      buf_put_he32(dst, buf_get_he32(src1) ^ buf_get_he32(src2));
+      dst += sizeof(u32);
+      src1 += sizeof(u32);
+      src2 += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    *dst++ = *src1++ ^ *src2++;
+}
+
+
+/* Optimized function for buffer xoring with two destination buffers.  Used
+   mainly by CFB mode encryption.  */
+static inline void
+buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
+{
+  byte *dst1 = _dst1;
+  byte *dst2 = _dst2;
+  const byte *src = _src;
+
+  while (len >= sizeof(u64))
+    {
+      u64 temp = buf_get_he64(dst2) ^ buf_get_he64(src);
+      buf_put_he64(dst2, temp);
+      buf_put_he64(dst1, temp);
+      dst2 += sizeof(u64);
+      dst1 += sizeof(u64);
+      src += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len >= sizeof(u32))
+    {
+      u32 temp = buf_get_he32(dst2) ^ buf_get_he32(src);
+      buf_put_he32(dst2, temp);
+      buf_put_he32(dst1, temp);
+      dst2 += sizeof(u32);
+      dst1 += sizeof(u32);
+      src += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    *dst1++ = (*dst2++ ^= *src++);
+}
+
+
+/* Optimized function for combined buffer xoring and copying.  Used by mainly
+   CBC mode decryption.  */
+static inline void
+buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
+		 const void *_src_cpy, size_t len)
+{
+  byte *dst_xor = _dst_xor;
+  byte *srcdst_cpy = _srcdst_cpy;
+  const byte *src_xor = _src_xor;
+  const byte *src_cpy = _src_cpy;
+
+  while (len >= sizeof(u64))
+    {
+      u64 temp = buf_get_he64(src_cpy);
+      buf_put_he64(dst_xor, buf_get_he64(srcdst_cpy) ^ buf_get_he64(src_xor));
+      buf_put_he64(srcdst_cpy, temp);
+      dst_xor += sizeof(u64);
+      srcdst_cpy += sizeof(u64);
+      src_xor += sizeof(u64);
+      src_cpy += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len >= sizeof(u32))
+    {
+      u32 temp = buf_get_he32(src_cpy);
+      buf_put_he32(dst_xor, buf_get_he32(srcdst_cpy) ^ buf_get_he32(src_xor));
+      buf_put_he32(srcdst_cpy, temp);
+      dst_xor += sizeof(u32);
+      srcdst_cpy += sizeof(u32);
+      src_xor += sizeof(u32);
+      src_cpy += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    {
+      byte temp = *src_cpy++;
+      *dst_xor++ = *srcdst_cpy ^ *src_xor++;
+      *srcdst_cpy++ = temp;
+    }
+}
+
+
+/* Optimized function for combined buffer xoring and copying.  Used by mainly
+   CFB mode decryption.  */
+static inline void
+buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len)
+{
+  buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len);
+}
+
+
+/* Constant-time compare of two buffers.  Returns 1 if buffers are equal,
+   and 0 if buffers differ.  */
+static inline int
+buf_eq_const(const void *_a, const void *_b, size_t len)
+{
+  const byte *a = _a;
+  const byte *b = _b;
+  int ab, ba;
+  size_t i;
+
+  /* Constant-time compare. */
+  for (i = 0, ab = 0, ba = 0; i < len; i++)
+    {
+      /* If a[i] != b[i], either ab or ba will be negative. */
+      ab |= a[i] - b[i];
+      ba |= b[i] - a[i];
+    }
+
+  /* 'ab | ba' is negative when buffers are not equal. */
+  return (ab | ba) >= 0;
+}
+
+
 #endif /*GCRYPT_BUFHELP_H*/

commit 30e783ec487466132324673f197d36b85a91b060
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    rijndael: fix unused parameter warning
    
    * cipher/rijndael.c (do_setkey): Silence unused 'hd' warning.
    --
    
    This commit fixes "warning: unused parameter 'hd'" warning seen on
    architectures that do not have alternative AES implementations.
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index d126f88..1bc8b0f 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -264,6 +264,8 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
   unsigned int hwfeatures;
 #endif
 
+  (void)hd;
+
   /* The on-the-fly self tests are only run in non-fips mode. In fips
      mode explicit self-tests are required.  Actually the on-the-fly
      self-tests are not fully thread-safe and it might happen that a

commit ec49013d23d9a7b874c42d77ceb08bd313ba69e1
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    mpi/longlong.h: enable inline assembly for powerpc64
    
    * mpi/longlong.h [__powerpc__ && W_TYPE_SIZE == 64]: Remove '#if 0'.
    --
    
    PowerPC64 inline assembly was tested on QEMU ('make check' pass).
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/mpi/longlong.h b/mpi/longlong.h
index d6958f3..c0f24c8 100644
--- a/mpi/longlong.h
+++ b/mpi/longlong.h
@@ -1088,7 +1088,6 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
 /* Powerpc 64 bit support taken from gmp-4.1.2. */
 /* We should test _IBMR2 here when we add assembly support for the system
    vendor compilers.  */
-#if 0 /* Not yet enabled because we don't have hardware for a test. */
 #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
@@ -1141,7 +1140,6 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
 #define SMUL_TIME 14  /* ??? */
 #define UDIV_TIME 120 /* ??? */
 #endif /* 64-bit PowerPC.  */
-#endif /* if 0 */
 
 /***************************************
  **************  PYR  ******************

commit 2aece89d3967e692743541cea857f2e4771b0b62
Author: Jussi Kivilinna <jussi.kivilinna at iki.fi>
Date:   Mon Nov 5 20:42:58 2018 +0200

    Change remaining users of _gcry_fips_mode to use fips_mode
    
    * src/fips.c (_gcry_fips_mode): Remove.
    (_gcry_enforced_fips_mode, _gcry_inactivate_fips_mode)
    (_gcry_is_fips_mode_inactive): Use fips_mode.
    * src/g10lib.h (_gcry_fips_mode): Remove.
    --
    
    Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>

diff --git a/src/fips.c b/src/fips.c
index 2b3a0af..36358bf 100644
--- a/src/fips.c
+++ b/src/fips.c
@@ -255,25 +255,11 @@ unlock_fsm (void)
 }
 
 
-/* This function returns true if fips mode is enabled.  This is
-   independent of the fips required finite state machine and only used
-   to enable fips specific code.  Please use the fips_mode macro
-   instead of calling this function directly. */
-int
-_gcry_fips_mode (void)
-{
-  /* No locking is required because we have the requirement that this
-     variable is only initialized once with no other threads
-     existing.  */
-  return !_gcry_no_fips_mode_required;
-}
-
-
 /* Return a flag telling whether we are in the enforced fips mode.  */
 int
 _gcry_enforced_fips_mode (void)
 {
-  if (!_gcry_fips_mode ())
+  if (!fips_mode ())
     return 0;
   return enforced_fips_mode;
 }
@@ -292,7 +278,7 @@ _gcry_set_enforced_fips_mode (void)
 void
 _gcry_inactivate_fips_mode (const char *text)
 {
-  gcry_assert (_gcry_fips_mode ());
+  gcry_assert (fips_mode ());
 
   if (_gcry_enforced_fips_mode () )
     {
@@ -323,7 +309,7 @@ _gcry_is_fips_mode_inactive (void)
 {
   int flag;
 
-  if (!_gcry_fips_mode ())
+  if (!fips_mode ())
     return 0;
   lock_fsm ();
   flag = inactive_fips_mode;
diff --git a/src/g10lib.h b/src/g10lib.h
index c1f84ee..c64cbcf 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -427,8 +427,6 @@ extern int _gcry_no_fips_mode_required;
 
 void _gcry_initialize_fips_mode (int force);
 
-int _gcry_fips_mode (void);
-
 /* This macro returns true if fips mode is enabled.  This is
    independent of the fips required finite state machine and only used
    to enable fips specific code.

-----------------------------------------------------------------------

Summary of changes:
 cipher/bufhelp.h  | 455 ++++++++++++++++++++++--------------------------------
 cipher/rijndael.c |   2 +
 configure.ac      |  15 ++
 mpi/longlong.h    |   2 -
 src/fips.c        |  20 +--
 src/g10lib.h      |  88 +++++------
 src/misc.c        |  26 ++--
 src/secmem.c      |  43 +++++-
 src/stdmem.c      |  20 ++-
 9 files changed, 309 insertions(+), 362 deletions(-)


hooks/post-receive
-- 
The GNU crypto library
http://git.gnupg.org


_______________________________________________
Gnupg-commits mailing list
Gnupg-commits at gnupg.org
http://lists.gnupg.org/mailman/listinfo/gnupg-commits




More information about the Gcrypt-devel mailing list