[PATCH 2/3] bufhelp: use one-byte aligned type for unaligned memory accesses

Jussi Kivilinna jussi.kivilinna at iki.fi
Tue Mar 10 17:57:32 CET 2015


* cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS): Enable only when
HAVE_GCC_ATTRIBUTE_PACKED.
(bufhelp_int_t): New type.
(buf_cpy, buf_xor, buf_xor_1, buf_xor_2dst, buf_xor_n_copy_2): Use
'bufhelp_int_t'.
* configure.ac (gcry_cv_gcc_attribute_packed): New.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/bufhelp.h |  111 ++++++++++++++++++++++++++++++++----------------------
 configure.ac     |   19 +++++++++
 2 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index a372acb..bd81937 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -33,10 +33,12 @@
 #include "bithelp.h"
 
 
-#if defined(__i386__) || defined(__x86_64__) || \
-    defined(__powerpc__) || defined(__powerpc64__) || \
-    (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
-    defined(__aarch64__)
+#undef BUFHELP_FAST_UNALIGNED_ACCESS
+#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
+    (defined(__i386__) || defined(__x86_64__) || \
+     defined(__powerpc__) || defined(__powerpc64__) || \
+     (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
+     defined(__aarch64__))
 /* These architectures are able of unaligned memory accesses and can
    handle those fast.
  */
@@ -44,6 +46,25 @@
 #endif
 
 
+#ifdef BUFHELP_FAST_UNALIGNED_ACCESS
+/* Define type with one-byte alignment on architectures with fast unaligned
+   memory accesses.
+ */
+typedef struct bufhelp_int_s
+{
+  uintptr_t a;
+} __attribute__((packed)) bufhelp_int_t;
+#else
+/* Define type with default alignment for other architectures (unaligned
+   accessed handled in per byte loops).
+ */
+typedef struct bufhelp_int_s
+{
+  uintptr_t a;
+} bufhelp_int_t;
+#endif
+
+
 /* Optimized function for small buffer copying */
 static inline void
 buf_cpy(void *_dst, const void *_src, size_t len)
@@ -54,21 +75,21 @@ buf_cpy(void *_dst, const void *_src, size_t len)
 #else
   byte *dst = _dst;
   const byte *src = _src;
-  uintptr_t *ldst;
-  const uintptr_t *lsrc;
+  bufhelp_int_t *ldst;
+  const bufhelp_int_t *lsrc;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)dst | (uintptr_t)src) & longmask)
     goto do_bytes;
 #endif
 
-  ldst = (uintptr_t *)(void *)dst;
-  lsrc = (const uintptr_t *)(const void *)src;
+  ldst = (bufhelp_int_t *)(void *)dst;
+  lsrc = (const bufhelp_int_t *)(const void *)src;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst++ = *lsrc++;
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst++)->a = (lsrc++)->a;
 
   dst = (byte *)ldst;
   src = (const byte *)lsrc;
@@ -90,22 +111,22 @@ buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
   byte *dst = _dst;
   const byte *src1 = _src1;
   const byte *src2 = _src2;
-  uintptr_t *ldst;
-  const uintptr_t *lsrc1, *lsrc2;
+  bufhelp_int_t *ldst;
+  const bufhelp_int_t *lsrc1, *lsrc2;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)
     goto do_bytes;
 #endif
 
-  ldst = (uintptr_t *)(void *)dst;
-  lsrc1 = (const uintptr_t *)(const void *)src1;
-  lsrc2 = (const uintptr_t *)(const void *)src2;
+  ldst = (bufhelp_int_t *)(void *)dst;
+  lsrc1 = (const bufhelp_int_t *)(const void *)src1;
+  lsrc2 = (const bufhelp_int_t *)(const void *)src2;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst++ = *lsrc1++ ^ *lsrc2++;
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a;
 
   dst = (byte *)ldst;
   src1 = (const byte *)lsrc1;
@@ -126,21 +147,21 @@ buf_xor_1(void *_dst, const void *_src, size_t len)
 {
   byte *dst = _dst;
   const byte *src = _src;
-  uintptr_t *ldst;
-  const uintptr_t *lsrc;
+  bufhelp_int_t *ldst;
+  const bufhelp_int_t *lsrc;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)dst | (uintptr_t)src) & longmask)
     goto do_bytes;
 #endif
 
-  ldst = (uintptr_t *)(void *)dst;
-  lsrc = (const uintptr_t *)(const void *)src;
+  ldst = (bufhelp_int_t *)(void *)dst;
+  lsrc = (const bufhelp_int_t *)(const void *)src;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst++ ^= *lsrc++;
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst++)->a ^= (lsrc++)->a;
 
   dst = (byte *)ldst;
   src = (const byte *)lsrc;
@@ -162,22 +183,22 @@ buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
   byte *dst1 = _dst1;
   byte *dst2 = _dst2;
   const byte *src = _src;
-  uintptr_t *ldst1, *ldst2;
-  const uintptr_t *lsrc;
+  bufhelp_int_t *ldst1, *ldst2;
+  const bufhelp_int_t *lsrc;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)
     goto do_bytes;
 #endif
 
-  ldst1 = (uintptr_t *)(void *)dst1;
-  ldst2 = (uintptr_t *)(void *)dst2;
-  lsrc = (const uintptr_t *)(const void *)src;
+  ldst1 = (bufhelp_int_t *)(void *)dst1;
+  ldst2 = (bufhelp_int_t *)(void *)dst2;
+  lsrc = (const bufhelp_int_t *)(const void *)src;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst1++ = (*ldst2++ ^= *lsrc++);
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a);
 
   dst1 = (byte *)ldst1;
   dst2 = (byte *)ldst2;
@@ -203,11 +224,11 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
   const byte *src_xor = _src_xor;
   const byte *src_cpy = _src_cpy;
   byte temp;
-  uintptr_t *ldst_xor, *lsrcdst_cpy;
-  const uintptr_t *lsrc_cpy, *lsrc_xor;
+  bufhelp_int_t *ldst_xor, *lsrcdst_cpy;
+  const bufhelp_int_t *lsrc_cpy, *lsrc_xor;
   uintptr_t ltemp;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor |
@@ -215,16 +236,16 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
     goto do_bytes;
 #endif
 
-  ldst_xor = (uintptr_t *)(void *)dst_xor;
-  lsrc_xor = (const uintptr_t *)(void *)src_xor;
-  lsrcdst_cpy = (uintptr_t *)(void *)srcdst_cpy;
-  lsrc_cpy = (const uintptr_t *)(const void *)src_cpy;
+  ldst_xor = (bufhelp_int_t *)(void *)dst_xor;
+  lsrc_xor = (const bufhelp_int_t *)(void *)src_xor;
+  lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy;
+  lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
     {
-      ltemp = *lsrc_cpy++;
-      *ldst_xor++ = *lsrcdst_cpy ^ *lsrc_xor++;
-      *lsrcdst_cpy++ = ltemp;
+      ltemp = (lsrc_cpy++)->a;
+      (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a;
+      (lsrcdst_cpy++)->a = ltemp;
     }
 
   dst_xor = (byte *)ldst_xor;
diff --git a/configure.ac b/configure.ac
index 4bbd686..865e116 100644
--- a/configure.ac
+++ b/configure.ac
@@ -958,6 +958,25 @@ fi
 
 
 #
+# Check whether the compiler supports the GCC style packed attribute
+#
+AC_CACHE_CHECK([whether the GCC style packed attribute is supported],
+       [gcry_cv_gcc_attribute_packed],
+       [gcry_cv_gcc_attribute_packed=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[struct foo_s { char a; long b; } __attribute__ ((packed));
+            enum bar {
+              FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))),
+            };
+          ]])],
+          [gcry_cv_gcc_attribute_packed=yes])])
+if test "gcry_cv_gcc_attribute_packed" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1,
+     [Defined if a GCC style "__attribute__ ((packed))" is supported])
+fi
+
+
+#
 # Check whether the compiler supports 'asm' or '__asm__' keyword for
 # assembler blocks.
 #




More information about the Gcrypt-devel mailing list