[PATCH v2 1/2] bufhelp: use one-byte aligned type for unaligned memory accesses

Jussi Kivilinna jussi.kivilinna at iki.fi
Wed Mar 11 18:04:46 CET 2015


* cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS): Enable only when
HAVE_GCC_ATTRIBUTE_PACKED and HAVE_GCC_ATTRIBUTE_ALIGNED are defined.
(bufhelp_int_t): New type.
(buf_cpy, buf_xor, buf_xor_1, buf_xor_2dst, buf_xor_n_copy_2): Use
'bufhelp_int_t'.
* configure.ac (gcry_cv_gcc_attribute_packed): New.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 cipher/bufhelp.h |  147 +++++++++++++++++++++++++++++++++---------------------
 configure.ac     |   18 +++++++
 2 files changed, 108 insertions(+), 57 deletions(-)

diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index a372acb..252d3bc 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -33,10 +33,13 @@
 #include "bithelp.h"
 
 
-#if defined(__i386__) || defined(__x86_64__) || \
-    defined(__powerpc__) || defined(__powerpc64__) || \
-    (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
-    defined(__aarch64__)
+#undef BUFHELP_FAST_UNALIGNED_ACCESS
+#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
+    defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \
+    (defined(__i386__) || defined(__x86_64__) || \
+     defined(__powerpc__) || defined(__powerpc64__) || \
+     (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
+     defined(__aarch64__))
 /* These architectures are able of unaligned memory accesses and can
    handle those fast.
  */
@@ -44,6 +47,25 @@
 #endif
 
 
+#ifdef BUFHELP_FAST_UNALIGNED_ACCESS
+/* Define type with one-byte alignment on architectures with fast unaligned
+   memory accesses.
+ */
+typedef struct bufhelp_int_s
+{
+  uintptr_t a;
+} __attribute__((packed, aligned(1))) bufhelp_int_t;
+#else
+/* Define type with default alignment for other architectures (unaligned
+   accessed handled in per byte loops).
+ */
+typedef struct bufhelp_int_s
+{
+  uintptr_t a;
+} bufhelp_int_t;
+#endif
+
+
 /* Optimized function for small buffer copying */
 static inline void
 buf_cpy(void *_dst, const void *_src, size_t len)
@@ -54,21 +76,21 @@ buf_cpy(void *_dst, const void *_src, size_t len)
 #else
   byte *dst = _dst;
   const byte *src = _src;
-  uintptr_t *ldst;
-  const uintptr_t *lsrc;
+  bufhelp_int_t *ldst;
+  const bufhelp_int_t *lsrc;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)dst | (uintptr_t)src) & longmask)
     goto do_bytes;
 #endif
 
-  ldst = (uintptr_t *)(void *)dst;
-  lsrc = (const uintptr_t *)(const void *)src;
+  ldst = (bufhelp_int_t *)(void *)dst;
+  lsrc = (const bufhelp_int_t *)(const void *)src;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst++ = *lsrc++;
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst++)->a = (lsrc++)->a;
 
   dst = (byte *)ldst;
   src = (const byte *)lsrc;
@@ -90,22 +112,22 @@ buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
   byte *dst = _dst;
   const byte *src1 = _src1;
   const byte *src2 = _src2;
-  uintptr_t *ldst;
-  const uintptr_t *lsrc1, *lsrc2;
+  bufhelp_int_t *ldst;
+  const bufhelp_int_t *lsrc1, *lsrc2;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)
     goto do_bytes;
 #endif
 
-  ldst = (uintptr_t *)(void *)dst;
-  lsrc1 = (const uintptr_t *)(const void *)src1;
-  lsrc2 = (const uintptr_t *)(const void *)src2;
+  ldst = (bufhelp_int_t *)(void *)dst;
+  lsrc1 = (const bufhelp_int_t *)(const void *)src1;
+  lsrc2 = (const bufhelp_int_t *)(const void *)src2;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst++ = *lsrc1++ ^ *lsrc2++;
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a;
 
   dst = (byte *)ldst;
   src1 = (const byte *)lsrc1;
@@ -126,21 +148,21 @@ buf_xor_1(void *_dst, const void *_src, size_t len)
 {
   byte *dst = _dst;
   const byte *src = _src;
-  uintptr_t *ldst;
-  const uintptr_t *lsrc;
+  bufhelp_int_t *ldst;
+  const bufhelp_int_t *lsrc;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)dst | (uintptr_t)src) & longmask)
     goto do_bytes;
 #endif
 
-  ldst = (uintptr_t *)(void *)dst;
-  lsrc = (const uintptr_t *)(const void *)src;
+  ldst = (bufhelp_int_t *)(void *)dst;
+  lsrc = (const bufhelp_int_t *)(const void *)src;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst++ ^= *lsrc++;
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst++)->a ^= (lsrc++)->a;
 
   dst = (byte *)ldst;
   src = (const byte *)lsrc;
@@ -162,22 +184,22 @@ buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
   byte *dst1 = _dst1;
   byte *dst2 = _dst2;
   const byte *src = _src;
-  uintptr_t *ldst1, *ldst2;
-  const uintptr_t *lsrc;
+  bufhelp_int_t *ldst1, *ldst2;
+  const bufhelp_int_t *lsrc;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)
     goto do_bytes;
 #endif
 
-  ldst1 = (uintptr_t *)(void *)dst1;
-  ldst2 = (uintptr_t *)(void *)dst2;
-  lsrc = (const uintptr_t *)(const void *)src;
+  ldst1 = (bufhelp_int_t *)(void *)dst1;
+  ldst2 = (bufhelp_int_t *)(void *)dst2;
+  lsrc = (const bufhelp_int_t *)(const void *)src;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
-    *ldst1++ = (*ldst2++ ^= *lsrc++);
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+    (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a);
 
   dst1 = (byte *)ldst1;
   dst2 = (byte *)ldst2;
@@ -203,11 +225,11 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
   const byte *src_xor = _src_xor;
   const byte *src_cpy = _src_cpy;
   byte temp;
-  uintptr_t *ldst_xor, *lsrcdst_cpy;
-  const uintptr_t *lsrc_cpy, *lsrc_xor;
+  bufhelp_int_t *ldst_xor, *lsrcdst_cpy;
+  const bufhelp_int_t *lsrc_cpy, *lsrc_xor;
   uintptr_t ltemp;
 #ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(uintptr_t) - 1;
+  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
 
   /* Skip fast processing if buffers are unaligned.  */
   if (((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor |
@@ -215,16 +237,16 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
     goto do_bytes;
 #endif
 
-  ldst_xor = (uintptr_t *)(void *)dst_xor;
-  lsrc_xor = (const uintptr_t *)(void *)src_xor;
-  lsrcdst_cpy = (uintptr_t *)(void *)srcdst_cpy;
-  lsrc_cpy = (const uintptr_t *)(const void *)src_cpy;
+  ldst_xor = (bufhelp_int_t *)(void *)dst_xor;
+  lsrc_xor = (const bufhelp_int_t *)(void *)src_xor;
+  lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy;
+  lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy;
 
-  for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
+  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
     {
-      ltemp = *lsrc_cpy++;
-      *ldst_xor++ = *lsrcdst_cpy ^ *lsrc_xor++;
-      *lsrcdst_cpy++ = ltemp;
+      ltemp = (lsrc_cpy++)->a;
+      (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a;
+      (lsrcdst_cpy++)->a = ltemp;
     }
 
   dst_xor = (byte *)ldst_xor;
@@ -357,53 +379,64 @@ static inline void buf_put_le64(void *_buf, u64 val)
 
 #else /*BUFHELP_FAST_UNALIGNED_ACCESS*/
 
+typedef struct bufhelp_u32_s
+{
+  u32 a;
+} __attribute__((packed, aligned(1))) bufhelp_u32_t;
+
 /* Functions for loading and storing unaligned u32 values of different
    endianness.  */
 static inline u32 buf_get_be32(const void *_buf)
 {
-  return be_bswap32(*(const u32 *)_buf);
+  return be_bswap32(((const bufhelp_u32_t *)_buf)->a);
 }
 
 static inline u32 buf_get_le32(const void *_buf)
 {
-  return le_bswap32(*(const u32 *)_buf);
+  return le_bswap32(((const bufhelp_u32_t *)_buf)->a);
 }
 
 static inline void buf_put_be32(void *_buf, u32 val)
 {
-  u32 *out = _buf;
-  *out = be_bswap32(val);
+  bufhelp_u32_t *out = _buf;
+  out->a = be_bswap32(val);
 }
 
 static inline void buf_put_le32(void *_buf, u32 val)
 {
-  u32 *out = _buf;
-  *out = le_bswap32(val);
+  bufhelp_u32_t *out = _buf;
+  out->a = le_bswap32(val);
 }
 
 #ifdef HAVE_U64_TYPEDEF
+
+typedef struct bufhelp_u64_s
+{
+  u64 a;
+} __attribute__((packed, aligned(1))) bufhelp_u64_t;
+
 /* Functions for loading and storing unaligned u64 values of different
    endianness.  */
 static inline u64 buf_get_be64(const void *_buf)
 {
-  return be_bswap64(*(const u64 *)_buf);
+  return be_bswap64(((const bufhelp_u64_t *)_buf)->a);
 }
 
 static inline u64 buf_get_le64(const void *_buf)
 {
-  return le_bswap64(*(const u64 *)_buf);
+  return le_bswap64(((const bufhelp_u64_t *)_buf)->a);
 }
 
 static inline void buf_put_be64(void *_buf, u64 val)
 {
-  u64 *out = _buf;
-  *out = be_bswap64(val);
+  bufhelp_u64_t *out = _buf;
+  out->a = be_bswap64(val);
 }
 
 static inline void buf_put_le64(void *_buf, u64 val)
 {
-  u64 *out = _buf;
-  *out = le_bswap64(val);
+  bufhelp_u64_t *out = _buf;
+  out->a = le_bswap64(val);
 }
 #endif /*HAVE_U64_TYPEDEF*/
 
diff --git a/configure.ac b/configure.ac
index 4bbd686..16f6a21 100644
--- a/configure.ac
+++ b/configure.ac
@@ -958,6 +958,24 @@ fi
 
 
 #
+# Check whether the compiler supports the GCC style packed attribute
+#
+AC_CACHE_CHECK([whether the GCC style packed attribute is supported],
+       [gcry_cv_gcc_attribute_packed],
+       [gcry_cv_gcc_attribute_packed=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[struct foo_s { char a; long b; } __attribute__ ((packed));
+            enum bar {
+              FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))),
+            };]])],
+          [gcry_cv_gcc_attribute_packed=yes])])
+if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1,
+     [Defined if a GCC style "__attribute__ ((packed))" is supported])
+fi
+
+
+#
 # Check whether the compiler supports 'asm' or '__asm__' keyword for
 # assembler blocks.
 #




More information about the Gcrypt-devel mailing list