[PATCH 2/3] bufhelp: use one-byte aligned type for unaligned memory accesses
Jussi Kivilinna
jussi.kivilinna at iki.fi
Tue Mar 10 17:57:32 CET 2015
* cipher/bufhelp.h (BUFHELP_FAST_UNALIGNED_ACCESS): Enable only when
HAVE_GCC_ATTRIBUTE_PACKED.
(bufhelp_int_t): New type.
(buf_cpy, buf_xor, buf_xor_1, buf_xor_2dst, buf_xor_n_copy_2): Use
'bufhelp_int_t'.
* configure.ac (gcry_cv_gcc_attribute_packed): New.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/bufhelp.h | 111 ++++++++++++++++++++++++++++++++----------------------
configure.ac | 19 +++++++++
2 files changed, 85 insertions(+), 45 deletions(-)
diff --git a/cipher/bufhelp.h b/cipher/bufhelp.h
index a372acb..bd81937 100644
--- a/cipher/bufhelp.h
+++ b/cipher/bufhelp.h
@@ -33,10 +33,12 @@
#include "bithelp.h"
-#if defined(__i386__) || defined(__x86_64__) || \
- defined(__powerpc__) || defined(__powerpc64__) || \
- (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
- defined(__aarch64__)
+#undef BUFHELP_FAST_UNALIGNED_ACCESS
+#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
+ (defined(__i386__) || defined(__x86_64__) || \
+ defined(__powerpc__) || defined(__powerpc64__) || \
+ (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
+ defined(__aarch64__))
/* These architectures are able of unaligned memory accesses and can
handle those fast.
*/
@@ -44,6 +46,25 @@
#endif
+#ifdef BUFHELP_FAST_UNALIGNED_ACCESS
+/* Define type with one-byte alignment on architectures with fast unaligned
+ memory accesses.
+ */
+typedef struct bufhelp_int_s
+{
+ uintptr_t a;
+} __attribute__((packed)) bufhelp_int_t;
+#else
+/* Define type with default alignment for other architectures (unaligned
+ accessed handled in per byte loops).
+ */
+typedef struct bufhelp_int_s
+{
+ uintptr_t a;
+} bufhelp_int_t;
+#endif
+
+
/* Optimized function for small buffer copying */
static inline void
buf_cpy(void *_dst, const void *_src, size_t len)
@@ -54,21 +75,21 @@ buf_cpy(void *_dst, const void *_src, size_t len)
#else
byte *dst = _dst;
const byte *src = _src;
- uintptr_t *ldst;
- const uintptr_t *lsrc;
+ bufhelp_int_t *ldst;
+ const bufhelp_int_t *lsrc;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)dst | (uintptr_t)src) & longmask)
goto do_bytes;
#endif
- ldst = (uintptr_t *)(void *)dst;
- lsrc = (const uintptr_t *)(const void *)src;
+ ldst = (bufhelp_int_t *)(void *)dst;
+ lsrc = (const bufhelp_int_t *)(const void *)src;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst++ = *lsrc++;
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst++)->a = (lsrc++)->a;
dst = (byte *)ldst;
src = (const byte *)lsrc;
@@ -90,22 +111,22 @@ buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
byte *dst = _dst;
const byte *src1 = _src1;
const byte *src2 = _src2;
- uintptr_t *ldst;
- const uintptr_t *lsrc1, *lsrc2;
+ bufhelp_int_t *ldst;
+ const bufhelp_int_t *lsrc1, *lsrc2;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)
goto do_bytes;
#endif
- ldst = (uintptr_t *)(void *)dst;
- lsrc1 = (const uintptr_t *)(const void *)src1;
- lsrc2 = (const uintptr_t *)(const void *)src2;
+ ldst = (bufhelp_int_t *)(void *)dst;
+ lsrc1 = (const bufhelp_int_t *)(const void *)src1;
+ lsrc2 = (const bufhelp_int_t *)(const void *)src2;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst++ = *lsrc1++ ^ *lsrc2++;
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a;
dst = (byte *)ldst;
src1 = (const byte *)lsrc1;
@@ -126,21 +147,21 @@ buf_xor_1(void *_dst, const void *_src, size_t len)
{
byte *dst = _dst;
const byte *src = _src;
- uintptr_t *ldst;
- const uintptr_t *lsrc;
+ bufhelp_int_t *ldst;
+ const bufhelp_int_t *lsrc;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)dst | (uintptr_t)src) & longmask)
goto do_bytes;
#endif
- ldst = (uintptr_t *)(void *)dst;
- lsrc = (const uintptr_t *)(const void *)src;
+ ldst = (bufhelp_int_t *)(void *)dst;
+ lsrc = (const bufhelp_int_t *)(const void *)src;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst++ ^= *lsrc++;
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst++)->a ^= (lsrc++)->a;
dst = (byte *)ldst;
src = (const byte *)lsrc;
@@ -162,22 +183,22 @@ buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
byte *dst1 = _dst1;
byte *dst2 = _dst2;
const byte *src = _src;
- uintptr_t *ldst1, *ldst2;
- const uintptr_t *lsrc;
+ bufhelp_int_t *ldst1, *ldst2;
+ const bufhelp_int_t *lsrc;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)
goto do_bytes;
#endif
- ldst1 = (uintptr_t *)(void *)dst1;
- ldst2 = (uintptr_t *)(void *)dst2;
- lsrc = (const uintptr_t *)(const void *)src;
+ ldst1 = (bufhelp_int_t *)(void *)dst1;
+ ldst2 = (bufhelp_int_t *)(void *)dst2;
+ lsrc = (const bufhelp_int_t *)(const void *)src;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
- *ldst1++ = (*ldst2++ ^= *lsrc++);
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
+ (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a);
dst1 = (byte *)ldst1;
dst2 = (byte *)ldst2;
@@ -203,11 +224,11 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
const byte *src_xor = _src_xor;
const byte *src_cpy = _src_cpy;
byte temp;
- uintptr_t *ldst_xor, *lsrcdst_cpy;
- const uintptr_t *lsrc_cpy, *lsrc_xor;
+ bufhelp_int_t *ldst_xor, *lsrcdst_cpy;
+ const bufhelp_int_t *lsrc_cpy, *lsrc_xor;
uintptr_t ltemp;
#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
- const unsigned int longmask = sizeof(uintptr_t) - 1;
+ const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
/* Skip fast processing if buffers are unaligned. */
if (((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor |
@@ -215,16 +236,16 @@ buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
goto do_bytes;
#endif
- ldst_xor = (uintptr_t *)(void *)dst_xor;
- lsrc_xor = (const uintptr_t *)(void *)src_xor;
- lsrcdst_cpy = (uintptr_t *)(void *)srcdst_cpy;
- lsrc_cpy = (const uintptr_t *)(const void *)src_cpy;
+ ldst_xor = (bufhelp_int_t *)(void *)dst_xor;
+ lsrc_xor = (const bufhelp_int_t *)(void *)src_xor;
+ lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy;
+ lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy;
- for (; len >= sizeof(uintptr_t); len -= sizeof(uintptr_t))
+ for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
{
- ltemp = *lsrc_cpy++;
- *ldst_xor++ = *lsrcdst_cpy ^ *lsrc_xor++;
- *lsrcdst_cpy++ = ltemp;
+ ltemp = (lsrc_cpy++)->a;
+ (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a;
+ (lsrcdst_cpy++)->a = ltemp;
}
dst_xor = (byte *)ldst_xor;
diff --git a/configure.ac b/configure.ac
index 4bbd686..865e116 100644
--- a/configure.ac
+++ b/configure.ac
@@ -958,6 +958,25 @@ fi
#
+# Check whether the compiler supports the GCC style packed attribute
+#
+AC_CACHE_CHECK([whether the GCC style packed attribute is supported],
+ [gcry_cv_gcc_attribute_packed],
+ [gcry_cv_gcc_attribute_packed=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[struct foo_s { char a; long b; } __attribute__ ((packed));
+ enum bar {
+ FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + sizeof(long))),
+ };
+ ]])],
+ [gcry_cv_gcc_attribute_packed=yes])])
+if test "gcry_cv_gcc_attribute_packed" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1,
+ [Defined if a GCC style "__attribute__ ((packed))" is supported])
+fi
+
+
+#
# Check whether the compiler supports 'asm' or '__asm__' keyword for
# assembler blocks.
#
More information about the Gcrypt-devel
mailing list