[PATCH 2/2] Replace architecture specific fast_wipememory2 with generic

Jussi Kivilinna jussi.kivilinna at iki.fi
Wed Oct 23 14:26:42 CEST 2013


* src/g10lib.h (fast_wipememory2): Remove architecture specific
implementations and add generic implementation.
--

Reduce code size, adds support for other architectures and gcc appears to
generated better code without assembly parts.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 src/g10lib.h |   85 +++++++++++++++++-----------------------------------------
 1 file changed, 25 insertions(+), 60 deletions(-)

diff --git a/src/g10lib.h b/src/g10lib.h
index 3b09448..80c73ee 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -275,77 +275,42 @@ void __gcry_burn_stack (unsigned int bytes);
                   } while(0)
 #define wipememory(_ptr,_len) wipememory2(_ptr,0,_len)
 
+#ifdef HAVE_U64_TYPEDEF
+  #define FASTWIPE_T u64
+  #define FASTWIPE_MULT (U64_C(0x0101010101010101))
+#else
+  #define FASTWIPE_T u32
+  #define FASTWIPE_MULT (0x01010101U)
+#endif
 
-/* Optimized fast_wipememory2 for i386, x86-64 and arm architectures.  May leave
-   tail bytes unhandled, in which case tail bytes are handled by wipememory2.
- */
-#if defined(__x86_64__) && __GNUC__ >= 4
-#define fast_wipememory2(_vptr,_vset,_vlen) do { \
-              unsigned long long int _vset8 = _vset; \
-              if (_vlen < 8) \
-                break; \
-              _vset8 *= 0x0101010101010101ULL; \
-              do { \
-                asm volatile("movq %[set], %[ptr]\n\t" \
-                             : /**/ \
-                             : [set] "Cr" (_vset8), \
-                               [ptr] "m" (*_vptr) \
-                             : "memory"); \
-                _vlen -= 8; \
-                _vptr += 8; \
-              } while (_vlen >= 8); \
-                  } while (0)
-#elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4
-#define fast_wipememory2(_ptr,_set,_len) do { \
-              unsigned long _vset4 = _vset; \
-              if (_vlen < 4) \
-                break; \
-              _vset4 *= 0x01010101; \
-              do { \
-                asm volatile("movl %[set], %[ptr]\n\t" \
-                             : /**/ \
-                             : [set] "Cr" (_vset4), \
-                               [ptr] "m" (*_vptr) \
-                             : "memory"); \
-                _vlen -= 4; \
-                _vptr += 4; \
-              } while (_vlen >= 4); \
-                  } while (0)
-#elif defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) && \
-	__GNUC__ >= 4
-
-#ifdef __ARM_FEATURE_UNALIGNED
+/* Following architectures can handle unaligned accesses fast.  */
+#if defined(__i386__) || defined(__x86_64__) || \
+    defined(__powerpc__) || defined(__powerpc64__) || \
+    (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
+    defined(__aarch64__)
 #define fast_wipememory2_unaligned_head(_ptr,_set,_len) /*do nothing*/
 #else
 #define fast_wipememory2_unaligned_head(_vptr,_vset,_vlen) do { \
-              while((size_t)(_vptr)&3 && _vlen) \
-	        { *_vptr=(_vset); _vptr++; _vlen--; } \
+              while((size_t)(_vptr)&(sizeof(FASTWIPE_T)-1) && _vlen) \
+                { *_vptr=(_vset); _vptr++; _vlen--; } \
                   } while(0)
 #endif
 
+/* fast_wipememory2 may leave tail bytes unhandled, in which case tail bytes
+   are handled by wipememory2. */
 #define fast_wipememory2(_vptr,_vset,_vlen) do { \
-              unsigned long _vset4 = _vset; \
+              FASTWIPE_T _vset_long = _vset; \
               fast_wipememory2_unaligned_head(_vptr,_vset,_vlen); \
-              if (_vlen < 8) \
+              if (_vlen < sizeof(FASTWIPE_T)) \
                 break; \
-              _vset4 *= 0x01010101; \
-              asm volatile( \
-                "mov %%r4, %[set];\n\t" \
-                "mov %%r5, %[set];\n\t" \
-                "1:;\n\t" \
-                "stm %[ptr]!, {%%r4, %%r5};\n\t" \
-                "cmp %[end], %[ptr];\n\t" \
-                "bne 1b;\n\t" \
-                : [ptr] "=r" (_vptr) \
-                : [set] "r" (_vset4), \
-                  [end] "r" (_vptr+(_vlen&(~0x7))), \
-                  "0" (_vptr) \
-                : "memory", "r4", "r5", "cc"); \
-              _vlen &= 0x7; \
+              _vset_long *= FASTWIPE_MULT; \
+              do { \
+                volatile FASTWIPE_T *_vptr_long = (volatile void *)_vptr; \
+                *_vptr_long = _vset_long; \
+                _vlen -= sizeof(FASTWIPE_T); \
+                _vptr += sizeof(FASTWIPE_T); \
+              } while (_vlen >= sizeof(FASTWIPE_T)); \
                   } while (0)
-#else
-#define fast_wipememory2(_ptr,_set,_len)
-#endif
 
 
 /* Digit predicates.  */




More information about the Gcrypt-devel mailing list