[PATCH] tests/bench-slope: improve CPU frequency auto-detection

Jussi Kivilinna jussi.kivilinna at iki.fi
Thu Jul 23 18:25:34 CEST 2020


* configure.ac (gcry_cv_have_asm_volatile_memory): Check also if
assembly memory barrier with input/output register is supported.
* tests/bench-slope.c (auto_ghz_bench): Change to use base operation
that takes two CPU cycles and unroll loop by 1024 operations.
--

CPU frequency is now correctly detected on AWS Graviton CPU (2.3Ghz).

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 configure.ac        | 11 +++++++--
 tests/bench-slope.c | 57 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9a5359c2..96a18d19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1117,7 +1117,11 @@ if test "$gcry_cv_have_asm" = "no" ; then
           [gcry_cv_have_asm_volatile_memory],
           [gcry_cv_have_asm_volatile_memory=no
            AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-             [[void a(void) { __asm__ volatile("":::"memory"); }]])],
+             [[void a(int x)
+               {
+                 __asm__ volatile("":::"memory");
+                 __asm__ volatile("":"+r"(x)::"memory");
+               }]])],
              [gcry_cv_have_asm_volatile_memory=yes])])
    fi
 else
@@ -1125,7 +1129,10 @@ else
        [gcry_cv_have_asm_volatile_memory],
        [gcry_cv_have_asm_volatile_memory=no
         AC_COMPILE_IFELSE([AC_LANG_SOURCE(
-          [[void a(void) { asm volatile("":::"memory"); }]])],
+          [[void a(int x)
+            {
+              asm volatile("":::"memory");
+              asm volatile("":"+r"(x)::"memory"); }]])],
           [gcry_cv_have_asm_volatile_memory=yes])])
 fi
 if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then
diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index 63f8f7ae..cfb3dd66 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -509,18 +509,59 @@ auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen)
    * function will give cycles/iteration result 1024.0 on high-end CPUs.
    * With turbo, result will be less and can be used detect turbo-clock. */
 
-  do
-    {
 #ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
-      /* Use memory barrier to prevent compiler from optimizing this loop
-       * away. */
-
-      asm volatile ("":::"memory");
+  /* Auto-ghz operation takes two CPU cycles to perform. Memory barriers
+   * are used to prevent compiler from optimizing this loop away. */
+  #define AUTO_GHZ_OPERATION \
+	asm volatile ("":"+r"(buflen)::"memory"); \
+	buflen ^= 1; \
+	asm volatile ("":"+r"(buflen)::"memory"); \
+	buflen -= 2
 #else
-      /* TODO: Needs alternative way. */
+  /* TODO: Needs alternative way of preventing compiler optimizations.
+   *       Mix of XOR and subtraction appears to do the trick for now. */
+  #define AUTO_GHZ_OPERATION \
+	buflen ^= 1; \
+	buflen -= 2
 #endif
+
+#define AUTO_GHZ_OPERATION_2 \
+	AUTO_GHZ_OPERATION; \
+	AUTO_GHZ_OPERATION
+
+#define AUTO_GHZ_OPERATION_4 \
+	AUTO_GHZ_OPERATION_2; \
+	AUTO_GHZ_OPERATION_2
+
+#define AUTO_GHZ_OPERATION_8 \
+	AUTO_GHZ_OPERATION_4; \
+	AUTO_GHZ_OPERATION_4
+
+#define AUTO_GHZ_OPERATION_16 \
+	AUTO_GHZ_OPERATION_8; \
+	AUTO_GHZ_OPERATION_8
+
+#define AUTO_GHZ_OPERATION_32 \
+	AUTO_GHZ_OPERATION_16; \
+	AUTO_GHZ_OPERATION_16
+
+#define AUTO_GHZ_OPERATION_64 \
+	AUTO_GHZ_OPERATION_32; \
+	AUTO_GHZ_OPERATION_32
+
+#define AUTO_GHZ_OPERATION_128 \
+	AUTO_GHZ_OPERATION_64; \
+	AUTO_GHZ_OPERATION_64
+
+  do
+    {
+      /* 1024 auto-ghz operations per loop, total 2048 instructions. */
+      AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+      AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+      AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+      AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
     }
-  while (--buflen);
+  while (buflen);
 }
 
 static struct bench_ops auto_ghz_detect_ops = {
-- 
2.25.1




More information about the Gcrypt-devel mailing list