[PATCH] tests/bench-slope: improve CPU frequency auto-detection
Jussi Kivilinna
jussi.kivilinna at iki.fi
Thu Jul 23 18:25:34 CEST 2020
* configure.ac (gcry_cv_have_asm_volatile_memory): Check also if
assembly memory barrier with input/output register is supported.
* tests/bench-slope.c (auto_ghz_bench): Change to use base operation
that takes two CPU cycles and unroll loop by 1024 operations.
--
CPU frequency is now correctly detected on AWS Graviton CPU (2.3Ghz).
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
configure.ac | 11 +++++++--
tests/bench-slope.c | 57 ++++++++++++++++++++++++++++++++++++++-------
2 files changed, 58 insertions(+), 10 deletions(-)
diff --git a/configure.ac b/configure.ac
index 9a5359c2..96a18d19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1117,7 +1117,11 @@ if test "$gcry_cv_have_asm" = "no" ; then
[gcry_cv_have_asm_volatile_memory],
[gcry_cv_have_asm_volatile_memory=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
- [[void a(void) { __asm__ volatile("":::"memory"); }]])],
+ [[void a(int x)
+ {
+ __asm__ volatile("":::"memory");
+ __asm__ volatile("":"+r"(x)::"memory");
+ }]])],
[gcry_cv_have_asm_volatile_memory=yes])])
fi
else
@@ -1125,7 +1129,10 @@ else
[gcry_cv_have_asm_volatile_memory],
[gcry_cv_have_asm_volatile_memory=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
- [[void a(void) { asm volatile("":::"memory"); }]])],
+ [[void a(int x)
+ {
+ asm volatile("":::"memory");
+ asm volatile("":"+r"(x)::"memory"); }]])],
[gcry_cv_have_asm_volatile_memory=yes])])
fi
if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then
diff --git a/tests/bench-slope.c b/tests/bench-slope.c
index 63f8f7ae..cfb3dd66 100644
--- a/tests/bench-slope.c
+++ b/tests/bench-slope.c
@@ -509,18 +509,59 @@ auto_ghz_bench (struct bench_obj *obj, void *buf, size_t buflen)
* function will give cycles/iteration result 1024.0 on high-end CPUs.
* With turbo, result will be less and can be used detect turbo-clock. */
- do
- {
#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
- /* Use memory barrier to prevent compiler from optimizing this loop
- * away. */
-
- asm volatile ("":::"memory");
+ /* Auto-ghz operation takes two CPU cycles to perform. Memory barriers
+ * are used to prevent compiler from optimizing this loop away. */
+ #define AUTO_GHZ_OPERATION \
+ asm volatile ("":"+r"(buflen)::"memory"); \
+ buflen ^= 1; \
+ asm volatile ("":"+r"(buflen)::"memory"); \
+ buflen -= 2
#else
- /* TODO: Needs alternative way. */
+ /* TODO: Needs alternative way of preventing compiler optimizations.
+ * Mix of XOR and subtraction appears to do the trick for now. */
+ #define AUTO_GHZ_OPERATION \
+ buflen ^= 1; \
+ buflen -= 2
#endif
+
+#define AUTO_GHZ_OPERATION_2 \
+ AUTO_GHZ_OPERATION; \
+ AUTO_GHZ_OPERATION
+
+#define AUTO_GHZ_OPERATION_4 \
+ AUTO_GHZ_OPERATION_2; \
+ AUTO_GHZ_OPERATION_2
+
+#define AUTO_GHZ_OPERATION_8 \
+ AUTO_GHZ_OPERATION_4; \
+ AUTO_GHZ_OPERATION_4
+
+#define AUTO_GHZ_OPERATION_16 \
+ AUTO_GHZ_OPERATION_8; \
+ AUTO_GHZ_OPERATION_8
+
+#define AUTO_GHZ_OPERATION_32 \
+ AUTO_GHZ_OPERATION_16; \
+ AUTO_GHZ_OPERATION_16
+
+#define AUTO_GHZ_OPERATION_64 \
+ AUTO_GHZ_OPERATION_32; \
+ AUTO_GHZ_OPERATION_32
+
+#define AUTO_GHZ_OPERATION_128 \
+ AUTO_GHZ_OPERATION_64; \
+ AUTO_GHZ_OPERATION_64
+
+ do
+ {
+ /* 1024 auto-ghz operations per loop, total 2048 instructions. */
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
+ AUTO_GHZ_OPERATION_128; AUTO_GHZ_OPERATION_128;
}
- while (--buflen);
+ while (buflen);
}
static struct bench_ops auto_ghz_detect_ops = {
--
2.25.1
More information about the Gcrypt-devel
mailing list