[PATCH 3/7] Burn stack in transform functions for SHA2 AMD64 implementations
Jussi Kivilinna
jussi.kivilinna at iki.fi
Fri Apr 5 19:25:54 CEST 2019
* cipher/sha256-avx-amd64.S: Burn stack inside transform functions.
* cipher/sha256-avx2-bmi2-amd64.S: Ditto.
* cipher/sha256-ssse3-amd64.S: Ditto.
* cipher/sha512-avx-amd64.S: Ditto.
* cipher/sha512-avx2-bmi2-amd64.S: Ditto.
* cipher/sha512-ssse3-amd64.S: Ditto.
--
This change reduces per call overhead for SHA256 & SHA512.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
0 files changed
diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index 6953855bb..b8b01b15b 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -480,9 +480,12 @@ _gcry_sha256_transform_amd64_avx:
cmp INP, [rsp + _INP_END]
jne .Loop0
+.Ldone_hash:
vzeroall
-.Ldone_hash:
+ vmovdqa [rsp + _XFER], XFER
+ xor eax, eax
+
add rsp, STACK_SIZE
pop r15
@@ -491,8 +494,6 @@ _gcry_sha256_transform_amd64_avx:
pop rbp
pop rbx
- mov eax, STACK_SIZE + 5*8
-
ret
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index 85e663fef..598f93821 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -747,10 +747,29 @@ _gcry_sha256_transform_amd64_avx2:
jmp .Ldo_last_block
.Ldone_hash:
- mov rsp, [rsp + _RSP]
-
vzeroall
+ /* burn stack */
+ vmovdqa [rsp + _XFER + 0 * 32], ymm0
+ vmovdqa [rsp + _XFER + 1 * 32], ymm0
+ vmovdqa [rsp + _XFER + 2 * 32], ymm0
+ vmovdqa [rsp + _XFER + 3 * 32], ymm0
+ vmovdqa [rsp + _XFER + 4 * 32], ymm0
+ vmovdqa [rsp + _XFER + 5 * 32], ymm0
+ vmovdqa [rsp + _XFER + 6 * 32], ymm0
+ vmovdqa [rsp + _XFER + 7 * 32], ymm0
+ vmovdqa [rsp + _XFER + 8 * 32], ymm0
+ vmovdqa [rsp + _XFER + 9 * 32], ymm0
+ vmovdqa [rsp + _XFER + 10 * 32], ymm0
+ vmovdqa [rsp + _XFER + 11 * 32], ymm0
+ vmovdqa [rsp + _XFER + 12 * 32], ymm0
+ vmovdqa [rsp + _XFER + 13 * 32], ymm0
+ vmovdqa [rsp + _XFER + 14 * 32], ymm0
+ vmovdqa [rsp + _XFER + 15 * 32], ymm0
+ xor eax, eax
+
+ mov rsp, [rsp + _RSP]
+
pop r15
pop r14
pop r13
@@ -758,9 +777,6 @@ _gcry_sha256_transform_amd64_avx2:
pop rbp
pop rbx
- /* stack burn depth */
- mov eax, STACK_SIZE + 6*8 + 31
-
ret
.align 64
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index a9213e419..ca5c9fd1d 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -503,6 +503,10 @@ _gcry_sha256_transform_amd64_ssse3:
pxor xmm12, xmm12
.Ldone_hash:
+ pxor XFER, XFER
+ movdqa [rsp + _XFER], XFER
+ xor eax, eax
+
add rsp, STACK_SIZE
pop r15
@@ -511,8 +515,6 @@ _gcry_sha256_transform_amd64_ssse3:
pop rbp
pop rbx
- mov eax, STACK_SIZE + 5*8
-
ret
diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index 446a8b4e5..534351e44 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -352,13 +352,19 @@ _gcry_sha512_transform_amd64_avx:
mov r14, [rsp + frame_GPRSAVE + 8 * 3]
mov r15, [rsp + frame_GPRSAVE + 8 * 4]
- /* Restore Stack Pointer */
- add rsp, frame_size
-
vzeroall
- /* Return stack burn depth */
- mov rax, frame_size
+ /* Burn stack */
+ t = 0
+ .rept frame_W_size / 32
+ vmovups [rsp + frame_W + (t) * 32], ymm0
+ t = ((t)+1)
+ .endr
+ vmovdqu [rsp + frame_WK], xmm0
+ xor eax, eax
+
+ /* Restore Stack Pointer */
+ add rsp, frame_size
.Lnowork:
ret
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index 05bef64cf..914f920af 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -714,6 +714,7 @@ _gcry_sha512_transform_amd64_avx2:
jne .Loop0
.Ldone_hash:
+ vzeroall
/* Restore GPRs */
mov rbp, [rsp + frame_GPRSAVE + 8 * 0]
@@ -723,12 +724,12 @@ _gcry_sha512_transform_amd64_avx2:
mov r14, [rsp + frame_GPRSAVE + 8 * 4]
mov r15, [rsp + frame_GPRSAVE + 8 * 5]
+ /* Burn stack */
+ vmovdqa [rsp + frame_XFER], XFER
+ xor eax, eax
+
/* Restore Stack Pointer */
mov rsp, [rsp + frame_RSPSAVE]
-
- vzeroall
-
- mov eax, frame_size + 31
.Lnowork:
ret
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index 51193b361..8e950e0e4 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -352,9 +352,6 @@ _gcry_sha512_transform_amd64_ssse3:
mov r14, [rsp + frame_GPRSAVE + 8 * 3]
mov r15, [rsp + frame_GPRSAVE + 8 * 4]
- /* Restore Stack Pointer */
- add rsp, frame_size
-
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
@@ -362,8 +359,17 @@ _gcry_sha512_transform_amd64_ssse3:
pxor xmm4, xmm4
pxor xmm5, xmm5
- /* Return stack burn depth */
- mov rax, frame_size
+ /* Burn stack */
+ t = 0
+ .rept frame_W_size / 16
+ movdqu [rsp + frame_W + (t) * 16], xmm0
+ t = ((t)+1)
+ .endr
+ movdqu [rsp + frame_WK], xmm0
+ xor eax, eax
+
+ /* Restore Stack Pointer */
+ add rsp, frame_size
.Lnowork:
ret
More information about the Gcrypt-devel
mailing list