[PATCH 3/7] Burn stack in transform functions for SHA2 AMD64 implementations

Jussi Kivilinna jussi.kivilinna at iki.fi
Fri Apr 5 19:25:54 CEST 2019


* cipher/sha256-avx-amd64.S: Burn stack inside transform functions.
* cipher/sha256-avx2-bmi2-amd64.S: Ditto.
* cipher/sha256-ssse3-amd64.S: Ditto.
* cipher/sha512-avx-amd64.S: Ditto.
* cipher/sha512-avx2-bmi2-amd64.S: Ditto.
* cipher/sha512-ssse3-amd64.S: Ditto.
--

This change reduces per call overhead for SHA256 & SHA512.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
 0 files changed

diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S
index 6953855bb..b8b01b15b 100644
--- a/cipher/sha256-avx-amd64.S
+++ b/cipher/sha256-avx-amd64.S
@@ -480,9 +480,12 @@ _gcry_sha256_transform_amd64_avx:
 	cmp	INP, [rsp + _INP_END]
 	jne	.Loop0
 
+.Ldone_hash:
 	vzeroall
 
-.Ldone_hash:
+	vmovdqa	[rsp + _XFER], XFER
+	xor     eax, eax
+
 	add	rsp, STACK_SIZE
 
 	pop	r15
@@ -491,8 +494,6 @@ _gcry_sha256_transform_amd64_avx:
 	pop	rbp
 	pop	rbx
 
-	mov     eax, STACK_SIZE + 5*8
-
 	ret
 
 
diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S
index 85e663fef..598f93821 100644
--- a/cipher/sha256-avx2-bmi2-amd64.S
+++ b/cipher/sha256-avx2-bmi2-amd64.S
@@ -747,10 +747,29 @@ _gcry_sha256_transform_amd64_avx2:
 	jmp	.Ldo_last_block
 
 .Ldone_hash:
-	mov	rsp, [rsp + _RSP]
-
 	vzeroall
 
+	/* burn stack */
+	vmovdqa [rsp + _XFER + 0 * 32], ymm0
+	vmovdqa [rsp + _XFER + 1 * 32], ymm0
+	vmovdqa [rsp + _XFER + 2 * 32], ymm0
+	vmovdqa [rsp + _XFER + 3 * 32], ymm0
+	vmovdqa [rsp + _XFER + 4 * 32], ymm0
+	vmovdqa [rsp + _XFER + 5 * 32], ymm0
+	vmovdqa [rsp + _XFER + 6 * 32], ymm0
+	vmovdqa [rsp + _XFER + 7 * 32], ymm0
+	vmovdqa [rsp + _XFER + 8 * 32], ymm0
+	vmovdqa [rsp + _XFER + 9 * 32], ymm0
+	vmovdqa [rsp + _XFER + 10 * 32], ymm0
+	vmovdqa [rsp + _XFER + 11 * 32], ymm0
+	vmovdqa [rsp + _XFER + 12 * 32], ymm0
+	vmovdqa [rsp + _XFER + 13 * 32], ymm0
+	vmovdqa [rsp + _XFER + 14 * 32], ymm0
+	vmovdqa [rsp + _XFER + 15 * 32], ymm0
+	xor     eax, eax
+
+	mov	rsp, [rsp + _RSP]
+
 	pop	r15
 	pop	r14
 	pop	r13
@@ -758,9 +777,6 @@ _gcry_sha256_transform_amd64_avx2:
 	pop	rbp
 	pop	rbx
 
-	/* stack burn depth */
-	mov	eax, STACK_SIZE + 6*8 + 31
-
 	ret
 
 .align 64
diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S
index a9213e419..ca5c9fd1d 100644
--- a/cipher/sha256-ssse3-amd64.S
+++ b/cipher/sha256-ssse3-amd64.S
@@ -503,6 +503,10 @@ _gcry_sha256_transform_amd64_ssse3:
 	pxor	xmm12, xmm12
 
 .Ldone_hash:
+	pxor	XFER, XFER
+	movdqa	[rsp + _XFER], XFER
+	xor     eax, eax
+
 	add	rsp, STACK_SIZE
 
 	pop	r15
@@ -511,8 +515,6 @@ _gcry_sha256_transform_amd64_ssse3:
 	pop	rbp
 	pop	rbx
 
-	mov     eax, STACK_SIZE + 5*8
-
 	ret
 
 
diff --git a/cipher/sha512-avx-amd64.S b/cipher/sha512-avx-amd64.S
index 446a8b4e5..534351e44 100644
--- a/cipher/sha512-avx-amd64.S
+++ b/cipher/sha512-avx-amd64.S
@@ -352,13 +352,19 @@ _gcry_sha512_transform_amd64_avx:
 	mov	r14, [rsp + frame_GPRSAVE + 8 * 3]
 	mov	r15, [rsp + frame_GPRSAVE + 8 * 4]
 
-	/* Restore Stack Pointer */
-	add	rsp, frame_size
-
 	vzeroall
 
-	/* Return stack burn depth */
-	mov	rax, frame_size
+	/* Burn stack */
+	t = 0
+	.rept frame_W_size / 32
+		vmovups [rsp + frame_W + (t) * 32], ymm0
+		t = ((t)+1)
+	.endr
+	vmovdqu [rsp + frame_WK], xmm0
+	xor     eax, eax
+
+	/* Restore Stack Pointer */
+	add	rsp, frame_size
 
 .Lnowork:
 	ret
diff --git a/cipher/sha512-avx2-bmi2-amd64.S b/cipher/sha512-avx2-bmi2-amd64.S
index 05bef64cf..914f920af 100644
--- a/cipher/sha512-avx2-bmi2-amd64.S
+++ b/cipher/sha512-avx2-bmi2-amd64.S
@@ -714,6 +714,7 @@ _gcry_sha512_transform_amd64_avx2:
 	jne	.Loop0
 
 .Ldone_hash:
+	vzeroall
 
 	/* Restore GPRs */
 	mov	rbp, [rsp + frame_GPRSAVE + 8 * 0]
@@ -723,12 +724,12 @@ _gcry_sha512_transform_amd64_avx2:
 	mov	r14, [rsp + frame_GPRSAVE + 8 * 4]
 	mov	r15, [rsp + frame_GPRSAVE + 8 * 5]
 
+	/* Burn stack */
+	vmovdqa	[rsp + frame_XFER], XFER
+	xor     eax, eax
+
 	/* Restore Stack Pointer */
 	mov	rsp, [rsp + frame_RSPSAVE]
-
-	vzeroall
-
-	mov	eax, frame_size + 31
 .Lnowork:
 	ret
 
diff --git a/cipher/sha512-ssse3-amd64.S b/cipher/sha512-ssse3-amd64.S
index 51193b361..8e950e0e4 100644
--- a/cipher/sha512-ssse3-amd64.S
+++ b/cipher/sha512-ssse3-amd64.S
@@ -352,9 +352,6 @@ _gcry_sha512_transform_amd64_ssse3:
 	mov	r14, [rsp + frame_GPRSAVE + 8 * 3]
 	mov	r15, [rsp + frame_GPRSAVE + 8 * 4]
 
-	/* Restore Stack Pointer */
-	add	rsp, frame_size
-
 	pxor	xmm0, xmm0
 	pxor	xmm1, xmm1
 	pxor	xmm2, xmm2
@@ -362,8 +359,17 @@ _gcry_sha512_transform_amd64_ssse3:
 	pxor	xmm4, xmm4
 	pxor	xmm5, xmm5
 
-	/* Return stack burn depth */
-	mov	rax, frame_size
+	/* Burn stack */
+	t = 0
+	.rept frame_W_size / 16
+		movdqu [rsp + frame_W + (t) * 16], xmm0
+		t = ((t)+1)
+	.endr
+	movdqu [rsp + frame_WK], xmm0
+	xor     eax, eax
+
+	/* Restore Stack Pointer */
+	add	rsp, frame_size
 
 .Lnowork:
 	ret




More information about the Gcrypt-devel mailing list