[PATCH 2/3] OCB: Move large L handling from bottom to upper level
Jussi Kivilinna
jussi.kivilinna at iki.fi
Mon Dec 5 15:14:29 CET 2016
* cipher/cipher-ocb.c (_gcry_cipher_ocb_get_l): Remove.
(ocb_get_L_big): New.
(_gcry_cipher_ocb_authenticate): L-big handling done in upper
processing loop, so that lower level never sees the case where
'aad_nblocks % 65536 == 0'; Add missing stack burn.
(ocb_aad_finalize): Add missing stack burn.
(ocb_crypt): L-big handling done in upper processing loop, so that
lower level never sees the case where 'data_nblocks % 65536 == 0'.
* cipher/cipher-internal.h (_gcry_cipher_ocb_get_l): Remove.
(ocb_get_l): Remove 'l_tmp' usage and simplify since input
is more limited now, 'N is not multiple of 65536'.
* cipher/rijndael-aesni.c (get_l): Remove.
(aesni_ocb_enc, aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Remove
l_tmp; Use 'ocb_get_l'.
* cipher/rijndael-ssse3-amd64.c (get_l): Remove.
(ssse3_ocb_enc, ssse3_ocb_dec, _gcry_aes_ssse3_ocb_auth): Remove
l_tmp; Use 'ocb_get_l'.
* cipher/camellia-glue.c: Remove OCB l_tmp usage.
* cipher/rijndael-armv8-ce.c: Ditto.
* cipher/rijndael.c: Ditto.
* cipher/serpent.c: Ditto.
* cipher/twofish.c: Ditto.
--
Move large L value generation to up-most level to simplify lower level
ocb_get_l for greater performance and simpler implementation. This helps
implementing OCB in assembly as 'ocb_get_l' no longer has function call
on slow-path.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna at iki.fi>
---
cipher/camellia-glue.c | 18 +--
cipher/cipher-internal.h | 36 +++--
cipher/cipher-ocb.c | 271 +++++++++++++++++++++++++++++------------
cipher/rijndael-aesni.c | 96 +--------------
cipher/rijndael-armv8-ce.c | 20 +--
cipher/rijndael-ssse3-amd64.c | 96 ---------------
cipher/rijndael.c | 6 -
cipher/serpent.c | 24 +---
cipher/twofish.c | 20 +--
9 files changed, 248 insertions(+), 339 deletions(-)
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 1be35c9..7687094 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -619,7 +619,6 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
CAMELLIA_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
- unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
int burn_stack_depth;
u64 blkn = c->u_mode.ocb.data_nblocks;
@@ -664,9 +663,8 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 32 block chunks. */
while (nblocks >= 32)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 32;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
if (encrypt)
_gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -725,9 +723,8 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 16 block chunks. */
while (nblocks >= 16)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 16;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
if (encrypt)
_gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -759,8 +756,6 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
c->u_mode.ocb.data_nblocks = blkn;
- wipememory(&l_tmp, sizeof(l_tmp));
-
if (burn_stack_depth)
_gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
#endif
@@ -776,7 +771,6 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
CAMELLIA_context *ctx = (void *)&c->context.c;
const unsigned char *abuf = abuf_arg;
- unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
int burn_stack_depth;
u64 blkn = c->u_mode.ocb.aad_nblocks;
@@ -818,9 +812,8 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
/* Process data in 32 block chunks. */
while (nblocks >= 32)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 32;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
_gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
c->u_mode.ocb.aad_offset,
@@ -875,9 +868,8 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
/* Process data in 16 block chunks. */
while (nblocks >= 16)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 16;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
_gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
c->u_mode.ocb.aad_offset,
@@ -905,8 +897,6 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
c->u_mode.ocb.aad_nblocks = blkn;
- wipememory(&l_tmp, sizeof(l_tmp));
-
if (burn_stack_depth)
_gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
#endif
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 01352f3..7204d48 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -459,28 +459,28 @@ gcry_err_code_t _gcry_cipher_ocb_get_tag
gcry_err_code_t _gcry_cipher_ocb_check_tag
/* */ (gcry_cipher_hd_t c,
const unsigned char *intag, size_t taglen);
-const unsigned char *_gcry_cipher_ocb_get_l
-/* */ (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n);
-/* Inline version of _gcry_cipher_ocb_get_l, with hard-coded fast paths for
- most common cases. */
+/* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N
+ * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can
+ * be directly passed to _gcry_ctz() function and resulting index will
+ * never overflow the table. */
static inline const unsigned char *
-ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+ocb_get_l (gcry_cipher_hd_t c, u64 n)
{
- if (n & 1)
- return c->u_mode.ocb.L[0];
- else if (n & 2)
- return c->u_mode.ocb.L[1];
- else
- {
- unsigned int ntz = _gcry_ctz64 (n);
-
- if (ntz < OCB_L_TABLE_SIZE)
- return c->u_mode.ocb.L[ntz];
- else
- return _gcry_cipher_ocb_get_l (c, l_tmp, n);
- }
+ unsigned long ntz;
+
+#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4)
+ /* Assumes that N != 0. */
+ asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [low] "r" ((unsigned long)n)
+ : "cc");
+#else
+ ntz = _gcry_ctz (n);
+#endif
+
+ return c->u_mode.ocb.L[ntz];
}
#endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index d1f01d5..db42aaf 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -109,25 +109,17 @@ bit_copy (unsigned char *d, const unsigned char *s,
}
-/* Return the L-value for block N. In most cases we use the table;
- only if the lower OCB_L_TABLE_SIZE bits of N are zero we need to
- compute it. With a table size of 16 we need to this this only
- every 65536-th block. L_TMP is a helper buffer of size
- OCB_BLOCK_LEN which is used to hold the computation if not taken
- from the table. */
-const unsigned char *
-_gcry_cipher_ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+/* Get L_big value for block N, where N is multiple of 65536. */
+static void
+ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf)
{
int ntz = _gcry_ctz64 (n);
- if (ntz < OCB_L_TABLE_SIZE)
- return c->u_mode.ocb.L[ntz];
+ gcry_assert(ntz >= OCB_L_TABLE_SIZE);
- double_block_cpy (l_tmp, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
+ double_block_cpy (l_buf, c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
for (ntz -= OCB_L_TABLE_SIZE; ntz; ntz--)
- double_block (l_tmp);
-
- return l_tmp;
+ double_block (l_buf);
}
@@ -241,7 +233,11 @@ gcry_err_code_t
_gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
size_t abuflen)
{
+ const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+ const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
unsigned char l_tmp[OCB_BLOCK_LEN];
+ unsigned int burn = 0;
+ unsigned int nburn;
/* Check that a nonce and thus a key has been set and that we have
not yet computed the tag. We also return an error if the aad has
@@ -264,14 +260,24 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
{
c->u_mode.ocb.aad_nblocks++;
+ if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0)
+ {
+ /* Table overflow, L needs to be generated. */
+ ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp);
+ }
+ else
+ {
+ buf_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+ OCB_BLOCK_LEN);
+ }
+
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- buf_xor_1 (c->u_mode.ocb.aad_offset,
- ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
- OCB_BLOCK_LEN);
+ buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
buf_xor (l_tmp, c->u_mode.ocb.aad_offset,
c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
- c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
c->u_mode.ocb.aad_nleftover = 0;
@@ -279,40 +285,83 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
}
if (!abuflen)
- return 0;
-
- /* Use a bulk method if available. */
- if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth)
{
- size_t nblks;
- size_t nleft;
- size_t ndone;
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
- nblks = abuflen / OCB_BLOCK_LEN;
- nleft = c->bulk.ocb_auth (c, abuf, nblks);
- ndone = nblks - nleft;
-
- abuf += ndone * OCB_BLOCK_LEN;
- abuflen -= ndone * OCB_BLOCK_LEN;
- nblks = nleft;
+ return 0;
}
- /* Hash all full blocks. */
+ /* Full blocks handling. */
while (abuflen >= OCB_BLOCK_LEN)
{
- c->u_mode.ocb.aad_nblocks++;
+ size_t nblks = abuflen / OCB_BLOCK_LEN;
+ size_t nmaxblks;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- buf_xor_1 (c->u_mode.ocb.aad_offset,
- ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
- OCB_BLOCK_LEN);
- /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
- buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
- c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
- buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+ /* Check how many blocks to process till table overflow. */
+ nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks;
+ nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+ if (nmaxblks == 0)
+ {
+ /* Table overflow, generate L and process one block. */
+ c->u_mode.ocb.aad_nblocks++;
+ ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
+ buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+ abuf += OCB_BLOCK_LEN;
+ abuflen -= OCB_BLOCK_LEN;
+ nblks--;
+
+ /* With overflow handled, retry loop again. Next overflow will
+ * happen after 65535 blocks. */
+ continue;
+ }
+
+ nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+ /* Use a bulk method if available. */
+ if (nblks && c->bulk.ocb_auth)
+ {
+ size_t nleft;
+ size_t ndone;
+
+ nleft = c->bulk.ocb_auth (c, abuf, nblks);
+ ndone = nblks - nleft;
+
+ abuf += ndone * OCB_BLOCK_LEN;
+ abuflen -= ndone * OCB_BLOCK_LEN;
+ nblks = nleft;
+ }
+
+ /* Hash all full blocks. */
+ while (nblks)
+ {
+ c->u_mode.ocb.aad_nblocks++;
+
+ gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_mode.ocb.aad_offset,
+ ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+ OCB_BLOCK_LEN);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
+ buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
- abuf += OCB_BLOCK_LEN;
- abuflen -= OCB_BLOCK_LEN;
+ abuf += OCB_BLOCK_LEN;
+ abuflen -= OCB_BLOCK_LEN;
+ nblks--;
+ }
}
/* Store away the remaining data. */
@@ -321,6 +370,9 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover++] = *abuf;
gcry_assert (!abuflen);
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
+
return 0;
}
@@ -330,6 +382,8 @@ static void
ocb_aad_finalize (gcry_cipher_hd_t c)
{
unsigned char l_tmp[OCB_BLOCK_LEN];
+ unsigned int burn = 0;
+ unsigned int nburn;
/* Check that a nonce and thus a key has been set and that we have
not yet computed the tag. We also skip this if the aad has been
@@ -352,7 +406,8 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
buf_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
/* Sum = Sum_m xor ENCIPHER(K, CipherInput) */
- c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+ burn = nburn > burn ? nburn : burn;
buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
c->u_mode.ocb.aad_nleftover = 0;
@@ -361,6 +416,9 @@ ocb_aad_finalize (gcry_cipher_hd_t c)
/* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can
* return an erro when called again. */
c->u_mode.ocb.aad_finalized = 1;
+
+ if (burn > 0)
+ _gcry_burn_stack (burn + 4*sizeof(void*));
}
@@ -387,10 +445,13 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
unsigned char *outbuf, size_t outbuflen,
const unsigned char *inbuf, size_t inbuflen)
{
+ const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+ const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
unsigned char l_tmp[OCB_BLOCK_LEN];
unsigned int burn = 0;
unsigned int nburn;
- size_t nblks = inbuflen / OCB_BLOCK_LEN;
+ gcry_cipher_encrypt_t crypt_fn =
+ encrypt ? c->spec->encrypt : c->spec->decrypt;
/* Check that a nonce and thus a key has been set and that we are
not yet in end of data state. */
@@ -407,58 +468,112 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
else if ((inbuflen % OCB_BLOCK_LEN))
return GPG_ERR_INV_LENGTH; /* We support only full blocks for now. */
- /* Use a bulk method if available. */
- if (nblks && c->bulk.ocb_crypt)
- {
- size_t nleft;
- size_t ndone;
-
- nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
- ndone = nblks - nleft;
-
- inbuf += ndone * OCB_BLOCK_LEN;
- outbuf += ndone * OCB_BLOCK_LEN;
- inbuflen -= ndone * OCB_BLOCK_LEN;
- outbuflen -= ndone * OCB_BLOCK_LEN;
- nblks = nleft;
- }
-
- if (nblks)
+ /* Full blocks handling. */
+ while (inbuflen >= OCB_BLOCK_LEN)
{
- gcry_cipher_encrypt_t crypt_fn =
- encrypt ? c->spec->encrypt : c->spec->decrypt;
+ size_t nblks = inbuflen / OCB_BLOCK_LEN;
+ size_t nmaxblks;
- if (encrypt)
- {
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
- }
+ /* Check how many blocks to process till table overflow. */
+ nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks;
+ nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
- /* Encrypt all full blocks. */
- while (inbuflen >= OCB_BLOCK_LEN)
+ if (nmaxblks == 0)
{
+ /* Table overflow, generate L and process one block. */
c->u_mode.ocb.data_nblocks++;
+ ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp);
+
+ if (encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, inbuf, 1);
+ }
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- buf_xor_1 (c->u_iv.iv,
- ocb_get_l (c, l_tmp, c->u_mode.ocb.data_nblocks),
- OCB_BLOCK_LEN);
+ buf_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
nburn = crypt_fn (&c->context.c, outbuf, outbuf);
burn = nburn > burn ? nburn : burn;
buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+ if (!encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, outbuf, 1);
+ }
+
inbuf += OCB_BLOCK_LEN;
inbuflen -= OCB_BLOCK_LEN;
outbuf += OCB_BLOCK_LEN;
outbuflen =- OCB_BLOCK_LEN;
+ nblks--;
+
+ /* With overflow handled, retry loop again. Next overflow will
+ * happen after 65535 blocks. */
+ continue;
+ }
+
+ nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+ /* Use a bulk method if available. */
+ if (nblks && c->bulk.ocb_crypt)
+ {
+ size_t nleft;
+ size_t ndone;
+
+ nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+ ndone = nblks - nleft;
+
+ inbuf += ndone * OCB_BLOCK_LEN;
+ outbuf += ndone * OCB_BLOCK_LEN;
+ inbuflen -= ndone * OCB_BLOCK_LEN;
+ outbuflen -= ndone * OCB_BLOCK_LEN;
+ nblks = nleft;
}
- if (!encrypt)
+ if (nblks)
{
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
+ size_t nblks_chksum = nblks;
+
+ if (encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum);
+ }
+
+ /* Encrypt all full blocks. */
+ while (nblks)
+ {
+ c->u_mode.ocb.data_nblocks++;
+
+ gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_iv.iv,
+ ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+ OCB_BLOCK_LEN);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+ burn = nburn > burn ? nburn : burn;
+ buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+ inbuf += OCB_BLOCK_LEN;
+ inbuflen -= OCB_BLOCK_LEN;
+ outbuf += OCB_BLOCK_LEN;
+ outbuflen =- OCB_BLOCK_LEN;
+ nblks--;
+ }
+
+ if (!encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr,
+ outbuf - nblks_chksum * OCB_BLOCK_LEN,
+ nblks_chksum);
+ }
}
}
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 8b28b3a..7852e19 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -1331,74 +1331,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
}
-static inline const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
- unsigned char *ctr)
-{
- const unsigned char *l;
- unsigned int ntz;
-
- if (i & 0xffffffffU)
- {
- asm ("rep;bsf %k[low], %k[ntz]\n\t"
- : [ntz] "=r" (ntz)
- : [low] "r" (i & 0xffffffffU)
- : "cc");
- }
- else
- {
- if (OCB_L_TABLE_SIZE < 32)
- {
- ntz = 32;
- }
- else if (i)
- {
- asm ("rep;bsf %k[high], %k[ntz]\n\t"
- : [ntz] "=r" (ntz)
- : [high] "r" (i >> 32)
- : "cc");
- ntz += 32;
- }
- else
- {
- ntz = 64;
- }
- }
-
- if (ntz < OCB_L_TABLE_SIZE)
- {
- l = c->u_mode.ocb.L[ntz];
- }
- else
- {
- /* Store Offset & Checksum before calling external function */
- asm volatile ("movdqu %%xmm5, %[iv]\n\t"
- "movdqu %%xmm6, %[ctr]\n\t"
- : [iv] "=m" (*iv),
- [ctr] "=m" (*ctr)
- :
- : "memory" );
-
- l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
-
- /* Restore Offset & Checksum */
- asm volatile ("movdqu %[iv], %%xmm5\n\t"
- "movdqu %[ctr], %%xmm6\n\t"
- : /* No output */
- : [iv] "m" (*iv),
- [ctr] "m" (*ctr)
- : "memory" );
- }
-
- return l;
-}
-
-
static void
aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks)
{
- union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
RIJNDAEL_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
@@ -1420,7 +1356,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks && n % 4; nblocks-- )
{
- l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Checksum_i = Checksum_{i-1} xor P_i */
@@ -1449,9 +1385,8 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks > 3 ; nblocks -= 4 )
{
- /* l_tmp will be used only every 65536-th block. */
n += 4;
- l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+ l = ocb_get_l(c, n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Checksum_i = Checksum_{i-1} xor P_i */
@@ -1522,7 +1457,7 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks; nblocks-- )
{
- l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Checksum_i = Checksum_{i-1} xor P_i */
@@ -1559,8 +1494,6 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
aesni_cleanup ();
aesni_cleanup_2_6 ();
-
- wipememory(&l_tmp, sizeof(l_tmp));
}
@@ -1568,7 +1501,6 @@ static void
aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks)
{
- union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
RIJNDAEL_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
@@ -1589,7 +1521,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks && n % 4; nblocks-- )
{
- l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
@@ -1618,9 +1550,8 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks > 3 ; nblocks -= 4 )
{
- /* l_tmp will be used only every 65536-th block. */
n += 4;
- l = get_l(c, l_tmp.x1, n, c->u_iv.iv, c->u_ctr.ctr);
+ l = ocb_get_l(c, n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
@@ -1691,7 +1622,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks; nblocks-- )
{
- l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
@@ -1728,8 +1659,6 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
aesni_cleanup ();
aesni_cleanup_2_6 ();
-
- wipememory(&l_tmp, sizeof(l_tmp));
}
@@ -1748,7 +1677,6 @@ void
_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
size_t nblocks)
{
- union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
RIJNDAEL_context *ctx = (void *)&c->context.c;
const unsigned char *abuf = abuf_arg;
u64 n = c->u_mode.ocb.aad_nblocks;
@@ -1768,8 +1696,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
for ( ;nblocks && n % 4; nblocks-- )
{
- l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
@@ -1794,10 +1721,8 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
for ( ;nblocks > 3 ; nblocks -= 4 )
{
- /* l_tmp will be used only every 65536-th block. */
n += 4;
- l = get_l(c, l_tmp.x1, n, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum);
+ l = ocb_get_l(c, n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
@@ -1849,8 +1774,7 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
for ( ;nblocks; nblocks-- )
{
- l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
@@ -1883,8 +1807,6 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
aesni_cleanup ();
aesni_cleanup_2_6 ();
-
- wipememory(&l_tmp, sizeof(l_tmp));
}
diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c
index bed4066..1bf74da 100644
--- a/cipher/rijndael-armv8-ce.c
+++ b/cipher/rijndael-armv8-ce.c
@@ -336,7 +336,6 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
u64 blkn = c->u_mode.ocb.data_nblocks;
u64 blkn_offs = blkn - blkn % 32;
unsigned int n = 32 - blkn % 32;
- unsigned char l_tmp[16];
void *Ls[32];
void **l;
size_t i;
@@ -364,9 +363,8 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 32 block chunks. */
while (nblocks >= 32)
{
- /* l_tmp will be used only every 65536-th block. */
blkn_offs += 32;
- *l = (void *)ocb_get_l(c, l_tmp, blkn_offs);
+ *l = (void *)ocb_get_l(c, blkn_offs);
crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, 32,
nrounds);
@@ -378,13 +376,13 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
if (nblocks && l < &Ls[nblocks])
{
- *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs);
+ *l = (void *)ocb_get_l(c, 32 + blkn_offs);
}
}
else
{
for (i = 0; i < nblocks; i++)
- Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn);
+ Ls[i] = (void *)ocb_get_l(c, ++blkn);
}
if (nblocks)
@@ -392,8 +390,6 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, nblocks,
nrounds);
}
-
- wipememory(&l_tmp, sizeof(l_tmp));
}
void
@@ -407,7 +403,6 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
u64 blkn = c->u_mode.ocb.aad_nblocks;
u64 blkn_offs = blkn - blkn % 32;
unsigned int n = 32 - blkn % 32;
- unsigned char l_tmp[16];
void *Ls[32];
void **l;
size_t i;
@@ -435,9 +430,8 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
/* Process data in 32 block chunks. */
while (nblocks >= 32)
{
- /* l_tmp will be used only every 65536-th block. */
blkn_offs += 32;
- *l = (void *)ocb_get_l(c, l_tmp, blkn_offs);
+ *l = (void *)ocb_get_l(c, blkn_offs);
_gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
c->u_mode.ocb.aad_sum, Ls, 32, nrounds);
@@ -448,13 +442,13 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
if (nblocks && l < &Ls[nblocks])
{
- *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs);
+ *l = (void *)ocb_get_l(c, 32 + blkn_offs);
}
}
else
{
for (i = 0; i < nblocks; i++)
- Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn);
+ Ls[i] = (void *)ocb_get_l(c, ++blkn);
}
if (nblocks)
@@ -462,8 +456,6 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
_gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
c->u_mode.ocb.aad_sum, Ls, nblocks, nrounds);
}
-
- wipememory(&l_tmp, sizeof(l_tmp));
}
#endif /* USE_ARM_CE */
diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c
index 937d868..a8e89d4 100644
--- a/cipher/rijndael-ssse3-amd64.c
+++ b/cipher/rijndael-ssse3-amd64.c
@@ -527,92 +527,10 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
}
-static inline const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
- unsigned char *ctr, const void **aes_const_ptr,
- byte ssse3_state[SSSE3_STATE_SIZE], int encrypt)
-{
- const unsigned char *l;
- unsigned int ntz;
-
- if (i & 1)
- return c->u_mode.ocb.L[0];
- else if (i & 2)
- return c->u_mode.ocb.L[1];
- else if (i & 0xffffffffU)
- {
- asm ("rep;bsf %k[low], %k[ntz]\n\t"
- : [ntz] "=r" (ntz)
- : [low] "r" (i & 0xffffffffU)
- : "cc");
- }
- else
- {
- if (OCB_L_TABLE_SIZE < 32)
- {
- ntz = 32;
- }
- else if (i)
- {
- asm ("rep;bsf %k[high], %k[ntz]\n\t"
- : [ntz] "=r" (ntz)
- : [high] "r" (i >> 32)
- : "cc");
- ntz += 32;
- }
- else
- {
- ntz = 64;
- }
- }
-
- if (ntz < OCB_L_TABLE_SIZE)
- {
- l = c->u_mode.ocb.L[ntz];
- }
- else
- {
- /* Store Offset & Checksum before calling external function */
- asm volatile ("movdqu %%xmm7, %[iv]\n\t"
- "movdqu %%xmm6, %[ctr]\n\t"
- : [iv] "=m" (*iv),
- [ctr] "=m" (*ctr)
- :
- : "memory" );
-
- /* Restore SSSE3 state. */
- vpaes_ssse3_cleanup();
-
- l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
-
- /* Save SSSE3 state. */
- if (encrypt)
- {
- vpaes_ssse3_prepare_enc (*aes_const_ptr);
- }
- else
- {
- vpaes_ssse3_prepare_dec (*aes_const_ptr);
- }
-
- /* Restore Offset & Checksum */
- asm volatile ("movdqu %[iv], %%xmm7\n\t"
- "movdqu %[ctr], %%xmm6\n\t"
- : /* No output */
- : [iv] "m" (*iv),
- [ctr] "m" (*ctr)
- : "memory" );
- }
-
- return l;
-}
-
-
static void
ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks)
{
- union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
RIJNDAEL_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
@@ -635,8 +553,7 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
{
const unsigned char *l;
- l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr,
- ssse3_state, 1);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Checksum_i = Checksum_{i-1} xor P_i */
@@ -671,7 +588,6 @@ ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
:
: "memory" );
- wipememory(&l_tmp, sizeof(l_tmp));
vpaes_ssse3_cleanup ();
}
@@ -679,7 +595,6 @@ static void
ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks)
{
- union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
RIJNDAEL_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
@@ -702,8 +617,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
{
const unsigned char *l;
- l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr,
- ssse3_state, 0);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
@@ -738,7 +652,6 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
:
: "memory" );
- wipememory(&l_tmp, sizeof(l_tmp));
vpaes_ssse3_cleanup ();
}
@@ -758,7 +671,6 @@ void
_gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
size_t nblocks)
{
- union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
RIJNDAEL_context *ctx = (void *)&c->context.c;
const unsigned char *abuf = abuf_arg;
u64 n = c->u_mode.ocb.aad_nblocks;
@@ -780,8 +692,7 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
const unsigned char *l;
- l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum, &aes_const_ptr, ssse3_state, 1);
+ l = ocb_get_l(c, ++n);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
@@ -812,7 +723,6 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
:
: "memory" );
- wipememory(&l_tmp, sizeof(l_tmp));
vpaes_ssse3_cleanup ();
}
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index cc6a722..66ea0f3 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -1353,7 +1353,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks; nblocks-- )
{
u64 i = ++c->u_mode.ocb.data_nblocks;
- const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+ const unsigned char *l = ocb_get_l(c, i);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
@@ -1378,7 +1378,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
for ( ;nblocks; nblocks-- )
{
u64 i = ++c->u_mode.ocb.data_nblocks;
- const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+ const unsigned char *l = ocb_get_l(c, i);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
@@ -1445,7 +1445,7 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
for ( ;nblocks; nblocks-- )
{
u64 i = ++c->u_mode.ocb.aad_nblocks;
- const unsigned char *l = ocb_get_l(c, l_tmp.x1, i);
+ const unsigned char *l = ocb_get_l(c, i);
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
diff --git a/cipher/serpent.c b/cipher/serpent.c
index ef19d3b..ea4b8ed 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -1235,7 +1235,6 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
serpent_context_t *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
- unsigned char l_tmp[sizeof(serpent_block_t)];
int burn_stack_depth = 2 * sizeof (serpent_block_t);
u64 blkn = c->u_mode.ocb.data_nblocks;
#else
@@ -1275,9 +1274,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 16 block chunks. */
while (nblocks >= 16)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 16;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
if (encrypt)
_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1327,9 +1325,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 8 block chunks. */
while (nblocks >= 8)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 8);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
if (encrypt)
_gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1378,9 +1375,8 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 8 block chunks. */
while (nblocks >= 8)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+ *l = ocb_get_l(c, blkn - blkn % 8);
if (encrypt)
_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
@@ -1410,8 +1406,6 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
c->u_mode.ocb.data_nblocks = blkn;
- wipememory(&l_tmp, sizeof(l_tmp));
-
if (burn_stack_depth)
_gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
#endif
@@ -1427,7 +1421,6 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
serpent_context_t *ctx = (void *)&c->context.c;
const unsigned char *abuf = abuf_arg;
- unsigned char l_tmp[sizeof(serpent_block_t)];
int burn_stack_depth = 2 * sizeof(serpent_block_t);
u64 blkn = c->u_mode.ocb.aad_nblocks;
#else
@@ -1465,9 +1458,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
/* Process data in 16 block chunks. */
while (nblocks >= 16)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 16;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
_gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
c->u_mode.ocb.aad_sum, Ls);
@@ -1512,9 +1504,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
/* Process data in 8 block chunks. */
while (nblocks >= 8)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 8);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
_gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
c->u_mode.ocb.aad_sum, Ls);
@@ -1558,9 +1549,8 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
/* Process data in 8 block chunks. */
while (nblocks >= 8)
{
- /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+ *l = ocb_get_l(c, blkn - blkn % 8);
_gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
c->u_mode.ocb.aad_sum, Ls);
@@ -1585,8 +1575,6 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
c->u_mode.ocb.aad_nblocks = blkn;
- wipememory(&l_tmp, sizeof(l_tmp));
-
if (burn_stack_depth)
_gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
#endif
diff --git a/cipher/twofish.c b/cipher/twofish.c
index 7a4d26a..55f6fb9 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -1261,7 +1261,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
TWOFISH_context *ctx = (void *)&c->context.c;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
- unsigned char l_tmp[TWOFISH_BLOCKSIZE];
unsigned int burn, burn_stack_depth = 0;
u64 blkn = c->u_mode.ocb.data_nblocks;
@@ -1273,10 +1272,9 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- /* l_tmp will be used only every 65536-th block. */
- Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 1);
- Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 2);
- Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 3);
+ Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+ Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+ Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
blkn += 3;
if (encrypt)
@@ -1300,8 +1298,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
c->u_mode.ocb.data_nblocks = blkn;
- wipememory(&l_tmp, sizeof(l_tmp));
-
if (burn_stack_depth)
_gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
#else
@@ -1322,7 +1318,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#ifdef USE_AMD64_ASM
TWOFISH_context *ctx = (void *)&c->context.c;
const unsigned char *abuf = abuf_arg;
- unsigned char l_tmp[TWOFISH_BLOCKSIZE];
unsigned int burn, burn_stack_depth = 0;
u64 blkn = c->u_mode.ocb.aad_nblocks;
@@ -1334,10 +1329,9 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
- /* l_tmp will be used only every 65536-th block. */
- Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 1);
- Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 2);
- Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, l_tmp, blkn + 3);
+ Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+ Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+ Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
blkn += 3;
twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
@@ -1356,8 +1350,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
c->u_mode.ocb.aad_nblocks = blkn;
- wipememory(&l_tmp, sizeof(l_tmp));
-
if (burn_stack_depth)
_gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
#else
More information about the Gcrypt-devel
mailing list