From gniibe at fsij.org Fri Jul 3 11:27:19 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 03 Jul 2015 18:27:19 +0900 Subject: [PATCH] Curve25519 encryption support (experimental) Message-ID: <55965577.7020306@fsij.org> Hello, This is pretty immature experimental patch for Curve25519 encryption support. Since Montgomery curve is available in libgcrypt, it is used. I assume that key generation is done with: (genkey(ecc(curve Curve25519)(flags eddsa))) Then, '(flags eddsa)' means that public key is in DJB format with the prefix 0x40 like EdDSA. I tested with modified version of GnuPG 2.1. I'm going to submit the patch for GnuPG now. Please note that this is highly experimental. The format is not yet decided. diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h index f0d97ea..6b3b063 100644 --- a/cipher/ecc-common.h +++ b/cipher/ecc-common.h @@ -132,6 +132,8 @@ gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input, ECC_public_key *pk, gcry_mpi_t r, gcry_mpi_t s, int hashalgo, gcry_mpi_t pkmpi); +gpg_err_code_t _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, + mpi_point_t result); /*-- ecc-gost.c --*/ gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, ECC_secret_key *skey, diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 9975bb4..5d855bd 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -40,7 +40,7 @@ static const struct const char *other; /* Other name. */ } curve_aliases[] = { - /*{ "Curve25519", "1.3.6.1.4.1.3029.1.5.1" },*/ + { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" }, { "Ed25519", "1.3.6.1.4.1.11591.15.1" }, { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID */ @@ -129,6 +129,18 @@ static const ecc_domain_parms_t domain_parms[] = "0x6666666666666666666666666666666666666666666666666666666666666658", "0x08" }, + { + /* (y^2 = x^3 + 486662*x^2 + x) */ + "Curve25519", 256, 0, + MPI_EC_MONTGOMERY, ECC_DIALECT_ED25519, + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + "0x01DB41", + "0x01", + "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED", + "0x0000000000000000000000000000000000000000000000000000000000000009", + "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9", + "0x08" + }, #if 0 /* No real specs yet found. */ { /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */ diff --git a/cipher/ecc-eddsa.c b/cipher/ecc-eddsa.c index 4323d8e..72481ba 100644 --- a/cipher/ecc-eddsa.c +++ b/cipher/ecc-eddsa.c @@ -400,6 +400,51 @@ _gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result, } +gpg_err_code_t +_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + + if (mpi_is_opaque (pk)) + { + const unsigned char *buf; + + buf = mpi_get_opaque (pk, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (rawmpilen > 1 && (rawmpilen%2) && buf[0] == 0x40) + { + rawmpilen--; + buf++; + } + + rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); + if (!rawmpi) + return gpg_err_code_from_syserror (); + memcpy (rawmpi, buf, rawmpilen); + reverse_buffer (rawmpi, rawmpilen); + } + else + { + /* Note: Without using an opaque MPI it is not reliable possible + to find out whether the public key has been given in + uncompressed format. Thus we expect native EdDSA format. */ + rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + } + + _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0); + xfree (rawmpi); + mpi_set_ui (result->z, 1); + + return 0; +} + + /* Compute the A value as used by EdDSA. The caller needs to provide the context EC and the actual secret D as an MPI. The function returns a newly allocated 64 byte buffer at r_digest; the first 32 diff --git a/cipher/ecc.c b/cipher/ecc.c index 5ffe84b..e5b3459 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -174,7 +174,10 @@ nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, point_init (&sk->Q); x = mpi_new (pbits); - y = mpi_new (pbits); + if (r_y == NULL) + y = NULL; + else + y = mpi_new (pbits); if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx)) log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q"); @@ -187,7 +190,7 @@ nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, * possibilities without any loss of security. Note that we don't * do that for Ed25519 so that we do not violate the special * construction of the secret key. */ - if (E->dialect == ECC_DIALECT_ED25519) + if (E->dialect == ECC_DIALECT_ED25519 || r_y == NULL) point_set (&sk->Q, &Q); else { @@ -231,7 +234,8 @@ nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, } *r_x = x; - *r_y = y; + if (r_y) + *r_y = y; point_free (&Q); /* Now we can test our keys (this should never fail!). */ @@ -307,7 +311,7 @@ test_ecdh_only_keys (ECC_secret_key *sk, unsigned int nbits) mpi_ec_t ec; if (DBG_CIPHER) - log_debug ("Testing key.\n"); + log_debug ("Testing ECDH only key.\n"); point_init (&R_); @@ -572,7 +576,9 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) ctx = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, 0, E.p, E.a, E.b); - if ((flags & PUBKEY_FLAG_EDDSA)) + if (E.model == MPI_EC_MONTGOMERY) + rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, NULL); + else if ((flags & PUBKEY_FLAG_EDDSA)) rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, flags); else rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, &Qy); @@ -582,26 +588,41 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) /* Copy data to the result. */ Gx = mpi_new (0); Gy = mpi_new (0); - if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx)) - log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G"); - base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p); if (sk.E.dialect == ECC_DIALECT_ED25519 && !(flags & PUBKEY_FLAG_NOCOMP)) { unsigned char *encpk; unsigned int encpklen; - /* (Gx and Gy are used as scratch variables) */ - rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy, - !!(flags & PUBKEY_FLAG_COMP), - &encpk, &encpklen); + if (E.model != MPI_EC_MONTGOMERY) + /* (Gx and Gy are used as scratch variables) */ + rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy, + !!(flags & PUBKEY_FLAG_COMP), + &encpk, &encpklen); + else + { + int off = !!(flags & PUBKEY_FLAG_COMP); + + encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, off?-1:0, + &encpklen, NULL); + if (encpk == NULL) + rc = gpg_err_code_from_syserror (); + else + { + if (off) + encpk[0] = 0x40; + encpklen += off; + } + } if (rc) return rc; public = mpi_new (0); mpi_set_opaque (public, encpk, encpklen*8); - encpk = NULL; } else { + if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G"); + base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p); if (!Qx) { /* This is the case for a key from _gcry_ecc_eddsa_generate @@ -1216,6 +1237,18 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) gcry_mpi_t data = NULL; ECC_public_key pk; mpi_ec_t ec = NULL; + int flags; + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + sexp_release (l1); + l1 = NULL; memset (&pk, 0, sizeof pk); _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, @@ -1239,7 +1272,9 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* * Extract the key. */ - rc = sexp_extract_param (keyparms, NULL, "-p?a?b?g?n?h?+q", + rc = sexp_extract_param (keyparms, NULL, + (flags & PUBKEY_FLAG_EDDSA)? + "-p?a?b?g?n?h?/q" : "-p?a?b?g?n?h?+q", &pk.E.p, &pk.E.a, &pk.E.b, &mpi_g, &pk.E.n, &pk.E.h, &mpi_q, NULL); if (rc) @@ -1252,7 +1287,6 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -1261,7 +1295,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. */ @@ -1292,42 +1326,73 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + /* Compute the encrypted value. */ + ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0, + pk.E.p, pk.E.a, pk.E.b); + /* Convert the public key. */ if (mpi_q) { point_init (&pk.Q); - rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (mpi_q, ec, &pk.Q); + else + rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); if (rc) goto leave; } - /* Compute the encrypted value. */ - ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0, - pk.E.p, pk.E.a, pk.E.b); - /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ { mpi_point_struct R; /* Result that we return. */ gcry_mpi_t x, y; + unsigned char *rawmpi; + unsigned int rawmpilen; x = mpi_new (0); - y = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); point_init (&R); /* R = kQ <=> R = kdG */ _gcry_mpi_ec_mul_point (&R, data, &pk.Q, ec); - if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) log_fatal ("ecdh: Failed to get affine coordinates for kdG\n"); - mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); + if (y) + mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); + else + { + rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + mpi_s = mpi_new (0); + mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8); + } + } /* R = kG */ _gcry_mpi_ec_mul_point (&R, data, &pk.E.G, ec); if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) log_fatal ("ecdh: Failed to get affine coordinates for kG\n"); - mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); + if (y) + mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); + else + { + rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + mpi_e = mpi_new (0); + mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8); + } + } mpi_free (x); mpi_free (y); @@ -1335,7 +1400,8 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) point_free (&R); } - rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + if (!rc) + rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); leave: _gcry_mpi_release (pk.E.p); @@ -1351,6 +1417,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_mpi_release (mpi_s); _gcry_mpi_release (mpi_e); xfree (curvename); + sexp_release (l1); _gcry_mpi_ec_free (ec); _gcry_pk_util_free_encoding_ctx (&ctx); if (DBG_CIPHER) @@ -1380,6 +1447,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_point_struct kG; mpi_point_struct R; gcry_mpi_t r = NULL; + int flags = 0; memset (&sk, 0, sizeof sk); point_init (&kG); @@ -1388,6 +1456,17 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, ecc_get_nbits (keyparms)); + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + sexp_release (l1); + l1 = NULL; + /* * Extract the data. */ @@ -1430,7 +1509,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. */ @@ -1462,18 +1541,19 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) } + ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0, + sk.E.p, sk.E.a, sk.E.b); + /* * Compute the plaintext. */ - rc = _gcry_ecc_os2ec (&kG, data_e); + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG); + else + rc = _gcry_ecc_os2ec (&kG, data_e); if (rc) - { - point_free (&kG); - return rc; - } + return rc; - ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0, - sk.E.p, sk.E.a, sk.E.b); /* R = dkG */ _gcry_mpi_ec_mul_point (&R, sk.d, &kG, ec); @@ -1483,12 +1563,30 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) gcry_mpi_t x, y; x = mpi_new (0); - y = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) log_fatal ("ecdh: Failed to get affine coordinates\n"); - r = _gcry_ecc_ec2os (x, y, sk.E.p); + if (y) + r = _gcry_ecc_ec2os (x, y, sk.E.p); + else + { + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + r = mpi_new (0); + mpi_set_opaque (r, rawmpi, rawmpilen*8); + } + } if (!r) rc = gpg_err_code_from_syserror (); else -- From gniibe at fsij.org Mon Jul 6 05:04:41 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Mon, 06 Jul 2015 12:04:41 +0900 Subject: [PATCH] ecc: fix memory leaks. Message-ID: <5599F049.8060205@fsij.org> Hello, While modifying ecc for Curve25519, I've found some errors in ecc.c. Here are changes. cipher/ecc.c (ecc_generate): Fix memory leak on error of _gcry_pk_util_parse_flaglist and _gcry_ecc_eddsa_encodepoint. (ecc_check_secret_key): Fix memory leak on error of _gcry_ecc_update_curve_param. (ecc_sign, ecc_verify, ecc_encrypt_raw, ecc_decrypt_raw): Remove unnecessary sexp_release and fix memory leak on error of _gcry_ecc_fill_in_curve. (ecc_decrypt_raw): Fix double free of the point kG and memory leak on error of _gcry_ecc_os2ec. diff --git a/cipher/ecc.c b/cipher/ecc.c index 5ffe84b..f5bc50a 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -551,7 +551,6 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) return GPG_ERR_NO_OBJ; /* No NBITS parameter. */ rc = _gcry_ecc_fill_in_curve (nbits, curve_name, &E, &nbits); - xfree (curve_name); curve_name = NULL; if (rc) goto leave; @@ -595,10 +594,9 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) !!(flags & PUBKEY_FLAG_COMP), &encpk, &encpklen); if (rc) - return rc; + goto leave; public = mpi_new (0); mpi_set_opaque (public, encpk, encpklen*8); - encpk = NULL; } else { @@ -691,6 +689,7 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) mpi_free (Qx); mpi_free (Qy); _gcry_mpi_ec_free (ctx); + xfree (curve_name); sexp_release (curve_flags); sexp_release (curve_info); return rc; @@ -744,7 +743,7 @@ ecc_check_secret_key (gcry_sexp_t keyparms) &sk.E.p, &sk.E.a, &sk.E.b, &mpi_g, &sk.E.n, &sk.E.h); if (rc) - return rc; + goto leave; } } if (mpi_g) @@ -877,7 +876,6 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -886,7 +884,7 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. @@ -1043,7 +1041,6 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (s_keyparms, "curve", 5); if (l1) { @@ -1052,7 +1049,7 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. @@ -1252,7 +1249,6 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -1261,7 +1257,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. */ @@ -1421,7 +1417,6 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -1430,7 +1425,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. */ @@ -1467,10 +1462,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) */ rc = _gcry_ecc_os2ec (&kG, data_e); if (rc) - { - point_free (&kG); - return rc; - } + goto leave; ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0, sk.E.p, sk.E.a, sk.E.b); -- From cvs at cvs.gnupg.org Tue Jul 7 04:09:54 2015 From: cvs at cvs.gnupg.org (by NIIBE Yutaka) Date: Tue, 07 Jul 2015 04:09:54 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-231-g0a7547e Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 0a7547e487a8bc4e7ac9599c55579eb2e4a13f06 (commit) from a36ee7501f68ad7ebcfe31f9659430b9d2c3ddd1 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 0a7547e487a8bc4e7ac9599c55579eb2e4a13f06 Author: NIIBE Yutaka Date: Mon Jul 6 12:01:00 2015 +0900 ecc: fix memory leaks. cipher/ecc.c (ecc_generate): Fix memory leak on error of _gcry_pk_util_parse_flaglist and _gcry_ecc_eddsa_encodepoint. (ecc_check_secret_key): Fix memory leak on error of _gcry_ecc_update_curve_param. (ecc_sign, ecc_verify, ecc_encrypt_raw, ecc_decrypt_raw): Remove unnecessary sexp_release and fix memory leak on error of _gcry_ecc_fill_in_curve. (ecc_decrypt_raw): Fix double free of the point kG and memory leak on error of _gcry_ecc_os2ec. diff --git a/cipher/ecc.c b/cipher/ecc.c index 5ffe84b..f5bc50a 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -551,7 +551,6 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) return GPG_ERR_NO_OBJ; /* No NBITS parameter. */ rc = _gcry_ecc_fill_in_curve (nbits, curve_name, &E, &nbits); - xfree (curve_name); curve_name = NULL; if (rc) goto leave; @@ -595,10 +594,9 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) !!(flags & PUBKEY_FLAG_COMP), &encpk, &encpklen); if (rc) - return rc; + goto leave; public = mpi_new (0); mpi_set_opaque (public, encpk, encpklen*8); - encpk = NULL; } else { @@ -691,6 +689,7 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) mpi_free (Qx); mpi_free (Qy); _gcry_mpi_ec_free (ctx); + xfree (curve_name); sexp_release (curve_flags); sexp_release (curve_info); return rc; @@ -744,7 +743,7 @@ ecc_check_secret_key (gcry_sexp_t keyparms) &sk.E.p, &sk.E.a, &sk.E.b, &mpi_g, &sk.E.n, &sk.E.h); if (rc) - return rc; + goto leave; } } if (mpi_g) @@ -877,7 +876,6 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -886,7 +884,7 @@ ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. @@ -1043,7 +1041,6 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (s_keyparms, "curve", 5); if (l1) { @@ -1052,7 +1049,7 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. @@ -1252,7 +1249,6 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -1261,7 +1257,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &pk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. */ @@ -1421,7 +1417,6 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ - sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { @@ -1430,7 +1425,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) { rc = _gcry_ecc_fill_in_curve (0, curvename, &sk.E, NULL); if (rc) - return rc; + goto leave; } } /* Guess required fields if a curve parameter has not been given. */ @@ -1467,10 +1462,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) */ rc = _gcry_ecc_os2ec (&kG, data_e); if (rc) - { - point_free (&kG); - return rc; - } + goto leave; ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0, sk.E.p, sk.E.a, sk.E.b); ----------------------------------------------------------------------- Summary of changes: cipher/ecc.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From ed at prote.us Tue Jul 7 17:52:59 2015 From: ed at prote.us (Ed Johns) Date: Tue, 7 Jul 2015 11:52:59 -0400 Subject: [PATCH] Curve25519 encryption support (experimental) Message-ID: Hello, I was interested in trying your Curve25519 encryption support. I've downloaded the latest alpha code. I see that the patch exists. Would you have a test case that I could used to get started with this patch? Thanks Ed Johns -------------- next part -------------- An HTML attachment was scrubbed... URL: From gniibe at fsij.org Wed Jul 8 02:34:24 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Wed, 08 Jul 2015 09:34:24 +0900 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <55965577.7020306@fsij.org> References: <55965577.7020306@fsij.org> Message-ID: <559C7010.6040700@fsij.org> Hello, Here is an update of the patch against current master branch. This is an experimental patch for Curve25519 encryption support. Key generation is done with SEXP: (genkey(ecc(curve Curve25519)(flags eddsa comp))) Here, I changed the meaning of '(flags eddsa)' a bit. Now, it means that the key is in DJB format and under DJB processing: Little endian Private key: MSB is 0, (MS-1)B is 1, multiple of cofactor ECDH: Assume a private key conforms this ----^ That is, '(flags eddsa)' also makes sense to Curve25519. '(flags comp)' is also a bit different now. It means the key comes with the prefix 0x40 like EdDSA in GnuPG. (Since the computation is only done with x-coordinate with Curve25519, there would be no way other than shorter format. So, it is not "compression", but requesting a prefix.) Those changes of flags would be confusing and needed to improve. I tested with modified version of GnuPG 2.1 and Gnuk. I don't think all possible combinations work. Following is not implemented/tested, and I don't know about all semantics. (genkey(ecc(curve Curve25519))) (genkey(ecc(curve Curve25519)(flags comp))) (genkey(ecc(curve Curve25519)(flags eddsa))) Please note that this is highly experimental. The SEXP format is not yet decided. diff --git a/cipher/ecc-common.h b/cipher/ecc-common.h index f0d97ea..6b3b063 100644 --- a/cipher/ecc-common.h +++ b/cipher/ecc-common.h @@ -132,6 +132,8 @@ gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input, ECC_public_key *pk, gcry_mpi_t r, gcry_mpi_t s, int hashalgo, gcry_mpi_t pkmpi); +gpg_err_code_t _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, + mpi_point_t result); /*-- ecc-gost.c --*/ gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, ECC_secret_key *skey, diff --git a/cipher/ecc-curves.c b/cipher/ecc-curves.c index 9975bb4..5d855bd 100644 --- a/cipher/ecc-curves.c +++ b/cipher/ecc-curves.c @@ -40,7 +40,7 @@ static const struct const char *other; /* Other name. */ } curve_aliases[] = { - /*{ "Curve25519", "1.3.6.1.4.1.3029.1.5.1" },*/ + { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" }, { "Ed25519", "1.3.6.1.4.1.11591.15.1" }, { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID */ @@ -129,6 +129,18 @@ static const ecc_domain_parms_t domain_parms[] = "0x6666666666666666666666666666666666666666666666666666666666666658", "0x08" }, + { + /* (y^2 = x^3 + 486662*x^2 + x) */ + "Curve25519", 256, 0, + MPI_EC_MONTGOMERY, ECC_DIALECT_ED25519, + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + "0x01DB41", + "0x01", + "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED", + "0x0000000000000000000000000000000000000000000000000000000000000009", + "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9", + "0x08" + }, #if 0 /* No real specs yet found. */ { /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */ diff --git a/cipher/ecc-eddsa.c b/cipher/ecc-eddsa.c index 4323d8e..72481ba 100644 --- a/cipher/ecc-eddsa.c +++ b/cipher/ecc-eddsa.c @@ -400,6 +400,51 @@ _gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result, } +gpg_err_code_t +_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + + if (mpi_is_opaque (pk)) + { + const unsigned char *buf; + + buf = mpi_get_opaque (pk, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (rawmpilen > 1 && (rawmpilen%2) && buf[0] == 0x40) + { + rawmpilen--; + buf++; + } + + rawmpi = xtrymalloc (rawmpilen? rawmpilen:1); + if (!rawmpi) + return gpg_err_code_from_syserror (); + memcpy (rawmpi, buf, rawmpilen); + reverse_buffer (rawmpi, rawmpilen); + } + else + { + /* Note: Without using an opaque MPI it is not reliable possible + to find out whether the public key has been given in + uncompressed format. Thus we expect native EdDSA format. */ + rawmpi = _gcry_mpi_get_buffer (pk, ctx->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + } + + _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0); + xfree (rawmpi); + mpi_set_ui (result->z, 1); + + return 0; +} + + /* Compute the A value as used by EdDSA. The caller needs to provide the context EC and the actual secret D as an MPI. The function returns a newly allocated 64 byte buffer at r_digest; the first 32 diff --git a/cipher/ecc.c b/cipher/ecc.c index f5bc50a..de4fdbd 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -174,7 +174,10 @@ nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, point_init (&sk->Q); x = mpi_new (pbits); - y = mpi_new (pbits); + if (r_y == NULL) + y = NULL; + else + y = mpi_new (pbits); if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx)) log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q"); @@ -187,7 +190,7 @@ nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, * possibilities without any loss of security. Note that we don't * do that for Ed25519 so that we do not violate the special * construction of the secret key. */ - if (E->dialect == ECC_DIALECT_ED25519) + if (E->dialect == ECC_DIALECT_ED25519 || r_y == NULL) point_set (&sk->Q, &Q); else { @@ -231,7 +234,8 @@ nist_generate_key (ECC_secret_key *sk, elliptic_curve_t *E, mpi_ec_t ctx, } *r_x = x; - *r_y = y; + if (r_y) + *r_y = y; point_free (&Q); /* Now we can test our keys (this should never fail!). */ @@ -307,7 +311,7 @@ test_ecdh_only_keys (ECC_secret_key *sk, unsigned int nbits) mpi_ec_t ec; if (DBG_CIPHER) - log_debug ("Testing key.\n"); + log_debug ("Testing ECDH only key.\n"); point_init (&R_); @@ -571,7 +575,9 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) ctx = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, 0, E.p, E.a, E.b); - if ((flags & PUBKEY_FLAG_EDDSA)) + if (E.model == MPI_EC_MONTGOMERY) + rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, NULL); + else if ((flags & PUBKEY_FLAG_EDDSA)) rc = _gcry_ecc_eddsa_genkey (&sk, &E, ctx, flags); else rc = nist_generate_key (&sk, &E, ctx, flags, nbits, &Qx, &Qy); @@ -581,18 +587,31 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) /* Copy data to the result. */ Gx = mpi_new (0); Gy = mpi_new (0); - if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx)) - log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G"); - base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p); if (sk.E.dialect == ECC_DIALECT_ED25519 && !(flags & PUBKEY_FLAG_NOCOMP)) { unsigned char *encpk; unsigned int encpklen; - /* (Gx and Gy are used as scratch variables) */ - rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy, - !!(flags & PUBKEY_FLAG_COMP), - &encpk, &encpklen); + if (E.model != MPI_EC_MONTGOMERY) + /* (Gx and Gy are used as scratch variables) */ + rc = _gcry_ecc_eddsa_encodepoint (&sk.Q, ctx, Gx, Gy, + !!(flags & PUBKEY_FLAG_COMP), + &encpk, &encpklen); + else + { + int off = !!(flags & PUBKEY_FLAG_COMP); + + encpk = _gcry_mpi_get_buffer_extra (Qx, ctx->nbits/8, off?-1:0, + &encpklen, NULL); + if (encpk == NULL) + rc = gpg_err_code_from_syserror (); + else + { + if (off) + encpk[0] = 0x40; + encpklen += off; + } + } if (rc) goto leave; public = mpi_new (0); @@ -600,6 +619,9 @@ ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) } else { + if (_gcry_mpi_ec_get_affine (Gx, Gy, &sk.E.G, ctx)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G"); + base = _gcry_ecc_ec2os (Gx, Gy, sk.E.p); if (!Qx) { /* This is the case for a key from _gcry_ecc_eddsa_generate @@ -1213,6 +1235,18 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) gcry_mpi_t data = NULL; ECC_public_key pk; mpi_ec_t ec = NULL; + int flags; + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + sexp_release (l1); + l1 = NULL; memset (&pk, 0, sizeof pk); _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, @@ -1236,7 +1270,9 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) /* * Extract the key. */ - rc = sexp_extract_param (keyparms, NULL, "-p?a?b?g?n?h?+q", + rc = sexp_extract_param (keyparms, NULL, + (flags & PUBKEY_FLAG_EDDSA)? + "-p?a?b?g?n?h?/q" : "-p?a?b?g?n?h?+q", &pk.E.p, &pk.E.a, &pk.E.b, &mpi_g, &pk.E.n, &pk.E.h, &mpi_q, NULL); if (rc) @@ -1288,26 +1324,34 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } + /* Compute the encrypted value. */ + ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0, + pk.E.p, pk.E.a, pk.E.b); + /* Convert the public key. */ if (mpi_q) { point_init (&pk.Q); - rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (mpi_q, ec, &pk.Q); + else + rc = _gcry_ecc_os2ec (&pk.Q, mpi_q); if (rc) goto leave; } - /* Compute the encrypted value. */ - ec = _gcry_mpi_ec_p_internal_new (pk.E.model, pk.E.dialect, 0, - pk.E.p, pk.E.a, pk.E.b); - /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ { mpi_point_struct R; /* Result that we return. */ gcry_mpi_t x, y; + unsigned char *rawmpi; + unsigned int rawmpilen; x = mpi_new (0); - y = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); point_init (&R); @@ -1316,14 +1360,38 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) log_fatal ("ecdh: Failed to get affine coordinates for kdG\n"); - mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); + if (y) + mpi_s = _gcry_ecc_ec2os (x, y, pk.E.p); + else + { + rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + mpi_s = mpi_new (0); + mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8); + } + } /* R = kG */ _gcry_mpi_ec_mul_point (&R, data, &pk.E.G, ec); if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) log_fatal ("ecdh: Failed to get affine coordinates for kG\n"); - mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); + if (y) + mpi_e = _gcry_ecc_ec2os (x, y, pk.E.p); + else + { + rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + mpi_e = mpi_new (0); + mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8); + } + } mpi_free (x); mpi_free (y); @@ -1331,7 +1399,8 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) point_free (&R); } - rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + if (!rc) + rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); leave: _gcry_mpi_release (pk.E.p); @@ -1347,6 +1416,7 @@ ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_mpi_release (mpi_s); _gcry_mpi_release (mpi_e); xfree (curvename); + sexp_release (l1); _gcry_mpi_ec_free (ec); _gcry_pk_util_free_encoding_ctx (&ctx); if (DBG_CIPHER) @@ -1376,6 +1446,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) mpi_point_struct kG; mpi_point_struct R; gcry_mpi_t r = NULL; + int flags = 0; memset (&sk, 0, sizeof sk); point_init (&kG); @@ -1384,6 +1455,17 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, ecc_get_nbits (keyparms)); + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + sexp_release (l1); + l1 = NULL; + /* * Extract the data. */ @@ -1457,15 +1539,19 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) } + ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0, + sk.E.p, sk.E.a, sk.E.b); + /* * Compute the plaintext. */ - rc = _gcry_ecc_os2ec (&kG, data_e); + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG); + else + rc = _gcry_ecc_os2ec (&kG, data_e); if (rc) goto leave; - ec = _gcry_mpi_ec_p_internal_new (sk.E.model, sk.E.dialect, 0, - sk.E.p, sk.E.a, sk.E.b); /* R = dkG */ _gcry_mpi_ec_mul_point (&R, sk.d, &kG, ec); @@ -1475,12 +1561,30 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) gcry_mpi_t x, y; x = mpi_new (0); - y = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) log_fatal ("ecdh: Failed to get affine coordinates\n"); - r = _gcry_ecc_ec2os (x, y, sk.E.p); + if (y) + r = _gcry_ecc_ec2os (x, y, sk.E.p); + else + { + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer (x, ec->nbits/8, &rawmpilen, NULL); + if (!rawmpi) + rc = gpg_err_code_from_syserror (); + else + { + r = mpi_new (0); + mpi_set_opaque (r, rawmpi, rawmpilen*8); + } + } if (!r) rc = gpg_err_code_from_syserror (); else -- From gniibe at fsij.org Wed Jul 8 03:32:05 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Wed, 08 Jul 2015 10:32:05 +0900 Subject: [PATCH] Curve25519 encryption support (experimental) In-Reply-To: References: Message-ID: <559C7D95.9020409@fsij.org> Hello, Thank you for your interests. On 07/08/2015 12:52 AM, Ed Johns wrote: > I was interested in trying your Curve25519 encryption support. I've > downloaded the latest alpha code. I see that the patch exists. Today, I posted new updated patch. https://lists.gnupg.org/pipermail/gcrypt-devel/2015-July/003464.html The patch is intended to apply to the master (development) branch of libgcrypt. The code is available at git.gnupg.org, you can browse it; http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=summary > Would you have a test case that I could used to get started with > this patch? Simple tests are not yet available. Sure, I'll add test cases, so that features can be tested by 'make check'. THE example is available, as GnuPG. My post is: https://lists.gnupg.org/pipermail/gnupg-devel/2015-July/030118.html The ECDH encryption/decryption code is divided into GnuPG and libgcrypt. It would be good to see both to study. ECDH encryption/decryption for GnuPG is described in: http://tools.ietf.org/html/rfc6637#section-8 For Curve25519, only x-coordinate is used in the computation. -- From peter at lekensteyn.nl Thu Jul 9 17:11:30 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:30 +0200 Subject: [PATCH 0/6] Memory leaks and undefined behavior fixes Message-ID: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Hi, Here are some small patches which fixes memory leaks and undefined behavior (copying from/to a NULL buffer). These were caught by running the test suite with ASAN and ubsan enabled. There is also a patch to the commit-msg hook such that it does not complain for the diff in `git commit -v`. The "Add LSan annotation to ignore a memory leak" patch "works" in the sense that the memory leak gets ignored. Finally there is a patch to clarify the RSA secret calculation. This was the original reason why I dug into the libgcrypt code. (I suspected a bug in this code, but it turns out that Wireshark had an issue by passing u = q^-1 mod p in the private key instead of u = p^-1 mod q.) Kind regards, Peter -- Peter Wu (6): sexp: fix invalid deallocation in error path ecc: fix memory leak build: ignore scissor line for the commit-msg hook Fix undefined behavior wrt memcpy Add LSan annotation to ignore a memory leak rsa: clarify the RSA secret parameters build-aux/git-hooks/commit-msg | 6 ++++++ cipher/cipher-gcm.c | 2 +- cipher/ecc.c | 1 + cipher/mac-poly1305.c | 3 +++ cipher/rsa.c | 9 +++++++-- mpi/mpiutil.c | 5 ++++- src/g10lib.h | 17 +++++++++++++++++ src/sexp.c | 2 +- 8 files changed, 40 insertions(+), 5 deletions(-) -- 2.4.4 From peter at lekensteyn.nl Thu Jul 9 17:11:31 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:31 +0200 Subject: [PATCH 1/6] sexp: fix invalid deallocation in error path In-Reply-To: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Message-ID: <1436454696-20362-2-git-send-email-peter@lekensteyn.nl> * src/sexp.c: Fix wrong condition in error path. -- This appears to be a copy and paste error and could result in wrong memory being freed in the error path (when arrayisdesc[idx] == 2, the condition ((!2) == 1) is false). Signed-off-by: Peter Wu --- src/sexp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sexp.c b/src/sexp.c index 9bc13ca..1c014e0 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -2405,7 +2405,7 @@ _gcry_sexp_vextract_param (gcry_sexp_t sexp, const char *path, _gcry_mpi_release (*array[idx]); *array[idx] = NULL; } - else if (!arrayisdesc[idx] == 1) + else if (arrayisdesc[idx] == 1) { /* Caller provided buffer. */ gcry_buffer_t *spec = (gcry_buffer_t*)array[idx]; -- 2.4.4 From peter at lekensteyn.nl Thu Jul 9 17:11:36 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:36 +0200 Subject: [PATCH 6/6] rsa: clarify the RSA secret parameters In-Reply-To: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Message-ID: <1436454696-20362-7-git-send-email-peter@lekensteyn.nl> * cipher/rsa.c: Clarify meaning of the 'u' parameter. Fix error in comments. Signed-off-by: Peter Wu --- cipher/rsa.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cipher/rsa.c b/cipher/rsa.c index 9a8d235..25e9d10 100644 --- a/cipher/rsa.c +++ b/cipher/rsa.c @@ -700,7 +700,12 @@ stronger_key_check ( RSA_secret_key *skey ) * * m = c^d mod n * - * Or faster: + * Or faster using Garner's Algorithm. Note that u is *not* the + * coefficient from RFC 3447 (PKCS#1), but the multiplicative inverse of + * p, mod q, from RFC 4880 (OpenPGP). + * + * (precomputed:) + * u = p ^ -1 mod q * * m1 = c ^ (d mod (p-1)) mod p * m2 = c ^ (d mod (q-1)) mod q @@ -738,7 +743,7 @@ secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey ) if ( mpi_has_sign ( h ) ) mpi_add ( h, h, skey->q ); mpi_mulm( h, skey->u, h, skey->q ); - /* m = m2 + h * p */ + /* m = m1 + h * p */ mpi_mul ( h, h, skey->p ); mpi_add ( output, m1, h ); -- 2.4.4 From peter at lekensteyn.nl Thu Jul 9 17:11:33 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:33 +0200 Subject: [PATCH 3/6] build: ignore scissor line for the commit-msg hook In-Reply-To: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Message-ID: <1436454696-20362-4-git-send-email-peter@lekensteyn.nl> * build-aux/git-hooks/commit-msg: Stop processing more lines when the scissor line is encountered. -- This allows the command `git commit -v` to work even if the code is longer than 72 characters. Note that comments are already ignored by the previous line. Signed-off-by: Peter Wu --- build-aux/git-hooks/commit-msg | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg index 5a697c7..3ca918b 100755 --- a/build-aux/git-hooks/commit-msg +++ b/build-aux/git-hooks/commit-msg @@ -86,11 +86,17 @@ sub check_msg($$) 2 <= @line && length $line[1] and return 'second line must be empty'; + # See git-commit(1), this is the --cleanup=scissors option. Everything + # after and including this line gets ignored. + my $marker = '# ------------------------ >8 ------------------------'; + # Limit line length to allow for the ChangeLog's leading TAB. foreach my $line (@line) { 72 < length $line && $line =~ /^[^#]/ and return 'line longer than 72 characters'; + + last if $line eq $marker; } return ''; -- 2.4.4 From peter at lekensteyn.nl Thu Jul 9 17:11:32 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:32 +0200 Subject: [PATCH 2/6] ecc: fix memory leak In-Reply-To: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Message-ID: <1436454696-20362-3-git-send-email-peter@lekensteyn.nl> * cipher/ecc.c: Release memory which was allocated before by _gcry_pk_util_preparse_sigval. -- Caught by LeakSanitizer (LSan). Now the test suite (make check) passes with no memleaks. Signed-off-by: Peter Wu --- cipher/ecc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cipher/ecc.c b/cipher/ecc.c index f5bc50a..c17a553 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -1040,6 +1040,7 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) if (rc) goto leave; } + sexp_release (l1); /* Add missing parameters using the optional curve parameter. */ l1 = sexp_find_token (s_keyparms, "curve", 5); if (l1) -- 2.4.4 From peter at lekensteyn.nl Thu Jul 9 17:11:35 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:35 +0200 Subject: [PATCH 5/6] Add LSan annotation to ignore a memory leak In-Reply-To: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Message-ID: <1436454696-20362-6-git-send-email-peter@lekensteyn.nl> * src/g10lib.h: Add annotate_leaked_object macro that ignores leaked objects. This avoids LSan from reporting deliberately leaked memory. * mpi/mpiutil.c: Mark "constant" MPIs as leaked. Signed-off-by: Peter Wu --- I am not happy with the method to detect LSan availability, but here I hope to solicit for some feedback. Should the code really be added to mpiutil? Or let the caller (tests/mpitests.c) handle leaks? --- mpi/mpiutil.c | 5 ++++- src/g10lib.h | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c index 71b3f1c..9a796c6 100644 --- a/mpi/mpiutil.c +++ b/mpi/mpiutil.c @@ -211,7 +211,10 @@ _gcry_mpi_free( gcry_mpi_t a ) if (!a ) return; if ((a->flags & 32)) - return; /* Never release a constant. */ + { + annotate_leaked_object(a); + return; /* Never release a constant. */ + } if ((a->flags & 4)) xfree( a->d ); else diff --git a/src/g10lib.h b/src/g10lib.h index 50a08ec..5793f8c 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -88,6 +88,16 @@ #define DIM(v) (sizeof(v)/sizeof((v)[0])) #define DIMof(type,member) DIM(((type *)0)->member) +/* Detect LeakSanitizer (LSan) support for GCC and Clang based on the + availability of AddressSanitizer (ASAN). */ +#ifdef __SANITIZE_ADDRESS__ +# define LEAK_SANITIZER +#elif defined(__has_feature) +# if __has_feature(address_sanitizer) +# define LEAK_SANITIZER +# endif +#endif + /*-- src/global.c -*/ @@ -126,6 +136,13 @@ int _gcry_is_secure (const void *a) _GCRY_GCC_ATTR_PURE; #define xstrdup(a) _gcry_xstrdup ((a)) #define xfree(a) _gcry_free ((a)) +/* Allows "constant" MPIs to be annotated as memory leak. */ +#ifdef LEAK_SANITIZER +# include +# define annotate_leaked_object(a) __lsan_ignore_object((a)) +#else +# define annotate_leaked_object(a) do { } while (0) +#endif /*-- src/misc.c --*/ -- 2.4.4 From peter at lekensteyn.nl Thu Jul 9 17:11:34 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 9 Jul 2015 17:11:34 +0200 Subject: [PATCH 4/6] Fix undefined behavior wrt memcpy In-Reply-To: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> Message-ID: <1436454696-20362-5-git-send-email-peter@lekensteyn.nl> * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let the function continue to add padding as needed though. * cipher/mac-poly1305.c: If the caller requested to finish the hash function without a copy of the result, return immediately. -- Caught by UndefinedBehaviorSanitizer. Signed-off-by: Peter Wu --- cipher/cipher-gcm.c | 2 +- cipher/mac-poly1305.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c index 6b13fc5..3711a1d 100644 --- a/cipher/cipher-gcm.c +++ b/cipher/cipher-gcm.c @@ -474,7 +474,7 @@ do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, do { - if (buflen + unused < blocksize || unused > 0) + if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) { n = blocksize - unused; n = n < buflen ? n : buflen; diff --git a/cipher/mac-poly1305.c b/cipher/mac-poly1305.c index 76b369a..b80f87d 100644 --- a/cipher/mac-poly1305.c +++ b/cipher/mac-poly1305.c @@ -260,6 +260,9 @@ poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen) mac_ctx->marks.tag = 1; } + if (*outlen == 0) + return 0; + if (*outlen <= POLY1305_TAGLEN) buf_cpy (outbuf, mac_ctx->tag, *outlen); else -- 2.4.4 From gniibe at fsij.org Fri Jul 10 03:01:58 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 10 Jul 2015 10:01:58 +0900 Subject: [PATCH 2/6] ecc: fix memory leak In-Reply-To: <1436454696-20362-3-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-3-git-send-email-peter@lekensteyn.nl> Message-ID: <559F1986.5070902@fsij.org> Hello, Thank you for your check and patches. On 07/10/2015 12:11 AM, Peter Wu wrote: > * cipher/ecc.c: Release memory which was allocated before by > _gcry_pk_util_preparse_sigval. > -- > Caught by LeakSanitizer (LSan). Now the test suite (make check) passes > with no memleaks. This one was introduced by my last commit. You're right. I'm going to commit the fix, but the line inserted will be after the comment as before. -- From cvs at cvs.gnupg.org Fri Jul 10 03:30:37 2015 From: cvs at cvs.gnupg.org (by Peter Wu) Date: Fri, 10 Jul 2015 03:30:37 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-232-g2a7aa3e Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 2a7aa3ea4d03a9c808d5888f5509c08cd27aa27c (commit) from 0a7547e487a8bc4e7ac9599c55579eb2e4a13f06 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 2a7aa3ea4d03a9c808d5888f5509c08cd27aa27c Author: Peter Wu Date: Fri Jul 10 10:15:26 2015 +0900 ecc: fix memory leak. * cipher/ecc.c (ecc_verify): Release memory which was allocated before by _gcry_pk_util_preparse_sigval. (ecc_decrypt_raw): Likewise. -- Caught by LeakSanitizer (LSan). Now the test suite (make check) passes with no memleaks. Signed-off-by: Peter Wu The last commit (0a7547e487a8bc4e7ac9599c55579eb2e4a13f06) includes wrong fixes for sexp_release. ecc_decrypt_raw fix added by gniibe. diff --git a/cipher/ecc.c b/cipher/ecc.c index f5bc50a..e33f999 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -1041,6 +1041,7 @@ ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ + sexp_release (l1); l1 = sexp_find_token (s_keyparms, "curve", 5); if (l1) { @@ -1417,6 +1418,7 @@ ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) goto leave; } /* Add missing parameters using the optional curve parameter. */ + sexp_release (l1); l1 = sexp_find_token (keyparms, "curve", 5); if (l1) { ----------------------------------------------------------------------- Summary of changes: cipher/ecc.c | 2 ++ 1 file changed, 2 insertions(+) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Fri Jul 10 04:11:08 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 10 Jul 2015 11:11:08 +0900 Subject: [PATCH 1/6] sexp: fix invalid deallocation in error path In-Reply-To: <1436454696-20362-2-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-2-git-send-email-peter@lekensteyn.nl> Message-ID: <559F29BC.40507@fsij.org> Hello, Thank you for the fix. I'd like to commit your fix without your comment since it is not accurate. Is it OK for you? On 07/10/2015 12:11 AM, Peter Wu wrote: > -- > This appears to be a copy and paste error and could result in wrong > memory being freed in the error path (when arrayisdesc[idx] == 2, the > condition ((!2) == 1) is false). When arrayisdesc[idx] == 1, it means that the buffer is provided by caller. When arrayisdesc[idx] == 2, it means that the buffer is allocated here in _gcry_sexp_vextract_param. When arrayisdesc[idx] == 1, the wrong expression '!arrayisdesc[idx] == 1' is evaluated to !1 == 1 -> 0 == 1 -> 0 (false) and it doesn't go into the statements to set spec->len = 0 but goes into 'else' clause to free the buffer. THIS IS A PROBLEM. When arrayisdesc[idx] == 2, the wrong expression '!arrayisdesc[idx] == 1' is evaluated to !2 == 1 -> 0 == 1 -> 0 (false) and it goes into 'else' clause (which is correct behaviour). -- From peter at lekensteyn.nl Sat Jul 11 01:02:23 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Sat, 11 Jul 2015 01:02:23 +0200 Subject: [PATCH 1/6] sexp: fix invalid deallocation in error path In-Reply-To: <559F29BC.40507@fsij.org> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-2-git-send-email-peter@lekensteyn.nl> <559F29BC.40507@fsij.org> Message-ID: <20150710230223.GA22106@al> Hi, On Fri, Jul 10, 2015 at 11:11:08AM +0900, NIIBE Yutaka wrote: > Thank you for the fix. I'd like to commit your fix without your > comment since it is not accurate. Is it OK for you? You are right, I swapped the case. Feel free to adjust it to whatever you deem appropriate. > On 07/10/2015 12:11 AM, Peter Wu wrote: > > -- > > This appears to be a copy and paste error and could result in wrong > > memory being freed in the error path (when arrayisdesc[idx] == 2, the > > condition ((!2) == 1) is false). > > > When arrayisdesc[idx] == 1, it means that the buffer is provided by > caller. > > When arrayisdesc[idx] == 2, it means that the buffer is allocated here > in _gcry_sexp_vextract_param. > > When arrayisdesc[idx] == 1, the wrong expression '!arrayisdesc[idx] == 1' > is evaluated to !1 == 1 -> 0 == 1 -> 0 (false) and it doesn't go into > the statements to set spec->len = 0 but goes into 'else' clause to > free the buffer. THIS IS A PROBLEM. Right, thanks for laying this down. > When arrayisdesc[idx] == 2, the wrong expression '!arrayisdesc[idx] == 1' > is evaluated to !2 == 1 -> 0 == 1 -> 0 (false) and it goes into 'else' > clause (which is correct behaviour). By accident, yes. This patch corrects the 1 case. Kind regards, Peter From cvs at cvs.gnupg.org Tue Jul 14 02:55:43 2015 From: cvs at cvs.gnupg.org (by Peter Wu) Date: Tue, 14 Jul 2015 02:55:43 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-233-g0f9532b Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 0f9532b186c1e0b54d7e7a6d76bce82b6226122b (commit) from 2a7aa3ea4d03a9c808d5888f5509c08cd27aa27c (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 0f9532b186c1e0b54d7e7a6d76bce82b6226122b Author: Peter Wu Date: Tue Jul 14 09:53:38 2015 +0900 sexp: Fix invalid deallocation in error path. * src/sexp.c: Fix wrong condition. -- Signed-off-by: Peter Wu diff --git a/src/sexp.c b/src/sexp.c index 9bc13ca..1c014e0 100644 --- a/src/sexp.c +++ b/src/sexp.c @@ -2405,7 +2405,7 @@ _gcry_sexp_vextract_param (gcry_sexp_t sexp, const char *path, _gcry_mpi_release (*array[idx]); *array[idx] = NULL; } - else if (!arrayisdesc[idx] == 1) + else if (arrayisdesc[idx] == 1) { /* Caller provided buffer. */ gcry_buffer_t *spec = (gcry_buffer_t*)array[idx]; ----------------------------------------------------------------------- Summary of changes: src/sexp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Thu Jul 16 06:26:33 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Thu, 16 Jul 2015 13:26:33 +0900 Subject: [PATCH 4/6] Fix undefined behavior wrt memcpy In-Reply-To: <1436454696-20362-5-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-5-git-send-email-peter@lekensteyn.nl> Message-ID: <55A73279.1010607@fsij.org> Hello, Jussi, Last week, following patch was submitted to gcrypt-devel. Since it's the code you wrote, I write to you. I think that memcpy can be called with 0 length, but pointers should be valid one (not NULL), even though most implementations works well. So, it is worth to consider the patch for the correctness of the code. On 07/10/2015 12:11 AM, Peter Wu wrote: > * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let > the function continue to add padding as needed though. > * cipher/mac-poly1305.c: If the caller requested to finish the hash > function without a copy of the result, return immediately. > -- > Caught by UndefinedBehaviorSanitizer. > > Signed-off-by: Peter Wu > --- > cipher/cipher-gcm.c | 2 +- > cipher/mac-poly1305.c | 3 +++ > 2 files changed, 4 insertions(+), 1 deletion(-) > > diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c > index 6b13fc5..3711a1d 100644 > --- a/cipher/cipher-gcm.c > +++ b/cipher/cipher-gcm.c > @@ -474,7 +474,7 @@ do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, > > do > { > - if (buflen + unused < blocksize || unused > 0) > + if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) > { > n = blocksize - unused; > n = n < buflen ? n : buflen; > diff --git a/cipher/mac-poly1305.c b/cipher/mac-poly1305.c > index 76b369a..b80f87d 100644 > --- a/cipher/mac-poly1305.c > +++ b/cipher/mac-poly1305.c > @@ -260,6 +260,9 @@ poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen) > mac_ctx->marks.tag = 1; > } > > + if (*outlen == 0) > + return 0; > + > if (*outlen <= POLY1305_TAGLEN) > buf_cpy (outbuf, mac_ctx->tag, *outlen); > else > From gniibe at fsij.org Thu Jul 16 06:57:17 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Thu, 16 Jul 2015 13:57:17 +0900 Subject: [PATCH 6/6] rsa: clarify the RSA secret parameters In-Reply-To: <1436454696-20362-7-git-send-email-peter@lekensteyn.nl> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-7-git-send-email-peter@lekensteyn.nl> Message-ID: <55A739AD.1030306@fsij.org> Hello, Thank you for the patch. On 07/10/2015 12:11 AM, Peter Wu wrote: > * cipher/rsa.c: Clarify meaning of the 'u' parameter. Fix error in > comments. For the first part, I think that it's correctly described in the documentation: (gcrypt)RSA key parameters Even it has an example as: Note that OpenSSL uses slighly different parameters: q < p and u = q^{-1} \bmod p. To use these parameters you will need to swap the values and recompute u. Here is example code to do this: if (gcry_mpi_cmp (p, q) > 0) { gcry_mpi_swap (p, q); gcry_mpi_invm (u, p, q); } I'm not sure (and wondering) if we need more. I know that it's a pitfall of libgcrypt (something common). For your reference, I know this one in Fedora: http://pkgs.fedoraproject.org/cgit/libgcrypt.git/diff/?id=376991d05a1a0e2911242061c41ca5c5a915e339&id2=f56a95f03b711eac70ddc8673b6417a93a45c2bd That's was same mistake. -- From cvs at cvs.gnupg.org Thu Jul 16 07:12:00 2015 From: cvs at cvs.gnupg.org (by Peter Wu) Date: Thu, 16 Jul 2015 07:12:00 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-234-g9cd55e8 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 9cd55e8e948f0049cb23495f536decf797d072f7 (commit) from 0f9532b186c1e0b54d7e7a6d76bce82b6226122b (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 9cd55e8e948f0049cb23495f536decf797d072f7 Author: Peter Wu Date: Thu Jul 16 13:59:44 2015 +0900 rsa: Fix error in comments. * cipher/rsa.c: Fix. -- Signed-off-by: Peter Wu diff --git a/cipher/rsa.c b/cipher/rsa.c index 9a8d235..e4f73d5 100644 --- a/cipher/rsa.c +++ b/cipher/rsa.c @@ -738,7 +738,7 @@ secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey ) if ( mpi_has_sign ( h ) ) mpi_add ( h, h, skey->q ); mpi_mulm( h, skey->u, h, skey->q ); - /* m = m2 + h * p */ + /* m = m1 + h * p */ mpi_mul ( h, h, skey->p ); mpi_add ( output, m1, h ); ----------------------------------------------------------------------- Summary of changes: cipher/rsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From jussi.kivilinna at iki.fi Thu Jul 16 16:37:57 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Thu, 16 Jul 2015 17:37:57 +0300 Subject: [PATCH 4/6] Fix undefined behavior wrt memcpy In-Reply-To: <55A73279.1010607@fsij.org> Message-ID: <8577d624-a9d4-4f57-a009-3e0968b738ec@email.android.com> Hello, Patch looks good and should definitely be merged. I will push it to repo when I get to my computer in a week or two. -Jussi 16.7.2015 7.26 NIIBE Yutaka kirjoitti: > > Hello, Jussi, > > Last week, following patch was submitted to gcrypt-devel.? Since it's > the code you wrote, I write to you. > > I think that memcpy can be called with 0 length, but pointers should > be valid one (not NULL), even though most implementations works well. > So, it is worth to consider the patch for the correctness of the code. > > On 07/10/2015 12:11 AM, Peter Wu wrote: > > * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let > >?? the function continue to add padding as needed though. > > * cipher/mac-poly1305.c: If the caller requested to finish the hash > >?? function without a copy of the result, return immediately. > > -- > > Caught by UndefinedBehaviorSanitizer. > > > > Signed-off-by: Peter Wu > > --- > >? cipher/cipher-gcm.c?? | 2 +- > >? cipher/mac-poly1305.c | 3 +++ > >? 2 files changed, 4 insertions(+), 1 deletion(-) > > > > diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c > > index 6b13fc5..3711a1d 100644 > > --- a/cipher/cipher-gcm.c > > +++ b/cipher/cipher-gcm.c > > @@ -474,7 +474,7 @@ do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, > >? > >??? do > >????? { > > -????? if (buflen + unused < blocksize || unused > 0) > > +????? if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) > >????????? { > >??????????? n = blocksize - unused; > >??????????? n = n < buflen ? n : buflen; > > diff --git a/cipher/mac-poly1305.c b/cipher/mac-poly1305.c > > index 76b369a..b80f87d 100644 > > --- a/cipher/mac-poly1305.c > > +++ b/cipher/mac-poly1305.c > > @@ -260,6 +260,9 @@ poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen) > >??????? mac_ctx->marks.tag = 1; > >????? } > >? > > +? if (*outlen == 0) > > +??? return 0; > > + > >??? if (*outlen <= POLY1305_TAGLEN) > >????? buf_cpy (outbuf, mac_ctx->tag, *outlen); > >??? else > > > > > _______________________________________________ > Gcrypt-devel mailing list > Gcrypt-devel at gnupg.org > http://lists.gnupg.org/mailman/listinfo/gcrypt-devel > From peter at lekensteyn.nl Thu Jul 16 21:40:50 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Thu, 16 Jul 2015 21:40:50 +0200 Subject: [PATCH 6/6] rsa: clarify the RSA secret parameters In-Reply-To: <55A739AD.1030306@fsij.org> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-7-git-send-email-peter@lekensteyn.nl> <55A739AD.1030306@fsij.org> Message-ID: <20150716194050.GA31777@al> On Thu, Jul 16, 2015 at 01:57:17PM +0900, NIIBE Yutaka wrote: > Hello, > > Thank you for the patch. > > On 07/10/2015 12:11 AM, Peter Wu wrote: > > * cipher/rsa.c: Clarify meaning of the 'u' parameter. Fix error in > > comments. > > For the first part, I think that it's correctly described in the > documentation: (gcrypt)RSA key parameters > > Even it has an example as: > > Note that OpenSSL uses slighly different parameters: q < p and u = > q^{-1} \bmod p. To use these parameters you will need to swap the > values and recompute u. Here is example code to do this: > > if (gcry_mpi_cmp (p, q) > 0) > { > gcry_mpi_swap (p, q); > gcry_mpi_invm (u, p, q); > } > > I'm not sure (and wondering) if we need more. That is documented in a different place. Repeating the same does not hurt, especially when noting why it differs from other common RSA implementations (presumably due to the origin from OpenPGP). > I know that it's a pitfall of libgcrypt (something common). For your > reference, I know this one in Fedora: > > http://pkgs.fedoraproject.org/cgit/libgcrypt.git/diff/?id=376991d05a1a0e2911242061c41ca5c5a915e339&id2=f56a95f03b711eac70ddc8673b6417a93a45c2bd > > That's was same mistake. Given this mistake, why not add the comment to save some hours from other reviewers? In my case it was an unexpected user error where $u = q^{-1} mod p$ was used. Maybe the function that imports the RSA parameters should first check for u * p == 1 mod q as a sanity check? -- Kind regards, Peter Wu https://lekensteyn.nl From gniibe at fsij.org Fri Jul 17 02:31:36 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 17 Jul 2015 09:31:36 +0900 Subject: [PATCH 6/6] rsa: clarify the RSA secret parameters In-Reply-To: <20150716194050.GA31777@al> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-7-git-send-email-peter@lekensteyn.nl> <55A739AD.1030306@fsij.org> <20150716194050.GA31777@al> Message-ID: <55A84CE8.6010703@fsij.org> On 07/17/2015 04:40 AM, Peter Wu wrote: > That is documented in a different place. Repeating the same does not > hurt, especially when noting why it differs from other common RSA > implementations (presumably due to the origin from OpenPGP). > >> I know that it's a pitfall of libgcrypt (something common). For your >> reference, I know this one in Fedora: >> >> http://pkgs.fedoraproject.org/cgit/libgcrypt.git/diff/?id=376991d05a1a0e2911242061c41ca5c5a915e339&id2=f56a95f03b711eac70ddc8673b6417a93a45c2bd >> >> That's was same mistake. > > Given this mistake, why not add the comment to save some hours from > other reviewers? > > In my case it was an unexpected user error where $u = q^{-1} mod p$ was > used. Maybe the function that imports the RSA parameters should first > check for u * p == 1 mod q as a sanity check? For myself, I support your opinion toward better/meaningful comments. Well, I have such a tendency to look into the code directly (instead of documentation), too. Currently, I'm not sure how we can improve the comment. Every code has its context. It would be better to describe its important context/assumption. Let us wait to ask Werner's opinion. -- From wk at gnupg.org Wed Jul 22 13:54:09 2015 From: wk at gnupg.org (Werner Koch) Date: Wed, 22 Jul 2015 13:54:09 +0200 Subject: [PATCH 3/6] build: ignore scissor line for the commit-msg hook In-Reply-To: <1436454696-20362-4-git-send-email-peter@lekensteyn.nl> (Peter Wu's message of "Thu, 9 Jul 2015 17:11:33 +0200") References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-4-git-send-email-peter@lekensteyn.nl> Message-ID: <87oaj4e7by.fsf@vigenere.g10code.de> On Thu, 9 Jul 2015 17:11, peter at lekensteyn.nl said: > * build-aux/git-hooks/commit-msg: Stop processing more lines when the > scissor line is encountered. > -- > This allows the command `git commit -v` to work even if the code is > longer than 72 characters. Note that comments are already ignored by the > previous line. So this is to allow overlong lines in commit messages? --cleanup=scissor seems to be a newer option. I think this is useful in some cases. I will propagate your patch to the other packages. -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From wk at gnupg.org Wed Jul 22 15:06:53 2015 From: wk at gnupg.org (Werner Koch) Date: Wed, 22 Jul 2015 15:06:53 +0200 Subject: [PATCH 5/6] Add LSan annotation to ignore a memory leak In-Reply-To: <1436454696-20362-6-git-send-email-peter@lekensteyn.nl> (Peter Wu's message of "Thu, 9 Jul 2015 17:11:35 +0200") References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-6-git-send-email-peter@lekensteyn.nl> Message-ID: <87fv4ge3yq.fsf@vigenere.g10code.de> On Thu, 9 Jul 2015 17:11, peter at lekensteyn.nl said: > I am not happy with the method to detect LSan availability, but here I > hope to solicit for some feedback. Should the code really be added to Adding a __GNUC__ condition would make it more portable. I like this style of annotations and they are useful for other code as well. libgpg-error is used by all GnuPG code and thus it might be useful to add this annotation macros to gpg-error.h. > mpiutil? Or let the caller (tests/mpitests.c) handle leaks? In GnuPG we have a register_mem_cleanup_func to run free with atexit but that is not appropriate for a library. I think such annotations are a better way. Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From wk at gnupg.org Wed Jul 22 15:10:28 2015 From: wk at gnupg.org (Werner Koch) Date: Wed, 22 Jul 2015 15:10:28 +0200 Subject: [PATCH 6/6] rsa: clarify the RSA secret parameters In-Reply-To: <20150716194050.GA31777@al> (Peter Wu's message of "Thu, 16 Jul 2015 21:40:50 +0200") References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-7-git-send-email-peter@lekensteyn.nl> <55A739AD.1030306@fsij.org> <20150716194050.GA31777@al> Message-ID: <87bnf4e3sr.fsf@vigenere.g10code.de> On Thu, 16 Jul 2015 21:40, peter at lekensteyn.nl said: > That is documented in a different place. Repeating the same does not > hurt, especially when noting why it differs from other common RSA > implementations (presumably due to the origin from OpenPGP). Actually PGP why of using the CRT is older than SSLeay ;-) > Given this mistake, why not add the comment to save some hours from > other reviewers? I agree; it does not harm. Shalom-Salam, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From wk at gnupg.org Wed Jul 22 15:21:24 2015 From: wk at gnupg.org (Werner Koch) Date: Wed, 22 Jul 2015 15:21:24 +0200 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <559C7010.6040700@fsij.org> (NIIBE Yutaka's message of "Wed, 08 Jul 2015 09:34:24 +0900") References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> Message-ID: <877fpse3aj.fsf@vigenere.g10code.de> On Wed, 8 Jul 2015 02:34, gniibe at fsij.org said: > Here, I changed the meaning of '(flags eddsa)' a bit. Now, it means > that the key is in DJB format and under DJB processing: We should not overload that flag with a new meaning: @item eddsa @cindex EdDSA Use the EdDSA scheme signing instead of the default ECDSA algorithm. Note that the EdDSA uses a special form of the public key. This flag describes the EdDSA algorithm and not the encoding of the points. Right, the default for that algorithm are those from Bernstein et al's paper but the idea is to use it also for future versions of EdDSA. For example: @misc{cryptoeprint:2015:677, author = {Daniel J. Bernstein and Simon Josefsson and Tanja Lange and Peter Schwabe and Bo-Yin Yang}, title = {EdDSA for more curves}, howpublished = {Cryptology ePrint Archive, Report 2015/677}, year = {2015}, note = {\url{http://eprint.iacr.org/}}, } We should keep the encoding separate. What about an "x-only" or "mont" flag to indicate that we only convey the x-ccordinate? Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From peter at lekensteyn.nl Wed Jul 22 21:13:25 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Wed, 22 Jul 2015 21:13:25 +0200 Subject: DCO signature Message-ID: <20150722191325.GA8113@al> Libgcrypt Developer's Certificate of Origin. Version 1.0 ========================================================= By making a contribution to the Libgcrypt project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the free software license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate free software license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same free software license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the free software license(s) involved. Signed-off-by: Peter Wu -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 473 bytes Desc: not available URL: From peter at lekensteyn.nl Wed Jul 22 21:17:13 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Wed, 22 Jul 2015 21:17:13 +0200 Subject: [PATCH 3/6] build: ignore scissor line for the commit-msg hook In-Reply-To: <87oaj4e7by.fsf@vigenere.g10code.de> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-4-git-send-email-peter@lekensteyn.nl> <87oaj4e7by.fsf@vigenere.g10code.de> Message-ID: <20150722191713.GB8113@al> On Wed, Jul 22, 2015 at 01:54:09PM +0200, Werner Koch wrote: > On Thu, 9 Jul 2015 17:11, peter at lekensteyn.nl said: > > * build-aux/git-hooks/commit-msg: Stop processing more lines when the > > scissor line is encountered. > > -- > > This allows the command `git commit -v` to work even if the code is > > longer than 72 characters. Note that comments are already ignored by the > > previous line. > > So this is to allow overlong lines in commit messages? Yes, it allows overlong lines in the draft commit message which are removed on exit. > --cleanup=scissor seems to be a newer option. I think this is useful in > some cases. I will propagate your patch to the other packages. This option was introduced with git v1.8.5-rc2-7-g1a72cfd. -- Kind regards, Peter Wu https://lekensteyn.nl From peter at lekensteyn.nl Wed Jul 22 21:40:41 2015 From: peter at lekensteyn.nl (Peter Wu) Date: Wed, 22 Jul 2015 21:40:41 +0200 Subject: [PATCH 5/6] Add LSan annotation to ignore a memory leak In-Reply-To: <87fv4ge3yq.fsf@vigenere.g10code.de> References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-6-git-send-email-peter@lekensteyn.nl> <87fv4ge3yq.fsf@vigenere.g10code.de> Message-ID: <20150722194041.GC8113@al> On Wed, Jul 22, 2015 at 03:06:53PM +0200, Werner Koch wrote: > On Thu, 9 Jul 2015 17:11, peter at lekensteyn.nl said: > > > I am not happy with the method to detect LSan availability, but here I > > hope to solicit for some feedback. Should the code really be added to > > Adding a __GNUC__ condition would make it more portable. I like this > style of annotations and they are useful for other code as well. LSan is also supported by the Clang compiler, how would adding __GNUC__ make the definition more portable? > libgpg-error is used by all GnuPG code and thus it might be useful to > add this annotation macros to gpg-error.h. I thought that LSan detection could be improved (http://stackoverflow.com/q/31273016/427545), but looking at Chromium, they seem to set cflags, ldflags (-fsanitize=leak) and macros (including LEAK_SANITIZER) depending on the build config. Ah well, those who use -fsanitize=threads or -fsanitize=leaks can also set -DLEAK_SANITIZER themselves. I should probably add a comment for that in the patch. > > mpiutil? Or let the caller (tests/mpitests.c) handle leaks? > > In GnuPG we have a register_mem_cleanup_func to run free with atexit but > that is not appropriate for a library. I think such annotations are a > better way. I will look at adding this to libgpg-error. Is src/gpg-error.h.in the appropriate file for this? Can the patch be submitted to this list? -- Kind regards, Peter Wu https://lekensteyn.nl From gniibe at fsij.org Thu Jul 23 10:02:58 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Thu, 23 Jul 2015 17:02:58 +0900 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <877fpse3aj.fsf@vigenere.g10code.de> References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> Message-ID: <55B09FB2.9060403@fsij.org> On 07/22/2015 10:21 PM, Werner Koch wrote: > On Wed, 8 Jul 2015 02:34, gniibe at fsij.org said: > >> Here, I changed the meaning of '(flags eddsa)' a bit. Now, it means >> that the key is in DJB format and under DJB processing: > > We should not overload that flag with a new meaning: OK, I see. Let me explain my point. When I said "the key is in DJB format and under DJB processing", I meant: Secret key is multiplied by cofactor and its msb is set. Encryption/signature processing assumes such a secret key. Well, let me call this "sec-is-multiplied-by-cofactor-and-msb-set", for now. For Curve25519, its model is MPI_EC_MONTGOMERY and its seret key should come with a flag of sec-is-multiplied-by-cofactor-and-msb-set. The eddsa flag already implies something like sec-is-multiplied-by-cofactor-and-msb-set flag, since EdDSA computation also assumes similar. I think that when we introduce another curve for EdDSA, we will fix current hard-wired cofactor handling and msb handling. > We should keep the encoding separate. I see, I will. It is possible to define co-factor ECDH with Montgomery curve where secret key is 1 <= d <= n, and ECDH computation is done by hkdG (multiplied by cofactor). It's not implemented yet. So, its meaning is sec-is-multiplied-by-cofactor-and-msb-set (not mont or x-only, which is defined by curve's model or compression). I don't have good naming for the flag though. -- From wk at gnupg.org Thu Jul 23 14:32:55 2015 From: wk at gnupg.org (Werner Koch) Date: Thu, 23 Jul 2015 14:32:55 +0200 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <55B09FB2.9060403@fsij.org> (NIIBE Yutaka's message of "Thu, 23 Jul 2015 17:02:58 +0900") References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> <55B09FB2.9060403@fsij.org> Message-ID: <87y4i7awaw.fsf@vigenere.g10code.de> On Thu, 23 Jul 2015 10:02, gniibe at fsij.org said: > So, its meaning is sec-is-multiplied-by-cofactor-and-msb-set (not mont > or x-only, which is defined by curve's model or compression). > > I don't have good naming for the flag though. "djb" :-) Anyone else with a suggestion for the name of such a flag? Shalom-Salam, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From wk at gnupg.org Thu Jul 23 14:35:47 2015 From: wk at gnupg.org (Werner Koch) Date: Thu, 23 Jul 2015 14:35:47 +0200 Subject: [PATCH 5/6] Add LSan annotation to ignore a memory leak In-Reply-To: <20150722194041.GC8113@al> (Peter Wu's message of "Wed, 22 Jul 2015 21:40:41 +0200") References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-6-git-send-email-peter@lekensteyn.nl> <87fv4ge3yq.fsf@vigenere.g10code.de> <20150722194041.GC8113@al> Message-ID: <87twsvaw64.fsf@vigenere.g10code.de> On Wed, 22 Jul 2015 21:40, peter at lekensteyn.nl said: > LSan is also supported by the Clang compiler, how would adding __GNUC__ > make the definition more portable? Although I do not like that but Clang also defined __GNUC__. Thus __GNUC__ is sufficient to enable the annotation for those tow while it won't be enabled for other compilers. Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From cvs at cvs.gnupg.org Thu Jul 23 14:47:34 2015 From: cvs at cvs.gnupg.org (by Werner Koch) Date: Thu, 23 Jul 2015 14:47:34 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-235-gda500a0 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via da500a030481a3c534e933b642e6f051aa064b97 (commit) from 9cd55e8e948f0049cb23495f536decf797d072f7 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit da500a030481a3c534e933b642e6f051aa064b97 Author: Werner Koch Date: Thu Jul 23 14:38:49 2015 +0200 Register DCO for Peter Wu. -- diff --git a/AUTHORS b/AUTHORS index 1e34aed..42d5ee6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -165,6 +165,9 @@ Markus Teich Milan Broz 2014-01-13:52D44CC6.4050707 at gmail.com: +Peter Wu +2015-07-22:20150722191325.GA8113 at al: + Rafa?l Carr? 2012-04-20:4F91988B.1080502 at videolan.org: ----------------------------------------------------------------------- Summary of changes: AUTHORS | 3 +++ 1 file changed, 3 insertions(+) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From cvs at cvs.gnupg.org Thu Jul 23 14:51:15 2015 From: cvs at cvs.gnupg.org (by Peter Wu) Date: Thu, 23 Jul 2015 14:51:15 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-236-gada0a7d Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via ada0a7d302cca97b327faaacac7a5d0b8043df88 (commit) from da500a030481a3c534e933b642e6f051aa064b97 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit ada0a7d302cca97b327faaacac7a5d0b8043df88 Author: Peter Wu Date: Thu Jul 9 17:11:33 2015 +0200 build: ignore scissor line for the commit-msg hook * build-aux/git-hooks/commit-msg: Stop processing more lines when the scissor line is encountered. -- This allows the command `git commit -v` to work even if the code is longer than 72 characters. Note that comments are already ignored by the previous line. Signed-off-by: Peter Wu diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg index 5a697c7..3ca918b 100755 --- a/build-aux/git-hooks/commit-msg +++ b/build-aux/git-hooks/commit-msg @@ -86,11 +86,17 @@ sub check_msg($$) 2 <= @line && length $line[1] and return 'second line must be empty'; + # See git-commit(1), this is the --cleanup=scissors option. Everything + # after and including this line gets ignored. + my $marker = '# ------------------------ >8 ------------------------'; + # Limit line length to allow for the ChangeLog's leading TAB. foreach my $line (@line) { 72 < length $line && $line =~ /^[^#]/ and return 'line longer than 72 characters'; + + last if $line eq $marker; } return ''; ----------------------------------------------------------------------- Summary of changes: build-aux/git-hooks/commit-msg | 6 ++++++ 1 file changed, 6 insertions(+) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From gniibe at fsij.org Fri Jul 24 08:32:22 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 24 Jul 2015 15:32:22 +0900 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <87y4i7awaw.fsf@vigenere.g10code.de> References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> <55B09FB2.9060403@fsij.org> <87y4i7awaw.fsf@vigenere.g10code.de> Message-ID: <55B1DBF6.1010109@fsij.org> On 07/23/2015 09:32 PM, Werner Koch wrote: > On Thu, 23 Jul 2015 10:02, gniibe at fsij.org said: > >> So, its meaning is sec-is-multiplied-by-cofactor-and-msb-set (not mont >> or x-only, which is defined by curve's model or compression). >> >> I don't have good naming for the flag though. > > "djb" :-) It is good for us. :-) It would require some more explanation for other people. > Anyone else with a suggestion for the name of such a flag? >From poor vocabulary of non-native speaker, trim, rational, legitimate, validated, solid, come up. I think that the practice makes much sense because it encourages constant time implementation. I wonder why it wasn't common for the standardization of ECC before safe curves. How about "advance"? In some sense, a secret key with this flag is like a ticket sold in advance; For both sides (buy & sell), it eliminates a possibility of failures (of payment). When we see the flag, it means that it's advanced ECC with safe curve. My point is: It would be good it has better connotation. -- From grothoff at gnunet.org Fri Jul 24 08:38:39 2015 From: grothoff at gnunet.org (Christian Grothoff) Date: Fri, 24 Jul 2015 08:38:39 +0200 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <55B1DBF6.1010109@fsij.org> References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> <55B09FB2.9060403@fsij.org> <87y4i7awaw.fsf@vigenere.g10code.de> <55B1DBF6.1010109@fsij.org> Message-ID: <55B1DD6F.6000106@gnunet.org> Why have a flag for the sane/safe behaviour? If we need a flag at all, shouldn't we have it for the unsafe behaviour? (and then we can just call it 'unsafe', to be appropriately discouraging). AFAIK encryption support is kind-of new anyway, so hopefully this isn't needed to avoid breaking backwards-compatibility with anything that has been deployed... On 07/24/2015 08:32 AM, NIIBE Yutaka wrote: > On 07/23/2015 09:32 PM, Werner Koch wrote: >> On Thu, 23 Jul 2015 10:02, gniibe at fsij.org said: >> >>> So, its meaning is sec-is-multiplied-by-cofactor-and-msb-set (not mont >>> or x-only, which is defined by curve's model or compression). >>> >>> I don't have good naming for the flag though. >> >> "djb" :-) > > It is good for us. :-) It would require some more explanation > for other people. > >> Anyone else with a suggestion for the name of such a flag? > > From poor vocabulary of non-native speaker, > > trim, rational, legitimate, validated, solid, > > come up. > > I think that the practice makes much sense because it encourages > constant time implementation. I wonder why it wasn't common for > the standardization of ECC before safe curves. > > > How about "advance"? In some sense, a secret key with this flag is > like a ticket sold in advance; For both sides (buy & sell), it > eliminates a possibility of failures (of payment). > > When we see the flag, it means that it's advanced ECC with safe curve. > > My point is: It would be good it has better connotation. > From gniibe at fsij.org Fri Jul 24 11:15:38 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Fri, 24 Jul 2015 18:15:38 +0900 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <55B1DD6F.6000106@gnunet.org> References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> <55B09FB2.9060403@fsij.org> <87y4i7awaw.fsf@vigenere.g10code.de> <55B1DBF6.1010109@fsij.org> <55B1DD6F.6000106@gnunet.org> Message-ID: <55B2023A.8010402@fsij.org> Hello, Thank you for your comment. Let me clarify. On 07/24/2015 03:38 PM, Christian Grothoff wrote: > Why have a flag for the sane/safe behaviour? If we need a flag at all, > shouldn't we have it for the unsafe behaviour? (and then we can just > call it 'unsafe', to be appropriately discouraging). AFAIK encryption > support is kind-of new anyway, so hopefully this isn't needed to avoid > breaking backwards-compatibility with anything that has been deployed... We already have "classic" ECC (including ECDH encryption) with the NIST, Brainpool, and GOST curves. Well, I'd say, it is not-that-safe if we compare modern ECC with safe curve. Its deployment (libgcrypt feature of classic ECC) is not that popular now, but it's published somehow by GnuPG 2.1's ECC support. With Curve25519, we are introducing new safer practice of sec-is-multiplied-by-cofactor-and-msb-set. I think that this practice can be applied to existing ECC code (since all existing curves have cofactor=1, only "msb-set" part is relevant), if/when we want to improve existing ECC code to be constant time. -- From wk at gnupg.org Fri Jul 24 16:46:13 2015 From: wk at gnupg.org (Werner Koch) Date: Fri, 24 Jul 2015 16:46:13 +0200 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <55B1DD6F.6000106@gnunet.org> (Christian Grothoff's message of "Fri, 24 Jul 2015 08:38:39 +0200") References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> <55B09FB2.9060403@fsij.org> <87y4i7awaw.fsf@vigenere.g10code.de> <55B1DBF6.1010109@fsij.org> <55B1DD6F.6000106@gnunet.org> Message-ID: <87k2tpaa16.fsf@vigenere.g10code.de> On Fri, 24 Jul 2015 08:38, grothoff at gnunet.org said: >> It is good for us. :-) It would require some more explanation >> for other people. What about le-tweak because it is a tweak in the little endian representation. Yeah, I know that it sounds like "let weak" ;-) Or le-twist le-highbit le-msb twistle Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From gniibe at fsij.org Sat Jul 25 05:08:24 2015 From: gniibe at fsij.org (NIIBE Yutaka) Date: Sat, 25 Jul 2015 12:08:24 +0900 Subject: [EXPERIMENTAL-PATCH] Curve25519 encryption support (updated) In-Reply-To: <87k2tpaa16.fsf@vigenere.g10code.de> References: <55965577.7020306@fsij.org> <559C7010.6040700@fsij.org> <877fpse3aj.fsf@vigenere.g10code.de> <55B09FB2.9060403@fsij.org> <87y4i7awaw.fsf@vigenere.g10code.de> <55B1DBF6.1010109@fsij.org> <55B1DD6F.6000106@gnunet.org> <87k2tpaa16.fsf@vigenere.g10code.de> Message-ID: <55B2FDA8.8090505@fsij.org> On 07/24/2015 11:46 PM, Werner Koch wrote: > What about > > le-tweak > > because it is a tweak in the little endian representation. Yeah, I know > that it sounds like "let weak" ;-) > > Or > > le-twist > le-highbit > le-msb > twistle I feel that "twist" is a bit confusing, because we also use the term "twist" for curves. tweak would be better. I think that it's not specific to little endian. Last year, when I tested Curve25519, the key (secret and public) was in big endian format. For cofactor multiplied secret key, I refer the site: SafeCurves: choosing safe curves for elliptic-curve cryptography Twist security http://safecurves.cr.yp.to/twist.html In the section: Background: small-subgroup attacks, it says: A protocol designer can protect against this type of attack for any curve by specifying n=hs. Here, h is the cofactor, n is the secret key, and I think that s is something secret. For secret key with MSB=1, its obvious that it's against timing attack. Since it's so obvious, I don't have good reference. Here is an explanation I found in Q&A site: When using Curve25519, why does the private key always have a fixed bit at 2^254? crypto.stackexchange.com/questions/11810/when-using-curve25519-why-does-the-private-key-always-have-a-fixed-bit-at-2254 (As I said yesterday, this can be applied to computation with other curves.) Yes, the practice of secret key is a tweak against such attacks. -- From cvs at cvs.gnupg.org Sun Jul 26 16:00:42 2015 From: cvs at cvs.gnupg.org (by Peter Wu) Date: Sun, 26 Jul 2015 16:00:42 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-237-g46c0726 Message-ID: This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "The GNU crypto library". The branch, master has been updated via 46c072669eb81ed610cc5b3c0dc0c75a143afbb4 (commit) from ada0a7d302cca97b327faaacac7a5d0b8043df88 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 46c072669eb81ed610cc5b3c0dc0c75a143afbb4 Author: Peter Wu Date: Sun Jul 26 16:50:33 2015 +0300 Fix undefined behavior wrt memcpy * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let the function continue to add padding as needed though. * cipher/mac-poly1305.c: If the caller requested to finish the hash function without a copy of the result, return immediately. -- Caught by UndefinedBehaviorSanitizer. Signed-off-by: Peter Wu diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c index 6b13fc5..3711a1d 100644 --- a/cipher/cipher-gcm.c +++ b/cipher/cipher-gcm.c @@ -474,7 +474,7 @@ do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, do { - if (buflen + unused < blocksize || unused > 0) + if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) { n = blocksize - unused; n = n < buflen ? n : buflen; diff --git a/cipher/mac-poly1305.c b/cipher/mac-poly1305.c index 76b369a..b80f87d 100644 --- a/cipher/mac-poly1305.c +++ b/cipher/mac-poly1305.c @@ -260,6 +260,9 @@ poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen) mac_ctx->marks.tag = 1; } + if (*outlen == 0) + return 0; + if (*outlen <= POLY1305_TAGLEN) buf_cpy (outbuf, mac_ctx->tag, *outlen); else ----------------------------------------------------------------------- Summary of changes: cipher/cipher-gcm.c | 2 +- cipher/mac-poly1305.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) hooks/post-receive -- The GNU crypto library http://git.gnupg.org _______________________________________________ Gnupg-commits mailing list Gnupg-commits at gnupg.org http://lists.gnupg.org/mailman/listinfo/gnupg-commits From wk at gnupg.org Mon Jul 27 07:45:25 2015 From: wk at gnupg.org (Werner Koch) Date: Mon, 27 Jul 2015 07:45:25 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-237-g46c0726 In-Reply-To: (by Peter Wu's message of "Sun, 26 Jul 2015 16:00:42 +0200") References: Message-ID: <87zj2i887e.fsf@vigenere.g10code.de> On Sun, 26 Jul 2015 16:00, cvs at cvs.gnupg.org said: > commit 46c072669eb81ed610cc5b3c0dc0c75a143afbb4 > Fix undefined behavior wrt memcpy > > * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let > the function continue to add padding as needed though. I think it is a bit surprising that buf_cpy does not behave similar to memcpy and requires the caller to check that LEN is greater than zero. Would it be a noticeable loss of speed if buf_cpy would do the test instead of the caller? Shalom-Salam, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From jussi.kivilinna at iki.fi Mon Jul 27 10:15:41 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 11:15:41 +0300 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-237-g46c0726 In-Reply-To: <87zj2i887e.fsf@vigenere.g10code.de> References: <87zj2i887e.fsf@vigenere.g10code.de> Message-ID: <55B5E8AD.3010102@iki.fi> On 27.07.2015 08:45, Werner Koch wrote: > On Sun, 26 Jul 2015 16:00, cvs at cvs.gnupg.org said: > >> commit 46c072669eb81ed610cc5b3c0dc0c75a143afbb4 > >> Fix undefined behavior wrt memcpy >> >> * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let >> the function continue to add padding as needed though. > > I think it is a bit surprising that buf_cpy does not behave similar to > memcpy and requires the caller to check that LEN is greater than zero. > Would it be a noticeable loss of speed if buf_cpy would do the test > instead of the caller? > Regular buf_cpy can handle the srcbuf==NULL && len==0 case (undefined for memcpy) without problem. Issue is that on x86, buf_cpy just uses memcpy directly (for faster code generation) and Peter caught undefined memcpy usage (srcbuf==NULL && len==0) with UndefinedBehaviorSanitizer. I guess buffer length check could be add to x86 version of buf_cpy. -Jussi > > Shalom-Salam, > > Werner > > From jussi.kivilinna at iki.fi Mon Jul 27 11:04:15 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 12:04:15 +0300 Subject: [PATCH 1/6] Add OCB bulk mode for AES SSSE3 implementation Message-ID: <20150727090415.18742.12674.stgit@localhost6.localdomain6> * cipher/rijndael-ssse3-amd64.c (SSSE3_STATE_SIZE): New. [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_prepare): Use 'ssse3_state' for storing current SSSE3 state. [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_cleanup): Restore SSSE3 state from 'ssse3_state'. (_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption) (_gcry_aes_ssse3_encrypt, _gcry_aes_ssse3_cfb_enc) (_gcry_aes_ssse3_cbc_enc, _gcry_aes_ssse3_ctr_enc) (_gcry_aes_ssse3_decrypt, _gcry_aes_ssse3_cfb_dec) (_gcry_aes_ssse3_cbc_dec, _gcry_aes_ssse3_cbc_dec): Add 'ssse3_state' array. (get_l, ssse3_ocb_enc, ssse3_ocb_dec, _gcry_aes_ssse3_ocb_crypt) (_gcry_aes_ssse3_ocb_auth): New. * cipher/rijndael.c (_gcry_aes_ssse3_ocb_crypt) (_gcry_aes_ssse3_ocb_auth): New. (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth) [USE_SSSE3]: Use SSSE3 implementation for OCB. -- Signed-off-by: Jussi Kivilinna --- cipher/rijndael-ssse3-amd64.c | 305 ++++++++++++++++++++++++++++++++++++++++- cipher/rijndael.c | 19 +++ 2 files changed, 320 insertions(+), 4 deletions(-) diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index 21438dc..0cdb532 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -45,6 +45,7 @@ #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" +#include "./cipher-internal.h" #ifdef USE_SSSE3 @@ -62,9 +63,9 @@ SSE registers are cleared and won't reveal any information about the key or the data. */ #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define SSSE3_STATE_SIZE (16 * 10) /* XMM6-XMM15 are callee-saved registers on WIN64. */ # define vpaes_ssse3_prepare() \ - char win64tmp[16 * 10]; \ asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" \ "movdqu %%xmm7, 1*16(%0)\n\t" \ "movdqu %%xmm8, 2*16(%0)\n\t" \ @@ -76,7 +77,7 @@ "movdqu %%xmm14, 8*16(%0)\n\t" \ "movdqu %%xmm15, 9*16(%0)\n\t" \ : \ - : "r" (win64tmp) \ + : "r" (ssse3_state) \ : "memory" ) # define vpaes_ssse3_cleanup() \ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ @@ -96,10 +97,11 @@ "movdqu 8*16(%0), %%xmm14 \n\t" \ "movdqu 9*16(%0), %%xmm15 \n\t" \ : \ - : "r" (win64tmp) \ + : "r" (ssse3_state) \ : "memory" ) #else -# define vpaes_ssse3_prepare() /*_*/ +# define SSSE3_STATE_SIZE 1 +# define vpaes_ssse3_prepare() (void)ssse3_state # define vpaes_ssse3_cleanup() \ asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ "pxor %%xmm1, %%xmm1 \n\t" \ @@ -148,6 +150,7 @@ void _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare(); @@ -178,6 +181,7 @@ void _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare(); @@ -238,6 +242,7 @@ _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, { unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (aes_const_ptr); asm volatile ("movdqu %[src], %%xmm0\n\t" @@ -261,6 +266,7 @@ _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, { unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (aes_const_ptr); @@ -300,6 +306,7 @@ _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, { unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (aes_const_ptr); @@ -347,6 +354,7 @@ _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; u64 ctrlow; vpaes_ssse3_prepare_enc (aes_const_ptr); @@ -411,6 +419,7 @@ _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, { unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_dec (aes_const_ptr); asm volatile ("movdqu %[src], %%xmm0\n\t" @@ -434,6 +443,7 @@ _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, { unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_enc (aes_const_ptr); @@ -474,6 +484,7 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, { unsigned int nrounds = ctx->rounds; const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; vpaes_ssse3_prepare_dec (aes_const_ptr); @@ -516,6 +527,292 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, } +static inline const unsigned char * +get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv, + unsigned char *ctr, const void **aes_const_ptr, + byte ssse3_state[SSSE3_STATE_SIZE], int encrypt) +{ + const unsigned char *l; + unsigned int ntz; + + if (i & 0xffffffffU) + { + asm ("rep;bsf %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" (i & 0xffffffffU) + : "cc"); + } + else + { + if (OCB_L_TABLE_SIZE < 32) + { + ntz = 32; + } + else if (i) + { + asm ("rep;bsf %k[high], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [high] "r" (i >> 32) + : "cc"); + ntz += 32; + } + else + { + ntz = 64; + } + } + + if (ntz < OCB_L_TABLE_SIZE) + { + l = c->u_mode.ocb.L[ntz]; + } + else + { + /* Store Offset & Checksum before calling external function */ + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*iv), + [ctr] "=m" (*ctr) + : + : "memory" ); + + /* Restore SSSE3 state. */ + vpaes_ssse3_cleanup(); + + l = _gcry_cipher_ocb_get_l (c, l_tmp, i); + + /* Save SSSE3 state. */ + if (encrypt) + { + vpaes_ssse3_prepare_enc (*aes_const_ptr); + } + else + { + vpaes_ssse3_prepare_dec (*aes_const_ptr); + } + + /* Restore Offset & Checksum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*iv), + [ctr] "m" (*ctr) + : "memory" ); + } + + return l; +} + + +static void +ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + unsigned int nrounds = ctx->rounds; + const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (aes_const_ptr); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr, + ssse3_state, 1); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm7\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_vpaes_ssse3_enc (ctx, nrounds, aes_const_ptr); + + asm volatile ("pxor %%xmm7, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + wipememory(&l_tmp, sizeof(l_tmp)); + vpaes_ssse3_cleanup (); +} + +static void +ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + unsigned int nrounds = ctx->rounds; + const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_dec (aes_const_ptr); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr, &aes_const_ptr, + ssse3_state, 0); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm7\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_vpaes_ssse3_dec (ctx, nrounds, aes_const_ptr); + + asm volatile ("pxor %%xmm7, %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + wipememory(&l_tmp, sizeof(l_tmp)); + vpaes_ssse3_cleanup (); +} + + +void +_gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + if (encrypt) + ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); + else + ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); +} + + +void +_gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + RIJNDAEL_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + u64 n = c->u_mode.ocb.aad_nblocks; + unsigned int nrounds = ctx->rounds; + const void *aes_const_ptr; + byte ssse3_state[SSSE3_STATE_SIZE]; + + vpaes_ssse3_prepare_enc (aes_const_ptr); + + /* Preload Offset and Sum */ + asm volatile ("movdqu %[iv], %%xmm7\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_mode.ocb.aad_offset), + [ctr] "m" (*c->u_mode.ocb.aad_sum) + : "memory" ); + + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, &aes_const_ptr, ssse3_state, 1); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[abuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm7\n\t" + "pxor %%xmm7, %%xmm0\n\t" + : + : [l] "m" (*l), + [abuf] "m" (*abuf) + : "memory" ); + + do_vpaes_ssse3_enc (ctx, nrounds, aes_const_ptr); + + asm volatile ("pxor %%xmm0, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += BLOCKSIZE; + } + + c->u_mode.ocb.aad_nblocks = n; + asm volatile ("movdqu %%xmm7, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_mode.ocb.aad_offset), + [ctr] "=m" (*c->u_mode.ocb.aad_sum) + : + : "memory" ); + + wipememory(&l_tmp, sizeof(l_tmp)); + vpaes_ssse3_cleanup (); +} + + #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS # define X(...) #else diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 4f063c4..1fe16d6 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -137,6 +137,11 @@ extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); +extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); #endif #ifdef USE_PADLOCK @@ -1226,6 +1231,13 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else if (encrypt) { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; @@ -1314,6 +1326,13 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; From jussi.kivilinna at iki.fi Mon Jul 27 11:04:25 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 12:04:25 +0300 Subject: [PATCH 3/6] Add bulk OCB for Twofish AMD64 implementation In-Reply-To: <20150727090415.18742.12674.stgit@localhost6.localdomain6> References: <20150727090415.18742.12674.stgit@localhost6.localdomain6> Message-ID: <20150727090425.18742.7458.stgit@localhost6.localdomain6> * cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk functions for Twofish. * cipher/twofish-amd64.S: Add OCB assembly functions. * cipher/twofish.c (_gcry_twofish_amd64_ocb_enc) (_gcry_twofish_amd64_ocb_dec, _gcry_twofish_amd64_ocb_auth): New prototypes. (call_sysv_fn5, call_sysv_fn6, twofish_amd64_ocb_enc) (twofish_amd64_ocb_dec, twofish_amd64_ocb_auth, get_l) (_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): New. * src/cipher.h (_gcry_twofish_ocb_crypt) (_gcry_twofish_ocb_auth): New. * tests/basic.c (check_ocb_cipher): Add test-vector for Twofish. -- Signed-off-by: Jussi Kivilinna --- cipher/cipher.c | 2 cipher/twofish-amd64.S | 310 ++++++++++++++++++++++++++++++++++++++++++++++++ cipher/twofish.c | 259 ++++++++++++++++++++++++++++++++++++++++ src/cipher.h | 5 + tests/basic.c | 20 ++- 5 files changed, 588 insertions(+), 8 deletions(-) diff --git a/cipher/cipher.c b/cipher/cipher.c index 2d2b0ad..8483c5f 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -561,6 +561,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, h->bulk.cbc_dec = _gcry_twofish_cbc_dec; h->bulk.cfb_dec = _gcry_twofish_cfb_dec; h->bulk.ctr_enc = _gcry_twofish_ctr_enc; + h->bulk.ocb_crypt = _gcry_twofish_ocb_crypt; + h->bulk.ocb_auth = _gcry_twofish_ocb_auth; break; #endif /*USE_TWOFISH*/ diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index ea88b94..aa964e0 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -1,6 +1,6 @@ /* twofish-amd64.S - AMD64 assembly implementation of Twofish cipher * - * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2013-2015 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -734,5 +734,313 @@ _gcry_twofish_amd64_cfb_dec: ret; ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;) +.align 8 +.globl _gcry_twofish_amd64_ocb_enc +ELF(.type _gcry_twofish_amd64_ocb_enc, at function;) +_gcry_twofish_amd64_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + movq %r8, RX2; + movq %r9, RY0; + movq %rsi, RY1; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* Store Offset_i */ + movq RT0, (0 * 8)(RY1); + movq RT1, (1 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB0, (0 * 8)(RX2); + xor RCD0, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* Store Offset_i */ + movq RT0, (2 * 8)(RY1); + movq RT1, (3 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB1, (0 * 8)(RX2); + xor RCD1, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* Store Offset_i */ + movq RT0, (4 * 8)(RY1); + movq RT1, (5 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB2, (0 * 8)(RX2); + xor RCD2, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* CX_i = ENCIPHER(K, PX_i) */ + call __twofish_enc_blk3; + + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* C_i = CX_i xor Offset_i */ + xorq RCD0, (0 * 8)(RX1); + xorq RAB0, (1 * 8)(RX1); + xorq RCD1, (2 * 8)(RX1); + xorq RAB1, (3 * 8)(RX1); + xorq RCD2, (4 * 8)(RX1); + xorq RAB2, (5 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_dec +ELF(.type _gcry_twofish_amd64_ocb_dec, at function;) +_gcry_twofish_amd64_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %r8, (7 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + movq %r9, RY0; + movq %rsi, RY1; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* Store Offset_i */ + movq RT0, (0 * 8)(RY1); + movq RT1, (1 * 8)(RY1); + /* CX_i = C_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* Store Offset_i */ + movq RT0, (2 * 8)(RY1); + movq RT1, (3 * 8)(RY1); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* Store Offset_i */ + movq RT0, (4 * 8)(RY1); + movq RT1, (5 * 8)(RY1); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* PX_i = DECIPHER(K, CX_i) */ + call __twofish_dec_blk3; + + movq (7 * 8)(%rsp), RX2; /*checksum*/ + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* Load checksum */ + movq (0 * 8)(RX2), RT0; + movq (1 * 8)(RX2), RT1; + + /* P_i = PX_i xor Offset_i */ + xorq RCD0, (0 * 8)(RX1); + xorq RAB0, (1 * 8)(RX1); + xorq RCD1, (2 * 8)(RX1); + xorq RAB1, (3 * 8)(RX1); + xorq RCD2, (4 * 8)(RX1); + xorq RAB2, (5 * 8)(RX1); + + /* Checksum_i = Checksum_{i-1} xor P_i */ + xorq (0 * 8)(RX1), RT0; + xorq (1 * 8)(RX1), RT1; + xorq (2 * 8)(RX1), RT0; + xorq (3 * 8)(RX1), RT1; + xorq (4 * 8)(RX1), RT0; + xorq (5 * 8)(RX1), RT1; + + /* Store checksum */ + movq RT0, (0 * 8)(RX2); + movq RT1, (1 * 8)(RX2); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_auth +ELF(.type _gcry_twofish_amd64_ocb_auth, at function;) +_gcry_twofish_amd64_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (3 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rcx, (6 * 8)(%rsp); + movq %rsi, RX0; + movq %rdx, RX1; + movq %r8, RY0; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* C_i = ENCIPHER(K, PX_i) */ + call __twofish_enc_blk3; + + movq (6 * 8)(%rsp), RX1; /*checksum*/ + + /* Checksum_i = C_i xor Checksum_i */ + xorq RCD0, RCD1; + xorq RAB0, RAB1; + xorq RCD1, RCD2; + xorq RAB1, RAB2; + xorq RCD2, (0 * 8)(RX1); + xorq RAB2, (1 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;) + #endif /*USE_TWOFISH*/ #endif /*__x86_64*/ diff --git a/cipher/twofish.c b/cipher/twofish.c index ce83fad..9b9c35f 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -45,6 +45,7 @@ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" @@ -755,6 +756,18 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); +extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, + const byte *in, byte *offset, + byte *checksum, const void *Ls[3]); + +extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, + const byte *in, byte *offset, + byte *checksum, const void *Ls[3]); + +extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx, + const byte *abuf, byte *offset, + byte *checksum, const void *Ls[3]); + #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS static inline void call_sysv_fn (const void *fn, const void *arg1, const void *arg2, @@ -771,6 +784,43 @@ call_sysv_fn (const void *fn, const void *arg1, const void *arg2, : : "cc", "memory", "r8", "r9", "r10", "r11"); } + +static inline void +call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4, const void *arg5) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("movq %[arg5], %%r8\n\t" + "callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : [arg5] "g" (arg5) + : "cc", "memory", "r8", "r9", "r10", "r11"); +} + +static inline void +call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4, const void *arg5, + const void *arg6) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("movq %[arg5], %%r8\n\t" + "movq %[arg6], %%r9\n\t" + "callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : [arg5] "g" (arg5), + [arg6] "g" (arg6) + : "cc", "memory", "r8", "r9", "r10", "r11"); +} #endif static inline void @@ -826,6 +876,39 @@ twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, #endif } +static inline void +twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in, + byte *offset, byte *checksum, const void *Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls); +#endif +} + +static inline void +twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in, + byte *offset, byte *checksum, const void *Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls); +#endif +} + +static inline void +twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf, + byte *offset, byte *checksum, const void *Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls); +#endif +} + #elif defined(USE_ARM_ASM) /* Assembly implementations of Twofish. */ @@ -1188,6 +1271,182 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, _gcry_burn_stack(burn_stack_depth); } +static inline const unsigned char * +get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) +{ + unsigned int ntz = _gcry_ctz64 (i); + + if (ntz < OCB_L_TABLE_SIZE) + return c->u_mode.ocb.L[ntz]; + else + return _gcry_cipher_ocb_get_l (c, l_tmp, i); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +void +_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + TWOFISH_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char l_tmp[TWOFISH_BLOCKSIZE]; + const unsigned char *l; + unsigned int burn, burn_stack_depth = 0; + u64 blkn = c->u_mode.ocb.data_nblocks; + +#ifdef USE_AMD64_ASM + { + const void *Ls[3]; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + /* l_tmp will be used only every 65536-th block. */ + Ls[0] = get_l(c, l_tmp, blkn + 1); + Ls[1] = get_l(c, l_tmp, blkn + 2); + Ls[2] = get_l(c, l_tmp, blkn + 3); + blkn += 3; + + if (encrypt) + twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + Ls); + else + twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + Ls); + + nblocks -= 3; + outbuf += 3 * TWOFISH_BLOCKSIZE; + inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + if (encrypt) + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE); + buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + burn = twofish_encrypt(ctx, l_tmp, l_tmp); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE); + + inbuf += TWOFISH_BLOCKSIZE; + outbuf += TWOFISH_BLOCKSIZE; + } + } + else + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE); + buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + burn = twofish_decrypt(ctx, l_tmp, l_tmp); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE); + buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE); + + inbuf += TWOFISH_BLOCKSIZE; + outbuf += TWOFISH_BLOCKSIZE; + } + } + + c->u_mode.ocb.data_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + +/* Bulk authentication of complete blocks in OCB mode. */ +void +_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + TWOFISH_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned char l_tmp[TWOFISH_BLOCKSIZE]; + const unsigned char *l; + unsigned int burn, burn_stack_depth = 0; + u64 blkn = c->u_mode.ocb.aad_nblocks; + +#ifdef USE_AMD64_ASM + { + const void *Ls[3]; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + /* l_tmp will be used only every 65536-th block. */ + Ls[0] = get_l(c, l_tmp, blkn + 1); + Ls[1] = get_l(c, l_tmp, blkn + 2); + Ls[2] = get_l(c, l_tmp, blkn + 3); + blkn += 3; + + twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 3; + abuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l, TWOFISH_BLOCKSIZE); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, TWOFISH_BLOCKSIZE); + burn = twofish_encrypt(ctx, l_tmp, l_tmp); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, TWOFISH_BLOCKSIZE); + + abuf += TWOFISH_BLOCKSIZE; + } + + c->u_mode.ocb.aad_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + /* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR diff --git a/src/cipher.h b/src/cipher.h index a0aac51..1a66f6d 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -217,6 +217,11 @@ void _gcry_twofish_cbc_dec (void *context, unsigned char *iv, void _gcry_twofish_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); +void _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +void _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- dsa.c --*/ void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data); diff --git a/tests/basic.c b/tests/basic.c index e3f4bfd..124df55 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -3330,20 +3330,26 @@ check_ocb_cipher (void) /* Check large buffer encryption/decryption. */ check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16, - "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8" - "\x33\xfd\x7a\x4f\x42\x60\x5d\x20"); + "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8" + "\x33\xfd\x7a\x4f\x42\x60\x5d\x20"); check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32, - "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d" - "\xfe\x96\x67\xc9\xc8\x41\x03\x51"); + "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d" + "\xfe\x96\x67\xc9\xc8\x41\x03\x51"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA128, 16, - "\x28\x23\x38\x45\x2b\xfd\x42\x45" + "\x28\x23\x38\x45\x2b\xfd\x42\x45" "\x43\x64\x7e\x67\x7f\xf4\x8b\xcd"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA192, 24, - "\xee\xca\xe5\x39\x27\x2d\x33\xe7" + "\xee\xca\xe5\x39\x27\x2d\x33\xe7" "\x79\x74\xb0\x1d\x37\x12\xd5\x6c"); check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA256, 32, - "\x39\x39\xd0\x2d\x05\x68\x74\xee" + "\x39\x39\xd0\x2d\x05\x68\x74\xee" "\x18\x6b\xea\x3d\x0b\xd3\x58\xae"); + check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 16, + "\x63\xe3\x0e\xb9\x11\x6f\x14\xba" + "\x79\xe4\xa7\x9e\xad\x3c\x02\x0c"); + check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 32, + "\xf6\xd4\xfe\x4e\x50\x85\x13\x59" + "\x69\x0e\x4c\x67\x3e\xdd\x47\x90"); } From jussi.kivilinna at iki.fi Mon Jul 27 11:04:35 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 12:04:35 +0300 Subject: [PATCH 5/6] Reduce amount of duplicated code in OCB bulk implementations In-Reply-To: <20150727090415.18742.12674.stgit@localhost6.localdomain6> References: <20150727090415.18742.12674.stgit@localhost6.localdomain6> Message-ID: <20150727090435.18742.24848.stgit@localhost6.localdomain6> * cipher/cipher-ocb.c (_gcry_cipher_ocb_authenticate) (ocb_crypt): Change bulk function to return number of unprocessed blocks. * src/cipher.h (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth) (_gcry_camellia_ocb_crypt, _gcry_camellia_ocb_auth) (_gcry_serpent_ocb_crypt, _gcry_serpent_ocb_auth) (_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): Change return type to 'size_t'. * cipher/camellia-glue.c (get_l): Only if USE_AESNI_AVX or USE_AESNI_AVX2 defined. (_gcry_camellia_ocb_crypt, _gcry_camellia_ocb_auth): Change return type to 'size_t' and return remaining blocks; Remove unaccelerated common code path. Enable remaining common code only if USE_AESNI_AVX or USE_AESNI_AVX2 defined; Remove unaccelerated common code. * cipher/rijndael.c (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth): Change return type to 'size_t' and return zero. * cipher/serpent.c (get_l): Only if USE_SSE2, USE_AVX2 or USE_NEON defined. (_gcry_serpent_ocb_crypt, _gcry_serpent_ocb_auth): Change return type to 'size_t' and return remaining blocks; Remove unaccelerated common code path. Enable remaining common code only if USE_SSE2, USE_AVX2 or USE_NEON defined; Remove unaccelerated common code. * cipher/twofish.c (get_l): Only if USE_AMD64_ASM defined. (_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): Change return type to 'size_t' and return remaining blocks; Remove unaccelerated common code path. Enable remaining common code only if USE_AMD64_ASM defined; Remove unaccelerated common code. -- Signed-off-by: Jussi Kivilinna --- cipher/camellia-glue.c | 87 +++++++++++++------------------------------- cipher/cipher-internal.h | 7 ++-- cipher/cipher-ocb.c | 32 +++++++++++----- cipher/rijndael.c | 8 +++- cipher/serpent.c | 85 ++++++++++++------------------------------- cipher/twofish.c | 91 ++++++++++------------------------------------ src/cipher.h | 38 ++++++++++--------- 7 files changed, 120 insertions(+), 228 deletions(-) diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 197e1b3..99516fc 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -604,6 +604,7 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv, _gcry_burn_stack(burn_stack_depth); } +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) static inline const unsigned char * get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) { @@ -614,22 +615,29 @@ get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) else return _gcry_cipher_ocb_get_l (c, l_tmp, i); } +#endif /* Bulk encryption/decryption of complete blocks in OCB mode. */ -void +size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) CAMELLIA_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char l_tmp[CAMELLIA_BLOCK_SIZE]; - const unsigned char *l; int burn_stack_depth; u64 blkn = c->u_mode.ocb.data_nblocks; burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size : CAMELLIA_decrypt_stack_burn_size; +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) @@ -723,70 +731,35 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, } #endif - if (encrypt) - { - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, CAMELLIA_BLOCK_SIZE); - buf_cpy (l_tmp, inbuf, CAMELLIA_BLOCK_SIZE); - /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp, CAMELLIA_BLOCK_SIZE); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); - Camellia_EncryptBlock(ctx->keybitlength, l_tmp, ctx->keytable, l_tmp); - buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); - buf_cpy (outbuf, l_tmp, CAMELLIA_BLOCK_SIZE); - - inbuf += CAMELLIA_BLOCK_SIZE; - outbuf += CAMELLIA_BLOCK_SIZE; - } - } - else - { - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, CAMELLIA_BLOCK_SIZE); - buf_cpy (l_tmp, inbuf, CAMELLIA_BLOCK_SIZE); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); - Camellia_DecryptBlock(ctx->keybitlength, l_tmp, ctx->keytable, l_tmp); - buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); - /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp, CAMELLIA_BLOCK_SIZE); - buf_cpy (outbuf, l_tmp, CAMELLIA_BLOCK_SIZE); - - inbuf += CAMELLIA_BLOCK_SIZE; - outbuf += CAMELLIA_BLOCK_SIZE; - } - } - +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) c->u_mode.ocb.data_nblocks = blkn; wipememory(&l_tmp, sizeof(l_tmp)); if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; } /* Bulk authentication of complete blocks in OCB mode. */ -void +size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks) + size_t nblocks) { +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) CAMELLIA_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned char l_tmp[CAMELLIA_BLOCK_SIZE]; - const unsigned char *l; int burn_stack_depth; u64 blkn = c->u_mode.ocb.aad_nblocks; burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; +#else + (void)c; + (void)abuf_arg; +#endif #ifdef USE_AESNI_AVX2 if (ctx->use_aesni_avx2) @@ -870,26 +843,16 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, } #endif - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, l, CAMELLIA_BLOCK_SIZE); - /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, CAMELLIA_BLOCK_SIZE); - Camellia_EncryptBlock(ctx->keybitlength, l_tmp, ctx->keytable, l_tmp); - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, CAMELLIA_BLOCK_SIZE); - - abuf += CAMELLIA_BLOCK_SIZE; - } - +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) c->u_mode.ocb.aad_nblocks = blkn; wipememory(&l_tmp, sizeof(l_tmp)); if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; } /* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index e20ea56..bb86d37 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -128,9 +128,10 @@ struct gcry_cipher_handle void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); - void (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, int encrypt); - void (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); + size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); + size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); } bulk; diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c index bc6fd87..096975a 100644 --- a/cipher/cipher-ocb.c +++ b/cipher/cipher-ocb.c @@ -260,10 +260,17 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, /* Use a bulk method if available. */ if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth) { - size_t nblks = abuflen / OCB_BLOCK_LEN; - c->bulk.ocb_auth (c, abuf, nblks); - abuf += nblks * OCB_BLOCK_LEN; - abuflen -= nblks * OCB_BLOCK_LEN; + size_t nblks; + size_t nleft; + size_t ndone; + + nblks = abuflen / OCB_BLOCK_LEN; + nleft = c->bulk.ocb_auth (c, abuf, nblks); + ndone = nblks - nleft; + + abuf += ndone * OCB_BLOCK_LEN; + abuflen -= ndone * OCB_BLOCK_LEN; + nblks = nleft; } /* Hash all full blocks. */ @@ -354,12 +361,17 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, /* Use a bulk method if available. */ if (nblks && c->bulk.ocb_crypt) { - c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt); - inbuf += nblks * OCB_BLOCK_LEN; - outbuf += nblks * OCB_BLOCK_LEN; - inbuflen -= nblks * OCB_BLOCK_LEN; - outbuflen -= nblks * OCB_BLOCK_LEN; - nblks = 0; + size_t nleft; + size_t ndone; + + nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt); + ndone = nblks - nleft; + + inbuf += ndone * OCB_BLOCK_LEN; + outbuf += ndone * OCB_BLOCK_LEN; + inbuflen -= ndone * OCB_BLOCK_LEN; + outbuflen -= ndone * OCB_BLOCK_LEN; + nblks = nleft; } if (nblks) diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 1fe16d6..4368c6d 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1200,7 +1200,7 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv, /* Bulk encryption/decryption of complete blocks in OCB mode. */ -void +size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { @@ -1303,11 +1303,13 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); + + return 0; } /* Bulk authentication of complete blocks in OCB mode. */ -void +size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { RIJNDAEL_context *ctx = (void *)&c->context.c; @@ -1364,6 +1366,8 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) if (burn_depth) _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); + + return 0; } diff --git a/cipher/serpent.c b/cipher/serpent.c index eb491aa..0a54a17 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -1226,6 +1226,7 @@ _gcry_serpent_cfb_dec(void *context, unsigned char *iv, _gcry_burn_stack(burn_stack_depth); } +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) static inline const unsigned char * get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) { @@ -1236,19 +1237,26 @@ get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) else return _gcry_cipher_ocb_get_l (c, l_tmp, i); } +#endif /* Bulk encryption/decryption of complete blocks in OCB mode. */ -void +size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) serpent_context_t *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char l_tmp[sizeof(serpent_block_t)]; - const unsigned char *l; int burn_stack_depth = 2 * sizeof (serpent_block_t); u64 blkn = c->u_mode.ocb.data_nblocks; +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif #ifdef USE_AVX2 if (ctx->use_avx2) @@ -1381,68 +1389,33 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, } #endif - if (encrypt) - { - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, sizeof(serpent_block_t)); - buf_cpy (l_tmp, inbuf, sizeof(serpent_block_t)); - /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp, sizeof(serpent_block_t)); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); - serpent_encrypt_internal(ctx, l_tmp, l_tmp); - buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); - buf_cpy (outbuf, l_tmp, sizeof(serpent_block_t)); - - inbuf += sizeof(serpent_block_t); - outbuf += sizeof(serpent_block_t); - } - } - else - { - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, sizeof(serpent_block_t)); - buf_cpy (l_tmp, inbuf, sizeof(serpent_block_t)); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); - serpent_decrypt_internal(ctx, l_tmp, l_tmp); - buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); - /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp, sizeof(serpent_block_t)); - buf_cpy (outbuf, l_tmp, sizeof(serpent_block_t)); - - inbuf += sizeof(serpent_block_t); - outbuf += sizeof(serpent_block_t); - } - } - +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) c->u_mode.ocb.data_nblocks = blkn; wipememory(&l_tmp, sizeof(l_tmp)); if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; } /* Bulk authentication of complete blocks in OCB mode. */ -void +size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) serpent_context_t *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned char l_tmp[sizeof(serpent_block_t)]; - const unsigned char *l; int burn_stack_depth = 2 * sizeof(serpent_block_t); u64 blkn = c->u_mode.ocb.aad_nblocks; +#else + (void)c; + (void)abuf_arg; +#endif #ifdef USE_AVX2 if (ctx->use_avx2) @@ -1560,26 +1533,16 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, } #endif - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, l, sizeof(serpent_block_t)); - /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, sizeof(serpent_block_t)); - serpent_encrypt_internal(ctx, l_tmp, l_tmp); - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, sizeof(serpent_block_t)); - - abuf += sizeof(serpent_block_t); - } - +#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON) c->u_mode.ocb.aad_nblocks = blkn; wipememory(&l_tmp, sizeof(l_tmp)); if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; } diff --git a/cipher/twofish.c b/cipher/twofish.c index 9b9c35f..3ee2be5 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -1271,6 +1271,7 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, _gcry_burn_stack(burn_stack_depth); } +#ifdef USE_AMD64_ASM static inline const unsigned char * get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) { @@ -1281,21 +1282,21 @@ get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) else return _gcry_cipher_ocb_get_l (c, l_tmp, i); } +#endif /* Bulk encryption/decryption of complete blocks in OCB mode. */ -void +size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg, size_t nblocks, int encrypt) { +#ifdef USE_AMD64_ASM TWOFISH_context *ctx = (void *)&c->context.c; unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; unsigned char l_tmp[TWOFISH_BLOCKSIZE]; - const unsigned char *l; unsigned int burn, burn_stack_depth = 0; u64 blkn = c->u_mode.ocb.data_nblocks; -#ifdef USE_AMD64_ASM { const void *Ls[3]; @@ -1326,54 +1327,6 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } -#endif - - if (encrypt) - { - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE); - buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE); - /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); - burn = twofish_encrypt(ctx, l_tmp, l_tmp); - if (burn > burn_stack_depth) - burn_stack_depth = burn; - buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); - buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE); - - inbuf += TWOFISH_BLOCKSIZE; - outbuf += TWOFISH_BLOCKSIZE; - } - } - else - { - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE); - buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); - burn = twofish_decrypt(ctx, l_tmp, l_tmp); - if (burn > burn_stack_depth) - burn_stack_depth = burn; - buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); - /* Checksum_i = Checksum_{i-1} xor P_i */ - buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE); - buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE); - - inbuf += TWOFISH_BLOCKSIZE; - outbuf += TWOFISH_BLOCKSIZE; - } - } c->u_mode.ocb.data_nblocks = blkn; @@ -1381,21 +1334,28 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif + + return nblocks; } /* Bulk authentication of complete blocks in OCB mode. */ -void +size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) { +#ifdef USE_AMD64_ASM TWOFISH_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; unsigned char l_tmp[TWOFISH_BLOCKSIZE]; - const unsigned char *l; unsigned int burn, burn_stack_depth = 0; u64 blkn = c->u_mode.ocb.aad_nblocks; -#ifdef USE_AMD64_ASM { const void *Ls[3]; @@ -1421,23 +1381,6 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, /* Use generic code to handle smaller chunks... */ } -#endif - - for (; nblocks; nblocks--) - { - l = get_l(c, l_tmp, ++blkn); - - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_mode.ocb.aad_offset, l, TWOFISH_BLOCKSIZE); - /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ - buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, TWOFISH_BLOCKSIZE); - burn = twofish_encrypt(ctx, l_tmp, l_tmp); - if (burn > burn_stack_depth) - burn_stack_depth = burn; - buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, TWOFISH_BLOCKSIZE); - - abuf += TWOFISH_BLOCKSIZE; - } c->u_mode.ocb.aad_nblocks = blkn; @@ -1445,6 +1388,12 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, if (burn_stack_depth) _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#else + (void)c; + (void)abuf_arg; +#endif + + return nblocks; } diff --git a/src/cipher.h b/src/cipher.h index d16746a..52f2695 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -136,10 +136,10 @@ void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); -void _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, int encrypt); -void _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks); +size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); +size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- blowfish.c --*/ void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv, @@ -177,11 +177,11 @@ void _gcry_camellia_cbc_dec (void *context, unsigned char *iv, void _gcry_camellia_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); -void _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt); -void _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks); +size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- des.c --*/ void _gcry_3des_ctr_enc (void *context, unsigned char *ctr, @@ -206,11 +206,11 @@ void _gcry_serpent_cbc_dec (void *context, unsigned char *iv, void _gcry_serpent_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); -void _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt); -void _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks); +size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- twofish.c --*/ void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr, @@ -222,11 +222,11 @@ void _gcry_twofish_cbc_dec (void *context, unsigned char *iv, void _gcry_twofish_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); -void _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, - const void *inbuf_arg, size_t nblocks, - int encrypt); -void _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, - size_t nblocks); +size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- dsa.c --*/ void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data); From jussi.kivilinna at iki.fi Mon Jul 27 11:04:30 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 12:04:30 +0300 Subject: [PATCH 4/6] Add bulk OCB for Serpent SSE2, AVX2 and NEON implementations In-Reply-To: <20150727090415.18742.12674.stgit@localhost6.localdomain6> References: <20150727090415.18742.12674.stgit@localhost6.localdomain6> Message-ID: <20150727090430.18742.9807.stgit@localhost6.localdomain6> * cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk functions for Serpent. * cipher/serpent-armv7-neon.S: Add OCB assembly functions. * cipher/serpent-avx2-amd64.S: Add OCB assembly functions. * cipher/serpent-sse2-amd64.S: Add OCB assembly functions. * cipher/serpent.c (_gcry_serpent_sse2_ocb_enc) (_gcry_serpent_sse2_ocb_dec, _gcry_serpent_sse2_ocb_auth) (_gcry_serpent_neon_ocb_enc, _gcry_serpent_neon_ocb_dec) (_gcry_serpent_neon_ocb_auth, _gcry_serpent_avx2_ocb_enc) (_gcry_serpent_avx2_ocb_dec, _gcry_serpent_avx2_ocb_auth): New prototypes. (get_l, _gcry_serpent_ocb_crypt, _gcry_serpent_ocb_auth): New. * src/cipher.h (_gcry_serpent_ocb_crypt) (_gcry_serpent_ocb_auth): New. * tests/basic.c (check_ocb_cipher): Add test-vector for serpent. -- Signed-off-by: Jussi Kivilinna --- cipher/cipher.c | 2 cipher/serpent-armv7-neon.S | 255 ++++++++++++++++++++++++++ cipher/serpent-avx2-amd64.S | 307 +++++++++++++++++++++++++++++++- cipher/serpent-sse2-amd64.S | 307 +++++++++++++++++++++++++++++++- cipher/serpent.c | 419 +++++++++++++++++++++++++++++++++++++++++++ src/cipher.h | 5 + tests/basic.c | 9 + 7 files changed, 1301 insertions(+), 3 deletions(-) diff --git a/cipher/cipher.c b/cipher/cipher.c index 8483c5f..30c2f48 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -553,6 +553,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, h->bulk.cbc_dec = _gcry_serpent_cbc_dec; h->bulk.cfb_dec = _gcry_serpent_cfb_dec; h->bulk.ctr_enc = _gcry_serpent_ctr_enc; + h->bulk.ocb_crypt = _gcry_serpent_ocb_crypt; + h->bulk.ocb_auth = _gcry_serpent_ocb_auth; break; #endif /*USE_SERPENT*/ #ifdef USE_TWOFISH diff --git a/cipher/serpent-armv7-neon.S b/cipher/serpent-armv7-neon.S index 3559558..adff639 100644 --- a/cipher/serpent-armv7-neon.S +++ b/cipher/serpent-armv7-neon.S @@ -866,4 +866,259 @@ _gcry_serpent_neon_cbc_dec: pop {pc}; .size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec; +.align 3 +.globl _gcry_serpent_neon_ocb_enc +.type _gcry_serpent_neon_ocb_enc,%function; +_gcry_serpent_neon_ocb_enc: + /* input: + * r0 : ctx, CTX + * r1 : dst (8 blocks) + * r2 : src (8 blocks) + * r3 : offset + * sp+0: checksum + * sp+4: L pointers (void *L[8]) + */ + + push {r4-r11, ip, lr}; + add ip, sp, #(10*4); + + vpush {RA4-RB2}; + + ldm ip, {r4, lr}; + + vld1.8 {RT0}, [r3]; + vld1.8 {RT1}, [r4]; + + /* Load L pointers */ + ldm lr!, {r5, r6, r7, r8}; + ldm lr, {r9, r10, r11, ip}; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + + vld1.8 {RA0, RA1}, [r2]!; + vld1.8 {RA2, RA3}, [r2]!; + vld1.8 {RB0, RB1}, [r2]!; + vld1.8 {RB2, RB3}, [r2]; + +#define OCB_INPUT(lreg, vreg) \ + vld1.8 {RT3}, [lreg]; \ + veor RT0, RT3; \ + veor RT1, vreg; \ + veor vreg, RT0; \ + vst1.8 {RT0}, [r1]!; + + OCB_INPUT(r5, RA0); + OCB_INPUT(r6, RA1); + OCB_INPUT(r7, RA2); + OCB_INPUT(r8, RA3); + OCB_INPUT(r9, RB0); + OCB_INPUT(r10, RB1); + OCB_INPUT(r11, RB2); + OCB_INPUT(ip, RB3); +#undef OCB_INPUT + + sub r1, r1, #(8*16); + vst1.8 {RT0}, [r3]; + vst1.8 {RT1}, [r4]; + mov r2, r1; + + bl __serpent_enc_blk8; + + vld1.8 {RT0, RT1}, [r1]!; + veor RT0, RA4, RT0; + veor RT1, RA1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + vst1.8 {RT0, RT1}, [r2]!; + veor RT2, RA2, RT2; + veor RT3, RA0, RT3; + vld1.8 {RT0, RT1}, [r1]!; + vst1.8 {RT2, RT3}, [r2]!; + veor RT0, RB4, RT0; + veor RT1, RB1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + vst1.8 {RT0, RT1}, [r2]!; + veor RT2, RB2, RT2; + veor RT3, RB0, RT3; + vst1.8 {RT2, RT3}, [r2]!; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RA3, RA3; + veor RB3, RB3; + + pop {r4-r11, ip, pc}; +.size _gcry_serpent_neon_ocb_enc,.-_gcry_serpent_neon_ocb_enc; + +.align 3 +.globl _gcry_serpent_neon_ocb_dec +.type _gcry_serpent_neon_ocb_dec,%function; +_gcry_serpent_neon_ocb_dec: + /* input: + * r0 : ctx, CTX + * r1 : dst (8 blocks) + * r2 : src (8 blocks) + * r3 : offset + * sp+0: checksum + * sp+4: L pointers (void *L[8]) + */ + + push {r4-r11, ip, lr}; + add ip, sp, #(10*4); + + vpush {RA4-RB2}; + + ldm ip, {r4, lr}; + + vld1.8 {RT0}, [r3]; + + /* Load L pointers */ + ldm lr!, {r5, r6, r7, r8}; + ldm lr, {r9, r10, r11, ip}; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + + vld1.8 {RA0, RA1}, [r2]!; + vld1.8 {RA2, RA3}, [r2]!; + vld1.8 {RB0, RB1}, [r2]!; + vld1.8 {RB2, RB3}, [r2]; + +#define OCB_INPUT(lreg, vreg) \ + vld1.8 {RT3}, [lreg]; \ + veor RT0, RT3; \ + veor vreg, RT0; \ + vst1.8 {RT0}, [r1]!; + + OCB_INPUT(r5, RA0); + OCB_INPUT(r6, RA1); + OCB_INPUT(r7, RA2); + OCB_INPUT(r8, RA3); + OCB_INPUT(r9, RB0); + OCB_INPUT(r10, RB1); + OCB_INPUT(r11, RB2); + OCB_INPUT(ip, RB3); +#undef OCB_INPUT + + sub r1, r1, #(8*16); + vst1.8 {RT0}, [r3]; + mov r2, r1; + + bl __serpent_dec_blk8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vld1.8 {RA4}, [r4]; + + vld1.8 {RT0, RT1}, [r1]!; + veor RA0, RA0, RT0; + veor RA1, RA1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + veor RA4, RA4, RA0; + vst1.8 {RA0, RA1}, [r2]!; + veor RA4, RA4, RA1; + veor RA2, RA2, RT2; + veor RA3, RA3, RT3; + vld1.8 {RT0, RT1}, [r1]!; + veor RA4, RA4, RA2; + vst1.8 {RA2, RA3}, [r2]!; + veor RA4, RA4, RA3; + veor RB0, RB0, RT0; + veor RB1, RB1, RT1; + vld1.8 {RT2, RT3}, [r1]!; + veor RA4, RA4, RB0; + vst1.8 {RB0, RB1}, [r2]!; + veor RA4, RA4, RB1; + veor RB2, RB2, RT2; + veor RB3, RB3, RT3; + veor RA4, RA4, RB2; + vst1.8 {RB2, RB3}, [r2]!; + + veor RA4, RA4, RB3; + vst1.8 {RA4}, [r4]; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RB4, RB4; + + pop {r4-r11, ip, pc}; +.size _gcry_serpent_neon_ocb_dec,.-_gcry_serpent_neon_ocb_dec; + +.align 3 +.globl _gcry_serpent_neon_ocb_auth +.type _gcry_serpent_neon_ocb_auth,%function; +_gcry_serpent_neon_ocb_auth: + /* input: + * r0 : ctx, CTX + * r1 : abuf (8 blocks) + * r2 : offset + * r3 : checksum + * sp+0: L pointers (void *L[8]) + */ + + push {r5-r11, ip, lr}; + ldr lr, [sp, #(9*4)]; + + vpush {RA4-RB2}; + + vld1.8 {RT0}, [r2]; + + /* Load L pointers */ + ldm lr!, {r5, r6, r7, r8}; + ldm lr, {r9, r10, r11, ip}; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + + vld1.8 {RA0, RA1}, [r1]!; + vld1.8 {RA2, RA3}, [r1]!; + vld1.8 {RB0, RB1}, [r1]!; + vld1.8 {RB2, RB3}, [r1]; + +#define OCB_INPUT(lreg, vreg) \ + vld1.8 {RT3}, [lreg]; \ + veor RT0, RT3; \ + veor vreg, RT0; + + OCB_INPUT(r5, RA0); + OCB_INPUT(r6, RA1); + OCB_INPUT(r7, RA2); + OCB_INPUT(r8, RA3); + OCB_INPUT(r9, RB0); + OCB_INPUT(r10, RB1); + OCB_INPUT(r11, RB2); + OCB_INPUT(ip, RB3); +#undef OCB_INPUT + + vst1.8 {RT0}, [r2]; + + bl __serpent_enc_blk8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + vld1.8 {RT0}, [r3]; + + veor RA4, RB4; + veor RA1, RB1; + veor RA2, RB2; + veor RA0, RB0; + + veor RA2, RT0; + veor RA1, RA4; + veor RA0, RA2; + + veor RA0, RA1; + + vst1.8 {RA0}, [r3]; + + vpop {RA4-RB2}; + + /* clear the used registers */ + veor RA3, RA3; + veor RB3, RB3; + + pop {r5-r11, ip, pc}; +.size _gcry_serpent_neon_ocb_auth,.-_gcry_serpent_neon_ocb_auth; + #endif diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S index 3f59f06..2902dab 100644 --- a/cipher/serpent-avx2-amd64.S +++ b/cipher/serpent-avx2-amd64.S @@ -1,6 +1,6 @@ /* serpent-avx2-amd64.S - AVX2 implementation of Serpent cipher * - * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2013-2015 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -808,6 +808,311 @@ _gcry_serpent_avx2_cfb_dec: ret ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;) +.align 8 +.globl _gcry_serpent_avx2_ocb_enc +ELF(.type _gcry_serpent_avx2_ocb_enc, at function;) + +_gcry_serpent_avx2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rcx), RTMP0x; + vmovdqu (%r8), RTMP1x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RTMP1, RTMP1; \ + vpxor yreg, RNOT, yreg; \ + vmovdqu RNOT, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RA1); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, RA2); + OCB_INPUT(3, %r12, %r13, RA3); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, RB0); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, RB2); + OCB_INPUT(7, %r12, %r13, RB3); +#undef OCB_INPUT + + vextracti128 $1, RTMP1, RNOTx; + vmovdqu RTMP0x, (%rcx); + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%r8); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk16; + + addq $(4 * 8), %rsp; + + vpxor (0 * 32)(%rsi), RA4, RA4; + vpxor (1 * 32)(%rsi), RA1, RA1; + vpxor (2 * 32)(%rsi), RA2, RA2; + vpxor (3 * 32)(%rsi), RA0, RA0; + vpxor (4 * 32)(%rsi), RB4, RB4; + vpxor (5 * 32)(%rsi), RB1, RB1; + vpxor (6 * 32)(%rsi), RB2, RB2; + vpxor (7 * 32)(%rsi), RB0, RB0; + + vmovdqu RA4, (0 * 32)(%rsi); + vmovdqu RA1, (1 * 32)(%rsi); + vmovdqu RA2, (2 * 32)(%rsi); + vmovdqu RA0, (3 * 32)(%rsi); + vmovdqu RB4, (4 * 32)(%rsi); + vmovdqu RB1, (5 * 32)(%rsi); + vmovdqu RB2, (6 * 32)(%rsi); + vmovdqu RB0, (7 * 32)(%rsi); + + vzeroall; + + ret; +ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;) + +.align 8 +.globl _gcry_serpent_avx2_ocb_dec +ELF(.type _gcry_serpent_avx2_ocb_dec, at function;) + +_gcry_serpent_avx2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rcx), RTMP0x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RNOT, yreg; \ + vmovdqu RNOT, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RA1); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, RA2); + OCB_INPUT(3, %r12, %r13, RA3); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, RB0); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, RB2); + OCB_INPUT(7, %r12, %r13, RB3); +#undef OCB_INPUT + + vmovdqu RTMP0x, (%rcx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_dec_blk16; + + addq $(4 * 8), %rsp; + + vmovdqu (%r8), RTMP1x; + + vpxor (0 * 32)(%rsi), RA0, RA0; + vpxor (1 * 32)(%rsi), RA1, RA1; + vpxor (2 * 32)(%rsi), RA2, RA2; + vpxor (3 * 32)(%rsi), RA3, RA3; + vpxor (4 * 32)(%rsi), RB0, RB0; + vpxor (5 * 32)(%rsi), RB1, RB1; + vpxor (6 * 32)(%rsi), RB2, RB2; + vpxor (7 * 32)(%rsi), RB3, RB3; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vmovdqu RA0, (0 * 32)(%rsi); + vpxor RA0, RTMP1, RTMP1; + vmovdqu RA1, (1 * 32)(%rsi); + vpxor RA1, RTMP1, RTMP1; + vmovdqu RA2, (2 * 32)(%rsi); + vpxor RA2, RTMP1, RTMP1; + vmovdqu RA3, (3 * 32)(%rsi); + vpxor RA3, RTMP1, RTMP1; + vmovdqu RB0, (4 * 32)(%rsi); + vpxor RB0, RTMP1, RTMP1; + vmovdqu RB1, (5 * 32)(%rsi); + vpxor RB1, RTMP1, RTMP1; + vmovdqu RB2, (6 * 32)(%rsi); + vpxor RB2, RTMP1, RTMP1; + vmovdqu RB3, (7 * 32)(%rsi); + vpxor RB3, RTMP1, RTMP1; + + vextracti128 $1, RTMP1, RNOTx; + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%r8); + + vzeroall; + + ret; +ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;) + +.align 8 +.globl _gcry_serpent_avx2_ocb_auth +ELF(.type _gcry_serpent_avx2_ocb_auth, at function;) + +_gcry_serpent_avx2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + vzeroupper; + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + vmovdqu (%rdx), RTMP0x; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rsi), yreg; \ + vpxor (l0reg), RTMP0x, RNOTx; \ + vpxor (l1reg), RNOTx, RTMP0x; \ + vinserti128 $1, RTMP0x, RNOT, RNOT; \ + vpxor yreg, RNOT, yreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %r11, RA0); + OCB_INPUT(1, %r12, %r13, RA1); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(2, %r10, %r11, RA2); + OCB_INPUT(3, %r12, %r13, RA3); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %r11, RB0); + OCB_INPUT(5, %r12, %r13, RB1); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(6, %r10, %r11, RB2); + OCB_INPUT(7, %r12, %r13, RB3); +#undef OCB_INPUT + + vmovdqu RTMP0x, (%rdx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk16; + + addq $(4 * 8), %rsp; + + vpxor RA4, RB4, RA4; + vpxor RA1, RB1, RA1; + vpxor RA2, RB2, RA2; + vpxor RA0, RB0, RA0; + + vpxor RA4, RA1, RA1; + vpxor RA2, RA0, RA0; + + vpxor RA1, RA0, RTMP1; + + vextracti128 $1, RTMP1, RNOTx; + vpxor (%rcx), RTMP1x, RTMP1x; + vpxor RNOTx, RTMP1x, RTMP1x; + vmovdqu RTMP1x, (%rcx); + + vzeroall; + + ret; +ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;) + .data .align 16 diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S index adbf4e2..b149af2 100644 --- a/cipher/serpent-sse2-amd64.S +++ b/cipher/serpent-sse2-amd64.S @@ -1,6 +1,6 @@ /* serpent-sse2-amd64.S - SSE2 implementation of Serpent cipher * - * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2013-2015 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -866,5 +866,310 @@ _gcry_serpent_sse2_cfb_dec: ret ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;) +.align 8 +.globl _gcry_serpent_sse2_ocb_enc +ELF(.type _gcry_serpent_sse2_ocb_enc, at function;) + +_gcry_serpent_sse2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[8]) + */ + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + movdqu (%rcx), RTMP0; + movdqu (%r8), RTMP1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + movdqu (n * 16)(%rdx), xreg; \ + movdqu (lreg), RNOT; \ + pxor RNOT, RTMP0; \ + pxor xreg, RTMP1; \ + pxor RTMP0, xreg; \ + movdqu RTMP0, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, RA0); + OCB_INPUT(1, %r11, RA1); + OCB_INPUT(2, %r12, RA2); + OCB_INPUT(3, %r13, RA3); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, RB0); + OCB_INPUT(5, %r11, RB1); + OCB_INPUT(6, %r12, RB2); + OCB_INPUT(7, %r13, RB3); +#undef OCB_INPUT + + movdqu RTMP0, (%rcx); + movdqu RTMP1, (%r8); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk8; + + addq $(4 * 8), %rsp; + + pxor_u((0 * 16)(%rsi), RA4, RTMP0); + pxor_u((1 * 16)(%rsi), RA1, RTMP0); + pxor_u((2 * 16)(%rsi), RA2, RTMP0); + pxor_u((3 * 16)(%rsi), RA0, RTMP0); + pxor_u((4 * 16)(%rsi), RB4, RTMP0); + pxor_u((5 * 16)(%rsi), RB1, RTMP0); + pxor_u((6 * 16)(%rsi), RB2, RTMP0); + pxor_u((7 * 16)(%rsi), RB0, RTMP0); + + movdqu RA4, (0 * 16)(%rsi); + movdqu RA1, (1 * 16)(%rsi); + movdqu RA2, (2 * 16)(%rsi); + movdqu RA0, (3 * 16)(%rsi); + movdqu RB4, (4 * 16)(%rsi); + movdqu RB1, (5 * 16)(%rsi); + movdqu RB2, (6 * 16)(%rsi); + movdqu RB0, (7 * 16)(%rsi); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret; +ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;) + +.align 8 +.globl _gcry_serpent_sse2_ocb_dec +ELF(.type _gcry_serpent_sse2_ocb_dec, at function;) + +_gcry_serpent_sse2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[8]) + */ + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + movdqu (%rcx), RTMP0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + movdqu (n * 16)(%rdx), xreg; \ + movdqu (lreg), RNOT; \ + pxor RNOT, RTMP0; \ + pxor RTMP0, xreg; \ + movdqu RTMP0, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, RA0); + OCB_INPUT(1, %r11, RA1); + OCB_INPUT(2, %r12, RA2); + OCB_INPUT(3, %r13, RA3); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, RB0); + OCB_INPUT(5, %r11, RB1); + OCB_INPUT(6, %r12, RB2); + OCB_INPUT(7, %r13, RB3); +#undef OCB_INPUT + + movdqu RTMP0, (%rcx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_dec_blk8; + + addq $(4 * 8), %rsp; + + movdqu (%r8), RTMP0; + + pxor_u((0 * 16)(%rsi), RA0, RTMP1); + pxor_u((1 * 16)(%rsi), RA1, RTMP1); + pxor_u((2 * 16)(%rsi), RA2, RTMP1); + pxor_u((3 * 16)(%rsi), RA3, RTMP1); + pxor_u((4 * 16)(%rsi), RB0, RTMP1); + pxor_u((5 * 16)(%rsi), RB1, RTMP1); + pxor_u((6 * 16)(%rsi), RB2, RTMP1); + pxor_u((7 * 16)(%rsi), RB3, RTMP1); + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + movdqu RA0, (0 * 16)(%rsi); + pxor RA0, RTMP0; + movdqu RA1, (1 * 16)(%rsi); + pxor RA1, RTMP0; + movdqu RA2, (2 * 16)(%rsi); + pxor RA2, RTMP0; + movdqu RA3, (3 * 16)(%rsi); + pxor RA3, RTMP0; + movdqu RB0, (4 * 16)(%rsi); + pxor RB0, RTMP0; + movdqu RB1, (5 * 16)(%rsi); + pxor RB1, RTMP0; + movdqu RB2, (6 * 16)(%rsi); + pxor RB2, RTMP0; + movdqu RB3, (7 * 16)(%rsi); + pxor RB3, RTMP0; + + movdqu RTMP0, (%r8); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret; +ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;) + +.align 8 +.globl _gcry_serpent_sse2_ocb_auth +ELF(.type _gcry_serpent_sse2_ocb_auth, at function;) + +_gcry_serpent_sse2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (8 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[8]) + */ + + subq $(4 * 8), %rsp; + + movq %r10, (0 * 8)(%rsp); + movq %r11, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + + movdqu (%rdx), RTMP0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + movdqu (n * 16)(%rsi), xreg; \ + movdqu (lreg), RNOT; \ + pxor RNOT, RTMP0; \ + pxor RTMP0, xreg; + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, RA0); + OCB_INPUT(1, %r11, RA1); + OCB_INPUT(2, %r12, RA2); + OCB_INPUT(3, %r13, RA3); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, RB0); + OCB_INPUT(5, %r11, RB1); + OCB_INPUT(6, %r12, RB2); + OCB_INPUT(7, %r13, RB3); +#undef OCB_INPUT + + movdqu RTMP0, (%rdx); + + movq (0 * 8)(%rsp), %r10; + movq (1 * 8)(%rsp), %r11; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + + call __serpent_enc_blk8; + + addq $(4 * 8), %rsp; + + movdqu (%rcx), RTMP0; + pxor RB4, RA4; + pxor RB1, RA1; + pxor RB2, RA2; + pxor RB0, RA0; + + pxor RTMP0, RA2; + pxor RA4, RA1; + pxor RA2, RA0; + + pxor RA1, RA0; + movdqu RA0, (%rcx); + + /* clear the used registers */ + pxor RA0, RA0; + pxor RA1, RA1; + pxor RA2, RA2; + pxor RA3, RA3; + pxor RA4, RA4; + pxor RB0, RB0; + pxor RB1, RB1; + pxor RB2, RB2; + pxor RB3, RB3; + pxor RB4, RB4; + pxor RTMP0, RTMP0; + pxor RTMP1, RTMP1; + pxor RTMP2, RTMP2; + pxor RNOT, RNOT; + + ret; +ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;) + #endif /*defined(USE_SERPENT)*/ #endif /*__x86_64*/ diff --git a/cipher/serpent.c b/cipher/serpent.c index 7d0e112..eb491aa 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -29,6 +29,7 @@ #include "cipher.h" #include "bithelp.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" @@ -118,10 +119,30 @@ extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]) ASM_FUNC_ABI; + +extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 -/* Assembler implementations of Serpent using SSE2. Process 16 block in +/* Assembler implementations of Serpent using AVX2. Process 16 block in parallel. */ extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx, @@ -138,6 +159,26 @@ extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[16]) ASM_FUNC_ABI; #endif #ifdef USE_NEON @@ -158,6 +199,26 @@ extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv); + +extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]); + +extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]); + +extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[8]); #endif @@ -1165,6 +1226,362 @@ _gcry_serpent_cfb_dec(void *context, unsigned char *iv, _gcry_burn_stack(burn_stack_depth); } +static inline const unsigned char * +get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) +{ + unsigned int ntz = _gcry_ctz64 (i); + + if (ntz < OCB_L_TABLE_SIZE) + return c->u_mode.ocb.L[ntz]; + else + return _gcry_cipher_ocb_get_l (c, l_tmp, i); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +void +_gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + serpent_context_t *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char l_tmp[sizeof(serpent_block_t)]; + const unsigned char *l; + int burn_stack_depth = 2 * sizeof (serpent_block_t); + u64 blkn = c->u_mode.ocb.data_nblocks; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + const void *Ls[16]; + int i; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 16; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + if (encrypt) + _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 16; + outbuf += 16 * sizeof(serpent_block_t); + inbuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + const void *Ls[8]; + int i; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 8; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + if (encrypt) + _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + + if (did_use_sse2) + { + /* serpent-sse2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + const void *Ls[8]; + int i; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 8; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + if (encrypt) + _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + if (encrypt) + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, sizeof(serpent_block_t)); + buf_cpy (l_tmp, inbuf, sizeof(serpent_block_t)); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, sizeof(serpent_block_t)); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); + serpent_encrypt_internal(ctx, l_tmp, l_tmp); + buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); + buf_cpy (outbuf, l_tmp, sizeof(serpent_block_t)); + + inbuf += sizeof(serpent_block_t); + outbuf += sizeof(serpent_block_t); + } + } + else + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, sizeof(serpent_block_t)); + buf_cpy (l_tmp, inbuf, sizeof(serpent_block_t)); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); + serpent_decrypt_internal(ctx, l_tmp, l_tmp); + buf_xor_1 (l_tmp, c->u_iv.iv, sizeof(serpent_block_t)); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, sizeof(serpent_block_t)); + buf_cpy (outbuf, l_tmp, sizeof(serpent_block_t)); + + inbuf += sizeof(serpent_block_t); + outbuf += sizeof(serpent_block_t); + } + } + + c->u_mode.ocb.data_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + +/* Bulk authentication of complete blocks in OCB mode. */ +void +_gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + serpent_context_t *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned char l_tmp[sizeof(serpent_block_t)]; + const unsigned char *l; + int burn_stack_depth = 2 * sizeof(serpent_block_t); + u64 blkn = c->u_mode.ocb.aad_nblocks; + +#ifdef USE_AVX2 + if (ctx->use_avx2) + { + int did_use_avx2 = 0; + const void *Ls[16]; + int i; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 16; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 16; + abuf += 16 * sizeof(serpent_block_t); + did_use_avx2 = 1; + } + + if (did_use_avx2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + const void *Ls[8]; + int i; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 8; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 8; + abuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + + if (did_use_sse2) + { + /* serpent-avx2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_NEON + if (ctx->use_neon) + { + int did_use_neon = 0; + const void *Ls[8]; + int i; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 8; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 8; + abuf += 8 * sizeof(serpent_block_t); + did_use_neon = 1; + } + + if (did_use_neon) + { + /* serpent-neon assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l, sizeof(serpent_block_t)); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, sizeof(serpent_block_t)); + serpent_encrypt_internal(ctx, l_tmp, l_tmp); + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, sizeof(serpent_block_t)); + + abuf += sizeof(serpent_block_t); + } + + c->u_mode.ocb.aad_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + /* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR diff --git a/src/cipher.h b/src/cipher.h index 1a66f6d..d16746a 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -206,6 +206,11 @@ void _gcry_serpent_cbc_dec (void *context, unsigned char *iv, void _gcry_serpent_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); +void _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +void _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- twofish.c --*/ void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr, diff --git a/tests/basic.c b/tests/basic.c index 124df55..3ad05a4 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -3350,6 +3350,15 @@ check_ocb_cipher (void) check_ocb_cipher_largebuf(GCRY_CIPHER_TWOFISH, 32, "\xf6\xd4\xfe\x4e\x50\x85\x13\x59" "\x69\x0e\x4c\x67\x3e\xdd\x47\x90"); + check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT128, 16, + "\x3c\xfb\x66\x14\x3c\xc8\x6c\x67" + "\x26\xb8\x23\xeb\xaf\x43\x98\x69"); + check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT192, 24, + "\x5e\x62\x27\xc5\x32\xc3\x1d\xe6" + "\x2e\x65\xe7\xd6\xfb\x05\xd7\xb2"); + check_ocb_cipher_largebuf(GCRY_CIPHER_SERPENT256, 32, + "\xe7\x8b\xe6\xd4\x2f\x7a\x36\x4c" + "\xba\xee\x20\xe2\x68\xf4\xcb\xcc"); } From jussi.kivilinna at iki.fi Mon Jul 27 11:04:40 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 12:04:40 +0300 Subject: [PATCH 6/6] Reduce code size for Twofish key-setup and remove key dependend branch In-Reply-To: <20150727090415.18742.12674.stgit@localhost6.localdomain6> References: <20150727090415.18742.12674.stgit@localhost6.localdomain6> Message-ID: <20150727090440.18742.74531.stgit@localhost6.localdomain6> * cipher/twofish.c (poly_to_exp): Increase size by one, change type from byte to u16 and insert '492' to index 0. (exp_to_poly): Increase size by 256, let new cells have zero value. (CALC_S): Execute unconditionally with help of modified tables. (do_twofish_setkey): Change type for 'tmp' to 'unsigned int'; Un-unroll CALC_K256 and CALC_K phases to reduce generated object size. -- Signed-off-by: Jussi Kivilinna --- cipher/twofish.c | 76 ++++++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 50 deletions(-) diff --git a/cipher/twofish.c b/cipher/twofish.c index 3ee2be5..11e60a7 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -356,7 +356,8 @@ static const u32 mds[4][256] = { * see a non-horrible way of avoiding them, and I did manage to group the * statements so that each if covers four group multiplications. */ -static const byte poly_to_exp[255] = { +static const u16 poly_to_exp[256] = { + 492, 0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19, 0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A, 0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C, @@ -381,7 +382,7 @@ static const byte poly_to_exp[255] = { 0x85, 0xC8, 0xA1 }; -static const byte exp_to_poly[492] = { +static const byte exp_to_poly[492 + 256] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6, @@ -422,7 +423,7 @@ static const byte exp_to_poly[492] = { 0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, - 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB + 0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, }; @@ -494,14 +495,15 @@ static byte calc_sb_tbl[512] = { 0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56, 0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91 }; + /* Macro to perform one column of the RS matrix multiplication. The * parameters a, b, c, and d are the four bytes of output; i is the index * of the key bytes, and w, x, y, and z, are the column of constants from * the RS matrix, preprocessed through the poly_to_exp table. */ #define CALC_S(a, b, c, d, i, w, x, y, z) \ - if (key[i]) { \ - tmp = poly_to_exp[key[i] - 1]; \ + { \ + tmp = poly_to_exp[key[i]]; \ (a) ^= exp_to_poly[tmp + (w)]; \ (b) ^= exp_to_poly[tmp + (x)]; \ (c) ^= exp_to_poly[tmp + (y)]; \ @@ -600,7 +602,7 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0; /* Temporary for CALC_S. */ - byte tmp; + unsigned int tmp; /* Flags for self-test. */ static int initialized = 0; @@ -668,28 +670,15 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); } - /* Calculate whitening and round subkeys. The constants are - * indices of subkeys, preprocessed through q0 and q1. */ - CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3); - CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); - CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); - CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); - CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); - CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B); - CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); - CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); - CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); - CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD); - CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71); - CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); - CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F); - CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B); - CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA); - CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F); - CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); - CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); - CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00); - CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); + /* Calculate whitening and round subkeys. */ + for (i = 0; i < 8; i += 2) + { + CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } + for (j = 0; j < 32; j += 2, i += 2) + { + CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } } else { @@ -699,28 +688,15 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen) CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); } - /* Calculate whitening and round subkeys. The constants are - * indices of subkeys, preprocessed through q0 and q1. */ - CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3); - CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); - CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B); - CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8); - CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3); - CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B); - CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D); - CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B); - CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32); - CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD); - CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71); - CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); - CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F); - CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B); - CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA); - CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F); - CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); - CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B); - CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00); - CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D); + /* Calculate whitening and round subkeys. */ + for (i = 0; i < 8; i += 2) + { + CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } + for (j = 0; j < 32; j += 2, i += 2) + { + CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] ); + } } return 0; From wk at gnupg.org Mon Jul 27 12:19:54 2015 From: wk at gnupg.org (Werner Koch) Date: Mon, 27 Jul 2015 12:19:54 +0200 Subject: [git] GCRYPT - branch, master, updated. libgcrypt-1.6.0-237-g46c0726 In-Reply-To: <55B5E8AD.3010102@iki.fi> (Jussi Kivilinna's message of "Mon, 27 Jul 2015 11:15:41 +0300") References: <87zj2i887e.fsf@vigenere.g10code.de> <55B5E8AD.3010102@iki.fi> Message-ID: <87a8uh9a2d.fsf@vigenere.g10code.de> On Mon, 27 Jul 2015 10:15, jussi.kivilinna at iki.fi said: >>> * cipher/cipher-gcm.c: Do not copy zero bytes from an empty buffer. Let > Regular buf_cpy can handle the srcbuf==NULL && len==0 case (undefined for > memcpy) without problem. Okay, I missed that it was about !SRCBUF. I falsely assumed "empty" denotes a zero length string. Salam-Shalom, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From wk at gnupg.org Mon Jul 27 12:22:23 2015 From: wk at gnupg.org (Werner Koch) Date: Mon, 27 Jul 2015 12:22:23 +0200 Subject: [PATCH 5/6] Add LSan annotation to ignore a memory leak In-Reply-To: <20150722194041.GC8113@al> (Peter Wu's message of "Wed, 22 Jul 2015 21:40:41 +0200") References: <1436454696-20362-1-git-send-email-peter@lekensteyn.nl> <1436454696-20362-6-git-send-email-peter@lekensteyn.nl> <87fv4ge3yq.fsf@vigenere.g10code.de> <20150722194041.GC8113@al> Message-ID: <87615599y8.fsf@vigenere.g10code.de> On Wed, 22 Jul 2015 21:40, peter at lekensteyn.nl said: > I will look at adding this to libgpg-error. Is src/gpg-error.h.in the > appropriate file for this? Can the patch be submitted to this list? Meanwhile I moved the GCC attribute stuff to gpg-error.h. Feel free to send a patch to implement your annotation (e.g. using GPGRT_ANNOTATE_* or gpgrt_annotate_*). No extra DCO required. Shalom-Salam, Werner -- Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz. From jussi.kivilinna at iki.fi Mon Jul 27 11:04:20 2015 From: jussi.kivilinna at iki.fi (Jussi Kivilinna) Date: Mon, 27 Jul 2015 12:04:20 +0300 Subject: [PATCH 2/6] Add bulk OCB for Camellia AES-NI/AVX and AES-NI/AVX2 implementations In-Reply-To: <20150727090415.18742.12674.stgit@localhost6.localdomain6> References: <20150727090415.18742.12674.stgit@localhost6.localdomain6> Message-ID: <20150727090420.18742.23093.stgit@localhost6.localdomain6> * cipher/camellia-aesni-avx-amd64.S: Add OCB assembly functions. * cipher/camellia-aesni-avx2-amd64.S: Add OCB assembly functions. * cipher/camellia-glue.c (_gcry_camellia_aesni_avx_ocb_enc) (_gcry_camellia_aesni_avx_ocb_dec, _gcry_camellia_aesni_avx_ocb_auth) (_gcry_camellia_aesni_avx2_ocb_enc, _gcry_camellia_aesni_avx2_ocb_dec) (_gcry_camellia_aesni_avx2_ocb_auth): New prototypes. (get_l, _gcry_camellia_ocb_crypt, _gcry_camellia_ocb_auth): New. * cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk functions for Camellia. * src/cipher.h (_gcry_camellia_ocb_crypt) (_gcry_camellia_ocb_auth): New. * tests/basic.c (check_ocb_cipher): Add test-vector for Camellia. -- Signed-off-by: Jussi Kivilinna --- cipher/camellia-aesni-avx-amd64.S | 424 ++++++++++++++++++++++++++++++ cipher/camellia-aesni-avx2-amd64.S | 503 ++++++++++++++++++++++++++++++++++++ cipher/camellia-glue.c | 329 ++++++++++++++++++++++++ cipher/cipher.c | 2 src/cipher.h | 5 tests/basic.c | 9 + 6 files changed, 1266 insertions(+), 6 deletions(-) diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S index c047a21..5a3a3cb 100644 --- a/cipher/camellia-aesni-avx-amd64.S +++ b/cipher/camellia-aesni-avx-amd64.S @@ -1,6 +1,6 @@ /* camellia-avx-aesni-amd64.S - AES-NI/AVX implementation of Camellia cipher * - * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2013-2015 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -1211,6 +1211,428 @@ _gcry_camellia_aesni_avx_cfb_dec: ret; ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;) +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_enc +ELF(.type _gcry_camellia_aesni_avx_ocb_enc, at function;) + +_gcry_camellia_aesni_avx_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rax); + movq %r11, (16 * 16 + 1 * 8)(%rax); + movq %r12, (16 * 16 + 2 * 8)(%rax); + movq %r13, (16 * 16 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm14; + vmovdqu (%r8), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rdx), xreg; \ + vpxor (lreg), %xmm14, %xmm14; \ + vpxor xreg, %xmm15, %xmm15; \ + vpxor xreg, %xmm14, xreg; \ + vmovdqu %xmm14, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm0); + vmovdqu %xmm0, (14 * 16)(%rax); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rcx); + vmovdqu %xmm15, (%r8); + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor 14 * 16(%rax), %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rsi), %xmm7, %xmm7; + vpxor 1 * 16(%rsi), %xmm6, %xmm6; + vpxor 2 * 16(%rsi), %xmm5, %xmm5; + vpxor 3 * 16(%rsi), %xmm4, %xmm4; + vpxor 4 * 16(%rsi), %xmm3, %xmm3; + vpxor 5 * 16(%rsi), %xmm2, %xmm2; + vpxor 6 * 16(%rsi), %xmm1, %xmm1; + vpxor 7 * 16(%rsi), %xmm0, %xmm0; + vpxor 8 * 16(%rsi), %xmm15, %xmm15; + vpxor 9 * 16(%rsi), %xmm14, %xmm14; + vpxor 10 * 16(%rsi), %xmm13, %xmm13; + vpxor 11 * 16(%rsi), %xmm12, %xmm12; + vpxor 12 * 16(%rsi), %xmm11, %xmm11; + vpxor 13 * 16(%rsi), %xmm10, %xmm10; + vpxor 14 * 16(%rsi), %xmm9, %xmm9; + vpxor 15 * 16(%rsi), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rax), %r10; + movq (16 * 16 + 1 * 8)(%rax), %r11; + movq (16 * 16 + 2 * 8)(%rax), %r12; + movq (16 * 16 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_dec +ELF(.type _gcry_camellia_aesni_avx_ocb_dec, at function;) + +_gcry_camellia_aesni_avx_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rax); + movq %r11, (16 * 16 + 1 * 8)(%rax); + movq %r12, (16 * 16 + 2 * 8)(%rax); + movq %r13, (16 * 16 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rdx), xreg; \ + vpxor (lreg), %xmm15, %xmm15; \ + vpxor xreg, %xmm15, xreg; \ + vmovdqu %xmm15, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm14); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm15, (%rcx); + + movq %r8, %r10; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r9d; + cmovel %r9d, %r8d; /* max */ + + /* inpack16_pre: */ + vmovq (key_table)(CTX, %r8, 8), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor %xmm14, %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_dec_blk16; + + vpxor 0 * 16(%rsi), %xmm7, %xmm7; + vpxor 1 * 16(%rsi), %xmm6, %xmm6; + vpxor 2 * 16(%rsi), %xmm5, %xmm5; + vpxor 3 * 16(%rsi), %xmm4, %xmm4; + vpxor 4 * 16(%rsi), %xmm3, %xmm3; + vpxor 5 * 16(%rsi), %xmm2, %xmm2; + vpxor 6 * 16(%rsi), %xmm1, %xmm1; + vpxor 7 * 16(%rsi), %xmm0, %xmm0; + vmovdqu %xmm7, (7 * 16)(%rax); + vpxor 8 * 16(%rsi), %xmm15, %xmm15; + vpxor 9 * 16(%rsi), %xmm14, %xmm14; + vpxor 10 * 16(%rsi), %xmm13, %xmm13; + vpxor 11 * 16(%rsi), %xmm12, %xmm12; + vpxor 12 * 16(%rsi), %xmm11, %xmm11; + vpxor 13 * 16(%rsi), %xmm10, %xmm10; + vpxor 14 * 16(%rsi), %xmm9, %xmm9; + vpxor 15 * 16(%rsi), %xmm8, %xmm8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vpxor (%r10), %xmm7, %xmm7; + vpxor %xmm6, %xmm7, %xmm7; + vpxor %xmm5, %xmm7, %xmm7; + vpxor %xmm4, %xmm7, %xmm7; + vpxor %xmm3, %xmm7, %xmm7; + vpxor %xmm2, %xmm7, %xmm7; + vpxor %xmm1, %xmm7, %xmm7; + vpxor %xmm0, %xmm7, %xmm7; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm14, %xmm7, %xmm7; + vpxor %xmm13, %xmm7, %xmm7; + vpxor %xmm12, %xmm7, %xmm7; + vpxor %xmm11, %xmm7, %xmm7; + vpxor %xmm10, %xmm7, %xmm7; + vpxor %xmm9, %xmm7, %xmm7; + vpxor %xmm8, %xmm7, %xmm7; + vmovdqu %xmm7, (%r10); + vmovdqu (7 * 16)(%rax), %xmm7; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rax), %r10; + movq (16 * 16 + 1 * 8)(%rax), %r11; + movq (16 * 16 + 2 * 8)(%rax), %r12; + movq (16 * 16 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_auth +ELF(.type _gcry_camellia_aesni_avx_ocb_auth, at function;) + +_gcry_camellia_aesni_avx_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rax); + movq %r11, (16 * 16 + 1 * 8)(%rax); + movq %r12, (16 * 16 + 2 * 8)(%rax); + movq %r13, (16 * 16 + 3 * 8)(%rax); + + vmovdqu (%rdx), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rsi), xreg; \ + vpxor (lreg), %xmm15, %xmm15; \ + vpxor xreg, %xmm15, xreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm14); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm15, (%rdx); + + movq %rcx, %r10; + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap RIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor %xmm14, %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor %xmm7, %xmm6, %xmm6; + vpxor %xmm5, %xmm4, %xmm4; + vpxor %xmm3, %xmm2, %xmm2; + vpxor %xmm1, %xmm0, %xmm0; + vpxor %xmm15, %xmm14, %xmm14; + vpxor %xmm13, %xmm12, %xmm12; + vpxor %xmm11, %xmm10, %xmm10; + vpxor %xmm9, %xmm8, %xmm8; + + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm10, %xmm8, %xmm8; + + vpxor %xmm4, %xmm0, %xmm0; + vpxor %xmm12, %xmm8, %xmm8; + + vpxor %xmm0, %xmm8, %xmm0; + vpxor (%r10), %xmm0, %xmm0; + vmovdqu %xmm0, (%r10); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rax), %r10; + movq (16 * 16 + 1 * 8)(%rax), %r11; + movq (16 * 16 + 2 * 8)(%rax), %r12; + movq (16 * 16 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;) + /* * IN: * ab: 64-bit AB state diff --git a/cipher/camellia-aesni-avx2-amd64.S b/cipher/camellia-aesni-avx2-amd64.S index a3fa229..26381df 100644 --- a/cipher/camellia-aesni-avx2-amd64.S +++ b/cipher/camellia-aesni-avx2-amd64.S @@ -1,6 +1,6 @@ /* camellia-avx2-aesni-amd64.S - AES-NI/AVX2 implementation of Camellia cipher * - * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2013-2015 Jussi Kivilinna * * This file is part of Libgcrypt. * @@ -1127,8 +1127,8 @@ ELF(.type _gcry_camellia_aesni_avx2_cbc_dec, at function;) _gcry_camellia_aesni_avx2_cbc_dec: /* input: * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) * %rcx: iv */ @@ -1199,8 +1199,8 @@ ELF(.type _gcry_camellia_aesni_avx2_cfb_dec, at function;) _gcry_camellia_aesni_avx2_cfb_dec: /* input: * %rdi: ctx, CTX - * %rsi: dst (16 blocks) - * %rdx: src (16 blocks) + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) * %rcx: iv */ @@ -1266,5 +1266,498 @@ _gcry_camellia_aesni_avx2_cfb_dec: ret; ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;) +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_enc +ELF(.type _gcry_camellia_aesni_avx2_ocb_enc, at function;) + +_gcry_camellia_aesni_avx2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[32]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rax); + movq %r11, (16 * 32 + 1 * 8)(%rax); + movq %r12, (16 * 32 + 2 * 8)(%rax); + movq %r13, (16 * 32 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm14; + vmovdqu (%r8), %xmm13; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm13, %ymm13; \ + vpxor yreg, %ymm15, yreg; \ + vmovdqu %ymm15, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, %ymm0); + vmovdqu %ymm0, (13 * 32)(%rax); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r9), %r10; + movq (17 * 8)(%r9), %r11; + movq (18 * 8)(%r9), %r12; + movq (19 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r9), %r10; + movq (21 * 8)(%r9), %r11; + movq (22 * 8)(%r9), %r12; + movq (23 * 8)(%r9), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r9), %r10; + movq (25 * 8)(%r9), %r11; + movq (26 * 8)(%r9), %r12; + movq (27 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r9), %r10; + movq (29 * 8)(%r9), %r11; + movq (30 * 8)(%r9), %r12; + movq (31 * 8)(%r9), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vextracti128 $1, %ymm13, %xmm15; + vmovdqu %xmm14, (%rcx); + vpxor %xmm13, %xmm15, %xmm15; + vmovdqu %xmm15, (%r8); + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor 13 * 32(%rax), %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rsi), %ymm7, %ymm7; + vpxor 1 * 32(%rsi), %ymm6, %ymm6; + vpxor 2 * 32(%rsi), %ymm5, %ymm5; + vpxor 3 * 32(%rsi), %ymm4, %ymm4; + vpxor 4 * 32(%rsi), %ymm3, %ymm3; + vpxor 5 * 32(%rsi), %ymm2, %ymm2; + vpxor 6 * 32(%rsi), %ymm1, %ymm1; + vpxor 7 * 32(%rsi), %ymm0, %ymm0; + vpxor 8 * 32(%rsi), %ymm15, %ymm15; + vpxor 9 * 32(%rsi), %ymm14, %ymm14; + vpxor 10 * 32(%rsi), %ymm13, %ymm13; + vpxor 11 * 32(%rsi), %ymm12, %ymm12; + vpxor 12 * 32(%rsi), %ymm11, %ymm11; + vpxor 13 * 32(%rsi), %ymm10, %ymm10; + vpxor 14 * 32(%rsi), %ymm9, %ymm9; + vpxor 15 * 32(%rsi), %ymm8, %ymm8; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rax), %r10; + movq (16 * 32 + 1 * 8)(%rax), %r11; + movq (16 * 32 + 2 * 8)(%rax), %r12; + movq (16 * 32 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ocb_enc,.-_gcry_camellia_aesni_avx2_ocb_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_dec +ELF(.type _gcry_camellia_aesni_avx2_ocb_dec, at function;) + +_gcry_camellia_aesni_avx2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[32]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rax); + movq %r11, (16 * 32 + 1 * 8)(%rax); + movq %r12, (16 * 32 + 2 * 8)(%rax); + movq %r13, (16 * 32 + 3 * 8)(%rax); + + vmovdqu (%rcx), %xmm14; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm15, yreg; \ + vmovdqu %ymm15, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, %ymm13); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r9), %r10; + movq (17 * 8)(%r9), %r11; + movq (18 * 8)(%r9), %r12; + movq (19 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r9), %r10; + movq (21 * 8)(%r9), %r11; + movq (22 * 8)(%r9), %r12; + movq (23 * 8)(%r9), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r9), %r10; + movq (25 * 8)(%r9), %r11; + movq (26 * 8)(%r9), %r12; + movq (27 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r9), %r10; + movq (29 * 8)(%r9), %r11; + movq (30 * 8)(%r9), %r12; + movq (31 * 8)(%r9), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rcx); + + movq %r8, %r10; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r9d; + cmovel %r9d, %r8d; /* max */ + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor %ymm13, %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_dec_blk32; + + vpxor 0 * 32(%rsi), %ymm7, %ymm7; + vpxor 1 * 32(%rsi), %ymm6, %ymm6; + vpxor 2 * 32(%rsi), %ymm5, %ymm5; + vpxor 3 * 32(%rsi), %ymm4, %ymm4; + vpxor 4 * 32(%rsi), %ymm3, %ymm3; + vpxor 5 * 32(%rsi), %ymm2, %ymm2; + vpxor 6 * 32(%rsi), %ymm1, %ymm1; + vpxor 7 * 32(%rsi), %ymm0, %ymm0; + vmovdqu %ymm7, (7 * 32)(%rax); + vmovdqu %ymm6, (6 * 32)(%rax); + vpxor 8 * 32(%rsi), %ymm15, %ymm15; + vpxor 9 * 32(%rsi), %ymm14, %ymm14; + vpxor 10 * 32(%rsi), %ymm13, %ymm13; + vpxor 11 * 32(%rsi), %ymm12, %ymm12; + vpxor 12 * 32(%rsi), %ymm11, %ymm11; + vpxor 13 * 32(%rsi), %ymm10, %ymm10; + vpxor 14 * 32(%rsi), %ymm9, %ymm9; + vpxor 15 * 32(%rsi), %ymm8, %ymm8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vpxor %ymm5, %ymm7, %ymm7; + vpxor %ymm4, %ymm6, %ymm6; + vpxor %ymm3, %ymm7, %ymm7; + vpxor %ymm2, %ymm6, %ymm6; + vpxor %ymm1, %ymm7, %ymm7; + vpxor %ymm0, %ymm6, %ymm6; + vpxor %ymm15, %ymm7, %ymm7; + vpxor %ymm14, %ymm6, %ymm6; + vpxor %ymm13, %ymm7, %ymm7; + vpxor %ymm12, %ymm6, %ymm6; + vpxor %ymm11, %ymm7, %ymm7; + vpxor %ymm10, %ymm6, %ymm6; + vpxor %ymm9, %ymm7, %ymm7; + vpxor %ymm8, %ymm6, %ymm6; + vpxor %ymm7, %ymm6, %ymm7; + + vextracti128 $1, %ymm7, %xmm6; + vpxor %xmm6, %xmm7, %xmm7; + vpxor (%r10), %xmm7, %xmm7; + vmovdqu %xmm7, (%r10); + + vmovdqu 7 * 32(%rax), %ymm7; + vmovdqu 6 * 32(%rax), %ymm6; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rax), %r10; + movq (16 * 32 + 1 * 8)(%rax), %r11; + movq (16 * 32 + 2 * 8)(%rax), %r12; + movq (16 * 32 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ocb_dec,.-_gcry_camellia_aesni_avx2_ocb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_auth +ELF(.type _gcry_camellia_aesni_avx2_ocb_auth, at function;) + +_gcry_camellia_aesni_avx2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + + pushq %rbp; + movq %rsp, %rbp; + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rax); + movq %r11, (16 * 32 + 1 * 8)(%rax); + movq %r12, (16 * 32 + 2 * 8)(%rax); + movq %r13, (16 * 32 + 3 * 8)(%rax); + + vmovdqu (%rdx), %xmm14; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rsi), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm15, yreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(2, %r10, %r11, %ymm13); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r8), %r10; + movq (17 * 8)(%r8), %r11; + movq (18 * 8)(%r8), %r12; + movq (19 * 8)(%r8), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r8), %r10; + movq (21 * 8)(%r8), %r11; + movq (22 * 8)(%r8), %r12; + movq (23 * 8)(%r8), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r8), %r10; + movq (25 * 8)(%r8), %r11; + movq (26 * 8)(%r8), %r12; + movq (27 * 8)(%r8), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r8), %r10; + movq (29 * 8)(%r8), %r11; + movq (30 * 8)(%r8), %r12; + movq (31 * 8)(%r8), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rdx); + + movq %rcx, %r10; + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap RIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor %ymm13, %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor %ymm7, %ymm6, %ymm6; + vpxor %ymm5, %ymm4, %ymm4; + vpxor %ymm3, %ymm2, %ymm2; + vpxor %ymm1, %ymm0, %ymm0; + vpxor %ymm15, %ymm14, %ymm14; + vpxor %ymm13, %ymm12, %ymm12; + vpxor %ymm11, %ymm10, %ymm10; + vpxor %ymm9, %ymm8, %ymm8; + + vpxor %ymm6, %ymm4, %ymm4; + vpxor %ymm2, %ymm0, %ymm0; + vpxor %ymm14, %ymm12, %ymm12; + vpxor %ymm10, %ymm8, %ymm8; + + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm12, %ymm8, %ymm8; + + vpxor %ymm0, %ymm8, %ymm0; + + vextracti128 $1, %ymm0, %xmm1; + vpxor (%r10), %xmm0, %xmm0; + vpxor %xmm0, %xmm1, %xmm0; + vmovdqu %xmm0, (%r10); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rax), %r10; + movq (16 * 32 + 1 * 8)(%rax), %r11; + movq (16 * 32 + 2 * 8)(%rax), %r12; + movq (16 * 32 + 3 * 8)(%rax), %r13; + + leave; + ret; +ELF(.size _gcry_camellia_aesni_avx2_ocb_auth,.-_gcry_camellia_aesni_avx2_ocb_auth;) + #endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/ #endif /*__x86_64*/ diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 5032321..197e1b3 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -63,6 +63,7 @@ #include "cipher.h" #include "camellia.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" /* Helper macro to force alignment to 16 bytes. */ @@ -135,6 +136,26 @@ extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; +extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[16]) ASM_FUNC_ABI; + extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx, const unsigned char *key, unsigned int keylen) ASM_FUNC_ABI; @@ -158,6 +179,26 @@ extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[32]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[32]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const void *Ls[32]) ASM_FUNC_ABI; #endif static const char *selftest(void); @@ -563,6 +604,294 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv, _gcry_burn_stack(burn_stack_depth); } +static inline const unsigned char * +get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) +{ + unsigned int ntz = _gcry_ctz64 (i); + + if (ntz < OCB_L_TABLE_SIZE) + return c->u_mode.ocb.L[ntz]; + else + return _gcry_cipher_ocb_get_l (c, l_tmp, i); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +void +_gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + CAMELLIA_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char l_tmp[CAMELLIA_BLOCK_SIZE]; + const unsigned char *l; + int burn_stack_depth; + u64 blkn = c->u_mode.ocb.data_nblocks; + + burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size : + CAMELLIA_decrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + const void *Ls[32]; + int i; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 32; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + if (encrypt) + _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + const void *Ls[16]; + int i; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 16; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + if (encrypt) + _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + if (encrypt) + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, CAMELLIA_BLOCK_SIZE); + buf_cpy (l_tmp, inbuf, CAMELLIA_BLOCK_SIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, CAMELLIA_BLOCK_SIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); + Camellia_EncryptBlock(ctx->keybitlength, l_tmp, ctx->keytable, l_tmp); + buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); + buf_cpy (outbuf, l_tmp, CAMELLIA_BLOCK_SIZE); + + inbuf += CAMELLIA_BLOCK_SIZE; + outbuf += CAMELLIA_BLOCK_SIZE; + } + } + else + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, CAMELLIA_BLOCK_SIZE); + buf_cpy (l_tmp, inbuf, CAMELLIA_BLOCK_SIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); + Camellia_DecryptBlock(ctx->keybitlength, l_tmp, ctx->keytable, l_tmp); + buf_xor_1 (l_tmp, c->u_iv.iv, CAMELLIA_BLOCK_SIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, CAMELLIA_BLOCK_SIZE); + buf_cpy (outbuf, l_tmp, CAMELLIA_BLOCK_SIZE); + + inbuf += CAMELLIA_BLOCK_SIZE; + outbuf += CAMELLIA_BLOCK_SIZE; + } + } + + c->u_mode.ocb.data_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + +/* Bulk authentication of complete blocks in OCB mode. */ +void +_gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned char l_tmp[CAMELLIA_BLOCK_SIZE]; + const unsigned char *l; + int burn_stack_depth; + u64 blkn = c->u_mode.ocb.aad_nblocks; + + burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + const void *Ls[32]; + int i; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 32; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 32; + abuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + const void *Ls[16]; + int i; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + /* l_tmp will be used only every 65536-th block. */ + for (i = 0; i < 16; i += 4) + { + Ls[i + 0] = get_l(c, l_tmp, blkn + 1); + Ls[i + 1] = get_l(c, l_tmp, blkn + 2); + Ls[i + 2] = get_l(c, l_tmp, blkn + 3); + Ls[i + 3] = get_l(c, l_tmp, blkn + 4); + blkn += 4; + } + + _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 16; + abuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l, CAMELLIA_BLOCK_SIZE); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, CAMELLIA_BLOCK_SIZE); + Camellia_EncryptBlock(ctx->keybitlength, l_tmp, ctx->keytable, l_tmp); + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, CAMELLIA_BLOCK_SIZE); + + abuf += CAMELLIA_BLOCK_SIZE; + } + + c->u_mode.ocb.aad_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + /* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR encryption. Returns NULL on success. */ static const char* diff --git a/cipher/cipher.c b/cipher/cipher.c index 7a29824..2d2b0ad 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -535,6 +535,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, h->bulk.cbc_dec = _gcry_camellia_cbc_dec; h->bulk.cfb_dec = _gcry_camellia_cfb_dec; h->bulk.ctr_enc = _gcry_camellia_ctr_enc; + h->bulk.ocb_crypt = _gcry_camellia_ocb_crypt; + h->bulk.ocb_auth = _gcry_camellia_ocb_auth; break; #endif /*USE_CAMELLIA*/ #ifdef USE_DES diff --git a/src/cipher.h b/src/cipher.h index ef183fd..a0aac51 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -177,6 +177,11 @@ void _gcry_camellia_cbc_dec (void *context, unsigned char *iv, void _gcry_camellia_cfb_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); +void _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +void _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- des.c --*/ void _gcry_3des_ctr_enc (void *context, unsigned char *ctr, diff --git a/tests/basic.c b/tests/basic.c index 2c664c0..e3f4bfd 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -3335,6 +3335,15 @@ check_ocb_cipher (void) check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32, "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d" "\xfe\x96\x67\xc9\xc8\x41\x03\x51"); + check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA128, 16, + "\x28\x23\x38\x45\x2b\xfd\x42\x45" + "\x43\x64\x7e\x67\x7f\xf4\x8b\xcd"); + check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA192, 24, + "\xee\xca\xe5\x39\x27\x2d\x33\xe7" + "\x79\x74\xb0\x1d\x37\x12\xd5\x6c"); + check_ocb_cipher_largebuf(GCRY_CIPHER_CAMELLIA256, 32, + "\x39\x39\xd0\x2d\x05\x68\x74\xee" + "\x18\x6b\xea\x3d\x0b\xd3\x58\xae"); }