@@ -22,15 +22,21 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
- do {
- unsigned int n = min_t(unsigned int, srclen, SZ_4K);
+ kernel_fpu_begin();
+ for (;;) {
+ const unsigned int chunk = min(srclen, 4096U);
+
+ crypto_nhpoly1305_update_helper(desc, src, chunk, nh_avx2);
+ srclen -= chunk;
+
+ if (!srclen)
+ break;
+
+ src += chunk;
+ kernel_fpu_yield();
+ }
+ kernel_fpu_end();
- kernel_fpu_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_avx2);
- kernel_fpu_end();
- src += n;
- srclen -= n;
- } while (srclen);
return 0;
}
@@ -22,15 +22,21 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
- do {
- unsigned int n = min_t(unsigned int, srclen, SZ_4K);
+ kernel_fpu_begin();
+ for (;;) {
+ const unsigned int chunk = min(srclen, 4096U);
+
+ crypto_nhpoly1305_update_helper(desc, src, chunk, nh_sse2);
+ srclen -= chunk;
+
+ if (!srclen)
+ break;
+
+ src += chunk;
+ kernel_fpu_yield();
+ }
+ kernel_fpu_end();
- kernel_fpu_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2);
- kernel_fpu_end();
- src += n;
- srclen -= n;
- } while (srclen);
return 0;
}
@@ -15,20 +15,13 @@
#include <asm/intel-family.h>
#include <asm/simd.h>
-asmlinkage void poly1305_init_x86_64(void *ctx,
- const u8 key[POLY1305_BLOCK_SIZE]);
-asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
- const size_t len, const u32 padbit);
-asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
- const u32 nonce[4]);
-asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
- const u32 nonce[4]);
-asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
- const u32 padbit);
-asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
- const u32 padbit);
-asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
- const size_t len, const u32 padbit);
+asmlinkage void poly1305_init_x86_64(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]);
+asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, unsigned int len, u32 padbit);
+asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]);
+asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]);
+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, unsigned int len, const u32 padbit);
+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, unsigned int len, u32 padbit);
+asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp, unsigned int len, u32 padbit);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
@@ -86,7 +79,7 @@ static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
poly1305_init_x86_64(ctx, key);
}
-static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
+static void poly1305_simd_blocks(void *ctx, const u8 *inp, unsigned int len,
const u32 padbit)
{
struct poly1305_arch_internal *state = ctx;
@@ -103,21 +96,25 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return;
}
- do {
- const size_t bytes = min_t(size_t, len, SZ_4K);
+ kernel_fpu_begin();
+ for (;;) {
+ const unsigned int chunk = min(len, 4096U);
- kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
- poly1305_blocks_avx512(ctx, inp, bytes, padbit);
+ poly1305_blocks_avx512(ctx, inp, chunk, padbit);
else if (static_branch_likely(&poly1305_use_avx2))
- poly1305_blocks_avx2(ctx, inp, bytes, padbit);
+ poly1305_blocks_avx2(ctx, inp, chunk, padbit);
else
- poly1305_blocks_avx(ctx, inp, bytes, padbit);
- kernel_fpu_end();
+ poly1305_blocks_avx(ctx, inp, chunk, padbit);
+ len -= chunk;
- len -= bytes;
- inp += bytes;
- } while (len);
+ if (!len)
+ break;
+
+ inp += chunk;
+ kernel_fpu_yield();
+ }
+ kernel_fpu_end();
}
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
@@ -45,8 +45,8 @@ struct polyval_desc_ctx {
u32 bytes;
};
-asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
- const u8 *in, size_t nblocks, u8 *accumulator);
+asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys, const u8 *in,
+ unsigned int nblocks, u8 *accumulator);
asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm)
@@ -55,27 +55,40 @@ static inline struct polyval_tfm_ctx *polyval_tfm_ctx(struct crypto_shash *tfm)
}
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
- const u8 *in, size_t nblocks, u8 *accumulator)
+ const u8 *in, unsigned int nblocks, u8 *accumulator)
{
- if (likely(crypto_simd_usable())) {
- kernel_fpu_begin();
- clmul_polyval_update(keys, in, nblocks, accumulator);
- kernel_fpu_end();
- } else {
+ if (!crypto_simd_usable()) {
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
nblocks, accumulator);
+ return;
}
+
+ kernel_fpu_begin();
+ for (;;) {
+ const unsigned int chunks = min(nblocks, 4096U / POLYVAL_BLOCK_SIZE);
+
+ clmul_polyval_update(keys, in, chunks, accumulator);
+ nblocks -= chunks;
+
+ if (!nblocks)
+ break;
+
+ in += chunks * POLYVAL_BLOCK_SIZE;
+ kernel_fpu_yield();
+ }
+ kernel_fpu_end();
}
static void internal_polyval_mul(u8 *op1, const u8 *op2)
{
- if (likely(crypto_simd_usable())) {
- kernel_fpu_begin();
- clmul_polyval_mul(op1, op2);
- kernel_fpu_end();
- } else {
+ if (!crypto_simd_usable()) {
polyval_mul_non4k(op1, op2);
+ return;
}
+
+ kernel_fpu_begin();
+ clmul_polyval_mul(op1, op2);
+ kernel_fpu_end();
}
static int polyval_x86_setkey(struct crypto_shash *tfm,
@@ -113,7 +126,6 @@ static int polyval_x86_update(struct shash_desc *desc,
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = polyval_tfm_ctx(desc->tfm);
u8 *pos;
- unsigned int nblocks;
unsigned int n;
if (dctx->bytes) {
@@ -131,9 +143,9 @@ static int polyval_x86_update(struct shash_desc *desc,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
- while (srclen >= POLYVAL_BLOCK_SIZE) {
- /* Allow rescheduling every 4K bytes. */
- nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
+ if (srclen >= POLYVAL_BLOCK_SIZE) {
+ const unsigned int nblocks = srclen / POLYVAL_BLOCK_SIZE;
+
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
src += nblocks * POLYVAL_BLOCK_SIZE;