@@ -12,46 +12,47 @@
#include <linux/sizes.h>
#include <asm/cpufeature.h>
-#include <asm/fpu/api.h>
#include <asm/processor.h>
#include <asm/simd.h>
-asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
- const u8 *block, const size_t nblocks,
- const u32 inc);
-asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
- const u8 *block, const size_t nblocks,
- const u32 inc);
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, u32 inc);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
-void blake2s_compress(struct blake2s_state *state, const u8 *block,
- size_t nblocks, const u32 inc)
+void blake2s_compress(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
- blake2s_compress_generic(state, block, nblocks, inc);
+ blake2s_compress_generic(state, data, nblocks, inc);
return;
}
- do {
- const size_t blocks = min_t(size_t, nblocks,
- SZ_4K / BLAKE2S_BLOCK_SIZE);
+ kernel_fpu_begin();
+ for (;;) {
+ const unsigned int chunks = min(nblocks, 4096U / BLAKE2S_BLOCK_SIZE);
- kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) &&
static_branch_likely(&blake2s_use_avx512))
- blake2s_compress_avx512(state, block, blocks, inc);
+ blake2s_compress_avx512(state, data, chunks, inc);
else
- blake2s_compress_ssse3(state, block, blocks, inc);
- kernel_fpu_end();
+ blake2s_compress_ssse3(state, data, chunks, inc);
- nblocks -= blocks;
- block += blocks * BLAKE2S_BLOCK_SIZE;
- } while (nblocks);
+ nblocks -= chunks;
+
+ if (!nblocks)
+ break;
+
+ data += chunks * BLAKE2S_BLOCK_SIZE;
+ kernel_fpu_yield();
+ }
+ kernel_fpu_end();
}
EXPORT_SYMBOL(blake2s_compress);
@@ -10,11 +10,11 @@
#include <crypto/blake2s.h>
#include <linux/string.h>
-void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
- size_t nblocks, const u32 inc);
+void blake2s_compress_generic(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, u32 inc);
-void blake2s_compress(struct blake2s_state *state, const u8 *block,
- size_t nblocks, const u32 inc);
+void blake2s_compress(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, u32 inc);
bool blake2s_selftest(void);
@@ -37,12 +37,12 @@ static inline void blake2s_increment_counter(struct blake2s_state *state,
state->t[1] += (state->t[0] < inc);
}
-void blake2s_compress(struct blake2s_state *state, const u8 *block,
- size_t nblocks, const u32 inc)
+void blake2s_compress(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, u32 inc)
__weak __alias(blake2s_compress_generic);
-void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
- size_t nblocks, const u32 inc)
+void blake2s_compress_generic(struct blake2s_state *state, const u8 *data,
+ unsigned int nblocks, u32 inc)
{
u32 m[16];
u32 v[16];
@@ -53,7 +53,7 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
while (nblocks > 0) {
blake2s_increment_counter(state, inc);
- memcpy(m, block, BLAKE2S_BLOCK_SIZE);
+ memcpy(m, data, BLAKE2S_BLOCK_SIZE);
le32_to_cpu_array(m, ARRAY_SIZE(m));
memcpy(v, state->h, 32);
v[ 8] = BLAKE2S_IV0;
@@ -103,7 +103,7 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
for (i = 0; i < 8; ++i)
state->h[i] ^= v[i] ^ v[i + 8];
- block += BLAKE2S_BLOCK_SIZE;
+ data += BLAKE2S_BLOCK_SIZE;
--nblocks;
}
}