[v4,11/24] crypto: x86/aegis - limit FPU preemption

Message ID 20221116041342.3841-12-elliott@hpe.com
State New
Headers
Series crypto: fix RCU stalls |

Commit Message

Elliott, Robert (Servers) Nov. 16, 2022, 4:13 a.m. UTC
  Make kernel_fpu_begin() and kernel_fpu_end() calls around each
assembly language function that uses FPU context, rather than
around the entire set (init, ad, crypt, final).

Limit the processing of bulk data based on a module parameter,
so multiple blocks are processed within one FPU context
(associated data is not limited).

Allow the skcipher_walk functions to sleep again, since they are
is no longer called inside FPU context.

Motivation: calling crypto_aead_encrypt() with a single scatter-gather
list entry pointing to a 1 MiB plaintext buffer caused the aesni_encrypt
function to receive a length of 1048576 bytes and consume 306348 cycles
within FPU context to process that data.

Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations")
Fixes: ba6771c0a0bc ("crypto: x86/aegis - fix handling chunked inputs and MAY_SLEEP")
Signed-off-by: Robert Elliott <elliott@hpe.com>
---
 arch/x86/crypto/aegis128-aesni-glue.c | 39 ++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 10 deletions(-)
  

Patch

diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 4623189000d8..6e96bdda2811 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -23,6 +23,9 @@ 
 #define AEGIS128_MIN_AUTH_SIZE 8
 #define AEGIS128_MAX_AUTH_SIZE 16
 
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 4 * 1024;
+
 asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
 
 asmlinkage void crypto_aegis128_aesni_ad(
@@ -85,15 +88,19 @@  static void crypto_aegis128_aesni_process_ad(
 			if (pos > 0) {
 				unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
 				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis128_aesni_ad(state,
+				kernel_fpu_begin();
+				crypto_aegis128_aesni_ad(state->blocks,
 							 AEGIS128_BLOCK_SIZE,
 							 buf.bytes);
+				kernel_fpu_end();
 				pos = 0;
 				left -= fill;
 				src += fill;
 			}
 
-			crypto_aegis128_aesni_ad(state, left, src);
+			kernel_fpu_begin();
+			crypto_aegis128_aesni_ad(state->blocks, left, src);
+			kernel_fpu_end();
 
 			src += left & ~(AEGIS128_BLOCK_SIZE - 1);
 			left &= AEGIS128_BLOCK_SIZE - 1;
@@ -110,7 +117,9 @@  static void crypto_aegis128_aesni_process_ad(
 
 	if (pos > 0) {
 		memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
-		crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
+		kernel_fpu_begin();
+		crypto_aegis128_aesni_ad(state->blocks, AEGIS128_BLOCK_SIZE, buf.bytes);
+		kernel_fpu_end();
 	}
 }
 
@@ -119,15 +128,23 @@  static void crypto_aegis128_aesni_process_crypt(
 		const struct aegis_crypt_ops *ops)
 {
 	while (walk->nbytes >= AEGIS128_BLOCK_SIZE) {
-		ops->crypt_blocks(state,
-				  round_down(walk->nbytes, AEGIS128_BLOCK_SIZE),
+		unsigned int chunk = min(walk->nbytes, bytes_per_fpu);
+
+		chunk = round_down(chunk, AEGIS128_BLOCK_SIZE);
+
+		kernel_fpu_begin();
+		ops->crypt_blocks(state->blocks, chunk,
 				  walk->src.virt.addr, walk->dst.virt.addr);
-		skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
+		kernel_fpu_end();
+
+		skcipher_walk_done(walk, walk->nbytes - chunk);
 	}
 
 	if (walk->nbytes) {
-		ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
+		kernel_fpu_begin();
+		ops->crypt_tail(state->blocks, walk->nbytes, walk->src.virt.addr,
 				walk->dst.virt.addr);
+		kernel_fpu_end();
 		skcipher_walk_done(walk, 0);
 	}
 }
@@ -172,15 +189,17 @@  static void crypto_aegis128_aesni_crypt(struct aead_request *req,
 	struct skcipher_walk walk;
 	struct aegis_state state;
 
-	ops->skcipher_walk_init(&walk, req, true);
+	ops->skcipher_walk_init(&walk, req, false);
 
 	kernel_fpu_begin();
+	crypto_aegis128_aesni_init(&state.blocks, ctx->key.bytes, req->iv);
+	kernel_fpu_end();
 
-	crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
 	crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
 	crypto_aegis128_aesni_process_crypt(&state, &walk, ops);
-	crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
 
+	kernel_fpu_begin();
+	crypto_aegis128_aesni_final(&state.blocks, tag_xor, req->assoclen, cryptlen);
 	kernel_fpu_end();
 }