[v3] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

Message ID 20221118100011.2634-1-zhengqi.arch@bytedance.com
State New
Headers
Series [v3] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr |

Commit Message

Qi Zheng Nov. 18, 2022, 10 a.m. UTC
  When we specify __GFP_NOWARN, we only expect that no warnings
will be issued for current caller. But in the __should_failslab()
and __should_fail_alloc_page(), the local GFP flags alter the
global {failslab|fail_page_alloc}.attr, which is persistent and
shared by all tasks. This is not what we expected, let's fix it.

Cc: stable@vger.kernel.org
Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
---
 v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
 v2: https://lore.kernel.org/lkml/20221108035232.87180-1-zhengqi.arch@bytedance.com/

 Changelog in v2 -> v3:
 - collect Reviewed-by
 - rebase onto the next-20221118

 Changelog in v1 -> v2:
  - add comment for __should_failslab() and __should_fail_alloc_page()
    (suggested by Jason)

 include/linux/fault-inject.h |  7 +++++--
 lib/fault-inject.c           | 14 +++++++++-----
 mm/failslab.c                | 12 ++++++++++--
 mm/page_alloc.c              |  7 +++++--
 4 files changed, 29 insertions(+), 11 deletions(-)
  

Comments

Jason Gunthorpe Nov. 18, 2022, 12:47 p.m. UTC | #1
On Fri, Nov 18, 2022 at 06:00:11PM +0800, Qi Zheng wrote:
> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
> 
> Cc: stable@vger.kernel.org
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
> ---
>  v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
>  v2: https://lore.kernel.org/lkml/20221108035232.87180-1-zhengqi.arch@bytedance.com/

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
  
Andrew Morton Nov. 18, 2022, 9:42 p.m. UTC | #2
On Fri, 18 Nov 2022 18:00:11 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:

> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
> 
> Cc: stable@vger.kernel.org
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
>
> ...
>
> -bool should_fail(struct fault_attr *attr, ssize_t size)
> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>  {
>  	bool stack_checked = false;
>  
> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>  		return false;
>  
>  fail:
> -	fail_dump(attr);
> +	if (!(flags & FAULT_NOWARN))
> +		fail_dump(attr);
>  
>  	if (atomic_read(&attr->times) != -1)
>  		atomic_dec_not_zero(&attr->times);
>  
>  	return true;
>  }
> +EXPORT_SYMBOL_GPL(should_fail_ex);

I don't see a need to export this?
  
Qi Zheng Nov. 19, 2022, 12:48 a.m. UTC | #3
On 2022/11/19 05:42, Andrew Morton wrote:
> On Fri, 18 Nov 2022 18:00:11 +0800 Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
>> When we specify __GFP_NOWARN, we only expect that no warnings
>> will be issued for current caller. But in the __should_failslab()
>> and __should_fail_alloc_page(), the local GFP flags alter the
>> global {failslab|fail_page_alloc}.attr, which is persistent and
>> shared by all tasks. This is not what we expected, let's fix it.
>>
>> Cc: stable@vger.kernel.org
>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>> Reported-by: Dmitry Vyukov <dvyukov@google.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
>>
>> ...
>>
>> -bool should_fail(struct fault_attr *attr, ssize_t size)
>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>>   {
>>   	bool stack_checked = false;
>>   
>> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>>   		return false;
>>   
>>   fail:
>> -	fail_dump(attr);
>> +	if (!(flags & FAULT_NOWARN))
>> +		fail_dump(attr);
>>   
>>   	if (atomic_read(&attr->times) != -1)
>>   		atomic_dec_not_zero(&attr->times);
>>   
>>   	return true;
>>   }
>> +EXPORT_SYMBOL_GPL(should_fail_ex);
> 
> I don't see a need to export this?

Yes, my initial thought was that there might be a driver using this
function, but there really isn't one yet.

And I see you've helped remove this, thanks a lot. :)

> 
>
  

Patch

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e25467844..444236dadcf0 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@  struct fault_attr {
 	atomic_t space;
 	unsigned long verbose;
 	bool task_filter;
-	bool no_warn;
 	unsigned long stacktrace_depth;
 	unsigned long require_start;
 	unsigned long require_end;
@@ -32,6 +31,10 @@  struct fault_attr {
 	struct dentry *dname;
 };
 
+enum fault_flags {
+	FAULT_NOWARN =	1 << 0,
+};
+
 #define FAULT_ATTR_INITIALIZER {					\
 		.interval = 1,						\
 		.times = ATOMIC_INIT(1),				\
@@ -40,11 +43,11 @@  struct fault_attr {
 		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
 		.verbose = 2,						\
 		.dname = NULL,						\
-		.no_warn = false,					\
 	}
 
 #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
 int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
 bool should_fail(struct fault_attr *attr, ssize_t size);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4b8fafce415c..5971f7c3e49e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@  EXPORT_SYMBOL_GPL(setup_fault_attr);
 
 static void fail_dump(struct fault_attr *attr)
 {
-	if (attr->no_warn)
-		return;
-
 	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
 		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
 		       "name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@  static inline bool fail_stacktrace(struct fault_attr *attr)
  * http://www.nongnu.org/failmalloc/
  */
 
-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
 {
 	bool stack_checked = false;
 
@@ -152,13 +149,20 @@  bool should_fail(struct fault_attr *attr, ssize_t size)
 		return false;
 
 fail:
-	fail_dump(attr);
+	if (!(flags & FAULT_NOWARN))
+		fail_dump(attr);
 
 	if (atomic_read(&attr->times) != -1)
 		atomic_dec_not_zero(&attr->times);
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(should_fail_ex);
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+	return should_fail_ex(attr, size, 0);
+}
 EXPORT_SYMBOL_GPL(should_fail);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/failslab.c b/mm/failslab.c
index 58df9789f1d2..ffc420c0e767 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -16,6 +16,8 @@  static struct {
 
 bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
+	int flags = 0;
+
 	/* No fault-injection for bootstrap cache */
 	if (unlikely(s == kmem_cache))
 		return false;
@@ -30,10 +32,16 @@  bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
 		return false;
 
+	/*
+	 * In some cases, it expects to specify __GFP_NOWARN
+	 * to avoid printing any information(not just a warning),
+	 * thus avoiding deadlocks. See commit 6b9dbedbe349 for
+	 * details.
+	 */
 	if (gfpflags & __GFP_NOWARN)
-		failslab.attr.no_warn = true;
+		flags |= FAULT_NOWARN;
 
-	return should_fail(&failslab.attr, s->object_size);
+	return should_fail_ex(&failslab.attr, s->object_size, flags);
 }
 
 static int __init setup_failslab(char *str)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f7a63684e6c4..baf97166172c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3909,6 +3909,8 @@  __setup("fail_page_alloc=", setup_fail_page_alloc);
 
 static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 {
+	int flags = 0;
+
 	if (order < fail_page_alloc.min_order)
 		return false;
 	if (gfp_mask & __GFP_NOFAIL)
@@ -3919,10 +3921,11 @@  static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 			(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
+	/* See comment in __should_failslab() */
 	if (gfp_mask & __GFP_NOWARN)
-		fail_page_alloc.attr.no_warn = true;
+		flags |= FAULT_NOWARN;
 
-	return should_fail(&fail_page_alloc.attr, 1 << order);
+	return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
 }
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS