[v2,1/1] mm: ALLOC_HIGHATOMIC flag allocation issue

Message ID 20231109073133.792-2-justinjiang@vivo.com
State New
Headers
Series mm: ALLOC_HIGHATOMIC flag allocation issue |

Commit Message

Zhiguo Jiang Nov. 9, 2023, 7:31 a.m. UTC
  Add a new bool* argument to pass return flag instead of *alloc_flags
and add the related comments.

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---

Changelog:
v1->v2:
1. Delete ALLOC_PCPLIST macro.
2. In rmqueue() add a new bool* argument to pass return flag
   instead of alloc_flags.
3. Add comments for reader understanding.

 mm/internal.h   |  1 -
 mm/page_alloc.c | 41 ++++++++++++++++++++++++++++++-----------
 2 files changed, 30 insertions(+), 12 deletions(-)
  

Comments

Andrew Morton Nov. 9, 2023, 5:49 p.m. UTC | #1
On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:

> Add a new bool* argument to pass return flag instead of *alloc_flags
> and add the related comments.

Please retain (and update) the changelog with each version of a patch.

For reviewers (please), here's the v1 changelog:

: In case of alloc_flags contain ALLOC_HIGHATOMIC and alloc order
: is order1/2/3/10 in rmqueue(), if pages are alloced successfully
: from pcplist cache, and move a free pageblock from the alloced
: migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
: alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result
: in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist,
: pages in other migratetype freelist are reduced and more likely to
: allocation failure.
: 
: Currently the sequence of ALLOC_HIGHATOMIC allocation is:
: pcplist cache --> buddy (batch >> order) allocation migratetype
: freelist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
: migratetype freelist.
: 
: Due to the fact that requesting pages from the pcplist cache is faster
: than buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is:
: pcplist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
: migrate freelist.
: 
: This patch can solve the allocation failure of the Non-ALLOC_HIGHATOMIC
: alloc_flag due to excessive pages reservations in MIGRATE_HIGHATOMIC
: freelists.

> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -905,7 +905,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>  #endif
>  #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
>  #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
> -#define ALLOC_PCPLIST		0x1000 /* Allocations from pcplist */
>  
>  /* Flags that allow allocations below the min watermark. */
>  #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 67cec88164b1..3c84c3e3eeb0 100755
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2854,6 +2854,11 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
>  			int batch = nr_pcp_alloc(pcp, zone, order);
>  			int alloced;
>  
> +			/*
> +			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
> +			 * it should alloc from buddy highatomic migrate freelist firstly
> +			 * to ensure quick and successful allocation.
> +			 */
>  			if (alloc_flags & ALLOC_HIGHATOMIC)
>  				goto out;
>  
> @@ -2925,8 +2930,8 @@ __no_sanitize_memory
>  static inline
>  struct page *rmqueue(struct zone *preferred_zone,
>  			struct zone *zone, unsigned int order,
> -			gfp_t gfp_flags, unsigned int *alloc_flags,
> -			int migratetype)
> +			gfp_t gfp_flags, unsigned int alloc_flags,
> +			int migratetype, bool *highatomc_allocation)
>  {
>  	struct page *page;
>  
> @@ -2938,19 +2943,33 @@ struct page *rmqueue(struct zone *preferred_zone,
>  
>  	if (likely(pcp_allowed_order(order))) {
>  		page = rmqueue_pcplist(preferred_zone, zone, order,
> -				       migratetype, *alloc_flags);
> -		if (likely(page)) {
> -			*alloc_flags |= ALLOC_PCPLIST;
> +				       migratetype, alloc_flags);
> +		if (likely(page))
>  			goto out;
> -		}
>  	}
>  
> -	page = rmqueue_buddy(preferred_zone, zone, order, *alloc_flags,
> +	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
>  							migratetype);
>  
> +	/*
> +	 * The high-order atomic allocation pageblock reserved conditions:
> +	 *
> +	 * If the high-order atomic allocation page is alloced from pcplist,
> +	 * the highatomic pageblock does not need to be reserved, which can
> +	 * void to migrate an increasing number of pages into buddy
> +	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
> +	 * allocation failure on other buddy migrate freelists.
> +	 *
> +	 * If the high-order atomic allocation page is alloced from buddy
> +	 * highatomic migrate freelist, regardless of whether the allocation
> +	 * is successful or not, the highatomic pageblock can try to be
> +	 * reserved.
> +	 */
> +	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
> +		*highatomc_allocation = true;
>  out:
>  	/* Separate test+clear to avoid unnecessary atomics */
> -	if ((*alloc_flags & ALLOC_KSWAPD) &&
> +	if ((alloc_flags & ALLOC_KSWAPD) &&
>  	    unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {
>  		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
>  		wakeup_kswapd(zone, 0, 0, zone_idx(zone));
> @@ -3218,6 +3237,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>  	struct pglist_data *last_pgdat = NULL;
>  	bool last_pgdat_dirty_ok = false;
>  	bool no_fallback;
> +	bool highatomc_allocation = false;
>  
>  retry:
>  	/*
> @@ -3349,7 +3369,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>  
>  try_this_zone:
>  		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
> -				gfp_mask, &alloc_flags, ac->migratetype);
> +				gfp_mask, alloc_flags, ac->migratetype, &highatomc_allocation);
>  		if (page) {
>  			prep_new_page(page, order, gfp_mask, alloc_flags);
>  
> @@ -3357,8 +3377,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>  			 * If this is a high-order atomic allocation then check
>  			 * if the pageblock should be reserved for the future
>  			 */
> -			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC) &&
> -				unlikely(!(alloc_flags & ALLOC_PCPLIST)))
> +			if (unlikely(highatomc_allocation))
>  				reserve_highatomic_pageblock(page, zone);
>  
>  			return page;
> -- 
> 2.39.0
  
Zhiguo Jiang Nov. 10, 2023, 1:51 a.m. UTC | #2
在 2023/11/10 1:49, Andrew Morton 写道:
> On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:
>
>> Add a new bool* argument to pass return flag instead of *alloc_flags
>> and add the related comments.
> Please retain (and update) the changelog with each version of a patch.
ok,I will supplement it if there is a later version.
>
> For reviewers (please), here's the v1 changelog:
>
> : In case of alloc_flags contain ALLOC_HIGHATOMIC and alloc order
> : is order1/2/3/10 in rmqueue(), if pages are alloced successfully
> : from pcplist cache, and move a free pageblock from the alloced
> : migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
> : alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result
> : in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist,
> : pages in other migratetype freelist are reduced and more likely to
> : allocation failure.
> :
> : Currently the sequence of ALLOC_HIGHATOMIC allocation is:
> : pcplist cache --> buddy (batch >> order) allocation migratetype
> : freelist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
> : migratetype freelist.
> :
> : Due to the fact that requesting pages from the pcplist cache is faster
> : than buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is:
> : pcplist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
> : migrate freelist.
> :
> : This patch can solve the allocation failure of the Non-ALLOC_HIGHATOMIC
> : alloc_flag due to excessive pages reservations in MIGRATE_HIGHATOMIC
> : freelists.
>
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -905,7 +905,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>>   #endif
>>   #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
>>   #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
>> -#define ALLOC_PCPLIST		0x1000 /* Allocations from pcplist */
>>   
>>   /* Flags that allow allocations below the min watermark. */
>>   #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 67cec88164b1..3c84c3e3eeb0 100755
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -2854,6 +2854,11 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
>>   			int batch = nr_pcp_alloc(pcp, zone, order);
>>   			int alloced;
>>   
>> +			/*
>> +			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
>> +			 * it should alloc from buddy highatomic migrate freelist firstly
>> +			 * to ensure quick and successful allocation.
>> +			 */
>>   			if (alloc_flags & ALLOC_HIGHATOMIC)
>>   				goto out;
>>   
>> @@ -2925,8 +2930,8 @@ __no_sanitize_memory
>>   static inline
>>   struct page *rmqueue(struct zone *preferred_zone,
>>   			struct zone *zone, unsigned int order,
>> -			gfp_t gfp_flags, unsigned int *alloc_flags,
>> -			int migratetype)
>> +			gfp_t gfp_flags, unsigned int alloc_flags,
>> +			int migratetype, bool *highatomc_allocation)
>>   {
>>   	struct page *page;
>>   
>> @@ -2938,19 +2943,33 @@ struct page *rmqueue(struct zone *preferred_zone,
>>   
>>   	if (likely(pcp_allowed_order(order))) {
>>   		page = rmqueue_pcplist(preferred_zone, zone, order,
>> -				       migratetype, *alloc_flags);
>> -		if (likely(page)) {
>> -			*alloc_flags |= ALLOC_PCPLIST;
>> +				       migratetype, alloc_flags);
>> +		if (likely(page))
>>   			goto out;
>> -		}
>>   	}
>>   
>> -	page = rmqueue_buddy(preferred_zone, zone, order, *alloc_flags,
>> +	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
>>   							migratetype);
>>   
>> +	/*
>> +	 * The high-order atomic allocation pageblock reserved conditions:
>> +	 *
>> +	 * If the high-order atomic allocation page is alloced from pcplist,
>> +	 * the highatomic pageblock does not need to be reserved, which can
>> +	 * void to migrate an increasing number of pages into buddy
>> +	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
>> +	 * allocation failure on other buddy migrate freelists.
>> +	 *
>> +	 * If the high-order atomic allocation page is alloced from buddy
>> +	 * highatomic migrate freelist, regardless of whether the allocation
>> +	 * is successful or not, the highatomic pageblock can try to be
>> +	 * reserved.
>> +	 */
>> +	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
>> +		*highatomc_allocation = true;
>>   out:
>>   	/* Separate test+clear to avoid unnecessary atomics */
>> -	if ((*alloc_flags & ALLOC_KSWAPD) &&
>> +	if ((alloc_flags & ALLOC_KSWAPD) &&
>>   	    unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {
>>   		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
>>   		wakeup_kswapd(zone, 0, 0, zone_idx(zone));
>> @@ -3218,6 +3237,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   	struct pglist_data *last_pgdat = NULL;
>>   	bool last_pgdat_dirty_ok = false;
>>   	bool no_fallback;
>> +	bool highatomc_allocation = false;
>>   
>>   retry:
>>   	/*
>> @@ -3349,7 +3369,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   
>>   try_this_zone:
>>   		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
>> -				gfp_mask, &alloc_flags, ac->migratetype);
>> +				gfp_mask, alloc_flags, ac->migratetype, &highatomc_allocation);
>>   		if (page) {
>>   			prep_new_page(page, order, gfp_mask, alloc_flags);
>>   
>> @@ -3357,8 +3377,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   			 * If this is a high-order atomic allocation then check
>>   			 * if the pageblock should be reserved for the future
>>   			 */
>> -			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC) &&
>> -				unlikely(!(alloc_flags & ALLOC_PCPLIST)))
>> +			if (unlikely(highatomc_allocation))
>>   				reserve_highatomic_pageblock(page, zone);
>>   
>>   			return page;
>> -- 
>> 2.39.0
  
Zhiguo Jiang Nov. 10, 2023, 2:13 a.m. UTC | #3
在 2023/11/10 1:49, Andrew Morton 写道:
> On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:
>
>> Add a new bool* argument to pass return flag instead of *alloc_flags
>> and add the related comments.
> Please retain (and update) the changelog with each version of a patch.
ok, supplement in v3 patch:
https://lore.kernel.org/all/20231110020840.1031-1-justinjiang@vivo.com/
>
> For reviewers (please), here's the v1 changelog:
>
> : In case of alloc_flags contain ALLOC_HIGHATOMIC and alloc order
> : is order1/2/3/10 in rmqueue(), if pages are alloced successfully
> : from pcplist cache, and move a free pageblock from the alloced
> : migratetype freelist to MIGRATE_HIGHATOMIC freelist, rather than
> : alloc from MIGRATE_HIGHATOMIC freelist firstly, so this will result
> : in an increasing number of pages on the MIGRATE_HIGHATOMIC freelist,
> : pages in other migratetype freelist are reduced and more likely to
> : allocation failure.
> :
> : Currently the sequence of ALLOC_HIGHATOMIC allocation is:
> : pcplist cache --> buddy (batch >> order) allocation migratetype
> : freelist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
> : migratetype freelist.
> :
> : Due to the fact that requesting pages from the pcplist cache is faster
> : than buddy, the sequence of modifying the ALLOC_HIGHATOMIC allocation is:
> : pcplist --> buddy MIGRATE_HIGHATOMIC freelist --> buddy allocation
> : migrate freelist.
> :
> : This patch can solve the allocation failure of the Non-ALLOC_HIGHATOMIC
> : alloc_flag due to excessive pages reservations in MIGRATE_HIGHATOMIC
> : freelists.
>
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -905,7 +905,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>>   #endif
>>   #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
>>   #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
>> -#define ALLOC_PCPLIST		0x1000 /* Allocations from pcplist */
>>   
>>   /* Flags that allow allocations below the min watermark. */
>>   #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 67cec88164b1..3c84c3e3eeb0 100755
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -2854,6 +2854,11 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
>>   			int batch = nr_pcp_alloc(pcp, zone, order);
>>   			int alloced;
>>   
>> +			/*
>> +			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
>> +			 * it should alloc from buddy highatomic migrate freelist firstly
>> +			 * to ensure quick and successful allocation.
>> +			 */
>>   			if (alloc_flags & ALLOC_HIGHATOMIC)
>>   				goto out;
>>   
>> @@ -2925,8 +2930,8 @@ __no_sanitize_memory
>>   static inline
>>   struct page *rmqueue(struct zone *preferred_zone,
>>   			struct zone *zone, unsigned int order,
>> -			gfp_t gfp_flags, unsigned int *alloc_flags,
>> -			int migratetype)
>> +			gfp_t gfp_flags, unsigned int alloc_flags,
>> +			int migratetype, bool *highatomc_allocation)
>>   {
>>   	struct page *page;
>>   
>> @@ -2938,19 +2943,33 @@ struct page *rmqueue(struct zone *preferred_zone,
>>   
>>   	if (likely(pcp_allowed_order(order))) {
>>   		page = rmqueue_pcplist(preferred_zone, zone, order,
>> -				       migratetype, *alloc_flags);
>> -		if (likely(page)) {
>> -			*alloc_flags |= ALLOC_PCPLIST;
>> +				       migratetype, alloc_flags);
>> +		if (likely(page))
>>   			goto out;
>> -		}
>>   	}
>>   
>> -	page = rmqueue_buddy(preferred_zone, zone, order, *alloc_flags,
>> +	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
>>   							migratetype);
>>   
>> +	/*
>> +	 * The high-order atomic allocation pageblock reserved conditions:
>> +	 *
>> +	 * If the high-order atomic allocation page is alloced from pcplist,
>> +	 * the highatomic pageblock does not need to be reserved, which can
>> +	 * void to migrate an increasing number of pages into buddy
>> +	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
>> +	 * allocation failure on other buddy migrate freelists.
>> +	 *
>> +	 * If the high-order atomic allocation page is alloced from buddy
>> +	 * highatomic migrate freelist, regardless of whether the allocation
>> +	 * is successful or not, the highatomic pageblock can try to be
>> +	 * reserved.
>> +	 */
>> +	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
>> +		*highatomc_allocation = true;
>>   out:
>>   	/* Separate test+clear to avoid unnecessary atomics */
>> -	if ((*alloc_flags & ALLOC_KSWAPD) &&
>> +	if ((alloc_flags & ALLOC_KSWAPD) &&
>>   	    unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {
>>   		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
>>   		wakeup_kswapd(zone, 0, 0, zone_idx(zone));
>> @@ -3218,6 +3237,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   	struct pglist_data *last_pgdat = NULL;
>>   	bool last_pgdat_dirty_ok = false;
>>   	bool no_fallback;
>> +	bool highatomc_allocation = false;
>>   
>>   retry:
>>   	/*
>> @@ -3349,7 +3369,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   
>>   try_this_zone:
>>   		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
>> -				gfp_mask, &alloc_flags, ac->migratetype);
>> +				gfp_mask, alloc_flags, ac->migratetype, &highatomc_allocation);
>>   		if (page) {
>>   			prep_new_page(page, order, gfp_mask, alloc_flags);
>>   
>> @@ -3357,8 +3377,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
>>   			 * If this is a high-order atomic allocation then check
>>   			 * if the pageblock should be reserved for the future
>>   			 */
>> -			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC) &&
>> -				unlikely(!(alloc_flags & ALLOC_PCPLIST)))
>> +			if (unlikely(highatomc_allocation))
>>   				reserve_highatomic_pageblock(page, zone);
>>   
>>   			return page;
>> -- 
>> 2.39.0
  
Matthew Wilcox Nov. 11, 2023, 6:25 a.m. UTC | #4
On Thu, Nov 09, 2023 at 09:49:54AM -0800, Andrew Morton wrote:
> On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:
> 
> > Add a new bool* argument to pass return flag instead of *alloc_flags
> > and add the related comments.
> 
> Please retain (and update) the changelog with each version of a patch.
> 
> For reviewers (please), here's the v1 changelog:

This patch isn't diffed against the current tree.  It can't be reviewed.
  
Zhiguo Jiang Nov. 13, 2023, 1:04 a.m. UTC | #5
在 2023/11/11 14:25, Matthew Wilcox 写道:
> On Thu, Nov 09, 2023 at 09:49:54AM -0800, Andrew Morton wrote:
>> On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:
>>
>>> Add a new bool* argument to pass return flag instead of *alloc_flags
>>> and add the related comments.
>> Please retain (and update) the changelog with each version of a patch.
>>
>> For reviewers (please), here's the v1 changelog:
> This patch isn't diffed against the current tree.  It can't be reviewed.
patch v1:
https://lore.kernel.org/all/20231108065408.1861-1-justinjiang@vivo.com/
patch v2:
https://lore.kernel.org/all/20231109073133.792-2-justinjiang@vivo.com/
patch v3:
https://lore.kernel.org/all/20231110020840.1031-1-justinjiang@vivo.com/


>
  
Matthew Wilcox Nov. 13, 2023, 1:36 a.m. UTC | #6
On Mon, Nov 13, 2023 at 09:04:49AM +0800, zhiguojiang wrote:
> 
> 
> 在 2023/11/11 14:25, Matthew Wilcox 写道:
> > On Thu, Nov 09, 2023 at 09:49:54AM -0800, Andrew Morton wrote:
> > > On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:
> > > 
> > > > Add a new bool* argument to pass return flag instead of *alloc_flags
> > > > and add the related comments.
> > > Please retain (and update) the changelog with each version of a patch.
> > > 
> > > For reviewers (please), here's the v1 changelog:
> > This patch isn't diffed against the current tree.  It can't be reviewed.
> patch v1:
> https://lore.kernel.org/all/20231108065408.1861-1-justinjiang@vivo.com/
> patch v2:
> https://lore.kernel.org/all/20231109073133.792-2-justinjiang@vivo.com/
> patch v3:
> https://lore.kernel.org/all/20231110020840.1031-1-justinjiang@vivo.com/

None of those are diffed against the current tree.  You need to send a
patch that applies cleanly to either linux-next or current Linus head.
Not a patch on top of an earlier version of your patch.
  
Zhiguo Jiang Nov. 13, 2023, 3:09 a.m. UTC | #7
在 2023/11/13 9:36, Matthew Wilcox 写道:
> On Mon, Nov 13, 2023 at 09:04:49AM +0800, zhiguojiang wrote:
>>
>> 在 2023/11/11 14:25, Matthew Wilcox 写道:
>>> On Thu, Nov 09, 2023 at 09:49:54AM -0800, Andrew Morton wrote:
>>>> On Thu,  9 Nov 2023 15:31:33 +0800 Zhiguo Jiang <justinjiang@vivo.com> wrote:
>>>>
>>>>> Add a new bool* argument to pass return flag instead of *alloc_flags
>>>>> and add the related comments.
>>>> Please retain (and update) the changelog with each version of a patch.
>>>>
>>>> For reviewers (please), here's the v1 changelog:
>>> This patch isn't diffed against the current tree.  It can't be reviewed.
>> patch v1:
>> https://lore.kernel.org/all/20231108065408.1861-1-justinjiang@vivo.com/
>> patch v2:
>> https://lore.kernel.org/all/20231109073133.792-2-justinjiang@vivo.com/
>> patch v3:
>> https://lore.kernel.org/all/20231110020840.1031-1-justinjiang@vivo.com/
> None of those are diffed against the current tree.  You need to send a
> patch that applies cleanly to either linux-next or current Linus head.
> Not a patch on top of an earlier version of your patch.
Modification has been made based on today's latest patch from 
linux-next, thanks.
Patch:
https://lore.kernel.org/all/20231113030343.1984-1-justinjiang@vivo.com/
  

Patch

diff --git a/mm/internal.h b/mm/internal.h
index 98c14b16ce81..1d67c141902d 100755
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -905,7 +905,6 @@  unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 #endif
 #define ALLOC_HIGHATOMIC	0x200 /* Allows access to MIGRATE_HIGHATOMIC */
 #define ALLOC_KSWAPD		0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
-#define ALLOC_PCPLIST		0x1000 /* Allocations from pcplist */
 
 /* Flags that allow allocations below the min watermark. */
 #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 67cec88164b1..3c84c3e3eeb0 100755
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2854,6 +2854,11 @@  struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 			int batch = nr_pcp_alloc(pcp, zone, order);
 			int alloced;
 
+			/*
+			 * If pcplist is empty and alloc_flags is with ALLOC_HIGHATOMIC,
+			 * it should alloc from buddy highatomic migrate freelist firstly
+			 * to ensure quick and successful allocation.
+			 */
 			if (alloc_flags & ALLOC_HIGHATOMIC)
 				goto out;
 
@@ -2925,8 +2930,8 @@  __no_sanitize_memory
 static inline
 struct page *rmqueue(struct zone *preferred_zone,
 			struct zone *zone, unsigned int order,
-			gfp_t gfp_flags, unsigned int *alloc_flags,
-			int migratetype)
+			gfp_t gfp_flags, unsigned int alloc_flags,
+			int migratetype, bool *highatomc_allocation)
 {
 	struct page *page;
 
@@ -2938,19 +2943,33 @@  struct page *rmqueue(struct zone *preferred_zone,
 
 	if (likely(pcp_allowed_order(order))) {
 		page = rmqueue_pcplist(preferred_zone, zone, order,
-				       migratetype, *alloc_flags);
-		if (likely(page)) {
-			*alloc_flags |= ALLOC_PCPLIST;
+				       migratetype, alloc_flags);
+		if (likely(page))
 			goto out;
-		}
 	}
 
-	page = rmqueue_buddy(preferred_zone, zone, order, *alloc_flags,
+	page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
 							migratetype);
 
+	/*
+	 * The high-order atomic allocation pageblock reserved conditions:
+	 *
+	 * If the high-order atomic allocation page is alloced from pcplist,
+	 * the highatomic pageblock does not need to be reserved, which can
+	 * void to migrate an increasing number of pages into buddy
+	 * MIGRATE_HIGHATOMIC freelist and lead to an increasing risk of
+	 * allocation failure on other buddy migrate freelists.
+	 *
+	 * If the high-order atomic allocation page is alloced from buddy
+	 * highatomic migrate freelist, regardless of whether the allocation
+	 * is successful or not, the highatomic pageblock can try to be
+	 * reserved.
+	 */
+	if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+		*highatomc_allocation = true;
 out:
 	/* Separate test+clear to avoid unnecessary atomics */
-	if ((*alloc_flags & ALLOC_KSWAPD) &&
+	if ((alloc_flags & ALLOC_KSWAPD) &&
 	    unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {
 		clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
 		wakeup_kswapd(zone, 0, 0, zone_idx(zone));
@@ -3218,6 +3237,7 @@  get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 	struct pglist_data *last_pgdat = NULL;
 	bool last_pgdat_dirty_ok = false;
 	bool no_fallback;
+	bool highatomc_allocation = false;
 
 retry:
 	/*
@@ -3349,7 +3369,7 @@  get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 
 try_this_zone:
 		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
-				gfp_mask, &alloc_flags, ac->migratetype);
+				gfp_mask, alloc_flags, ac->migratetype, &highatomc_allocation);
 		if (page) {
 			prep_new_page(page, order, gfp_mask, alloc_flags);
 
@@ -3357,8 +3377,7 @@  get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
 			 * If this is a high-order atomic allocation then check
 			 * if the pageblock should be reserved for the future
 			 */
-			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC) &&
-				unlikely(!(alloc_flags & ALLOC_PCPLIST)))
+			if (unlikely(highatomc_allocation))
 				reserve_highatomic_pageblock(page, zone);
 
 			return page;