mm: optimization on page allocation when CMA enabled

Message ID 1682679641-13652-1-git-send-email-zhaoyang.huang@unisoc.com
State New
Headers
Series mm: optimization on page allocation when CMA enabled |

Commit Message

zhaoyang.huang April 28, 2023, 11 a.m. UTC
  From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

Please be notice bellowing typical scenario that commit 168676649 introduce,
that is, 12MB free cma pages 'help' GFP_MOVABLE to keep draining/fragmenting
U&R page blocks until they shrink to 12MB without enter slowpath which against
current reclaiming policy. This commit change the criteria from hard coded '1/2'
to watermark check which leave U&R free pages stay around WMARK_LOW when being
fallback.

DMA32 free:25900kB boost:0kB min:4176kB low:25856kB high:29516kB

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
 mm/page_alloc.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)
  

Comments

kernel test robot April 28, 2023, 2:05 p.m. UTC | #1
Hi zhaoyang.huang,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-optimization-on-page-allocation-when-CMA-enabled/20230428-190140
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/1682679641-13652-1-git-send-email-zhaoyang.huang%40unisoc.com
patch subject: [PATCH] mm: optimization on page allocation when CMA enabled
config: nios2-randconfig-r003-20230428 (https://download.01.org/0day-ci/archive/20230428/202304282107.O93rPndb-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/dbda57eee661a0c9b47f23720bcc9741495d00a5
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review zhaoyang-huang/mm-optimization-on-page-allocation-when-CMA-enabled/20230428-190140
        git checkout dbda57eee661a0c9b47f23720bcc9741495d00a5
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=nios2 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=nios2 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304282107.O93rPndb-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/page_alloc.c: In function '__rmqueue':
>> mm/page_alloc.c:2328:42: error: implicit declaration of function '__if_use_cma_first' [-Werror=implicit-function-declaration]
    2328 |                         bool cma_first = __if_use_cma_first(zone, order, alloc_flags);
         |                                          ^~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/__if_use_cma_first +2328 mm/page_alloc.c

  2277	
  2278	#ifdef CONFIG_CMA
  2279	static bool __if_use_cma_first(struct zone *zone, unsigned int order, unsigned int alloc_flags)
  2280	{
  2281		unsigned long cma_proportion = 0;
  2282		unsigned long cma_free_proportion = 0;
  2283		unsigned long watermark = 0;
  2284		unsigned long wm_fact[ALLOC_WMARK_MASK] = {1, 1, 2};
  2285		long count = 0;
  2286		bool cma_first = false;
  2287	
  2288		watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
  2289		/*check if GFP_MOVABLE pass previous watermark check via the help of CMA*/
  2290		if (!zone_watermark_ok(zone, order, watermark, 0, alloc_flags & (~ALLOC_CMA)))
  2291		{
  2292			alloc_flags &= ALLOC_WMARK_MASK;
  2293			/* WMARK_LOW failed lead to using cma first, this helps U&R stay
  2294			 * around low when being drained by GFP_MOVABLE
  2295			 */
  2296			if (alloc_flags <= ALLOC_WMARK_LOW)
  2297				cma_first = true;
  2298			/*check proportion for WMARK_HIGH*/
  2299			else {
  2300				count = atomic_long_read(&zone->managed_pages);
  2301				cma_proportion = zone->cma_pages * 100 / count;
  2302				cma_free_proportion = zone_page_state(zone, NR_FREE_CMA_PAGES) * 100
  2303					/  zone_page_state(zone, NR_FREE_PAGES);
  2304				cma_first = (cma_free_proportion >= wm_fact[alloc_flags] * cma_proportion
  2305						|| cma_free_proportion >= 50);
  2306			}
  2307		}
  2308		return cma_first;
  2309	}
  2310	#endif
  2311	/*
  2312	 * Do the hard work of removing an element from the buddy allocator.
  2313	 * Call me with the zone->lock already held.
  2314	 */
  2315	static __always_inline struct page *
  2316	__rmqueue(struct zone *zone, unsigned int order, int migratetype,
  2317							unsigned int alloc_flags)
  2318	{
  2319		struct page *page;
  2320	
  2321		if (IS_ENABLED(CONFIG_CMA)) {
  2322			/*
  2323			 * Balance movable allocations between regular and CMA areas by
  2324			 * allocating from CMA when over half of the zone's free memory
  2325			 * is in the CMA area.
  2326			 */
  2327			if (migratetype == MIGRATE_MOVABLE) {
> 2328				bool cma_first = __if_use_cma_first(zone, order, alloc_flags);
  2329				page = cma_first ? __rmqueue_cma_fallback(zone, order) : NULL;
  2330				if (page)
  2331					return page;
  2332			}
  2333		}
  2334	retry:
  2335		page = __rmqueue_smallest(zone, order, migratetype);
  2336		if (unlikely(!page)) {
  2337			if (alloc_flags & ALLOC_CMA)
  2338				page = __rmqueue_cma_fallback(zone, order);
  2339	
  2340			if (!page && __rmqueue_fallback(zone, order, migratetype,
  2341									alloc_flags))
  2342				goto retry;
  2343		}
  2344		return page;
  2345	}
  2346
  
kernel test robot April 28, 2023, 2:16 p.m. UTC | #2
Hi zhaoyang.huang,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-optimization-on-page-allocation-when-CMA-enabled/20230428-190140
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/1682679641-13652-1-git-send-email-zhaoyang.huang%40unisoc.com
patch subject: [PATCH] mm: optimization on page allocation when CMA enabled
config: alpha-randconfig-r014-20230428 (https://download.01.org/0day-ci/archive/20230428/202304282118.os2SZpZS-lkp@intel.com/config)
compiler: alpha-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/dbda57eee661a0c9b47f23720bcc9741495d00a5
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review zhaoyang-huang/mm-optimization-on-page-allocation-when-CMA-enabled/20230428-190140
        git checkout dbda57eee661a0c9b47f23720bcc9741495d00a5
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=alpha olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=alpha SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304282118.os2SZpZS-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/page_alloc.c: In function '__rmqueue':
>> mm/page_alloc.c:2328:42: error: implicit declaration of function '__if_use_cma_first' [-Werror=implicit-function-declaration]
    2328 |                         bool cma_first = __if_use_cma_first(zone, order, alloc_flags);
         |                                          ^~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/__if_use_cma_first +2328 mm/page_alloc.c

  2277	
  2278	#ifdef CONFIG_CMA
  2279	static bool __if_use_cma_first(struct zone *zone, unsigned int order, unsigned int alloc_flags)
  2280	{
  2281		unsigned long cma_proportion = 0;
  2282		unsigned long cma_free_proportion = 0;
  2283		unsigned long watermark = 0;
  2284		unsigned long wm_fact[ALLOC_WMARK_MASK] = {1, 1, 2};
  2285		long count = 0;
  2286		bool cma_first = false;
  2287	
  2288		watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
  2289		/*check if GFP_MOVABLE pass previous watermark check via the help of CMA*/
  2290		if (!zone_watermark_ok(zone, order, watermark, 0, alloc_flags & (~ALLOC_CMA)))
  2291		{
  2292			alloc_flags &= ALLOC_WMARK_MASK;
  2293			/* WMARK_LOW failed lead to using cma first, this helps U&R stay
  2294			 * around low when being drained by GFP_MOVABLE
  2295			 */
  2296			if (alloc_flags <= ALLOC_WMARK_LOW)
  2297				cma_first = true;
  2298			/*check proportion for WMARK_HIGH*/
  2299			else {
  2300				count = atomic_long_read(&zone->managed_pages);
  2301				cma_proportion = zone->cma_pages * 100 / count;
  2302				cma_free_proportion = zone_page_state(zone, NR_FREE_CMA_PAGES) * 100
  2303					/  zone_page_state(zone, NR_FREE_PAGES);
  2304				cma_first = (cma_free_proportion >= wm_fact[alloc_flags] * cma_proportion
  2305						|| cma_free_proportion >= 50);
  2306			}
  2307		}
  2308		return cma_first;
  2309	}
  2310	#endif
  2311	/*
  2312	 * Do the hard work of removing an element from the buddy allocator.
  2313	 * Call me with the zone->lock already held.
  2314	 */
  2315	static __always_inline struct page *
  2316	__rmqueue(struct zone *zone, unsigned int order, int migratetype,
  2317							unsigned int alloc_flags)
  2318	{
  2319		struct page *page;
  2320	
  2321		if (IS_ENABLED(CONFIG_CMA)) {
  2322			/*
  2323			 * Balance movable allocations between regular and CMA areas by
  2324			 * allocating from CMA when over half of the zone's free memory
  2325			 * is in the CMA area.
  2326			 */
  2327			if (migratetype == MIGRATE_MOVABLE) {
> 2328				bool cma_first = __if_use_cma_first(zone, order, alloc_flags);
  2329				page = cma_first ? __rmqueue_cma_fallback(zone, order) : NULL;
  2330				if (page)
  2331					return page;
  2332			}
  2333		}
  2334	retry:
  2335		page = __rmqueue_smallest(zone, order, migratetype);
  2336		if (unlikely(!page)) {
  2337			if (alloc_flags & ALLOC_CMA)
  2338				page = __rmqueue_cma_fallback(zone, order);
  2339	
  2340			if (!page && __rmqueue_fallback(zone, order, migratetype,
  2341									alloc_flags))
  2342				goto retry;
  2343		}
  2344		return page;
  2345	}
  2346
  
Roman Gushchin May 1, 2023, 5:12 p.m. UTC | #3
Hi Zhaoyang!

On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> 
> Please be notice bellowing typical scenario that commit 168676649 introduce,
> that is, 12MB free cma pages 'help' GFP_MOVABLE to keep draining/fragmenting
> U&R page blocks until they shrink to 12MB without enter slowpath which against
> current reclaiming policy. This commit change the criteria from hard coded '1/2'
> to watermark check which leave U&R free pages stay around WMARK_LOW when being
> fallback.

Can you, please, explain the problem you're solving in more details?

If I understand your code correctly, you're effectively reducing the
use of cma areas for movable allocations. Why it's good?
Also, this is a hot path, please, make sure you're not adding
much overhead.

And please use scripts/checkpatch.pl next time, there are many
code style issues.

Thanks!

> 
> DMA32 free:25900kB boost:0kB min:4176kB low:25856kB high:29516kB
> 
> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> ---
>  mm/page_alloc.c | 40 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 36 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 0745aed..97768fe 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3071,6 +3071,39 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
>  
>  }
>  
> +#ifdef CONFIG_CMA
> +static bool __if_use_cma_first(struct zone *zone, unsigned int order, unsigned int alloc_flags)
> +{
> +	unsigned long cma_proportion = 0;
> +	unsigned long cma_free_proportion = 0;
> +	unsigned long watermark = 0;
> +	unsigned long wm_fact[ALLOC_WMARK_MASK] = {1, 1, 2};
> +	long count = 0;
> +	bool cma_first = false;
> +
> +	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
> +	/*check if GFP_MOVABLE pass previous watermark check via the help of CMA*/
> +	if (!zone_watermark_ok(zone, order, watermark, 0, alloc_flags & (~ALLOC_CMA)))
> +	{
> +		alloc_flags &= ALLOC_WMARK_MASK;
> +		/* WMARK_LOW failed lead to using cma first, this helps U&R stay
> +		 * around low when being drained by GFP_MOVABLE
> +		 */
> +		if (alloc_flags <= ALLOC_WMARK_LOW)
> +			cma_first = true;
> +		/*check proportion for WMARK_HIGH*/
> +		else {
> +			count = atomic_long_read(&zone->managed_pages);
> +			cma_proportion = zone->cma_pages * 100 / count;
> +			cma_free_proportion = zone_page_state(zone, NR_FREE_CMA_PAGES) * 100
> +				/  zone_page_state(zone, NR_FREE_PAGES);
> +			cma_first = (cma_free_proportion >= wm_fact[alloc_flags] * cma_proportion
> +					|| cma_free_proportion >= 50);
> +		}
> +	}
> +	return cma_first;
> +}
> +#endif
>  /*
>   * Do the hard work of removing an element from the buddy allocator.
>   * Call me with the zone->lock already held.
> @@ -3087,10 +3120,9 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
>  		 * allocating from CMA when over half of the zone's free memory
>  		 * is in the CMA area.
>  		 */
> -		if (alloc_flags & ALLOC_CMA &&
> -		    zone_page_state(zone, NR_FREE_CMA_PAGES) >
> -		    zone_page_state(zone, NR_FREE_PAGES) / 2) {
> -			page = __rmqueue_cma_fallback(zone, order);
> +		if (migratetype == MIGRATE_MOVABLE) {
> +			bool cma_first = __if_use_cma_first(zone, order, alloc_flags);
> +			page = cma_first ? __rmqueue_cma_fallback(zone, order) : NULL;
>  			if (page)
>  				return page;
>  		}
> -- 
> 1.9.1
> 
>
  
zhaoyang.huang May 2, 2023, 12:12 p.m. UTC | #4
> Hi Zhaoyang!
> 
> On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> >
> > Please be notice bellowing typical scenario that commit 168676649
> > introduce, that is, 12MB free cma pages 'help' GFP_MOVABLE to keep
> > draining/fragmenting U&R page blocks until they shrink to 12MB without
> > enter slowpath which against current reclaiming policy. This commit change
> the criteria from hard coded '1/2'
> > to watermark check which leave U&R free pages stay around WMARK_LOW
> > when being fallback.
> 
> Can you, please, explain the problem you're solving in more details?
I am trying to solve a OOM problem caused by slab allocation fail as all free pages are MIGRATE_CMA by applying 168676649, which could help to reduce the fault ration from 12/20 to 2/20. I noticed it introduce the phenomenon which I describe above.
> 
> If I understand your code correctly, you're effectively reducing the use of cma
> areas for movable allocations. Why it's good?
Not exactly. In fact, this commit lead to the use of cma early than it is now, which could help to protect U&R be 'stolen' by GFP_MOVABLE. Imagine this scenario, 30MB total free pages composed of 10MB CMA and 20MB U&R, while zone's watermark low is 25MB. An GFP_MOVABLE allocation can keep stealing U&R pages(don't meet 1/2 criteria) without enter slowpath(zone_watermark_ok(WMARK_LOW) is true) until they shrink to 15MB. In my opinion, it makes more sense to have CMA take its duty to help movable allocation when U&R lower to certain zone's watermark instead of when their size become smaller than CMA.
> Also, this is a hot path, please, make sure you're not adding much overhead.
I would like to take more thought.
> 
> And please use scripts/checkpatch.pl next time, there are many code style
> issues.
ok

> 
> Thanks!
> 
> >
> > DMA32 free:25900kB boost:0kB min:4176kB low:25856kB high:29516kB
> >
> > Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > ---
> >  mm/page_alloc.c | 40 ++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 36 insertions(+), 4 deletions(-)
> >
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0745aed..97768fe
> > 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -3071,6 +3071,39 @@ static bool
> > unreserve_highatomic_pageblock(const struct alloc_context *ac,
> >
> >  }
> >
> > +#ifdef CONFIG_CMA
> > +static bool __if_use_cma_first(struct zone *zone, unsigned int order,
> > +unsigned int alloc_flags) {
> > +     unsigned long cma_proportion = 0;
> > +     unsigned long cma_free_proportion = 0;
> > +     unsigned long watermark = 0;
> > +     unsigned long wm_fact[ALLOC_WMARK_MASK] = {1, 1, 2};
> > +     long count = 0;
> > +     bool cma_first = false;
> > +
> > +     watermark = wmark_pages(zone, alloc_flags &
> ALLOC_WMARK_MASK);
> > +     /*check if GFP_MOVABLE pass previous watermark check via the help
> of CMA*/
> > +     if (!zone_watermark_ok(zone, order, watermark, 0, alloc_flags &
> (~ALLOC_CMA)))
> > +     {
> > +             alloc_flags &= ALLOC_WMARK_MASK;
> > +             /* WMARK_LOW failed lead to using cma first, this helps
> U&R stay
> > +              * around low when being drained by GFP_MOVABLE
> > +              */
> > +             if (alloc_flags <= ALLOC_WMARK_LOW)
> > +                     cma_first = true;
> > +             /*check proportion for WMARK_HIGH*/
> > +             else {
> > +                     count =
> atomic_long_read(&zone->managed_pages);
> > +                     cma_proportion = zone->cma_pages * 100 / count;
> > +                     cma_free_proportion = zone_page_state(zone,
> NR_FREE_CMA_PAGES) * 100
> > +                             /  zone_page_state(zone,
> NR_FREE_PAGES);
> > +                     cma_first = (cma_free_proportion >=
> wm_fact[alloc_flags] * cma_proportion
> > +                                     || cma_free_proportion >=
> 50);
> > +             }
> > +     }
> > +     return cma_first;
> > +}
> > +#endif
> >  /*
> >   * Do the hard work of removing an element from the buddy allocator.
> >   * Call me with the zone->lock already held.
> > @@ -3087,10 +3120,9 @@ static bool
> unreserve_highatomic_pageblock(const struct alloc_context *ac,
> >                * allocating from CMA when over half of the zone's free
> memory
> >                * is in the CMA area.
> >                */
> > -             if (alloc_flags & ALLOC_CMA &&
> > -                 zone_page_state(zone, NR_FREE_CMA_PAGES) >
> > -                 zone_page_state(zone, NR_FREE_PAGES) / 2) {
> > -                     page = __rmqueue_cma_fallback(zone, order);
> > +             if (migratetype == MIGRATE_MOVABLE) {
> > +                     bool cma_first = __if_use_cma_first(zone, order,
> alloc_flags);
> > +                     page = cma_first ? __rmqueue_cma_fallback(zone,
> > + order) : NULL;
> >                       if (page)
> >                               return page;
> >               }
> > --
> > 1.9.1
> >
> >
  
Roman Gushchin May 2, 2023, 10:01 p.m. UTC | #5
On Tue, May 02, 2023 at 12:12:28PM +0000, 黄朝阳 (Zhaoyang Huang) wrote:
> > Hi Zhaoyang!
> > 
> > On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> > > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > >
> > > Please be notice bellowing typical scenario that commit 168676649
> > > introduce, that is, 12MB free cma pages 'help' GFP_MOVABLE to keep
> > > draining/fragmenting U&R page blocks until they shrink to 12MB without
> > > enter slowpath which against current reclaiming policy. This commit change
> > the criteria from hard coded '1/2'
> > > to watermark check which leave U&R free pages stay around WMARK_LOW
> > > when being fallback.
> > 
> > Can you, please, explain the problem you're solving in more details?
> I am trying to solve a OOM problem caused by slab allocation fail as all free pages are MIGRATE_CMA by applying 168676649, which could help to reduce the fault ration from 12/20 to 2/20. I noticed it introduce the phenomenon which I describe above.
> > 
> > If I understand your code correctly, you're effectively reducing the use of cma
> > areas for movable allocations. Why it's good?
> Not exactly. In fact, this commit lead to the use of cma early than it is now, which could help to protect U&R be 'stolen' by GFP_MOVABLE. Imagine this scenario, 30MB total free pages composed of 10MB CMA and 20MB U&R, while zone's watermark low is 25MB. An GFP_MOVABLE allocation can keep stealing U&R pages(don't meet 1/2 criteria) without enter slowpath(zone_watermark_ok(WMARK_LOW) is true) until they shrink to 15MB. In my opinion, it makes more sense to have CMA take its duty to help movable allocation when U&R lower to certain zone's watermark instead of when their size become smaller than CMA.
> > Also, this is a hot path, please, make sure you're not adding much overhead.
> I would like to take more thought.

Got it, thank you for the explanation!

How about the following approach (completely untested)?

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6da423ec356f..4b50f497c09d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2279,12 +2279,13 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
 	if (IS_ENABLED(CONFIG_CMA)) {
 		/*
 		 * Balance movable allocations between regular and CMA areas by
-		 * allocating from CMA when over half of the zone's free memory
-		 * is in the CMA area.
+		 * allocating from CMA when over half of the zone's easily
+		 * available free memory is in the CMA area.
 		 */
 		if (alloc_flags & ALLOC_CMA &&
 		    zone_page_state(zone, NR_FREE_CMA_PAGES) >
-		    zone_page_state(zone, NR_FREE_PAGES) / 2) {
+		    (zone_page_state(zone, NR_FREE_PAGES) -
+		     zone->_watermark[WMARK_LOW]) / 2) {
 			page = __rmqueue_cma_fallback(zone, order);
 			if (page)
 				return page;

Basically the idea is to keep free space equally split between cma and non-cma areas.
Will it work for you?

Thanks!
  
Zhaoyang Huang May 3, 2023, 7:58 a.m. UTC | #6
On Wed, May 3, 2023 at 6:01 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> On Tue, May 02, 2023 at 12:12:28PM +0000, 黄朝阳 (Zhaoyang Huang) wrote:
> > > Hi Zhaoyang!
> > >
> > > On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> > > > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > > >
> > > > Please be notice bellowing typical scenario that commit 168676649
> > > > introduce, that is, 12MB free cma pages 'help' GFP_MOVABLE to keep
> > > > draining/fragmenting U&R page blocks until they shrink to 12MB without
> > > > enter slowpath which against current reclaiming policy. This commit change
> > > the criteria from hard coded '1/2'
> > > > to watermark check which leave U&R free pages stay around WMARK_LOW
> > > > when being fallback.
> > >
> > > Can you, please, explain the problem you're solving in more details?
> > I am trying to solve a OOM problem caused by slab allocation fail as all free pages are MIGRATE_CMA by applying 168676649, which could help to reduce the fault ration from 12/20 to 2/20. I noticed it introduce the phenomenon which I describe above.
> > >
> > > If I understand your code correctly, you're effectively reducing the use of cma
> > > areas for movable allocations. Why it's good?
> > Not exactly. In fact, this commit lead to the use of cma early than it is now, which could help to protect U&R be 'stolen' by GFP_MOVABLE. Imagine this scenario, 30MB total free pages composed of 10MB CMA and 20MB U&R, while zone's watermark low is 25MB. An GFP_MOVABLE allocation can keep stealing U&R pages(don't meet 1/2 criteria) without enter slowpath(zone_watermark_ok(WMARK_LOW) is true) until they shrink to 15MB. In my opinion, it makes more sense to have CMA take its duty to help movable allocation when U&R lower to certain zone's watermark instead of when their size become smaller than CMA.
> > > Also, this is a hot path, please, make sure you're not adding much overhead.
> > I would like to take more thought.
>
> Got it, thank you for the explanation!
>
> How about the following approach (completely untested)?
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6da423ec356f..4b50f497c09d 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2279,12 +2279,13 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
>         if (IS_ENABLED(CONFIG_CMA)) {
>                 /*
>                  * Balance movable allocations between regular and CMA areas by
> -                * allocating from CMA when over half of the zone's free memory
> -                * is in the CMA area.
> +                * allocating from CMA when over half of the zone's easily
> +                * available free memory is in the CMA area.
>                  */
>                 if (alloc_flags & ALLOC_CMA &&
>                     zone_page_state(zone, NR_FREE_CMA_PAGES) >
> -                   zone_page_state(zone, NR_FREE_PAGES) / 2) {
> +                   (zone_page_state(zone, NR_FREE_PAGES) -
> +                    zone->_watermark[WMARK_LOW]) / 2) {
IMO, we should focus on non-cma area which trigger use of cma when
they are lower than corresponding watermark(there is still
WMARK_MIN/HIGH to deal with within slowpath)
>                         page = __rmqueue_cma_fallback(zone, order);
>                         if (page)
>                                 return page;
>
> Basically the idea is to keep free space equally split between cma and non-cma areas.
> Will it work for you?
I don't think it makes sense to 'equally split' cma and non-cma areas
over free space while cma could occupy various proportions in a single
zone. This fixed 1/2 could lead to different situation on 20% or 50%
cma occupation.
>
> Thanks!
  
Roman Gushchin May 3, 2023, 4:30 p.m. UTC | #7
On Wed, May 03, 2023 at 03:58:21PM +0800, Zhaoyang Huang wrote:
> On Wed, May 3, 2023 at 6:01 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> >
> > On Tue, May 02, 2023 at 12:12:28PM +0000, 黄朝阳 (Zhaoyang Huang) wrote:
> > > > Hi Zhaoyang!
> > > >
> > > > On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> > > > > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > > > >
> > > > > Please be notice bellowing typical scenario that commit 168676649
> > > > > introduce, that is, 12MB free cma pages 'help' GFP_MOVABLE to keep
> > > > > draining/fragmenting U&R page blocks until they shrink to 12MB without
> > > > > enter slowpath which against current reclaiming policy. This commit change
> > > > the criteria from hard coded '1/2'
> > > > > to watermark check which leave U&R free pages stay around WMARK_LOW
> > > > > when being fallback.
> > > >
> > > > Can you, please, explain the problem you're solving in more details?
> > > I am trying to solve a OOM problem caused by slab allocation fail as all free pages are MIGRATE_CMA by applying 168676649, which could help to reduce the fault ration from 12/20 to 2/20. I noticed it introduce the phenomenon which I describe above.
> > > >
> > > > If I understand your code correctly, you're effectively reducing the use of cma
> > > > areas for movable allocations. Why it's good?
> > > Not exactly. In fact, this commit lead to the use of cma early than it is now, which could help to protect U&R be 'stolen' by GFP_MOVABLE. Imagine this scenario, 30MB total free pages composed of 10MB CMA and 20MB U&R, while zone's watermark low is 25MB. An GFP_MOVABLE allocation can keep stealing U&R pages(don't meet 1/2 criteria) without enter slowpath(zone_watermark_ok(WMARK_LOW) is true) until they shrink to 15MB. In my opinion, it makes more sense to have CMA take its duty to help movable allocation when U&R lower to certain zone's watermark instead of when their size become smaller than CMA.
> > > > Also, this is a hot path, please, make sure you're not adding much overhead.
> > > I would like to take more thought.
> >
> > Got it, thank you for the explanation!
> >
> > How about the following approach (completely untested)?
> >
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 6da423ec356f..4b50f497c09d 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -2279,12 +2279,13 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
> >         if (IS_ENABLED(CONFIG_CMA)) {
> >                 /*
> >                  * Balance movable allocations between regular and CMA areas by
> > -                * allocating from CMA when over half of the zone's free memory
> > -                * is in the CMA area.
> > +                * allocating from CMA when over half of the zone's easily
> > +                * available free memory is in the CMA area.
> >                  */
> >                 if (alloc_flags & ALLOC_CMA &&
> >                     zone_page_state(zone, NR_FREE_CMA_PAGES) >
> > -                   zone_page_state(zone, NR_FREE_PAGES) / 2) {
> > +                   (zone_page_state(zone, NR_FREE_PAGES) -
> > +                    zone->_watermark[WMARK_LOW]) / 2) {
> IMO, we should focus on non-cma area which trigger use of cma when
> they are lower than corresponding watermark(there is still
> WMARK_MIN/HIGH to deal with within slowpath)
> >                         page = __rmqueue_cma_fallback(zone, order);
> >                         if (page)
> >                                 return page;
> >
> > Basically the idea is to keep free space equally split between cma and non-cma areas.
> > Will it work for you?
> I don't think it makes sense to 'equally split' cma and non-cma areas
> over free space while cma could occupy various proportions in a single
> zone. This fixed 1/2 could lead to different situation on 20% or 50%
> cma occupation.

Can you then, please, explain in details what you're proposing instead
and why it's better (not only in your case, but generally as well)?
For the context, my original usecase was cma size under 10G and
the total memory size between 32G and 128G.

Looking at your original patch, I see that __if_use_cma_first() always returns
false if zone_watermark_ok() returns true. It worries me.

Thanks!
  
Zhaoyang Huang May 4, 2023, 6:23 a.m. UTC | #8
On Thu, May 4, 2023 at 12:30 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> On Wed, May 03, 2023 at 03:58:21PM +0800, Zhaoyang Huang wrote:
> > On Wed, May 3, 2023 at 6:01 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> > >
> > > On Tue, May 02, 2023 at 12:12:28PM +0000, 黄朝阳 (Zhaoyang Huang) wrote:
> > > > > Hi Zhaoyang!
> > > > >
> > > > > On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> > > > > > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > > > > >
> > > > > > Please be notice bellowing typical scenario that commit 168676649
> > > > > > introduce, that is, 12MB free cma pages 'help' GFP_MOVABLE to keep
> > > > > > draining/fragmenting U&R page blocks until they shrink to 12MB without
> > > > > > enter slowpath which against current reclaiming policy. This commit change
> > > > > the criteria from hard coded '1/2'
> > > > > > to watermark check which leave U&R free pages stay around WMARK_LOW
> > > > > > when being fallback.
> > > > >
> > > > > Can you, please, explain the problem you're solving in more details?
> > > > I am trying to solve a OOM problem caused by slab allocation fail as all free pages are MIGRATE_CMA by applying 168676649, which could help to reduce the fault ration from 12/20 to 2/20. I noticed it introduce the phenomenon which I describe above.
> > > > >
> > > > > If I understand your code correctly, you're effectively reducing the use of cma
> > > > > areas for movable allocations. Why it's good?
> > > > Not exactly. In fact, this commit lead to the use of cma early than it is now, which could help to protect U&R be 'stolen' by GFP_MOVABLE. Imagine this scenario, 30MB total free pages composed of 10MB CMA and 20MB U&R, while zone's watermark low is 25MB. An GFP_MOVABLE allocation can keep stealing U&R pages(don't meet 1/2 criteria) without enter slowpath(zone_watermark_ok(WMARK_LOW) is true) until they shrink to 15MB. In my opinion, it makes more sense to have CMA take its duty to help movable allocation when U&R lower to certain zone's watermark instead of when their size become smaller than CMA.
> > > > > Also, this is a hot path, please, make sure you're not adding much overhead.
> > > > I would like to take more thought.
> > >
> > > Got it, thank you for the explanation!
> > >
> > > How about the following approach (completely untested)?
> > >
> > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > index 6da423ec356f..4b50f497c09d 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -2279,12 +2279,13 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
> > >         if (IS_ENABLED(CONFIG_CMA)) {
> > >                 /*
> > >                  * Balance movable allocations between regular and CMA areas by
> > > -                * allocating from CMA when over half of the zone's free memory
> > > -                * is in the CMA area.
> > > +                * allocating from CMA when over half of the zone's easily
> > > +                * available free memory is in the CMA area.
> > >                  */
> > >                 if (alloc_flags & ALLOC_CMA &&
> > >                     zone_page_state(zone, NR_FREE_CMA_PAGES) >
> > > -                   zone_page_state(zone, NR_FREE_PAGES) / 2) {
> > > +                   (zone_page_state(zone, NR_FREE_PAGES) -
> > > +                    zone->_watermark[WMARK_LOW]) / 2) {
> > IMO, we should focus on non-cma area which trigger use of cma when
> > they are lower than corresponding watermark(there is still
> > WMARK_MIN/HIGH to deal with within slowpath)
> > >                         page = __rmqueue_cma_fallback(zone, order);
> > >                         if (page)
> > >                                 return page;
> > >
> > > Basically the idea is to keep free space equally split between cma and non-cma areas.
> > > Will it work for you?
> > I don't think it makes sense to 'equally split' cma and non-cma areas
> > over free space while cma could occupy various proportions in a single
> > zone. This fixed 1/2 could lead to different situation on 20% or 50%
> > cma occupation.
>
> Can you then, please, explain in details what you're proposing instead
> and why it's better (not only in your case, but generally as well)?
> For the context, my original usecase was cma size under 10G and
> the total memory size between 32G and 128G.

Let us look at the series of scenarios below with
WMARK_LOW=25MB,WMARK_MIN=5MB(managed pages 1.9GB). We can know that
1. optimized 1/2 method start to use CMA since D which actually has
caused U&R lower than WMARK_LOW (this could be deemed as against
current memory policy, that is, U&R should either keeping stay  around
WATERMARK_LOW or enter slowpath to do reclaiming)
2. it keep using CMA as long as free pages keep shrinking (this is
against the original desire of balance cma & none-cma area)

free_cma/free_pages(MB)        A(12/30)     B(10/25)     C(8/25)
D(8/20)     E(8/16)     F(5/12)     F(5/10)   G(4/8)
optimized 1/2                                N                 N
         N             Y               Y                Y
Y            Y
!zone_watermark_ok                    Y                  Y
   Y             N               N                N            Y
     Y

>
> Looking at your original patch, I see that __if_use_cma_first() always returns
> false if zone_watermark_ok() returns true. It worries me.
ok. we could deal with the scenario here when free pages is higher
than WMARK_HIGH
>
> Thanks!
  
Zhaoyang Huang May 4, 2023, 6:30 a.m. UTC | #9
On Thu, May 4, 2023 at 2:23 PM Zhaoyang Huang <huangzhaoyang@gmail.com> wrote:
>
> On Thu, May 4, 2023 at 12:30 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> >
> > On Wed, May 03, 2023 at 03:58:21PM +0800, Zhaoyang Huang wrote:
> > > On Wed, May 3, 2023 at 6:01 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
> > > >
> > > > On Tue, May 02, 2023 at 12:12:28PM +0000, 黄朝阳 (Zhaoyang Huang) wrote:
> > > > > > Hi Zhaoyang!
> > > > > >
> > > > > > On Fri, Apr 28, 2023 at 07:00:41PM +0800, zhaoyang.huang wrote:
> > > > > > > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > > > > > >
> > > > > > > Please be notice bellowing typical scenario that commit 168676649
> > > > > > > introduce, that is, 12MB free cma pages 'help' GFP_MOVABLE to keep
> > > > > > > draining/fragmenting U&R page blocks until they shrink to 12MB without
> > > > > > > enter slowpath which against current reclaiming policy. This commit change
> > > > > > the criteria from hard coded '1/2'
> > > > > > > to watermark check which leave U&R free pages stay around WMARK_LOW
> > > > > > > when being fallback.
> > > > > >
> > > > > > Can you, please, explain the problem you're solving in more details?
> > > > > I am trying to solve a OOM problem caused by slab allocation fail as all free pages are MIGRATE_CMA by applying 168676649, which could help to reduce the fault ration from 12/20 to 2/20. I noticed it introduce the phenomenon which I describe above.
> > > > > >
> > > > > > If I understand your code correctly, you're effectively reducing the use of cma
> > > > > > areas for movable allocations. Why it's good?
> > > > > Not exactly. In fact, this commit lead to the use of cma early than it is now, which could help to protect U&R be 'stolen' by GFP_MOVABLE. Imagine this scenario, 30MB total free pages composed of 10MB CMA and 20MB U&R, while zone's watermark low is 25MB. An GFP_MOVABLE allocation can keep stealing U&R pages(don't meet 1/2 criteria) without enter slowpath(zone_watermark_ok(WMARK_LOW) is true) until they shrink to 15MB. In my opinion, it makes more sense to have CMA take its duty to help movable allocation when U&R lower to certain zone's watermark instead of when their size become smaller than CMA.
> > > > > > Also, this is a hot path, please, make sure you're not adding much overhead.
> > > > > I would like to take more thought.
> > > >
> > > > Got it, thank you for the explanation!
> > > >
> > > > How about the following approach (completely untested)?
> > > >
> > > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > > index 6da423ec356f..4b50f497c09d 100644
> > > > --- a/mm/page_alloc.c
> > > > +++ b/mm/page_alloc.c
> > > > @@ -2279,12 +2279,13 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
> > > >         if (IS_ENABLED(CONFIG_CMA)) {
> > > >                 /*
> > > >                  * Balance movable allocations between regular and CMA areas by
> > > > -                * allocating from CMA when over half of the zone's free memory
> > > > -                * is in the CMA area.
> > > > +                * allocating from CMA when over half of the zone's easily
> > > > +                * available free memory is in the CMA area.
> > > >                  */
> > > >                 if (alloc_flags & ALLOC_CMA &&
> > > >                     zone_page_state(zone, NR_FREE_CMA_PAGES) >
> > > > -                   zone_page_state(zone, NR_FREE_PAGES) / 2) {
> > > > +                   (zone_page_state(zone, NR_FREE_PAGES) -
> > > > +                    zone->_watermark[WMARK_LOW]) / 2) {
> > > IMO, we should focus on non-cma area which trigger use of cma when
> > > they are lower than corresponding watermark(there is still
> > > WMARK_MIN/HIGH to deal with within slowpath)
> > > >                         page = __rmqueue_cma_fallback(zone, order);
> > > >                         if (page)
> > > >                                 return page;
> > > >
> > > > Basically the idea is to keep free space equally split between cma and non-cma areas.
> > > > Will it work for you?
> > > I don't think it makes sense to 'equally split' cma and non-cma areas
> > > over free space while cma could occupy various proportions in a single
> > > zone. This fixed 1/2 could lead to different situation on 20% or 50%
> > > cma occupation.
> >
> > Can you then, please, explain in details what you're proposing instead
> > and why it's better (not only in your case, but generally as well)?
> > For the context, my original usecase was cma size under 10G and
> > the total memory size between 32G and 128G.
>
> Let us look at the series of scenarios below with
> WMARK_LOW=25MB,WMARK_MIN=5MB(managed pages 1.9GB). We can know that
> 1. optimized 1/2 method start to use CMA since D which actually has
> caused U&R lower than WMARK_LOW (this could be deemed as against
> current memory policy, that is, U&R should either keeping stay  around
> WATERMARK_LOW or enter slowpath to do reclaiming)
> 2. it keep using CMA as long as free pages keep shrinking (this is
> against the original desire of balance cma & none-cma area)
>
> free_cma/free_pages(MB)        A(12/30)     B(10/25)     C(8/25)
> D(8/20)     E(8/16)     F(5/12)     F(5/10)   G(4/8)
> optimized 1/2                                N                 N
>          N             Y               Y                Y
> Y            Y
> !zone_watermark_ok                    Y                  Y
>    Y             N               N                N            Y
>      Y
sorry for the broken graph format of previous reply, repost it here.N
stands for not use CMA while Y stands for using

 free_cma/free_pages(MB)        A(12/30)     B(10/25)     C(8/25)
 optimized 1/2                                N                 N
          N
 !zone_watermark_ok                    Y                  Y                Y

D(8/20)     E(8/16)     F(5/12)     F(5/10)   G(4/8)
    Y               Y                Y            Y            Y
    N               N                N            Y           Y


> >
> > Looking at your original patch, I see that __if_use_cma_first() always returns
> > false if zone_watermark_ok() returns true. It worries me.
> ok. we could deal with the scenario here when free pages is higher
> than WMARK_HIGH
> >
> > Thanks!
  

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0745aed..97768fe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3071,6 +3071,39 @@  static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 
 }
 
+#ifdef CONFIG_CMA
+static bool __if_use_cma_first(struct zone *zone, unsigned int order, unsigned int alloc_flags)
+{
+	unsigned long cma_proportion = 0;
+	unsigned long cma_free_proportion = 0;
+	unsigned long watermark = 0;
+	unsigned long wm_fact[ALLOC_WMARK_MASK] = {1, 1, 2};
+	long count = 0;
+	bool cma_first = false;
+
+	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
+	/*check if GFP_MOVABLE pass previous watermark check via the help of CMA*/
+	if (!zone_watermark_ok(zone, order, watermark, 0, alloc_flags & (~ALLOC_CMA)))
+	{
+		alloc_flags &= ALLOC_WMARK_MASK;
+		/* WMARK_LOW failed lead to using cma first, this helps U&R stay
+		 * around low when being drained by GFP_MOVABLE
+		 */
+		if (alloc_flags <= ALLOC_WMARK_LOW)
+			cma_first = true;
+		/*check proportion for WMARK_HIGH*/
+		else {
+			count = atomic_long_read(&zone->managed_pages);
+			cma_proportion = zone->cma_pages * 100 / count;
+			cma_free_proportion = zone_page_state(zone, NR_FREE_CMA_PAGES) * 100
+				/  zone_page_state(zone, NR_FREE_PAGES);
+			cma_first = (cma_free_proportion >= wm_fact[alloc_flags] * cma_proportion
+					|| cma_free_proportion >= 50);
+		}
+	}
+	return cma_first;
+}
+#endif
 /*
  * Do the hard work of removing an element from the buddy allocator.
  * Call me with the zone->lock already held.
@@ -3087,10 +3120,9 @@  static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 		 * allocating from CMA when over half of the zone's free memory
 		 * is in the CMA area.
 		 */
-		if (alloc_flags & ALLOC_CMA &&
-		    zone_page_state(zone, NR_FREE_CMA_PAGES) >
-		    zone_page_state(zone, NR_FREE_PAGES) / 2) {
-			page = __rmqueue_cma_fallback(zone, order);
+		if (migratetype == MIGRATE_MOVABLE) {
+			bool cma_first = __if_use_cma_first(zone, order, alloc_flags);
+			page = cma_first ? __rmqueue_cma_fallback(zone, order) : NULL;
 			if (page)
 				return page;
 		}