mm: skip CMA pages when they are not available

Message ID 1681882824-17532-1-git-send-email-zhaoyang.huang@unisoc.com
State New
Headers
Series mm: skip CMA pages when they are not available |

Commit Message

zhaoyang.huang April 19, 2023, 5:40 a.m. UTC
  From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

It is wasting of effort to reclaim CMA pages if they are not availabe
for current context during direct reclaim. Skip them when under corresponding
circumstance.

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
 mm/vmscan.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)
  

Comments

Huang, Ying April 19, 2023, 6:06 a.m. UTC | #1
"zhaoyang.huang" <zhaoyang.huang@unisoc.com> writes:

> From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
>
> It is wasting of effort to reclaim CMA pages if they are not availabe
> for current context during direct reclaim. Skip them when under corresponding
> circumstance.

Do you have any performance number for this change?

Best Regards,
Huang, Ying

> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> ---
>  mm/vmscan.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index bd6637f..04424d9 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2225,10 +2225,16 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
>  	unsigned long skipped = 0;
>  	unsigned long scan, total_scan, nr_pages;
> +	bool cma_cap = true;
> +	struct page *page;
>  	LIST_HEAD(folios_skipped);
>  
>  	total_scan = 0;
>  	scan = 0;
> +	if ((IS_ENABLED(CONFIG_CMA)) && !current_is_kswapd()
> +		&& (gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE))
> +		cma_cap = false;
> +
>  	while (scan < nr_to_scan && !list_empty(src)) {
>  		struct list_head *move_to = src;
>  		struct folio *folio;
> @@ -2239,7 +2245,10 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  		nr_pages = folio_nr_pages(folio);
>  		total_scan += nr_pages;
>  
> -		if (folio_zonenum(folio) > sc->reclaim_idx) {
> +		page = &folio->page;
> +
> +		if (folio_zonenum(folio) > sc->reclaim_idx ||
> +			(get_pageblock_migratetype(page) == MIGRATE_CMA && !cma_cap)) {
>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>  			move_to = &folios_skipped;
>  			goto move;
  
Zhaoyang Huang April 19, 2023, 6:52 a.m. UTC | #2
On Wed, Apr 19, 2023 at 2:07 PM Huang, Ying <ying.huang@intel.com> wrote:
>
> "zhaoyang.huang" <zhaoyang.huang@unisoc.com> writes:
>
> > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> >
> > It is wasting of effort to reclaim CMA pages if they are not availabe
> > for current context during direct reclaim. Skip them when under corresponding
> > circumstance.
>
> Do you have any performance number for this change?
Sorry, No. This patch arised from bellowing OOM issue which is caused
by MIGRATE_CMA occupying almost 100 percent of zones free pages and
solved by "168676649 mm,page_alloc,cma: conditionally prefer cma
pageblocks for movable allocations". This could be a common scenario
for a zone that has a large proportion of CMA reserved page blocks
which need to be considered in both allocation and reclaiming
perspective.

04166 < 4> [   36.172486] [03-19 10:05:52.172] ActivityManager: page
allocation failure: order:0, mode:0xc00(GFP_NOIO),
nodemask=(null),cpuset=foreground,mems_allowed=0
0419C < 4> [   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB
(C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C)
3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
0419D < 4> [   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH)
49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB
1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
......
041EA < 4> [   36.234447] [03-19 10:05:52.234] SLUB: Unable to
allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
041EB < 4> [   36.234455] [03-19 10:05:52.234] cache: ext4_io_end,
object size: 64, buffer size: 64, default order: 0, min order: 0
041EC < 4> [   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,
objs: 3392, free: 0
>
> Best Regards,
> Huang, Ying
>
> > Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > ---
> >  mm/vmscan.c | 11 ++++++++++-
> >  1 file changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index bd6637f..04424d9 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -2225,10 +2225,16 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
> >       unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
> >       unsigned long skipped = 0;
> >       unsigned long scan, total_scan, nr_pages;
> > +     bool cma_cap = true;
> > +     struct page *page;
> >       LIST_HEAD(folios_skipped);
> >
> >       total_scan = 0;
> >       scan = 0;
> > +     if ((IS_ENABLED(CONFIG_CMA)) && !current_is_kswapd()
> > +             && (gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE))
> > +             cma_cap = false;
> > +
> >       while (scan < nr_to_scan && !list_empty(src)) {
> >               struct list_head *move_to = src;
> >               struct folio *folio;
> > @@ -2239,7 +2245,10 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
> >               nr_pages = folio_nr_pages(folio);
> >               total_scan += nr_pages;
> >
> > -             if (folio_zonenum(folio) > sc->reclaim_idx) {
> > +             page = &folio->page;
> > +
> > +             if (folio_zonenum(folio) > sc->reclaim_idx ||
> > +                     (get_pageblock_migratetype(page) == MIGRATE_CMA && !cma_cap)) {
> >                       nr_skipped[folio_zonenum(folio)] += nr_pages;
> >                       move_to = &folios_skipped;
> >                       goto move;
  
Huang, Ying April 19, 2023, 7:34 a.m. UTC | #3
Zhaoyang Huang <huangzhaoyang@gmail.com> writes:

> On Wed, Apr 19, 2023 at 2:07 PM Huang, Ying <ying.huang@intel.com> wrote:
>>
>> "zhaoyang.huang" <zhaoyang.huang@unisoc.com> writes:
>>
>> > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
>> >
>> > It is wasting of effort to reclaim CMA pages if they are not availabe
>> > for current context during direct reclaim. Skip them when under corresponding
>> > circumstance.
>>
>> Do you have any performance number for this change?
> Sorry, No. This patch arised from bellowing OOM issue which is caused
> by MIGRATE_CMA occupying almost 100 percent of zones free pages and
> solved by "168676649 mm,page_alloc,cma: conditionally prefer cma
> pageblocks for movable allocations". This could be a common scenario
> for a zone that has a large proportion of CMA reserved page blocks
> which need to be considered in both allocation and reclaiming
> perspective.

IIUC, your patch is inspired by the OOM issue and the commit 168676649?

Anyway, I think it's better for you to describe the issues you want to
address in the patch description, and show how your patch addresses it
with some tests if possible.  Performance number is just one way to show
it.

Best Regards,
Huang, Ying

> 04166 < 4> [   36.172486] [03-19 10:05:52.172] ActivityManager: page
> allocation failure: order:0, mode:0xc00(GFP_NOIO),
> nodemask=(null),cpuset=foreground,mems_allowed=0
> 0419C < 4> [   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB
> (C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C)
> 3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
> 0419D < 4> [   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH)
> 49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB
> 1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
> ......
> 041EA < 4> [   36.234447] [03-19 10:05:52.234] SLUB: Unable to
> allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
> 041EB < 4> [   36.234455] [03-19 10:05:52.234] cache: ext4_io_end,
> object size: 64, buffer size: 64, default order: 0, min order: 0
> 041EC < 4> [   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,
> objs: 3392, free: 0
>>
>> Best Regards,
>> Huang, Ying
>>
>> > Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
>> > ---
>> >  mm/vmscan.c | 11 ++++++++++-
>> >  1 file changed, 10 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/mm/vmscan.c b/mm/vmscan.c
>> > index bd6637f..04424d9 100644
>> > --- a/mm/vmscan.c
>> > +++ b/mm/vmscan.c
>> > @@ -2225,10 +2225,16 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>> >       unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
>> >       unsigned long skipped = 0;
>> >       unsigned long scan, total_scan, nr_pages;
>> > +     bool cma_cap = true;
>> > +     struct page *page;
>> >       LIST_HEAD(folios_skipped);
>> >
>> >       total_scan = 0;
>> >       scan = 0;
>> > +     if ((IS_ENABLED(CONFIG_CMA)) && !current_is_kswapd()
>> > +             && (gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE))
>> > +             cma_cap = false;
>> > +
>> >       while (scan < nr_to_scan && !list_empty(src)) {
>> >               struct list_head *move_to = src;
>> >               struct folio *folio;
>> > @@ -2239,7 +2245,10 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>> >               nr_pages = folio_nr_pages(folio);
>> >               total_scan += nr_pages;
>> >
>> > -             if (folio_zonenum(folio) > sc->reclaim_idx) {
>> > +             page = &folio->page;
>> > +
>> > +             if (folio_zonenum(folio) > sc->reclaim_idx ||
>> > +                     (get_pageblock_migratetype(page) == MIGRATE_CMA && !cma_cap)) {
>> >                       nr_skipped[folio_zonenum(folio)] += nr_pages;
>> >                       move_to = &folios_skipped;
>> >                       goto move;
  
kernel test robot April 19, 2023, 11:36 a.m. UTC | #4
Hi zhaoyang.huang,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/zhaoyang-huang/mm-skip-CMA-pages-when-they-are-not-available/20230419-134421
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/1681882824-17532-1-git-send-email-zhaoyang.huang%40unisoc.com
patch subject: [PATCH] mm: skip CMA pages when they are not available
config: i386-randconfig-a012-20230417 (https://download.01.org/0day-ci/archive/20230419/202304191908.Bqx6phWT-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/346b74f2bf2155cb8be6af66b346157c7681b9c9
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review zhaoyang-huang/mm-skip-CMA-pages-when-they-are-not-available/20230419-134421
        git checkout 346b74f2bf2155cb8be6af66b346157c7681b9c9
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304191908.Bqx6phWT-lkp@intel.com/

All errors (new ones prefixed by >>):

>> mm/vmscan.c:2317:40: error: use of undeclared identifier 'MIGRATE_CMA'; did you mean 'MIGRATE_SYNC'?
                           (get_pageblock_migratetype(page) == MIGRATE_CMA && !cma_cap)) {
                                                               ^~~~~~~~~~~
                                                               MIGRATE_SYNC
   include/linux/migrate_mode.h:18:2: note: 'MIGRATE_SYNC' declared here
           MIGRATE_SYNC,
           ^
   1 error generated.


vim +2317 mm/vmscan.c

  2261	
  2262	/*
  2263	 * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
  2264	 *
  2265	 * lruvec->lru_lock is heavily contended.  Some of the functions that
  2266	 * shrink the lists perform better by taking out a batch of pages
  2267	 * and working on them outside the LRU lock.
  2268	 *
  2269	 * For pagecache intensive workloads, this function is the hottest
  2270	 * spot in the kernel (apart from copy_*_user functions).
  2271	 *
  2272	 * Lru_lock must be held before calling this function.
  2273	 *
  2274	 * @nr_to_scan:	The number of eligible pages to look through on the list.
  2275	 * @lruvec:	The LRU vector to pull pages from.
  2276	 * @dst:	The temp list to put pages on to.
  2277	 * @nr_scanned:	The number of pages that were scanned.
  2278	 * @sc:		The scan_control struct for this reclaim session
  2279	 * @lru:	LRU list id for isolating
  2280	 *
  2281	 * returns how many pages were moved onto *@dst.
  2282	 */
  2283	static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
  2284			struct lruvec *lruvec, struct list_head *dst,
  2285			unsigned long *nr_scanned, struct scan_control *sc,
  2286			enum lru_list lru)
  2287	{
  2288		struct list_head *src = &lruvec->lists[lru];
  2289		unsigned long nr_taken = 0;
  2290		unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
  2291		unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
  2292		unsigned long skipped = 0;
  2293		unsigned long scan, total_scan, nr_pages;
  2294		bool cma_cap = true;
  2295		struct page *page;
  2296		LIST_HEAD(folios_skipped);
  2297	
  2298		total_scan = 0;
  2299		scan = 0;
  2300		if ((IS_ENABLED(CONFIG_CMA)) && !current_is_kswapd()
  2301			&& (gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE))
  2302			cma_cap = false;
  2303	
  2304		while (scan < nr_to_scan && !list_empty(src)) {
  2305			struct list_head *move_to = src;
  2306			struct folio *folio;
  2307	
  2308			folio = lru_to_folio(src);
  2309			prefetchw_prev_lru_folio(folio, src, flags);
  2310	
  2311			nr_pages = folio_nr_pages(folio);
  2312			total_scan += nr_pages;
  2313	
  2314			page = &folio->page;
  2315	
  2316			if (folio_zonenum(folio) > sc->reclaim_idx ||
> 2317				(get_pageblock_migratetype(page) == MIGRATE_CMA && !cma_cap)) {
  2318				nr_skipped[folio_zonenum(folio)] += nr_pages;
  2319				move_to = &folios_skipped;
  2320				goto move;
  2321			}
  2322	
  2323			/*
  2324			 * Do not count skipped folios because that makes the function
  2325			 * return with no isolated folios if the LRU mostly contains
  2326			 * ineligible folios.  This causes the VM to not reclaim any
  2327			 * folios, triggering a premature OOM.
  2328			 * Account all pages in a folio.
  2329			 */
  2330			scan += nr_pages;
  2331	
  2332			if (!folio_test_lru(folio))
  2333				goto move;
  2334			if (!sc->may_unmap && folio_mapped(folio))
  2335				goto move;
  2336	
  2337			/*
  2338			 * Be careful not to clear the lru flag until after we're
  2339			 * sure the folio is not being freed elsewhere -- the
  2340			 * folio release code relies on it.
  2341			 */
  2342			if (unlikely(!folio_try_get(folio)))
  2343				goto move;
  2344	
  2345			if (!folio_test_clear_lru(folio)) {
  2346				/* Another thread is already isolating this folio */
  2347				folio_put(folio);
  2348				goto move;
  2349			}
  2350	
  2351			nr_taken += nr_pages;
  2352			nr_zone_taken[folio_zonenum(folio)] += nr_pages;
  2353			move_to = dst;
  2354	move:
  2355			list_move(&folio->lru, move_to);
  2356		}
  2357	
  2358		/*
  2359		 * Splice any skipped folios to the start of the LRU list. Note that
  2360		 * this disrupts the LRU order when reclaiming for lower zones but
  2361		 * we cannot splice to the tail. If we did then the SWAP_CLUSTER_MAX
  2362		 * scanning would soon rescan the same folios to skip and waste lots
  2363		 * of cpu cycles.
  2364		 */
  2365		if (!list_empty(&folios_skipped)) {
  2366			int zid;
  2367	
  2368			list_splice(&folios_skipped, src);
  2369			for (zid = 0; zid < MAX_NR_ZONES; zid++) {
  2370				if (!nr_skipped[zid])
  2371					continue;
  2372	
  2373				__count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
  2374				skipped += nr_skipped[zid];
  2375			}
  2376		}
  2377		*nr_scanned = total_scan;
  2378		trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
  2379					    total_scan, skipped, nr_taken,
  2380					    sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
  2381		update_lru_sizes(lruvec, lru, nr_zone_taken);
  2382		return nr_taken;
  2383	}
  2384
  

Patch

diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd6637f..04424d9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2225,10 +2225,16 @@  static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 	unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
 	unsigned long skipped = 0;
 	unsigned long scan, total_scan, nr_pages;
+	bool cma_cap = true;
+	struct page *page;
 	LIST_HEAD(folios_skipped);
 
 	total_scan = 0;
 	scan = 0;
+	if ((IS_ENABLED(CONFIG_CMA)) && !current_is_kswapd()
+		&& (gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE))
+		cma_cap = false;
+
 	while (scan < nr_to_scan && !list_empty(src)) {
 		struct list_head *move_to = src;
 		struct folio *folio;
@@ -2239,7 +2245,10 @@  static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx) {
+		page = &folio->page;
+
+		if (folio_zonenum(folio) > sc->reclaim_idx ||
+			(get_pageblock_migratetype(page) == MIGRATE_CMA && !cma_cap)) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
 			goto move;