[PATCHv4] mm: skip CMA pages when they are not available

Message ID 1684737363-31554-1-git-send-email-zhaoyang.huang@unisoc.com
State New
Headers
Series [PATCHv4] mm: skip CMA pages when they are not available |

Commit Message

zhaoyang.huang May 22, 2023, 6:36 a.m. UTC
  From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>

This patch fixes unproductive reclaiming of CMA pages by skipping them when they
are not available for current context. It is arise from bellowing OOM issue, which
caused by large proportion of MIGRATE_CMA pages among free pages.

[   36.172486] [03-19 10:05:52.172] ActivityManager: page allocation failure: order:0, mode:0xc00(GFP_NOIO), nodemask=(null),cpuset=foreground,mems_allowed=0
[   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB (C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C) 3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
[   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH) 49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB 1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
...
[   36.234447] [03-19 10:05:52.234] SLUB: Unable to allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
[   36.234455] [03-19 10:05:52.234] cache: ext4_io_end, object size: 64, buffer size: 64, default order: 0, min order: 0
[   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,objs: 3392, free: 0

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
v2: update commit message and fix build error when CONFIG_CMA is not set
v3,v4: update code and comments
---
---
 mm/vmscan.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)
  

Comments

Matthew Wilcox May 25, 2023, 8:03 p.m. UTC | #1
On Mon, May 22, 2023 at 02:36:03PM +0800, zhaoyang.huang wrote:
> +#ifdef CONFIG_CMA
> +/*
> + * It is waste of effort to scan and reclaim CMA pages if it is not available
> + * for current allocation context
> + */
> +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> +{
> +	if (!current_is_kswapd() &&
> +			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
> +			get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
> +		return true;
> +	return false;
> +}
> +#else
> +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> +{
> +	return false;
> +}
> +#endif
> +
>  /*
>   * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
>   *
> @@ -2239,7 +2259,8 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  		nr_pages = folio_nr_pages(folio);
>  		total_scan += nr_pages;
>  
> -		if (folio_zonenum(folio) > sc->reclaim_idx) {
> +		if (folio_zonenum(folio) > sc->reclaim_idx ||
> +				skip_cma(folio, sc)) {
>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>  			move_to = &folios_skipped;
>  			goto move;

I have no idea if what this patch is trying to accomplish is correct,
but I no longer object to how it is doing it.
  
Zhaoyang Huang May 26, 2023, 2:30 a.m. UTC | #2
On Fri, May 26, 2023 at 4:03 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Mon, May 22, 2023 at 02:36:03PM +0800, zhaoyang.huang wrote:
> > +#ifdef CONFIG_CMA
> > +/*
> > + * It is waste of effort to scan and reclaim CMA pages if it is not available
> > + * for current allocation context
> > + */
> > +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> > +{
> > +     if (!current_is_kswapd() &&
> > +                     gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
> > +                     get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
> > +             return true;
> > +     return false;
> > +}
> > +#else
> > +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> > +{
> > +     return false;
> > +}
> > +#endif
> > +
> >  /*
> >   * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
> >   *
> > @@ -2239,7 +2259,8 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
> >               nr_pages = folio_nr_pages(folio);
> >               total_scan += nr_pages;
> >
> > -             if (folio_zonenum(folio) > sc->reclaim_idx) {
> > +             if (folio_zonenum(folio) > sc->reclaim_idx ||
> > +                             skip_cma(folio, sc)) {
> >                       nr_skipped[folio_zonenum(folio)] += nr_pages;
> >                       move_to = &folios_skipped;
> >                       goto move;
>
> I have no idea if what this patch is trying to accomplish is correct,
> but I no longer object to how it is doing it.
IMO, this is necessary as there could be such weird scenario, that is
an GFP_KERNEL allocation might get 32 MIGRATE_CMA pages via
direct_reclaim which lead to a low PSI_MEM/vmpressure value but return
a NULL pointer
  
David Hildenbrand May 26, 2023, 7:36 p.m. UTC | #3
On 22.05.23 08:36, zhaoyang.huang wrote:
> From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> 
> This patch fixes unproductive reclaiming of CMA pages by skipping them when they
> are not available for current context. It is arise from bellowing OOM issue, which
> caused by large proportion of MIGRATE_CMA pages among free pages.
> 
> [   36.172486] [03-19 10:05:52.172] ActivityManager: page allocation failure: order:0, mode:0xc00(GFP_NOIO), nodemask=(null),cpuset=foreground,mems_allowed=0
> [   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB (C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C) 3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
> [   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH) 49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB 1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
> ...
> [   36.234447] [03-19 10:05:52.234] SLUB: Unable to allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
> [   36.234455] [03-19 10:05:52.234] cache: ext4_io_end, object size: 64, buffer size: 64, default order: 0, min order: 0
> [   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,objs: 3392, free: 0
> 
> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> ---
> v2: update commit message and fix build error when CONFIG_CMA is not set
> v3,v4: update code and comments
> ---
> ---
>   mm/vmscan.c | 23 ++++++++++++++++++++++-
>   1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index bd6637f..20facec 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2193,6 +2193,26 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
>   
>   }
>   
> +#ifdef CONFIG_CMA
> +/*
> + * It is waste of effort to scan and reclaim CMA pages if it is not available
> + * for current allocation context
> + */

/*
  * Only movable allocations may end up on MIGRATE_CMA pageblocks. If
  * we're not dealing with a movable allocation, it doesn't make sense to
  * reclaim from these pageblocks: the reclaimed memory is unusable for
  * this allocation.
  */

Did I get it right?

> +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> +{
> +	if (!current_is_kswapd() &&
> +			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
> +			get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
> +		return true;
> +	return false;

	return !current_is_kswapd() &&
	       gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
	       get_pageblock_migratetype(&folio->page) == MIGRATE_CMA;
  
Minchan Kim May 26, 2023, 11:03 p.m. UTC | #4
On Mon, May 22, 2023 at 02:36:03PM +0800, zhaoyang.huang wrote:
> From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> 
> This patch fixes unproductive reclaiming of CMA pages by skipping them when they
> are not available for current context. It is arise from bellowing OOM issue, which
> caused by large proportion of MIGRATE_CMA pages among free pages.
> 
> [   36.172486] [03-19 10:05:52.172] ActivityManager: page allocation failure: order:0, mode:0xc00(GFP_NOIO), nodemask=(null),cpuset=foreground,mems_allowed=0
> [   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB (C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C) 3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
> [   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH) 49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB 1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
> ...
> [   36.234447] [03-19 10:05:52.234] SLUB: Unable to allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
> [   36.234455] [03-19 10:05:52.234] cache: ext4_io_end, object size: 64, buffer size: 64, default order: 0, min order: 0
> [   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,objs: 3392, free: 0
> 
> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> ---
> v2: update commit message and fix build error when CONFIG_CMA is not set
> v3,v4: update code and comments
> ---
> ---
>  mm/vmscan.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index bd6637f..20facec 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2193,6 +2193,26 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
>  
>  }
>  
> +#ifdef CONFIG_CMA
> +/*
> + * It is waste of effort to scan and reclaim CMA pages if it is not available
> + * for current allocation context
> + */
> +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> +{
> +	if (!current_is_kswapd() &&

The function is called by isolate_lru_folios which is used by both background
and direct reclaims at the same time. And sc->reclaim_idx below to filter
unproductive reclaim out is used for both cases but why does the cma is considering
only direct reclaim path?


> +			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
> +			get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
> +		return true;
> +	return false;
> +}
> +#else
> +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> +{
> +	return false;
> +}
> +#endif
> +
>  /*
>   * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
>   *
> @@ -2239,7 +2259,8 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  		nr_pages = folio_nr_pages(folio);
>  		total_scan += nr_pages;
>  
> -		if (folio_zonenum(folio) > sc->reclaim_idx) {
> +		if (folio_zonenum(folio) > sc->reclaim_idx ||
> +				skip_cma(folio, sc)) {
>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>  			move_to = &folios_skipped;
>  			goto move;
> -- 
> 1.9.1
>
  
Zhaoyang Huang May 29, 2023, 1:02 a.m. UTC | #5
On Sat, May 27, 2023 at 3:36 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 22.05.23 08:36, zhaoyang.huang wrote:
> > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> >
> > This patch fixes unproductive reclaiming of CMA pages by skipping them when they
> > are not available for current context. It is arise from bellowing OOM issue, which
> > caused by large proportion of MIGRATE_CMA pages among free pages.
> >
> > [   36.172486] [03-19 10:05:52.172] ActivityManager: page allocation failure: order:0, mode:0xc00(GFP_NOIO), nodemask=(null),cpuset=foreground,mems_allowed=0
> > [   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB (C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C) 3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
> > [   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH) 49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB 1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
> > ...
> > [   36.234447] [03-19 10:05:52.234] SLUB: Unable to allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
> > [   36.234455] [03-19 10:05:52.234] cache: ext4_io_end, object size: 64, buffer size: 64, default order: 0, min order: 0
> > [   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,objs: 3392, free: 0
> >
> > Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > ---
> > v2: update commit message and fix build error when CONFIG_CMA is not set
> > v3,v4: update code and comments
> > ---
> > ---
> >   mm/vmscan.c | 23 ++++++++++++++++++++++-
> >   1 file changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index bd6637f..20facec 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -2193,6 +2193,26 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
> >
> >   }
> >
> > +#ifdef CONFIG_CMA
> > +/*
> > + * It is waste of effort to scan and reclaim CMA pages if it is not available
> > + * for current allocation context
> > + */
>
> /*
>   * Only movable allocations may end up on MIGRATE_CMA pageblocks. If
>   * we're not dealing with a movable allocation, it doesn't make sense to
>   * reclaim from these pageblocks: the reclaimed memory is unusable for
>   * this allocation.
>   */
>
> Did I get it right?
Yes, it is right.
>
> > +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> > +{
> > +     if (!current_is_kswapd() &&
> > +                     gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
> > +                     get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
> > +             return true;
> > +     return false;
>
>         return !current_is_kswapd() &&
>                gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
>                get_pageblock_migratetype(&folio->page) == MIGRATE_CMA;
ok, thanks
>
>
> --
> Thanks,
>
> David / dhildenb
>
  
Zhaoyang Huang May 29, 2023, 1:11 a.m. UTC | #6
On Sat, May 27, 2023 at 7:03 AM Minchan Kim <minchan@kernel.org> wrote:
>
> On Mon, May 22, 2023 at 02:36:03PM +0800, zhaoyang.huang wrote:
> > From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> >
> > This patch fixes unproductive reclaiming of CMA pages by skipping them when they
> > are not available for current context. It is arise from bellowing OOM issue, which
> > caused by large proportion of MIGRATE_CMA pages among free pages.
> >
> > [   36.172486] [03-19 10:05:52.172] ActivityManager: page allocation failure: order:0, mode:0xc00(GFP_NOIO), nodemask=(null),cpuset=foreground,mems_allowed=0
> > [   36.189447] [03-19 10:05:52.189] DMA32: 0*4kB 447*8kB (C) 217*16kB (C) 124*32kB (C) 136*64kB (C) 70*128kB (C) 22*256kB (C) 3*512kB (C) 0*1024kB 0*2048kB 0*4096kB = 35848kB
> > [   36.193125] [03-19 10:05:52.193] Normal: 231*4kB (UMEH) 49*8kB (MEH) 14*16kB (H) 13*32kB (H) 8*64kB (H) 2*128kB (H) 0*256kB 1*512kB (H) 0*1024kB 0*2048kB 0*4096kB = 3236kB
> > ...
> > [   36.234447] [03-19 10:05:52.234] SLUB: Unable to allocate memory on node -1, gfp=0xa20(GFP_ATOMIC)
> > [   36.234455] [03-19 10:05:52.234] cache: ext4_io_end, object size: 64, buffer size: 64, default order: 0, min order: 0
> > [   36.234459] [03-19 10:05:52.234] node 0: slabs: 53,objs: 3392, free: 0
> >
> > Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
> > ---
> > v2: update commit message and fix build error when CONFIG_CMA is not set
> > v3,v4: update code and comments
> > ---
> > ---
> >  mm/vmscan.c | 23 ++++++++++++++++++++++-
> >  1 file changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index bd6637f..20facec 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -2193,6 +2193,26 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
> >
> >  }
> >
> > +#ifdef CONFIG_CMA
> > +/*
> > + * It is waste of effort to scan and reclaim CMA pages if it is not available
> > + * for current allocation context
> > + */
> > +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> > +{
> > +     if (!current_is_kswapd() &&
>
> The function is called by isolate_lru_folios which is used by both background
> and direct reclaims at the same time. And sc->reclaim_idx below to filter
> unproductive reclaim out is used for both cases but why does the cma is considering
> only direct reclaim path?
Because kswapd's sc->gfp_mask = GFP_KERNEL which can not distinguish
this scenario
>
>
> > +                     gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
> > +                     get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
> > +             return true;
> > +     return false;
> > +}
> > +#else
> > +static bool skip_cma(struct folio *folio, struct scan_control *sc)
> > +{
> > +     return false;
> > +}
> > +#endif
> > +
> >  /*
> >   * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
> >   *
> > @@ -2239,7 +2259,8 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
> >               nr_pages = folio_nr_pages(folio);
> >               total_scan += nr_pages;
> >
> > -             if (folio_zonenum(folio) > sc->reclaim_idx) {
> > +             if (folio_zonenum(folio) > sc->reclaim_idx ||
> > +                             skip_cma(folio, sc)) {
> >                       nr_skipped[folio_zonenum(folio)] += nr_pages;
> >                       move_to = &folios_skipped;
> >                       goto move;
> > --
> > 1.9.1
> >
  

Patch

diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd6637f..20facec 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2193,6 +2193,26 @@  static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 
 }
 
+#ifdef CONFIG_CMA
+/*
+ * It is waste of effort to scan and reclaim CMA pages if it is not available
+ * for current allocation context
+ */
+static bool skip_cma(struct folio *folio, struct scan_control *sc)
+{
+	if (!current_is_kswapd() &&
+			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
+			get_pageblock_migratetype(&folio->page) == MIGRATE_CMA)
+		return true;
+	return false;
+}
+#else
+static bool skip_cma(struct folio *folio, struct scan_control *sc)
+{
+	return false;
+}
+#endif
+
 /*
  * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
  *
@@ -2239,7 +2259,8 @@  static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx) {
+		if (folio_zonenum(folio) > sc->reclaim_idx ||
+				skip_cma(folio, sc)) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
 			goto move;