[1/2] mm/rmap: support folio_referenced to control if try_lock in rmap_walk

Message ID 20240219141703.3851-2-lipeifeng@oppo.com
State New
Headers
Series Support kshrinkd |

Commit Message

李培锋 Feb. 19, 2024, 2:17 p.m. UTC
  From: lipeifeng <lipeifeng@oppo.com>

The patch to support folio_referenced to control the bevavior
of walk_rmap, which for some thread to hold the lock in rmap_walk
instead of try_lock when using folio_referenced.

Signed-off-by: lipeifeng <lipeifeng@oppo.com>
---
 include/linux/rmap.h |  5 +++--
 mm/rmap.c            |  5 +++--
 mm/vmscan.c          | 16 ++++++++++++++--
 3 files changed, 20 insertions(+), 6 deletions(-)
  

Comments

李培锋 Feb. 20, 2024, 1:42 a.m. UTC | #1
add more experts from Linux and Google.


在 2024/2/19 22:17, lipeifeng@oppo.com 写道:
> From: lipeifeng <lipeifeng@oppo.com>
>
> The patch to support folio_referenced to control the bevavior
> of walk_rmap, which for some thread to hold the lock in rmap_walk
> instead of try_lock when using folio_referenced.
>
> Signed-off-by: lipeifeng <lipeifeng@oppo.com>
> ---
>   include/linux/rmap.h |  5 +++--
>   mm/rmap.c            |  5 +++--
>   mm/vmscan.c          | 16 ++++++++++++++--
>   3 files changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index b7944a8..846b261 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
>    * Called from mm/vmscan.c to handle paging out
>    */
>   int folio_referenced(struct folio *, int is_locked,
> -			struct mem_cgroup *memcg, unsigned long *vm_flags);
> +			struct mem_cgroup *memcg, unsigned long *vm_flags,
> +			unsigned int rw_try_lock);
>   
>   void try_to_migrate(struct folio *folio, enum ttu_flags flags);
>   void try_to_unmap(struct folio *, enum ttu_flags flags);
> @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
>   
>   static inline int folio_referenced(struct folio *folio, int is_locked,
>   				  struct mem_cgroup *memcg,
> -				  unsigned long *vm_flags)
> +				  unsigned long *vm_flags, unsigned int rw_try_lock)
>   {
>   	*vm_flags = 0;
>   	return 0;
> diff --git a/mm/rmap.c b/mm/rmap.c
> index f5d43ed..15d1fba 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
>    * @is_locked: Caller holds lock on the folio.
>    * @memcg: target memory cgroup
>    * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
> + * @rw_try_lock: if try_lock in rmap_walk
>    *
>    * Quick test_and_clear_referenced for all mappings of a folio,
>    *
> @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
>    * the function bailed out due to rmap lock contention.
>    */
>   int folio_referenced(struct folio *folio, int is_locked,
> -		     struct mem_cgroup *memcg, unsigned long *vm_flags)
> +		     struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
>   {
>   	int we_locked = 0;
>   	struct folio_referenced_arg pra = {
> @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
>   		.rmap_one = folio_referenced_one,
>   		.arg = (void *)&pra,
>   		.anon_lock = folio_lock_anon_vma_read,
> -		.try_lock = true,
> +		.try_lock = rw_try_lock ? true : false,
>   		.invalid_vma = invalid_folio_referenced_vma,
>   	};
>   
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 4f9c854..0296d48 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -136,6 +136,9 @@ struct scan_control {
>   	/* Always discard instead of demoting to lower tier memory */
>   	unsigned int no_demotion:1;
>   
> +	/* if try_lock in rmap_walk */
> +	unsigned int rw_try_lock:1;
> +
>   	/* Allocation order */
>   	s8 order;
>   
> @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
>   	unsigned long vm_flags;
>   
>   	referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
> -					   &vm_flags);
> +					   &vm_flags, sc->rw_try_lock);
>   	referenced_folio = folio_test_clear_referenced(folio);
>   
>   	/*
> @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>   	struct scan_control sc = {
>   		.gfp_mask = GFP_KERNEL,
>   		.may_unmap = 1,
> +		.rw_try_lock = 1,
>   	};
>   	struct reclaim_stat stat;
>   	unsigned int nr_reclaimed;
> @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
>   
>   		/* Referenced or rmap lock contention: rotate */
>   		if (folio_referenced(folio, 0, sc->target_mem_cgroup,
> -				     &vm_flags) != 0) {
> +				     &vm_flags, sc->rw_try_lock) != 0) {
>   			/*
>   			 * Identify referenced, file-backed active folios and
>   			 * give them one more trip around the active list. So
> @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
>   		.may_unmap = 1,
>   		.may_swap = 1,
>   		.no_demotion = 1,
> +		.rw_try_lock = 1,
>   	};
>   
>   	nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
> @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
>   		.may_swap = true,
>   		.reclaim_idx = MAX_NR_ZONES - 1,
>   		.gfp_mask = GFP_KERNEL,
> +		.rw_try_lock = 1,
>   	};
>   
>   	buf = kvmalloc(len + 1, GFP_KERNEL);
> @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>   		.may_writepage = !laptop_mode,
>   		.may_unmap = 1,
>   		.may_swap = 1,
> +		.rw_try_lock = 1,
>   	};
>   
>   	/*
> @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
>   		.may_unmap = 1,
>   		.reclaim_idx = MAX_NR_ZONES - 1,
>   		.may_swap = !noswap,
> +		.rw_try_lock = 1,
>   	};
>   
>   	WARN_ON_ONCE(!current->reclaim_state);
> @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>   		.may_unmap = 1,
>   		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
>   		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
> +		.rw_try_lock = 1,
>   	};
>   	/*
>   	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
> @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
>   		.gfp_mask = GFP_KERNEL,
>   		.order = order,
>   		.may_unmap = 1,
> +		.rw_try_lock = 1,
>   	};
>   
>   	set_task_reclaim_state(current, &sc.reclaim_state);
> @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>   		.may_unmap = 1,
>   		.may_swap = 1,
>   		.hibernation_mode = 1,
> +		.rw_try_lock = 1,
>   	};
>   	struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
>   	unsigned long nr_reclaimed;
> @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
>   		.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
>   		.may_swap = 1,
>   		.reclaim_idx = gfp_zone(gfp_mask),
> +		.rw_try_lock = 1,
>   	};
>   	unsigned long pflags;
>
  
Barry Song Feb. 20, 2024, 3:01 a.m. UTC | #2
Hi peifeng,

On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <lipeifeng@oppo.com> wrote:
>
> add more experts from Linux and Google.
>
>
> 在 2024/2/19 22:17, lipeifeng@oppo.com 写道:
> > From: lipeifeng <lipeifeng@oppo.com>
> >
> > The patch to support folio_referenced to control the bevavior
> > of walk_rmap, which for some thread to hold the lock in rmap_walk
> > instead of try_lock when using folio_referenced.

please describe what problem the patch is trying to address,
and why this modification is needed in commit message.

btw, who is set rw_try_lock to 0, what is the benefit?

> >
> > Signed-off-by: lipeifeng <lipeifeng@oppo.com>
> > ---
> >   include/linux/rmap.h |  5 +++--
> >   mm/rmap.c            |  5 +++--
> >   mm/vmscan.c          | 16 ++++++++++++++--
> >   3 files changed, 20 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> > index b7944a8..846b261 100644
> > --- a/include/linux/rmap.h
> > +++ b/include/linux/rmap.h
> > @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
> >    * Called from mm/vmscan.c to handle paging out
> >    */
> >   int folio_referenced(struct folio *, int is_locked,
> > -                     struct mem_cgroup *memcg, unsigned long *vm_flags);
> > +                     struct mem_cgroup *memcg, unsigned long *vm_flags,
> > +                     unsigned int rw_try_lock);
> >
> >   void try_to_migrate(struct folio *folio, enum ttu_flags flags);
> >   void try_to_unmap(struct folio *, enum ttu_flags flags);
> > @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
> >
> >   static inline int folio_referenced(struct folio *folio, int is_locked,
> >                                 struct mem_cgroup *memcg,
> > -                               unsigned long *vm_flags)
> > +                               unsigned long *vm_flags, unsigned int rw_try_lock)
> >   {
> >       *vm_flags = 0;
> >       return 0;
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index f5d43ed..15d1fba 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
> >    * @is_locked: Caller holds lock on the folio.
> >    * @memcg: target memory cgroup
> >    * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
> > + * @rw_try_lock: if try_lock in rmap_walk
> >    *
> >    * Quick test_and_clear_referenced for all mappings of a folio,
> >    *
> > @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
> >    * the function bailed out due to rmap lock contention.
> >    */
> >   int folio_referenced(struct folio *folio, int is_locked,
> > -                  struct mem_cgroup *memcg, unsigned long *vm_flags)
> > +                  struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
> >   {
> >       int we_locked = 0;
> >       struct folio_referenced_arg pra = {
> > @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
> >               .rmap_one = folio_referenced_one,
> >               .arg = (void *)&pra,
> >               .anon_lock = folio_lock_anon_vma_read,
> > -             .try_lock = true,
> > +             .try_lock = rw_try_lock ? true : false,
> >               .invalid_vma = invalid_folio_referenced_vma,
> >       };
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 4f9c854..0296d48 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -136,6 +136,9 @@ struct scan_control {
> >       /* Always discard instead of demoting to lower tier memory */
> >       unsigned int no_demotion:1;
> >
> > +     /* if try_lock in rmap_walk */
> > +     unsigned int rw_try_lock:1;
> > +
> >       /* Allocation order */
> >       s8 order;
> >
> > @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
> >       unsigned long vm_flags;
> >
> >       referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
> > -                                        &vm_flags);
> > +                                        &vm_flags, sc->rw_try_lock);
> >       referenced_folio = folio_test_clear_referenced(folio);
> >
> >       /*
> > @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> >       struct scan_control sc = {
> >               .gfp_mask = GFP_KERNEL,
> >               .may_unmap = 1,
> > +             .rw_try_lock = 1,
> >       };
> >       struct reclaim_stat stat;
> >       unsigned int nr_reclaimed;
> > @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
> >
> >               /* Referenced or rmap lock contention: rotate */
> >               if (folio_referenced(folio, 0, sc->target_mem_cgroup,
> > -                                  &vm_flags) != 0) {
> > +                                  &vm_flags, sc->rw_try_lock) != 0) {
> >                       /*
> >                        * Identify referenced, file-backed active folios and
> >                        * give them one more trip around the active list So
> > @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
> >               .may_unmap = 1,
> >               .may_swap = 1,
> >               .no_demotion = 1,
> > +             .rw_try_lock = 1,
> >       };
> >
> >       nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
> > @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
> >               .may_swap = true,
> >               .reclaim_idx = MAX_NR_ZONES - 1,
> >               .gfp_mask = GFP_KERNEL,
> > +             .rw_try_lock = 1,
> >       };
> >
> >       buf = kvmalloc(len + 1, GFP_KERNEL);
> > @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> >               .may_writepage = !laptop_mode,
> >               .may_unmap = 1,
> >               .may_swap = 1,
> > +             .rw_try_lock = 1,
> >       };
> >
> >       /*
> > @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
> >               .may_unmap = 1,
> >               .reclaim_idx = MAX_NR_ZONES - 1,
> >               .may_swap = !noswap,
> > +             .rw_try_lock = 1,
> >       };
> >
> >       WARN_ON_ONCE(!current->reclaim_state);
> > @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> >               .may_unmap = 1,
> >               .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
> >               .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
> > +             .rw_try_lock = 1,
> >       };
> >       /*
> >        * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
> > @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
> >               .gfp_mask = GFP_KERNEL,
> >               .order = order,
> >               .may_unmap = 1,
> > +             .rw_try_lock = 1,
> >       };
> >
> >       set_task_reclaim_state(current, &sc.reclaim_state);
> > @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
> >               .may_unmap = 1,
> >               .may_swap = 1,
> >               .hibernation_mode = 1,
> > +             .rw_try_lock = 1,
> >       };
> >       struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
> >       unsigned long nr_reclaimed;
> > @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
> >               .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
> >               .may_swap = 1,
> >               .reclaim_idx = gfp_zone(gfp_mask),
> > +             .rw_try_lock = 1,
> >       };
> >       unsigned long pflags;
> >
Thanks
Barry
  
李培锋 Feb. 20, 2024, 4 a.m. UTC | #3
在 2024/2/20 11:01, Barry Song 写道:
> Hi peifeng,
>
> On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <lipeifeng@oppo.com> wrote:
>> add more experts from Linux and Google.
>>
>>
>> 在 2024/2/19 22:17, lipeifeng@oppo.com 写道:
>>> From: lipeifeng <lipeifeng@oppo.com>
>>>
>>> The patch to support folio_referenced to control the bevavior
>>> of walk_rmap, which for some thread to hold the lock in rmap_walk
>>> instead of try_lock when using folio_referenced.
> please describe what problem the patch is trying to address,
> and why this modification is needed in commit message.

Hi Barry:

1. the patch is one of the kshrinkd series patches.

2. it is to support folio_referenced to control the bevavior of walk_rmap,

kshrinkd would call folio_referenced through shrink_folio_list but it 
doesn't

want to try_lock in rmap_walk during folio_referenced.


> btw, who is set rw_try_lock to 0, what is the benefit?

Actually, the current situation is that only shrink_folio_list will set 
try_lock to 1,

while others will be set to 0 that it would wait for rwsem-lock if 
contened in rmap_walk.


>
>>> Signed-off-by: lipeifeng <lipeifeng@oppo.com>
>>> ---
>>>    include/linux/rmap.h |  5 +++--
>>>    mm/rmap.c            |  5 +++--
>>>    mm/vmscan.c          | 16 ++++++++++++++--
>>>    3 files changed, 20 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
>>> index b7944a8..846b261 100644
>>> --- a/include/linux/rmap.h
>>> +++ b/include/linux/rmap.h
>>> @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
>>>     * Called from mm/vmscan.c to handle paging out
>>>     */
>>>    int folio_referenced(struct folio *, int is_locked,
>>> -                     struct mem_cgroup *memcg, unsigned long *vm_flags);
>>> +                     struct mem_cgroup *memcg, unsigned long *vm_flags,
>>> +                     unsigned int rw_try_lock);
>>>
>>>    void try_to_migrate(struct folio *folio, enum ttu_flags flags);
>>>    void try_to_unmap(struct folio *, enum ttu_flags flags);
>>> @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
>>>
>>>    static inline int folio_referenced(struct folio *folio, int is_locked,
>>>                                  struct mem_cgroup *memcg,
>>> -                               unsigned long *vm_flags)
>>> +                               unsigned long *vm_flags, unsigned int rw_try_lock)
>>>    {
>>>        *vm_flags = 0;
>>>        return 0;
>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>> index f5d43ed..15d1fba 100644
>>> --- a/mm/rmap.c
>>> +++ b/mm/rmap.c
>>> @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
>>>     * @is_locked: Caller holds lock on the folio.
>>>     * @memcg: target memory cgroup
>>>     * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
>>> + * @rw_try_lock: if try_lock in rmap_walk
>>>     *
>>>     * Quick test_and_clear_referenced for all mappings of a folio,
>>>     *
>>> @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
>>>     * the function bailed out due to rmap lock contention.
>>>     */
>>>    int folio_referenced(struct folio *folio, int is_locked,
>>> -                  struct mem_cgroup *memcg, unsigned long *vm_flags)
>>> +                  struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
>>>    {
>>>        int we_locked = 0;
>>>        struct folio_referenced_arg pra = {
>>> @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
>>>                .rmap_one = folio_referenced_one,
>>>                .arg = (void *)&pra,
>>>                .anon_lock = folio_lock_anon_vma_read,
>>> -             .try_lock = true,
>>> +             .try_lock = rw_try_lock ? true : false,
>>>                .invalid_vma = invalid_folio_referenced_vma,
>>>        };
>>>
>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>> index 4f9c854..0296d48 100644
>>> --- a/mm/vmscan.c
>>> +++ b/mm/vmscan.c
>>> @@ -136,6 +136,9 @@ struct scan_control {
>>>        /* Always discard instead of demoting to lower tier memory */
>>>        unsigned int no_demotion:1;
>>>
>>> +     /* if try_lock in rmap_walk */
>>> +     unsigned int rw_try_lock:1;
>>> +
>>>        /* Allocation order */
>>>        s8 order;
>>>
>>> @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
>>>        unsigned long vm_flags;
>>>
>>>        referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
>>> -                                        &vm_flags);
>>> +                                        &vm_flags, sc->rw_try_lock);
>>>        referenced_folio = folio_test_clear_referenced(folio);
>>>
>>>        /*
>>> @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>>>        struct scan_control sc = {
>>>                .gfp_mask = GFP_KERNEL,
>>>                .may_unmap = 1,
>>> +             .rw_try_lock = 1,
>>>        };
>>>        struct reclaim_stat stat;
>>>        unsigned int nr_reclaimed;
>>> @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
>>>
>>>                /* Referenced or rmap lock contention: rotate */
>>>                if (folio_referenced(folio, 0, sc->target_mem_cgroup,
>>> -                                  &vm_flags) != 0) {
>>> +                                  &vm_flags, sc->rw_try_lock) != 0) {
>>>                        /*
>>>                         * Identify referenced, file-backed active folios and
>>>                         * give them one more trip around the active list. So
>>> @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
>>>                .may_unmap = 1,
>>>                .may_swap = 1,
>>>                .no_demotion = 1,
>>> +             .rw_try_lock = 1,
>>>        };
>>>
>>>        nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
>>> @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
>>>                .may_swap = true,
>>>                .reclaim_idx = MAX_NR_ZONES - 1,
>>>                .gfp_mask = GFP_KERNEL,
>>> +             .rw_try_lock = 1,
>>>        };
>>>
>>>        buf = kvmalloc(len + 1, GFP_KERNEL);
>>> @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>>>                .may_writepage = !laptop_mode,
>>>                .may_unmap = 1,
>>>                .may_swap = 1,
>>> +             .rw_try_lock = 1,
>>>        };
>>>
>>>        /*
>>> @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
>>>                .may_unmap = 1,
>>>                .reclaim_idx = MAX_NR_ZONES - 1,
>>>                .may_swap = !noswap,
>>> +             .rw_try_lock = 1,
>>>        };
>>>
>>>        WARN_ON_ONCE(!current->reclaim_state);
>>> @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>>>                .may_unmap = 1,
>>>                .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
>>>                .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
>>> +             .rw_try_lock = 1,
>>>        };
>>>        /*
>>>         * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
>>> @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
>>>                .gfp_mask = GFP_KERNEL,
>>>                .order = order,
>>>                .may_unmap = 1,
>>> +             .rw_try_lock = 1,
>>>        };
>>>
>>>        set_task_reclaim_state(current, &sc.reclaim_state);
>>> @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>>>                .may_unmap = 1,
>>>                .may_swap = 1,
>>>                .hibernation_mode = 1,
>>> +             .rw_try_lock = 1,
>>>        };
>>>        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
>>>        unsigned long nr_reclaimed;
>>> @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
>>>                .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
>>>                .may_swap = 1,
>>>                .reclaim_idx = gfp_zone(gfp_mask),
>>> +             .rw_try_lock = 1,
>>>        };
>>>        unsigned long pflags;
>>>
> Thanks
> Barry
  
Barry Song Feb. 20, 2024, 7:16 a.m. UTC | #4
On Tue, Feb 20, 2024 at 5:00 PM 李培锋 <lipeifeng@oppo.com> wrote:
>
>
> 在 2024/2/20 11:01, Barry Song 写道:
> > Hi peifeng,
> >
> > On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <lipeifeng@oppo.com> wrote:
> >> add more experts from Linux and Google.
> >>
> >>
> >> 在 2024/2/19 22:17, lipeifeng@oppo.com 写道:
> >>> From: lipeifeng <lipeifeng@oppo.com>
> >>>
> >>> The patch to support folio_referenced to control the bevavior
> >>> of walk_rmap, which for some thread to hold the lock in rmap_walk
> >>> instead of try_lock when using folio_referenced.
> > please describe what problem the patch is trying to address,
> > and why this modification is needed in commit message.
>
> Hi Barry:
>
> 1. the patch is one of the kshrinkd series patches.

this seems like a bad name for the patchset as nobody knows
what is kshrinkd. maybe something like "asynchronously
reclaim contended folios rather than aging them"?

>
> 2. it is to support folio_referenced to control the bevavior of walk_rmap,
>
> kshrinkd would call folio_referenced through shrink_folio_list but it
> doesn't
>
> want to try_lock in rmap_walk during folio_referenced.
>
>
> > btw, who is set rw_try_lock to 0, what is the benefit?
>
> Actually, the current situation is that only shrink_folio_list will set
> try_lock to 1,

understood, as you don't want contended folios to be skipped
by scanner any more.

>
> while others will be set to 0 that it would wait for rwsem-lock if
> contened in rmap_walk.

ok. other reclamation threads will still skip contended folios.

As discussed, the patchset really needs detailed data to back up.

Thanks
Barry
  

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b7944a8..846b261 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -623,7 +623,8 @@  static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
  * Called from mm/vmscan.c to handle paging out
  */
 int folio_referenced(struct folio *, int is_locked,
-			struct mem_cgroup *memcg, unsigned long *vm_flags);
+			struct mem_cgroup *memcg, unsigned long *vm_flags,
+			unsigned int rw_try_lock);
 
 void try_to_migrate(struct folio *folio, enum ttu_flags flags);
 void try_to_unmap(struct folio *, enum ttu_flags flags);
@@ -739,7 +740,7 @@  struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
 
 static inline int folio_referenced(struct folio *folio, int is_locked,
 				  struct mem_cgroup *memcg,
-				  unsigned long *vm_flags)
+				  unsigned long *vm_flags, unsigned int rw_try_lock)
 {
 	*vm_flags = 0;
 	return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index f5d43ed..15d1fba 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -952,6 +952,7 @@  static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
  * @is_locked: Caller holds lock on the folio.
  * @memcg: target memory cgroup
  * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
+ * @rw_try_lock: if try_lock in rmap_walk
  *
  * Quick test_and_clear_referenced for all mappings of a folio,
  *
@@ -959,7 +960,7 @@  static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
  * the function bailed out due to rmap lock contention.
  */
 int folio_referenced(struct folio *folio, int is_locked,
-		     struct mem_cgroup *memcg, unsigned long *vm_flags)
+		     struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
 {
 	int we_locked = 0;
 	struct folio_referenced_arg pra = {
@@ -970,7 +971,7 @@  int folio_referenced(struct folio *folio, int is_locked,
 		.rmap_one = folio_referenced_one,
 		.arg = (void *)&pra,
 		.anon_lock = folio_lock_anon_vma_read,
-		.try_lock = true,
+		.try_lock = rw_try_lock ? true : false,
 		.invalid_vma = invalid_folio_referenced_vma,
 	};
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4f9c854..0296d48 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -136,6 +136,9 @@  struct scan_control {
 	/* Always discard instead of demoting to lower tier memory */
 	unsigned int no_demotion:1;
 
+	/* if try_lock in rmap_walk */
+	unsigned int rw_try_lock:1;
+
 	/* Allocation order */
 	s8 order;
 
@@ -827,7 +830,7 @@  static enum folio_references folio_check_references(struct folio *folio,
 	unsigned long vm_flags;
 
 	referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
-					   &vm_flags);
+					   &vm_flags, sc->rw_try_lock);
 	referenced_folio = folio_test_clear_referenced(folio);
 
 	/*
@@ -1501,6 +1504,7 @@  unsigned int reclaim_clean_pages_from_list(struct zone *zone,
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
 		.may_unmap = 1,
+		.rw_try_lock = 1,
 	};
 	struct reclaim_stat stat;
 	unsigned int nr_reclaimed;
@@ -2038,7 +2042,7 @@  static void shrink_active_list(unsigned long nr_to_scan,
 
 		/* Referenced or rmap lock contention: rotate */
 		if (folio_referenced(folio, 0, sc->target_mem_cgroup,
-				     &vm_flags) != 0) {
+				     &vm_flags, sc->rw_try_lock) != 0) {
 			/*
 			 * Identify referenced, file-backed active folios and
 			 * give them one more trip around the active list. So
@@ -2096,6 +2100,7 @@  static unsigned int reclaim_folio_list(struct list_head *folio_list,
 		.may_unmap = 1,
 		.may_swap = 1,
 		.no_demotion = 1,
+		.rw_try_lock = 1,
 	};
 
 	nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
@@ -5442,6 +5447,7 @@  static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
 		.may_swap = true,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.gfp_mask = GFP_KERNEL,
+		.rw_try_lock = 1,
 	};
 
 	buf = kvmalloc(len + 1, GFP_KERNEL);
@@ -6414,6 +6420,7 @@  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
 		.may_swap = 1,
+		.rw_try_lock = 1,
 	};
 
 	/*
@@ -6459,6 +6466,7 @@  unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 		.may_unmap = 1,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.may_swap = !noswap,
+		.rw_try_lock = 1,
 	};
 
 	WARN_ON_ONCE(!current->reclaim_state);
@@ -6503,6 +6511,7 @@  unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.may_unmap = 1,
 		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
 		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+		.rw_try_lock = 1,
 	};
 	/*
 	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
@@ -6764,6 +6773,7 @@  static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 		.gfp_mask = GFP_KERNEL,
 		.order = order,
 		.may_unmap = 1,
+		.rw_try_lock = 1,
 	};
 
 	set_task_reclaim_state(current, &sc.reclaim_state);
@@ -7223,6 +7233,7 @@  unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 		.may_unmap = 1,
 		.may_swap = 1,
 		.hibernation_mode = 1,
+		.rw_try_lock = 1,
 	};
 	struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
 	unsigned long nr_reclaimed;
@@ -7381,6 +7392,7 @@  static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 		.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
 		.may_swap = 1,
 		.reclaim_idx = gfp_zone(gfp_mask),
+		.rw_try_lock = 1,
 	};
 	unsigned long pflags;