[1/6] sched/numa, mm: make numa migrate functions to take a folio

Message ID 20230918103213.4166210-2-wangkefeng.wang@huawei.com
State New
Headers
Series mm: convert numa balancing functions to use a folio |

Commit Message

Kefeng Wang Sept. 18, 2023, 10:32 a.m. UTC
  The cpuid(or access time) is stored in the head page for THP, so it is
safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
to take a folio. This is in preparation for large folio numa balancing.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/sched/numa_balancing.h |  4 ++--
 kernel/sched/fair.c                  | 12 ++++++------
 mm/mempolicy.c                       |  3 ++-
 3 files changed, 10 insertions(+), 9 deletions(-)
  

Comments

Huang, Ying Sept. 20, 2023, 3:05 a.m. UTC | #1
Kefeng Wang <wangkefeng.wang@huawei.com> writes:

> The cpuid(or access time) is stored in the head page for THP, so it is

s/cpuid/cpupid/

> safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
> to take a folio. This is in preparation for large folio numa balancing.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  include/linux/sched/numa_balancing.h |  4 ++--
>  kernel/sched/fair.c                  | 12 ++++++------
>  mm/mempolicy.c                       |  3 ++-
>  3 files changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
> index 3988762efe15..a38528c28665 100644
> --- a/include/linux/sched/numa_balancing.h
> +++ b/include/linux/sched/numa_balancing.h
> @@ -20,7 +20,7 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
>  extern pid_t task_numa_group_id(struct task_struct *p);
>  extern void set_numabalancing_state(bool enabled);
>  extern void task_numa_free(struct task_struct *p, bool final);
> -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
> +extern bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
>  					int src_nid, int dst_cpu);
>  #else
>  static inline void task_numa_fault(int last_node, int node, int pages,
> @@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
>  {
>  }
>  static inline bool should_numa_migrate_memory(struct task_struct *p,
> -				struct page *page, int src_nid, int dst_cpu)
> +				struct folio *folio, int src_nid, int dst_cpu)
>  {
>  	return true;
>  }
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index cb225921bbca..683cc1e417d7 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
>   * The smaller the hint page fault latency, the higher the possibility
>   * for the page to be hot.
>   */
> -static int numa_hint_fault_latency(struct page *page)
> +static int numa_hint_fault_latency(struct folio *folio)
>  {
>  	int last_time, time;
>  
>  	time = jiffies_to_msecs(jiffies);
> -	last_time = xchg_page_access_time(page, time);
> +	last_time = xchg_page_access_time(&folio->page, time);

How about define xchg_folio_access_time() and folio_cpupid_xchg_last()?

--
Best Regards,
Huang, Ying

>  
>  	return (time - last_time) & PAGE_ACCESS_TIME_MASK;
>  }
> @@ -1784,7 +1784,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat,
>  	}
>  }
>  
> -bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
> +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
>  				int src_nid, int dst_cpu)
>  {
>  	struct numa_group *ng = deref_curr_numa_group(p);
> @@ -1814,16 +1814,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
>  		numa_promotion_adjust_threshold(pgdat, rate_limit, def_th);
>  
>  		th = pgdat->nbp_threshold ? : def_th;
> -		latency = numa_hint_fault_latency(page);
> +		latency = numa_hint_fault_latency(folio);
>  		if (latency >= th)
>  			return false;
>  
>  		return !numa_promotion_rate_limit(pgdat, rate_limit,
> -						  thp_nr_pages(page));
> +						  folio_nr_pages(folio));
>  	}
>  
>  	this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
> -	last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
> +	last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid);
>  
>  	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
>  	    !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 42b5567e3773..39584dc25c84 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2642,7 +2642,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
>  	if (pol->flags & MPOL_F_MORON) {
>  		polnid = thisnid;
>  
> -		if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
> +		if (!should_numa_migrate_memory(current, page_folio(page),
> +						curnid, thiscpu))
>  			goto out;
>  	}
  
Kefeng Wang Sept. 20, 2023, 7:57 a.m. UTC | #2
On 2023/9/20 11:05, Huang, Ying wrote:
> Kefeng Wang <wangkefeng.wang@huawei.com> writes:
> 
>> The cpuid(or access time) is stored in the head page for THP, so it is
> 
> s/cpuid/cpupid/

Will fix.

> 
>> safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
>> to take a folio. This is in preparation for large folio numa balancing.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   include/linux/sched/numa_balancing.h |  4 ++--
>>   kernel/sched/fair.c                  | 12 ++++++------
>>   mm/mempolicy.c                       |  3 ++-
>>   3 files changed, 10 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
>> index 3988762efe15..a38528c28665 100644
>> --- a/include/linux/sched/numa_balancing.h
>> +++ b/include/linux/sched/numa_balancing.h
>> @@ -20,7 +20,7 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
>>   extern pid_t task_numa_group_id(struct task_struct *p);
>>   extern void set_numabalancing_state(bool enabled);
>>   extern void task_numa_free(struct task_struct *p, bool final);
>> -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
>> +extern bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
>>   					int src_nid, int dst_cpu);
>>   #else
>>   static inline void task_numa_fault(int last_node, int node, int pages,
>> @@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
>>   {
>>   }
>>   static inline bool should_numa_migrate_memory(struct task_struct *p,
>> -				struct page *page, int src_nid, int dst_cpu)
>> +				struct folio *folio, int src_nid, int dst_cpu)
>>   {
>>   	return true;
>>   }
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index cb225921bbca..683cc1e417d7 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
>>    * The smaller the hint page fault latency, the higher the possibility
>>    * for the page to be hot.
>>    */
>> -static int numa_hint_fault_latency(struct page *page)
>> +static int numa_hint_fault_latency(struct folio *folio)
>>   {
>>   	int last_time, time;
>>   
>>   	time = jiffies_to_msecs(jiffies);
>> -	last_time = xchg_page_access_time(page, time);
>> +	last_time = xchg_page_access_time(&folio->page, time);
> 
> How about define xchg_folio_access_time() and folio_cpupid_xchg_last()?
> 
Yes, like 
page_cpupid_last()/xchg_page_access_time()/page_cpupid_xchg_last(),
we could do it later to change the caller to use a folio, and rename them.


> --
> Best Regards,
> Huang, Ying
>
  

Patch

diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 3988762efe15..a38528c28665 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -20,7 +20,7 @@  extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
 extern void task_numa_free(struct task_struct *p, bool final);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+extern bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
 					int src_nid, int dst_cpu);
 #else
 static inline void task_numa_fault(int last_node, int node, int pages,
@@ -38,7 +38,7 @@  static inline void task_numa_free(struct task_struct *p, bool final)
 {
 }
 static inline bool should_numa_migrate_memory(struct task_struct *p,
-				struct page *page, int src_nid, int dst_cpu)
+				struct folio *folio, int src_nid, int dst_cpu)
 {
 	return true;
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cb225921bbca..683cc1e417d7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1722,12 +1722,12 @@  static bool pgdat_free_space_enough(struct pglist_data *pgdat)
  * The smaller the hint page fault latency, the higher the possibility
  * for the page to be hot.
  */
-static int numa_hint_fault_latency(struct page *page)
+static int numa_hint_fault_latency(struct folio *folio)
 {
 	int last_time, time;
 
 	time = jiffies_to_msecs(jiffies);
-	last_time = xchg_page_access_time(page, time);
+	last_time = xchg_page_access_time(&folio->page, time);
 
 	return (time - last_time) & PAGE_ACCESS_TIME_MASK;
 }
@@ -1784,7 +1784,7 @@  static void numa_promotion_adjust_threshold(struct pglist_data *pgdat,
 	}
 }
 
-bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
+bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
 				int src_nid, int dst_cpu)
 {
 	struct numa_group *ng = deref_curr_numa_group(p);
@@ -1814,16 +1814,16 @@  bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 		numa_promotion_adjust_threshold(pgdat, rate_limit, def_th);
 
 		th = pgdat->nbp_threshold ? : def_th;
-		latency = numa_hint_fault_latency(page);
+		latency = numa_hint_fault_latency(folio);
 		if (latency >= th)
 			return false;
 
 		return !numa_promotion_rate_limit(pgdat, rate_limit,
-						  thp_nr_pages(page));
+						  folio_nr_pages(folio));
 	}
 
 	this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
-	last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+	last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid);
 
 	if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
 	    !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 42b5567e3773..39584dc25c84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2642,7 +2642,8 @@  int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 	if (pol->flags & MPOL_F_MORON) {
 		polnid = thisnid;
 
-		if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
+		if (!should_numa_migrate_memory(current, page_folio(page),
+						curnid, thiscpu))
 			goto out;
 	}