[1/6] sched/numa, mm: make numa migrate functions to take a folio
Commit Message
The cpuid(or access time) is stored in the head page for THP, so it is
safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
to take a folio. This is in preparation for large folio numa balancing.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/sched/numa_balancing.h | 4 ++--
kernel/sched/fair.c | 12 ++++++------
mm/mempolicy.c | 3 ++-
3 files changed, 10 insertions(+), 9 deletions(-)
Comments
Kefeng Wang <wangkefeng.wang@huawei.com> writes:
> The cpuid(or access time) is stored in the head page for THP, so it is
s/cpuid/cpupid/
> safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
> to take a folio. This is in preparation for large folio numa balancing.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
> include/linux/sched/numa_balancing.h | 4 ++--
> kernel/sched/fair.c | 12 ++++++------
> mm/mempolicy.c | 3 ++-
> 3 files changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
> index 3988762efe15..a38528c28665 100644
> --- a/include/linux/sched/numa_balancing.h
> +++ b/include/linux/sched/numa_balancing.h
> @@ -20,7 +20,7 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
> extern pid_t task_numa_group_id(struct task_struct *p);
> extern void set_numabalancing_state(bool enabled);
> extern void task_numa_free(struct task_struct *p, bool final);
> -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
> +extern bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
> int src_nid, int dst_cpu);
> #else
> static inline void task_numa_fault(int last_node, int node, int pages,
> @@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
> {
> }
> static inline bool should_numa_migrate_memory(struct task_struct *p,
> - struct page *page, int src_nid, int dst_cpu)
> + struct folio *folio, int src_nid, int dst_cpu)
> {
> return true;
> }
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index cb225921bbca..683cc1e417d7 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
> * The smaller the hint page fault latency, the higher the possibility
> * for the page to be hot.
> */
> -static int numa_hint_fault_latency(struct page *page)
> +static int numa_hint_fault_latency(struct folio *folio)
> {
> int last_time, time;
>
> time = jiffies_to_msecs(jiffies);
> - last_time = xchg_page_access_time(page, time);
> + last_time = xchg_page_access_time(&folio->page, time);
How about define xchg_folio_access_time() and folio_cpupid_xchg_last()?
--
Best Regards,
Huang, Ying
>
> return (time - last_time) & PAGE_ACCESS_TIME_MASK;
> }
> @@ -1784,7 +1784,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat,
> }
> }
>
> -bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
> +bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
> int src_nid, int dst_cpu)
> {
> struct numa_group *ng = deref_curr_numa_group(p);
> @@ -1814,16 +1814,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
> numa_promotion_adjust_threshold(pgdat, rate_limit, def_th);
>
> th = pgdat->nbp_threshold ? : def_th;
> - latency = numa_hint_fault_latency(page);
> + latency = numa_hint_fault_latency(folio);
> if (latency >= th)
> return false;
>
> return !numa_promotion_rate_limit(pgdat, rate_limit,
> - thp_nr_pages(page));
> + folio_nr_pages(folio));
> }
>
> this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
> - last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
> + last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid);
>
> if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
> !node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 42b5567e3773..39584dc25c84 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2642,7 +2642,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
> if (pol->flags & MPOL_F_MORON) {
> polnid = thisnid;
>
> - if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
> + if (!should_numa_migrate_memory(current, page_folio(page),
> + curnid, thiscpu))
> goto out;
> }
On 2023/9/20 11:05, Huang, Ying wrote:
> Kefeng Wang <wangkefeng.wang@huawei.com> writes:
>
>> The cpuid(or access time) is stored in the head page for THP, so it is
>
> s/cpuid/cpupid/
Will fix.
>
>> safely to make should_numa_migrate_memory() and numa_hint_fault_latency()
>> to take a folio. This is in preparation for large folio numa balancing.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>> include/linux/sched/numa_balancing.h | 4 ++--
>> kernel/sched/fair.c | 12 ++++++------
>> mm/mempolicy.c | 3 ++-
>> 3 files changed, 10 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
>> index 3988762efe15..a38528c28665 100644
>> --- a/include/linux/sched/numa_balancing.h
>> +++ b/include/linux/sched/numa_balancing.h
>> @@ -20,7 +20,7 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
>> extern pid_t task_numa_group_id(struct task_struct *p);
>> extern void set_numabalancing_state(bool enabled);
>> extern void task_numa_free(struct task_struct *p, bool final);
>> -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
>> +extern bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
>> int src_nid, int dst_cpu);
>> #else
>> static inline void task_numa_fault(int last_node, int node, int pages,
>> @@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
>> {
>> }
>> static inline bool should_numa_migrate_memory(struct task_struct *p,
>> - struct page *page, int src_nid, int dst_cpu)
>> + struct folio *folio, int src_nid, int dst_cpu)
>> {
>> return true;
>> }
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index cb225921bbca..683cc1e417d7 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
>> * The smaller the hint page fault latency, the higher the possibility
>> * for the page to be hot.
>> */
>> -static int numa_hint_fault_latency(struct page *page)
>> +static int numa_hint_fault_latency(struct folio *folio)
>> {
>> int last_time, time;
>>
>> time = jiffies_to_msecs(jiffies);
>> - last_time = xchg_page_access_time(page, time);
>> + last_time = xchg_page_access_time(&folio->page, time);
>
> How about define xchg_folio_access_time() and folio_cpupid_xchg_last()?
>
Yes, like
page_cpupid_last()/xchg_page_access_time()/page_cpupid_xchg_last(),
we could do it later to change the caller to use a folio, and rename them.
> --
> Best Regards,
> Huang, Ying
>
@@ -20,7 +20,7 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
extern void task_numa_free(struct task_struct *p, bool final);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+extern bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
int src_nid, int dst_cpu);
#else
static inline void task_numa_fault(int last_node, int node, int pages,
@@ -38,7 +38,7 @@ static inline void task_numa_free(struct task_struct *p, bool final)
{
}
static inline bool should_numa_migrate_memory(struct task_struct *p,
- struct page *page, int src_nid, int dst_cpu)
+ struct folio *folio, int src_nid, int dst_cpu)
{
return true;
}
@@ -1722,12 +1722,12 @@ static bool pgdat_free_space_enough(struct pglist_data *pgdat)
* The smaller the hint page fault latency, the higher the possibility
* for the page to be hot.
*/
-static int numa_hint_fault_latency(struct page *page)
+static int numa_hint_fault_latency(struct folio *folio)
{
int last_time, time;
time = jiffies_to_msecs(jiffies);
- last_time = xchg_page_access_time(page, time);
+ last_time = xchg_page_access_time(&folio->page, time);
return (time - last_time) & PAGE_ACCESS_TIME_MASK;
}
@@ -1784,7 +1784,7 @@ static void numa_promotion_adjust_threshold(struct pglist_data *pgdat,
}
}
-bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
+bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
int src_nid, int dst_cpu)
{
struct numa_group *ng = deref_curr_numa_group(p);
@@ -1814,16 +1814,16 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
numa_promotion_adjust_threshold(pgdat, rate_limit, def_th);
th = pgdat->nbp_threshold ? : def_th;
- latency = numa_hint_fault_latency(page);
+ latency = numa_hint_fault_latency(folio);
if (latency >= th)
return false;
return !numa_promotion_rate_limit(pgdat, rate_limit,
- thp_nr_pages(page));
+ folio_nr_pages(folio));
}
this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
- last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
+ last_cpupid = page_cpupid_xchg_last(&folio->page, this_cpupid);
if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
!node_is_toptier(src_nid) && !cpupid_valid(last_cpupid))
@@ -2642,7 +2642,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
if (pol->flags & MPOL_F_MORON) {
polnid = thisnid;
- if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
+ if (!should_numa_migrate_memory(current, page_folio(page),
+ curnid, thiscpu))
goto out;
}