[RFC,2/4] mm/compaction: optimize >0 order folio compaction with free page split.

Message ID 20230912162815.440749-3-zi.yan@sent.com
State New
Headers
Series Enable >0 order folio memory compaction |

Commit Message

Zi Yan Sept. 12, 2023, 4:28 p.m. UTC
  From: Zi Yan <ziy@nvidia.com>

During migration in a memory compaction, free pages are placed in an array
of page lists based on their order. But the desired free page order (i.e.,
the order of a source page) might not be always present, thus leading to
migration failures. Split a high order free pages when source migration
page has a lower order to increase migration successful rate.

Note: merging free pages when a migration fails and a lower order free
page is returned via compaction_free() is possible, but there is too much
work. Since the free pages are not buddy pages, it is hard to identify
these free pages using existing PFN-based page merging algorithm.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/compaction.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)
  

Comments

Baolin Wang Sept. 18, 2023, 7:34 a.m. UTC | #1
On 9/13/2023 12:28 AM, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
> 
> During migration in a memory compaction, free pages are placed in an array
> of page lists based on their order. But the desired free page order (i.e.,
> the order of a source page) might not be always present, thus leading to
> migration failures. Split a high order free pages when source migration
> page has a lower order to increase migration successful rate.
> 
> Note: merging free pages when a migration fails and a lower order free
> page is returned via compaction_free() is possible, but there is too much
> work. Since the free pages are not buddy pages, it is hard to identify
> these free pages using existing PFN-based page merging algorithm.
> 
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
>   mm/compaction.c | 40 +++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 868e92e55d27..45747ab5f380 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1801,9 +1801,46 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
>   	struct compact_control *cc = (struct compact_control *)data;
>   	struct folio *dst;
>   	int order = folio_order(src);
> +	bool has_isolated_pages = false;
>   
> +again:
>   	if (!cc->freepages[order].nr_free) {
> -		isolate_freepages(cc);
> +		int i;
> +
> +		for (i = order + 1; i <= MAX_ORDER; i++) {
> +			if (cc->freepages[i].nr_free) {
> +				struct page *freepage =
> +					list_first_entry(&cc->freepages[i].pages,
> +							 struct page, lru);
> +
> +				int start_order = i;
> +				unsigned long size = 1 << start_order;
> +
> +				list_del(&freepage->lru);
> +				cc->freepages[i].nr_free--;
> +
> +				while (start_order > order) {
> +					start_order--;
> +					size >>= 1;
> +
> +					list_add(&freepage[size].lru,
> +						&cc->freepages[start_order].pages);
> +					cc->freepages[start_order].nr_free++;
> +					set_page_private(&freepage[size], start_order);

IIUC, these split pages should also call functions to initialize? e.g. 
prep_compound_page()?

> +				}
> +				post_alloc_hook(freepage, order, __GFP_MOVABLE);
> +				if (order)
> +					prep_compound_page(freepage, order);
> +				dst = page_folio(freepage);
> +				goto done;
> +			}
> +		}
> +		if (!has_isolated_pages) {
> +			isolate_freepages(cc);
> +			has_isolated_pages = true;
> +			goto again;
> +		}
> +
>   		if (!cc->freepages[order].nr_free)
>   			return NULL;
>   	}
> @@ -1814,6 +1851,7 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
>   	post_alloc_hook(&dst->page, order, __GFP_MOVABLE);
>   	if (order)
>   		prep_compound_page(&dst->page, order);
> +done:
>   	cc->nr_freepages -= 1 << order;
>   	return dst;
>   }
  
Zi Yan Sept. 18, 2023, 5:20 p.m. UTC | #2
On 18 Sep 2023, at 3:34, Baolin Wang wrote:

> On 9/13/2023 12:28 AM, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> During migration in a memory compaction, free pages are placed in an array
>> of page lists based on their order. But the desired free page order (i.e.,
>> the order of a source page) might not be always present, thus leading to
>> migration failures. Split a high order free pages when source migration
>> page has a lower order to increase migration successful rate.
>>
>> Note: merging free pages when a migration fails and a lower order free
>> page is returned via compaction_free() is possible, but there is too much
>> work. Since the free pages are not buddy pages, it is hard to identify
>> these free pages using existing PFN-based page merging algorithm.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>>   mm/compaction.c | 40 +++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 39 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/compaction.c b/mm/compaction.c
>> index 868e92e55d27..45747ab5f380 100644
>> --- a/mm/compaction.c
>> +++ b/mm/compaction.c
>> @@ -1801,9 +1801,46 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
>>   	struct compact_control *cc = (struct compact_control *)data;
>>   	struct folio *dst;
>>   	int order = folio_order(src);
>> +	bool has_isolated_pages = false;
>>  +again:
>>   	if (!cc->freepages[order].nr_free) {
>> -		isolate_freepages(cc);
>> +		int i;
>> +
>> +		for (i = order + 1; i <= MAX_ORDER; i++) {
>> +			if (cc->freepages[i].nr_free) {
>> +				struct page *freepage =
>> +					list_first_entry(&cc->freepages[i].pages,
>> +							 struct page, lru);
>> +
>> +				int start_order = i;
>> +				unsigned long size = 1 << start_order;
>> +
>> +				list_del(&freepage->lru);
>> +				cc->freepages[i].nr_free--;
>> +
>> +				while (start_order > order) {
>> +					start_order--;
>> +					size >>= 1;
>> +
>> +					list_add(&freepage[size].lru,
>> +						&cc->freepages[start_order].pages);
>> +					cc->freepages[start_order].nr_free++;
>> +					set_page_private(&freepage[size], start_order);
>
> IIUC, these split pages should also call functions to initialize? e.g. prep_compound_page()?

Not at this place. It is done right below and above "done" label. When free pages
are on cc->freepages, we want to keep them without being post_alloc_hook() or
prep_compound_page() processed for a possible future split. A free page is
only initialized when it is returned by compaction_alloc().

>
>> +				}
>> +				post_alloc_hook(freepage, order, __GFP_MOVABLE);
>> +				if (order)
>> +					prep_compound_page(freepage, order);
>> +				dst = page_folio(freepage);
>> +				goto done;
>> +			}
>> +		}
>> +		if (!has_isolated_pages) {
>> +			isolate_freepages(cc);
>> +			has_isolated_pages = true;
>> +			goto again;
>> +		}
>> +
>>   		if (!cc->freepages[order].nr_free)
>>   			return NULL;
>>   	}
>> @@ -1814,6 +1851,7 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
>>   	post_alloc_hook(&dst->page, order, __GFP_MOVABLE);
>>   	if (order)
>>   		prep_compound_page(&dst->page, order);
>> +done:
>>   	cc->nr_freepages -= 1 << order;
>>   	return dst;
>>   }


--
Best Regards,
Yan, Zi
  
Baolin Wang Sept. 20, 2023, 8:15 a.m. UTC | #3
On 9/19/2023 1:20 AM, Zi Yan wrote:
> On 18 Sep 2023, at 3:34, Baolin Wang wrote:
> 
>> On 9/13/2023 12:28 AM, Zi Yan wrote:
>>> From: Zi Yan <ziy@nvidia.com>
>>>
>>> During migration in a memory compaction, free pages are placed in an array
>>> of page lists based on their order. But the desired free page order (i.e.,
>>> the order of a source page) might not be always present, thus leading to
>>> migration failures. Split a high order free pages when source migration
>>> page has a lower order to increase migration successful rate.
>>>
>>> Note: merging free pages when a migration fails and a lower order free
>>> page is returned via compaction_free() is possible, but there is too much
>>> work. Since the free pages are not buddy pages, it is hard to identify
>>> these free pages using existing PFN-based page merging algorithm.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>>    mm/compaction.c | 40 +++++++++++++++++++++++++++++++++++++++-
>>>    1 file changed, 39 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/mm/compaction.c b/mm/compaction.c
>>> index 868e92e55d27..45747ab5f380 100644
>>> --- a/mm/compaction.c
>>> +++ b/mm/compaction.c
>>> @@ -1801,9 +1801,46 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
>>>    	struct compact_control *cc = (struct compact_control *)data;
>>>    	struct folio *dst;
>>>    	int order = folio_order(src);
>>> +	bool has_isolated_pages = false;
>>>   +again:
>>>    	if (!cc->freepages[order].nr_free) {
>>> -		isolate_freepages(cc);
>>> +		int i;
>>> +
>>> +		for (i = order + 1; i <= MAX_ORDER; i++) {
>>> +			if (cc->freepages[i].nr_free) {
>>> +				struct page *freepage =
>>> +					list_first_entry(&cc->freepages[i].pages,
>>> +							 struct page, lru);
>>> +
>>> +				int start_order = i;
>>> +				unsigned long size = 1 << start_order;
>>> +
>>> +				list_del(&freepage->lru);
>>> +				cc->freepages[i].nr_free--;
>>> +
>>> +				while (start_order > order) {
>>> +					start_order--;
>>> +					size >>= 1;
>>> +
>>> +					list_add(&freepage[size].lru,
>>> +						&cc->freepages[start_order].pages);
>>> +					cc->freepages[start_order].nr_free++;
>>> +					set_page_private(&freepage[size], start_order);
>>
>> IIUC, these split pages should also call functions to initialize? e.g. prep_compound_page()?
> 
> Not at this place. It is done right below and above "done" label. When free pages
> are on cc->freepages, we want to keep them without being post_alloc_hook() or
> prep_compound_page() processed for a possible future split. A free page is
> only initialized when it is returned by compaction_alloc().

Ah, I see. Thanks for explanation.
  

Patch

diff --git a/mm/compaction.c b/mm/compaction.c
index 868e92e55d27..45747ab5f380 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1801,9 +1801,46 @@  static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 	struct compact_control *cc = (struct compact_control *)data;
 	struct folio *dst;
 	int order = folio_order(src);
+	bool has_isolated_pages = false;
 
+again:
 	if (!cc->freepages[order].nr_free) {
-		isolate_freepages(cc);
+		int i;
+
+		for (i = order + 1; i <= MAX_ORDER; i++) {
+			if (cc->freepages[i].nr_free) {
+				struct page *freepage =
+					list_first_entry(&cc->freepages[i].pages,
+							 struct page, lru);
+
+				int start_order = i;
+				unsigned long size = 1 << start_order;
+
+				list_del(&freepage->lru);
+				cc->freepages[i].nr_free--;
+
+				while (start_order > order) {
+					start_order--;
+					size >>= 1;
+
+					list_add(&freepage[size].lru,
+						&cc->freepages[start_order].pages);
+					cc->freepages[start_order].nr_free++;
+					set_page_private(&freepage[size], start_order);
+				}
+				post_alloc_hook(freepage, order, __GFP_MOVABLE);
+				if (order)
+					prep_compound_page(freepage, order);
+				dst = page_folio(freepage);
+				goto done;
+			}
+		}
+		if (!has_isolated_pages) {
+			isolate_freepages(cc);
+			has_isolated_pages = true;
+			goto again;
+		}
+
 		if (!cc->freepages[order].nr_free)
 			return NULL;
 	}
@@ -1814,6 +1851,7 @@  static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 	post_alloc_hook(&dst->page, order, __GFP_MOVABLE);
 	if (order)
 		prep_compound_page(&dst->page, order);
+done:
 	cc->nr_freepages -= 1 << order;
 	return dst;
 }