[-V2,2/5] swap, __read_swap_cache_async(): enlarge get/put_swap_device protection range

Message ID 20230522070905.16773-3-ying.huang@intel.com
State New
Headers
Series swap: cleanup get/put_swap_device() usage |

Commit Message

Huang, Ying May 22, 2023, 7:09 a.m. UTC
  This makes the function a little easier to be understood because we
don't need to consider swapoff.  And this makes it possible to remove
get/put_swap_device() calling in some functions called by
__read_swap_cache_async().

Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
---
 mm/swap_state.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)
  

Comments

David Hildenbrand May 22, 2023, 12:01 p.m. UTC | #1
On 22.05.23 09:09, Huang Ying wrote:
> This makes the function a little easier to be understood because we
> don't need to consider swapoff.  And this makes it possible to remove
> get/put_swap_device() calling in some functions called by
> __read_swap_cache_async().
> 
> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Michal Hocko <mhocko@suse.com>
> Cc: Minchan Kim <minchan@kernel.org>
> Cc: Tim Chen <tim.c.chen@linux.intel.com>
> Cc: Yang Shi <shy828301@gmail.com>
> Cc: Yu Zhao <yuzhao@google.com>
> ---
>   mm/swap_state.c | 33 ++++++++++++++++++++++-----------
>   1 file changed, 22 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index b76a65ac28b3..a1028fe7214e 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -417,9 +417,13 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   {
>   	struct swap_info_struct *si;
>   	struct folio *folio;
> +	struct page *page;
>   	void *shadow = NULL;
>   
>   	*new_page_allocated = false;
> +	si = get_swap_device(entry);
> +	if (!si)
> +		return NULL;
>   
>   	for (;;) {
>   		int err;
> @@ -428,14 +432,12 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   		 * called after swap_cache_get_folio() failed, re-calling
>   		 * that would confuse statistics.
>   		 */
> -		si = get_swap_device(entry);
> -		if (!si)
> -			return NULL;
>   		folio = filemap_get_folio(swap_address_space(entry),
>   						swp_offset(entry));
> -		put_swap_device(si);
> -		if (!IS_ERR(folio))
> -			return folio_file_page(folio, swp_offset(entry));
> +		if (!IS_ERR(folio)) {
> +			page = folio_file_page(folio, swp_offset(entry));
> +			goto got_page;
> +		}
>   
>   		/*
>   		 * Just skip read ahead for unused swap slot.
> @@ -445,8 +447,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   		 * as SWAP_HAS_CACHE.  That's done in later part of code or
>   		 * else swap_off will be aborted if we return NULL.
>   		 */
> -		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
> -			return NULL;
> +		if (!swap_swapcount(si, entry) && swap_slot_cache_enabled)
> +			goto fail;
>   
>   		/*
>   		 * Get a new page to read into from swap.  Allocate it now,
> @@ -455,7 +457,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   		 */
>   		folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false);
>   		if (!folio)
> -			return NULL;
> +                        goto fail;
>   
>   		/*
>   		 * Swap entry may have been freed since our caller observed it.
> @@ -466,7 +468,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   
>   		folio_put(folio);
>   		if (err != -EEXIST)
> -			return NULL;
> +			goto fail;
>   
>   		/*
>   		 * We might race against __delete_from_swap_cache(), and
> @@ -500,12 +502,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>   	/* Caller will initiate read into locked folio */
>   	folio_add_lru(folio);
>   	*new_page_allocated = true;
> -	return &folio->page;
> +	page = &folio->page;
> +got_page:
> +	put_swap_device(si);
> +	return page;
>   
>   fail_unlock:
>   	put_swap_folio(folio, entry);
>   	folio_unlock(folio);
>   	folio_put(folio);
> +fail:

Maybe better "fail_put_swap".

We now hold the swap device ref longer than we used to, prevent swapoff 
over the whole operation. I guess there is no way we can deadlock this way?

In general, looks good to me.
  
Huang, Ying May 23, 2023, 12:43 a.m. UTC | #2
David Hildenbrand <david@redhat.com> writes:

> On 22.05.23 09:09, Huang Ying wrote:
>> This makes the function a little easier to be understood because we
>> don't need to consider swapoff.  And this makes it possible to remove
>> get/put_swap_device() calling in some functions called by
>> __read_swap_cache_async().
>> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
>> Cc: David Hildenbrand <david@redhat.com>
>> Cc: Hugh Dickins <hughd@google.com>
>> Cc: Johannes Weiner <hannes@cmpxchg.org>
>> Cc: Matthew Wilcox <willy@infradead.org>
>> Cc: Michal Hocko <mhocko@suse.com>
>> Cc: Minchan Kim <minchan@kernel.org>
>> Cc: Tim Chen <tim.c.chen@linux.intel.com>
>> Cc: Yang Shi <shy828301@gmail.com>
>> Cc: Yu Zhao <yuzhao@google.com>
>> ---
>>   mm/swap_state.c | 33 ++++++++++++++++++++++-----------
>>   1 file changed, 22 insertions(+), 11 deletions(-)
>> diff --git a/mm/swap_state.c b/mm/swap_state.c
>> index b76a65ac28b3..a1028fe7214e 100644
>> --- a/mm/swap_state.c
>> +++ b/mm/swap_state.c
>> @@ -417,9 +417,13 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>>   {
>>   	struct swap_info_struct *si;
>>   	struct folio *folio;
>> +	struct page *page;
>>   	void *shadow = NULL;
>>     	*new_page_allocated = false;
>> +	si = get_swap_device(entry);
>> +	if (!si)
>> +		return NULL;
>>     	for (;;) {
>>   		int err;
>> @@ -428,14 +432,12 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>>   		 * called after swap_cache_get_folio() failed, re-calling
>>   		 * that would confuse statistics.
>>   		 */
>> -		si = get_swap_device(entry);
>> -		if (!si)
>> -			return NULL;
>>   		folio = filemap_get_folio(swap_address_space(entry),
>>   						swp_offset(entry));
>> -		put_swap_device(si);
>> -		if (!IS_ERR(folio))
>> -			return folio_file_page(folio, swp_offset(entry));
>> +		if (!IS_ERR(folio)) {
>> +			page = folio_file_page(folio, swp_offset(entry));
>> +			goto got_page;
>> +		}
>>     		/*
>>   		 * Just skip read ahead for unused swap slot.
>> @@ -445,8 +447,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>>   		 * as SWAP_HAS_CACHE.  That's done in later part of code or
>>   		 * else swap_off will be aborted if we return NULL.
>>   		 */
>> -		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
>> -			return NULL;
>> +		if (!swap_swapcount(si, entry) && swap_slot_cache_enabled)
>> +			goto fail;
>>     		/*
>>   		 * Get a new page to read into from swap.  Allocate it now,
>> @@ -455,7 +457,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>>   		 */
>>   		folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false);
>>   		if (!folio)
>> -			return NULL;
>> +                        goto fail;
>>     		/*
>>   		 * Swap entry may have been freed since our caller observed it.
>> @@ -466,7 +468,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>>     		folio_put(folio);
>>   		if (err != -EEXIST)
>> -			return NULL;
>> +			goto fail;
>>     		/*
>>   		 * We might race against __delete_from_swap_cache(), and
>> @@ -500,12 +502,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>>   	/* Caller will initiate read into locked folio */
>>   	folio_add_lru(folio);
>>   	*new_page_allocated = true;
>> -	return &folio->page;
>> +	page = &folio->page;
>> +got_page:
>> +	put_swap_device(si);
>> +	return page;
>>     fail_unlock:
>>   	put_swap_folio(folio, entry);
>>   	folio_unlock(folio);
>>   	folio_put(folio);
>> +fail:
>
> Maybe better "fail_put_swap".
>
> We now hold the swap device ref longer than we used to, prevent
> swapoff over the whole operation. I guess there is no way we can
> deadlock this way?

I think that we are safe.  In swapoff() syscall, we call
percpu_ref_kill() after all pages are swapped in (via try_to_unuse()).

> In general, looks good to me.

Thanks!

Best Regards,
Huang, Ying
  

Patch

diff --git a/mm/swap_state.c b/mm/swap_state.c
index b76a65ac28b3..a1028fe7214e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -417,9 +417,13 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 {
 	struct swap_info_struct *si;
 	struct folio *folio;
+	struct page *page;
 	void *shadow = NULL;
 
 	*new_page_allocated = false;
+	si = get_swap_device(entry);
+	if (!si)
+		return NULL;
 
 	for (;;) {
 		int err;
@@ -428,14 +432,12 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * called after swap_cache_get_folio() failed, re-calling
 		 * that would confuse statistics.
 		 */
-		si = get_swap_device(entry);
-		if (!si)
-			return NULL;
 		folio = filemap_get_folio(swap_address_space(entry),
 						swp_offset(entry));
-		put_swap_device(si);
-		if (!IS_ERR(folio))
-			return folio_file_page(folio, swp_offset(entry));
+		if (!IS_ERR(folio)) {
+			page = folio_file_page(folio, swp_offset(entry));
+			goto got_page;
+		}
 
 		/*
 		 * Just skip read ahead for unused swap slot.
@@ -445,8 +447,8 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * as SWAP_HAS_CACHE.  That's done in later part of code or
 		 * else swap_off will be aborted if we return NULL.
 		 */
-		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
-			return NULL;
+		if (!swap_swapcount(si, entry) && swap_slot_cache_enabled)
+			goto fail;
 
 		/*
 		 * Get a new page to read into from swap.  Allocate it now,
@@ -455,7 +457,7 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 */
 		folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false);
 		if (!folio)
-			return NULL;
+                        goto fail;
 
 		/*
 		 * Swap entry may have been freed since our caller observed it.
@@ -466,7 +468,7 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 
 		folio_put(folio);
 		if (err != -EEXIST)
-			return NULL;
+			goto fail;
 
 		/*
 		 * We might race against __delete_from_swap_cache(), and
@@ -500,12 +502,17 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	/* Caller will initiate read into locked folio */
 	folio_add_lru(folio);
 	*new_page_allocated = true;
-	return &folio->page;
+	page = &folio->page;
+got_page:
+	put_swap_device(si);
+	return page;
 
 fail_unlock:
 	put_swap_folio(folio, entry);
 	folio_unlock(folio);
 	folio_put(folio);
+fail:
+	put_swap_device(si);
 	return NULL;
 }
 
@@ -514,6 +521,10 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  * and reading the disk if it is not already cached.
  * A failure return means that either the page allocation failed or that
  * the swap entry is no longer in use.
+ *
+ * get/put_swap_device() aren't needed to call this function, because
+ * __read_swap_cache_async() call them and swap_readpage() holds the
+ * swap cache folio lock.
  */
 struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 				   struct vm_area_struct *vma,