[3/7] mm/hugetlb: Add page_mask for hugetlb_follow_page_mask()

Message ID 20230613215346.1022773-4-peterx@redhat.com
State New
Headers
Series mm/gup: Unify hugetlb, speed up thp |

Commit Message

Peter Xu June 13, 2023, 9:53 p.m. UTC
  follow_page() doesn't need it, but we'll start to need it when unifying gup
for hugetlb.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/hugetlb.h | 8 +++++---
 mm/gup.c                | 3 ++-
 mm/hugetlb.c            | 4 +++-
 3 files changed, 10 insertions(+), 5 deletions(-)
  

Comments

Mike Kravetz June 15, 2023, 12:17 a.m. UTC | #1
On 06/13/23 17:53, Peter Xu wrote:
> follow_page() doesn't need it, but we'll start to need it when unifying gup
> for hugetlb.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  include/linux/hugetlb.h | 8 +++++---
>  mm/gup.c                | 3 ++-
>  mm/hugetlb.c            | 4 +++-
>  3 files changed, 10 insertions(+), 5 deletions(-)

Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
  
David Hildenbrand June 16, 2023, 8:11 a.m. UTC | #2
On 13.06.23 23:53, Peter Xu wrote:
> follow_page() doesn't need it, but we'll start to need it when unifying gup
> for hugetlb.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>   include/linux/hugetlb.h | 8 +++++---
>   mm/gup.c                | 3 ++-
>   mm/hugetlb.c            | 4 +++-
>   3 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 21f942025fec..0d6f389d98de 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -131,7 +131,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
>   int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
>   			    struct vm_area_struct *, struct vm_area_struct *);
>   struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
> -				unsigned long address, unsigned int flags);
> +				      unsigned long address, unsigned int flags,
> +				      unsigned int *page_mask);
>   long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
>   			 struct page **, unsigned long *, unsigned long *,
>   			 long, unsigned int, int *);
> @@ -297,8 +298,9 @@ static inline void adjust_range_if_pmd_sharing_possible(
>   {
>   }
>   
> -static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
> -				unsigned long address, unsigned int flags)
> +static inline struct page *hugetlb_follow_page_mask(
> +    struct vm_area_struct *vma, unsigned long address, unsigned int flags,
> +    unsigned int *page_mask)
>   {
>   	BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
>   }
> diff --git a/mm/gup.c b/mm/gup.c
> index aa0668505d61..8d59ae4554e7 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -794,7 +794,8 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
>   	 * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
>   	 */
>   	if (is_vm_hugetlb_page(vma))
> -		return hugetlb_follow_page_mask(vma, address, flags);
> +		return hugetlb_follow_page_mask(vma, address, flags,
> +						&ctx->page_mask);
>   
>   	pgd = pgd_offset(mm, address);
>   
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 9c261921b2cf..f037eaf9d819 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6457,7 +6457,8 @@ static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
>   }
>   
>   struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
> -				unsigned long address, unsigned int flags)
> +				      unsigned long address, unsigned int flags,
> +				      unsigned int *page_mask)
>   {
>   	struct hstate *h = hstate_vma(vma);
>   	struct mm_struct *mm = vma->vm_mm;
> @@ -6506,6 +6507,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
>   		 * because we hold the ptl lock and have verified pte_present().
>   		 */
>   		WARN_ON_ONCE(try_grab_page(page, flags));
> +		*page_mask = huge_page_mask(h);
>   	}
>   out:
>   	spin_unlock(ptl);

Reviewed-by: David Hildenbrand <david@redhat.com>
  
Peter Xu June 19, 2023, 9:43 p.m. UTC | #3
On Tue, Jun 13, 2023 at 05:53:42PM -0400, Peter Xu wrote:
> @@ -6506,6 +6507,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
>  		 * because we hold the ptl lock and have verified pte_present().
>  		 */
>  		WARN_ON_ONCE(try_grab_page(page, flags));
> +		*page_mask = huge_page_mask(h);

Sorry, I was wrong this line.  It should be:

		*page_mask = ~huge_page_mask(h) >> PAGE_SHIFT;

This can be exposed if we bypass fast-gup and also specify pin npages>1.
Probably overlooked in my initial round and I'd guess fast-gup just all
succeeded there..

I'll temporarily drop the R-bs for this patch, so reviewers can have
another closer look in this one.
  
David Hildenbrand June 20, 2023, 7:01 a.m. UTC | #4
On 19.06.23 23:43, Peter Xu wrote:
> On Tue, Jun 13, 2023 at 05:53:42PM -0400, Peter Xu wrote:
>> @@ -6506,6 +6507,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
>>   		 * because we hold the ptl lock and have verified pte_present().
>>   		 */
>>   		WARN_ON_ONCE(try_grab_page(page, flags));
>> +		*page_mask = huge_page_mask(h);
> 
> Sorry, I was wrong this line.  It should be:
> 
> 		*page_mask = ~huge_page_mask(h) >> PAGE_SHIFT;
> 

That's ... surprising. It feels like either page_mask or 
huge_page_mask() has a misleading name ....

h->mask = ~(huge_page_size(h) - 1);


For PMDs, we do

ctx->page_mask = HPAGE_PMD_NR - 1;


Maybe

*page_mask = PHYS_PFN(huge_page_size(h)) - 1;

Would be clearer.

I guess "page_mask" should actually be "pfn_mask" ... but the meaning 
regarding PAGE_MASK are still inverted ...
  
Peter Xu June 20, 2023, 2:40 p.m. UTC | #5
On Tue, Jun 20, 2023 at 09:01:23AM +0200, David Hildenbrand wrote:
> On 19.06.23 23:43, Peter Xu wrote:
> > On Tue, Jun 13, 2023 at 05:53:42PM -0400, Peter Xu wrote:
> > > @@ -6506,6 +6507,7 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
> > >   		 * because we hold the ptl lock and have verified pte_present().
> > >   		 */
> > >   		WARN_ON_ONCE(try_grab_page(page, flags));
> > > +		*page_mask = huge_page_mask(h);
> > 
> > Sorry, I was wrong this line.  It should be:
> > 
> > 		*page_mask = ~huge_page_mask(h) >> PAGE_SHIFT;
> > 
> 
> That's ... surprising. It feels like either page_mask or huge_page_mask()
> has a misleading name ....
> 
> h->mask = ~(huge_page_size(h) - 1);
> 
> 
> For PMDs, we do
> 
> ctx->page_mask = HPAGE_PMD_NR - 1;
> 
> 
> Maybe
> 
> *page_mask = PHYS_PFN(huge_page_size(h)) - 1;
> 
> Would be clearer.

Since I just posted a new version.. I'll see whether I should get that
cleaned up in a new one.

> 
> I guess "page_mask" should actually be "pfn_mask" ... but the meaning
> regarding PAGE_MASK are still inverted ...

Yes, pfn_mask can be at least slightly better.  I can add a patch to rename
it, or we can also do it on top as cleanups.

Thanks,
  

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 21f942025fec..0d6f389d98de 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -131,7 +131,8 @@  int move_hugetlb_page_tables(struct vm_area_struct *vma,
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 			    struct vm_area_struct *, struct vm_area_struct *);
 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
-				unsigned long address, unsigned int flags);
+				      unsigned long address, unsigned int flags,
+				      unsigned int *page_mask);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			 struct page **, unsigned long *, unsigned long *,
 			 long, unsigned int, int *);
@@ -297,8 +298,9 @@  static inline void adjust_range_if_pmd_sharing_possible(
 {
 }
 
-static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
-				unsigned long address, unsigned int flags)
+static inline struct page *hugetlb_follow_page_mask(
+    struct vm_area_struct *vma, unsigned long address, unsigned int flags,
+    unsigned int *page_mask)
 {
 	BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
 }
diff --git a/mm/gup.c b/mm/gup.c
index aa0668505d61..8d59ae4554e7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -794,7 +794,8 @@  static struct page *follow_page_mask(struct vm_area_struct *vma,
 	 * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
 	 */
 	if (is_vm_hugetlb_page(vma))
-		return hugetlb_follow_page_mask(vma, address, flags);
+		return hugetlb_follow_page_mask(vma, address, flags,
+						&ctx->page_mask);
 
 	pgd = pgd_offset(mm, address);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9c261921b2cf..f037eaf9d819 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6457,7 +6457,8 @@  static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
 }
 
 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
-				unsigned long address, unsigned int flags)
+				      unsigned long address, unsigned int flags,
+				      unsigned int *page_mask)
 {
 	struct hstate *h = hstate_vma(vma);
 	struct mm_struct *mm = vma->vm_mm;
@@ -6506,6 +6507,7 @@  struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
 		 * because we hold the ptl lock and have verified pte_present().
 		 */
 		WARN_ON_ONCE(try_grab_page(page, flags));
+		*page_mask = huge_page_mask(h);
 	}
 out:
 	spin_unlock(ptl);