[1/2,v2] fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl

Message ID 20231106220959.296568-1-avagin@google.com
State New
Headers
Series [1/2,v2] fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl |

Commit Message

Andrei Vagin Nov. 6, 2023, 10:09 p.m. UTC
  The PAGEMAP_SCAN ioctl returns information regarding page table entries.
It is more efficient compared to reading pagemap files. CRIU can start
to utilize this ioctl, but it needs info about soft-dirty bits to track
memory changes.

We are aware of a new method for tracking memory changes implemented in
the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method
is its usability by unprivileged users. However, it is not feasible to
transparently replace the soft-dirty tracker with the new one. The main
problem here is userfault descriptors that have to be preserved between
pre-dump iterations.  It means criu continues supporting the soft-dirty
method to avoid breakage for current users. The new method will be
implemented as a separate feature.

Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
Cc: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Andrei Vagin <avagin@google.com>
---
v2: check the soft-dirty bit in pagemap_page_category

 Documentation/admin-guide/mm/pagemap.rst |  1 +
 fs/proc/task_mmu.c                       | 17 ++++++++++++++++-
 include/uapi/linux/fs.h                  |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)
  

Comments

Muhammad Usama Anjum Nov. 7, 2023, 5:49 a.m. UTC | #1
On 11/7/23 3:09 AM, Andrei Vagin wrote:
> The PAGEMAP_SCAN ioctl returns information regarding page table entries.
> It is more efficient compared to reading pagemap files. CRIU can start
> to utilize this ioctl, but it needs info about soft-dirty bits to track
> memory changes.
> 
> We are aware of a new method for tracking memory changes implemented in
> the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method
> is its usability by unprivileged users. However, it is not feasible to
> transparently replace the soft-dirty tracker with the new one. The main
> problem here is userfault descriptors that have to be preserved between
> pre-dump iterations.  It means criu continues supporting the soft-dirty
> method to avoid breakage for current users. The new method will be
> implemented as a separate feature.
> 
> Cc: Muhammad Usama Anjum <usama.anjum@collabora.com>
> Cc: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> Signed-off-by: Andrei Vagin <avagin@google.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>

> ---
> v2: check the soft-dirty bit in pagemap_page_category
> 
>  Documentation/admin-guide/mm/pagemap.rst |  1 +
>  fs/proc/task_mmu.c                       | 17 ++++++++++++++++-
>  include/uapi/linux/fs.h                  |  1 +
>  3 files changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
> index fe17cf210426..f5f065c67615 100644
> --- a/Documentation/admin-guide/mm/pagemap.rst
> +++ b/Documentation/admin-guide/mm/pagemap.rst
> @@ -253,6 +253,7 @@ Following flags about pages are currently supported:
>  - ``PAGE_IS_SWAPPED`` - Page is in swapped
>  - ``PAGE_IS_PFNZERO`` - Page has zero PFN
>  - ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
> +- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
>  
>  The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
>  
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index ef2eb12906da..51e0ec658457 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1761,7 +1761,7 @@ static int pagemap_release(struct inode *inode, struct file *file)
>  #define PM_SCAN_CATEGORIES	(PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN |	\
>  				 PAGE_IS_FILE |	PAGE_IS_PRESENT |	\
>  				 PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |	\
> -				 PAGE_IS_HUGE)
> +				 PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
>  #define PM_SCAN_FLAGS		(PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
>  
>  struct pagemap_scan_private {
> @@ -1793,6 +1793,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
>  
>  		if (is_zero_pfn(pte_pfn(pte)))
>  			categories |= PAGE_IS_PFNZERO;
> +		if (pte_soft_dirty(pte))
> +			categories |= PAGE_IS_SOFT_DIRTY;
>  	} else if (is_swap_pte(pte)) {
>  		swp_entry_t swp;
>  
> @@ -1806,6 +1808,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
>  			    !PageAnon(pfn_swap_entry_to_page(swp)))
>  				categories |= PAGE_IS_FILE;
>  		}
> +		if (pte_swp_soft_dirty(pte))
> +			categories |= PAGE_IS_SOFT_DIRTY;
>  	}
>  
>  	return categories;
> @@ -1853,12 +1857,16 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
>  
>  		if (is_zero_pfn(pmd_pfn(pmd)))
>  			categories |= PAGE_IS_PFNZERO;
> +		if (pmd_soft_dirty(pmd))
> +			categories |= PAGE_IS_SOFT_DIRTY;
>  	} else if (is_swap_pmd(pmd)) {
>  		swp_entry_t swp;
>  
>  		categories |= PAGE_IS_SWAPPED;
>  		if (!pmd_swp_uffd_wp(pmd))
>  			categories |= PAGE_IS_WRITTEN;
> +		if (pmd_swp_soft_dirty(pmd))
> +			categories |= PAGE_IS_SOFT_DIRTY;
>  
>  		if (p->masks_of_interest & PAGE_IS_FILE) {
>  			swp = pmd_to_swp_entry(pmd);
> @@ -1905,10 +1913,14 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
>  			categories |= PAGE_IS_FILE;
>  		if (is_zero_pfn(pte_pfn(pte)))
>  			categories |= PAGE_IS_PFNZERO;
> +		if (pte_soft_dirty(pte))
> +			categories |= PAGE_IS_SOFT_DIRTY;
>  	} else if (is_swap_pte(pte)) {
>  		categories |= PAGE_IS_SWAPPED;
>  		if (!pte_swp_uffd_wp_any(pte))
>  			categories |= PAGE_IS_WRITTEN;
> +		if (pte_swp_soft_dirty(pte))
> +			categories |= PAGE_IS_SOFT_DIRTY;
>  	}
>  
>  	return categories;
> @@ -1991,6 +2003,9 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
>  	if (vma->vm_flags & VM_PFNMAP)
>  		return 1;
>  
> +	if (vma->vm_flags & VM_SOFTDIRTY)
> +		vma_category |= PAGE_IS_SOFT_DIRTY;
> +
>  	if (!pagemap_scan_is_interesting_vma(vma_category, p))
>  		return 1;
>  
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index da43810b7485..48ad69f7722e 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t;
>  #define PAGE_IS_SWAPPED		(1 << 4)
>  #define PAGE_IS_PFNZERO		(1 << 5)
>  #define PAGE_IS_HUGE		(1 << 6)
> +#define PAGE_IS_SOFT_DIRTY	(1 << 7)
LGTM, other than the missed the identical change in
tools/include/uapi/linux/fs.h as well.

>  
>  /*
>   * struct page_region - Page region with flags
  

Patch

diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index fe17cf210426..f5f065c67615 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -253,6 +253,7 @@  Following flags about pages are currently supported:
 - ``PAGE_IS_SWAPPED`` - Page is in swapped
 - ``PAGE_IS_PFNZERO`` - Page has zero PFN
 - ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
+- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
 
 The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ef2eb12906da..51e0ec658457 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1761,7 +1761,7 @@  static int pagemap_release(struct inode *inode, struct file *file)
 #define PM_SCAN_CATEGORIES	(PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN |	\
 				 PAGE_IS_FILE |	PAGE_IS_PRESENT |	\
 				 PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |	\
-				 PAGE_IS_HUGE)
+				 PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
 #define PM_SCAN_FLAGS		(PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
 
 struct pagemap_scan_private {
@@ -1793,6 +1793,8 @@  static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
 
 		if (is_zero_pfn(pte_pfn(pte)))
 			categories |= PAGE_IS_PFNZERO;
+		if (pte_soft_dirty(pte))
+			categories |= PAGE_IS_SOFT_DIRTY;
 	} else if (is_swap_pte(pte)) {
 		swp_entry_t swp;
 
@@ -1806,6 +1808,8 @@  static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
 			    !PageAnon(pfn_swap_entry_to_page(swp)))
 				categories |= PAGE_IS_FILE;
 		}
+		if (pte_swp_soft_dirty(pte))
+			categories |= PAGE_IS_SOFT_DIRTY;
 	}
 
 	return categories;
@@ -1853,12 +1857,16 @@  static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
 
 		if (is_zero_pfn(pmd_pfn(pmd)))
 			categories |= PAGE_IS_PFNZERO;
+		if (pmd_soft_dirty(pmd))
+			categories |= PAGE_IS_SOFT_DIRTY;
 	} else if (is_swap_pmd(pmd)) {
 		swp_entry_t swp;
 
 		categories |= PAGE_IS_SWAPPED;
 		if (!pmd_swp_uffd_wp(pmd))
 			categories |= PAGE_IS_WRITTEN;
+		if (pmd_swp_soft_dirty(pmd))
+			categories |= PAGE_IS_SOFT_DIRTY;
 
 		if (p->masks_of_interest & PAGE_IS_FILE) {
 			swp = pmd_to_swp_entry(pmd);
@@ -1905,10 +1913,14 @@  static unsigned long pagemap_hugetlb_category(pte_t pte)
 			categories |= PAGE_IS_FILE;
 		if (is_zero_pfn(pte_pfn(pte)))
 			categories |= PAGE_IS_PFNZERO;
+		if (pte_soft_dirty(pte))
+			categories |= PAGE_IS_SOFT_DIRTY;
 	} else if (is_swap_pte(pte)) {
 		categories |= PAGE_IS_SWAPPED;
 		if (!pte_swp_uffd_wp_any(pte))
 			categories |= PAGE_IS_WRITTEN;
+		if (pte_swp_soft_dirty(pte))
+			categories |= PAGE_IS_SOFT_DIRTY;
 	}
 
 	return categories;
@@ -1991,6 +2003,9 @@  static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
 	if (vma->vm_flags & VM_PFNMAP)
 		return 1;
 
+	if (vma->vm_flags & VM_SOFTDIRTY)
+		vma_category |= PAGE_IS_SOFT_DIRTY;
+
 	if (!pagemap_scan_is_interesting_vma(vma_category, p))
 		return 1;
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index da43810b7485..48ad69f7722e 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -316,6 +316,7 @@  typedef int __bitwise __kernel_rwf_t;
 #define PAGE_IS_SWAPPED		(1 << 4)
 #define PAGE_IS_PFNZERO		(1 << 5)
 #define PAGE_IS_HUGE		(1 << 6)
+#define PAGE_IS_SOFT_DIRTY	(1 << 7)
 
 /*
  * struct page_region - Page region with flags