[RFC,v3,07/11] mseal: make sealed VMA mergeable.

Message ID 20231212231706.2680890-8-jeffxu@chromium.org
State New
Headers
Series Introduce mseal() |

Commit Message

Jeff Xu Dec. 12, 2023, 11:17 p.m. UTC
  From: Jeff Xu <jeffxu@chromium.org>

Add merge/split handling for mlock/madvice/mprotect/mmap case.
Make sealed VMA mergeable with adjacent VMAs.

This is so that we don't run out of VMAs, i.e. there is a max
number of VMA per process.

Signed-off-by: Jeff Xu <jeffxu@chromium.org>
Suggested-by: Jann Horn <jannh@google.com>
---
 fs/userfaultfd.c   |  8 +++++---
 include/linux/mm.h | 31 +++++++++++++------------------
 mm/madvise.c       |  2 +-
 mm/mempolicy.c     |  2 +-
 mm/mlock.c         |  2 +-
 mm/mmap.c          | 44 +++++++++++++++++++++-----------------------
 mm/mprotect.c      |  2 +-
 mm/mremap.c        |  2 +-
 mm/mseal.c         | 23 ++++++++++++++++++-----
 9 files changed, 62 insertions(+), 54 deletions(-)
  

Patch

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 56eaae9dac1a..8ebee7c1c6cf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -926,7 +926,8 @@  static int userfaultfd_release(struct inode *inode, struct file *file)
 				 new_flags, vma->anon_vma,
 				 vma->vm_file, vma->vm_pgoff,
 				 vma_policy(vma),
-				 NULL_VM_UFFD_CTX, anon_vma_name(vma));
+				 NULL_VM_UFFD_CTX, anon_vma_name(vma),
+				vma_seals(vma));
 		if (prev) {
 			vma = prev;
 		} else {
@@ -1483,7 +1484,7 @@  static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 				 vma->anon_vma, vma->vm_file, pgoff,
 				 vma_policy(vma),
 				 ((struct vm_userfaultfd_ctx){ ctx }),
-				 anon_vma_name(vma));
+				 anon_vma_name(vma), vma_seals(vma));
 		if (prev) {
 			/* vma_merge() invalidated the mas */
 			vma = prev;
@@ -1668,7 +1669,8 @@  static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
 				 vma->anon_vma, vma->vm_file, pgoff,
 				 vma_policy(vma),
-				 NULL_VM_UFFD_CTX, anon_vma_name(vma));
+				 NULL_VM_UFFD_CTX, anon_vma_name(vma),
+				vma_seals(vma));
 		if (prev) {
 			vma = prev;
 			goto next;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5d3ee79f1438..1f162bb5b38d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3243,7 +3243,7 @@  extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi,
 	struct mm_struct *, struct vm_area_struct *prev, unsigned long addr,
 	unsigned long end, unsigned long vm_flags, struct anon_vma *,
 	struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx,
-	struct anon_vma_name *);
+	struct anon_vma_name *, unsigned long vm_seals);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *,
 		       unsigned long addr, int new_below);
@@ -3327,19 +3327,6 @@  static inline void mm_populate(unsigned long addr, unsigned long len) {}
 #endif
 
 #ifdef CONFIG_MSEAL
-static inline bool check_vma_seals_mergeable(unsigned long vm_seals)
-{
-	/*
-	 * Set sealed VMA not mergeable with another VMA for now.
-	 * This will be changed in later commit to make sealed
-	 * VMA also mergeable.
-	 */
-	if (vm_seals & MM_SEAL_ALL)
-		return false;
-
-	return true;
-}
-
 /*
  * return the valid sealing (after mask).
  */
@@ -3353,6 +3340,14 @@  static inline void update_vma_seals(struct vm_area_struct *vma, unsigned long vm
 	vma->vm_seals |= vm_seals;
 }
 
+static inline bool check_vma_seals_mergeable(unsigned long vm_seals1, unsigned long vm_seals2)
+{
+	if ((vm_seals1 & MM_SEAL_ALL) != (vm_seals2 & MM_SEAL_ALL))
+		return false;
+
+	return true;
+}
+
 extern bool can_modify_mm(struct mm_struct *mm, unsigned long start,
 		unsigned long end, unsigned long checkSeals);
 
@@ -3390,14 +3385,14 @@  static inline int check_mmap_seals(unsigned long prot, unsigned long *vm_seals)
 	return 0;
 }
 #else
-static inline bool check_vma_seals_mergeable(unsigned long vm_seals1)
+static inline unsigned long vma_seals(struct vm_area_struct *vma)
 {
-	return true;
+	return 0;
 }
 
-static inline unsigned long vma_seals(struct vm_area_struct *vma)
+static inline bool check_vma_seals_mergeable(unsigned long vm_seals1, unsigned long vm_seals2)
 {
-	return 0;
+	return true;
 }
 
 static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start,
diff --git a/mm/madvise.c b/mm/madvise.c
index 4dded5d27e7e..e2d219a4b6ef 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -152,7 +152,7 @@  static int madvise_update_vma(struct vm_area_struct *vma,
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(&vmi, mm, *prev, start, end, new_flags,
 			  vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			  vma->vm_userfaultfd_ctx, anon_name);
+			  vma->vm_userfaultfd_ctx, anon_name, vma_seals(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e52e3a0b8f2e..e70b69c64564 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -836,7 +836,7 @@  static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
 	merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
 			 vma->anon_vma, vma->vm_file, pgoff, new_pol,
-			 vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+			 vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma));
 	if (merged) {
 		*prev = merged;
 		return vma_replace_policy(merged, new_pol);
diff --git a/mm/mlock.c b/mm/mlock.c
index 06bdfab83b58..b537a2cbd337 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -428,7 +428,7 @@  static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*prev = vma_merge(vmi, mm, *prev, start, end, newflags,
 			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+			vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma));
 	if (*prev) {
 		vma = *prev;
 		goto success;
diff --git a/mm/mmap.c b/mm/mmap.c
index 3e1bf5a131b0..6da8d83f2e66 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -720,7 +720,8 @@  int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
 static inline bool is_mergeable_vma(struct vm_area_struct *vma,
 		struct file *file, unsigned long vm_flags,
 		struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
-		struct anon_vma_name *anon_name, bool may_remove_vma)
+		struct anon_vma_name *anon_name, bool may_remove_vma,
+		unsigned long vm_seals)
 {
 	/*
 	 * VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -740,7 +741,7 @@  static inline bool is_mergeable_vma(struct vm_area_struct *vma,
 		return false;
 	if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
 		return false;
-	if (!check_vma_seals_mergeable(vma_seals(vma)))
+	if (!check_vma_seals_mergeable(vma_seals(vma), vm_seals))
 		return false;
 
 	return true;
@@ -776,9 +777,10 @@  static bool
 can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
 		struct anon_vma *anon_vma, struct file *file,
 		pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
-		struct anon_vma_name *anon_name)
+		struct anon_vma_name *anon_name, unsigned long vm_seals)
 {
-	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, true) &&
+	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx,
+		anon_name, true, vm_seals) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		if (vma->vm_pgoff == vm_pgoff)
 			return true;
@@ -799,9 +801,10 @@  static bool
 can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
 		struct anon_vma *anon_vma, struct file *file,
 		pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
-		struct anon_vma_name *anon_name)
+		struct anon_vma_name *anon_name, unsigned long vm_seals)
 {
-	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, false) &&
+	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx,
+		anon_name, false, vm_seals) &&
 	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
 		pgoff_t vm_pglen;
 		vm_pglen = vma_pages(vma);
@@ -869,7 +872,7 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 			struct anon_vma *anon_vma, struct file *file,
 			pgoff_t pgoff, struct mempolicy *policy,
 			struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
-			struct anon_vma_name *anon_name)
+			struct anon_vma_name *anon_name, unsigned long vm_seals)
 {
 	struct vm_area_struct *curr, *next, *res;
 	struct vm_area_struct *vma, *adjust, *remove, *remove2;
@@ -908,7 +911,7 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 		/* Can we merge the predecessor? */
 		if (addr == prev->vm_end && mpol_equal(vma_policy(prev), policy)
 		    && can_vma_merge_after(prev, vm_flags, anon_vma, file,
-					   pgoff, vm_userfaultfd_ctx, anon_name)) {
+			pgoff, vm_userfaultfd_ctx, anon_name, vm_seals)) {
 			merge_prev = true;
 			vma_prev(vmi);
 		}
@@ -917,7 +920,7 @@  struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
 	/* Can we merge the successor? */
 	if (next && mpol_equal(policy, vma_policy(next)) &&
 	    can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen,
-				 vm_userfaultfd_ctx, anon_name)) {
+			vm_userfaultfd_ctx, anon_name, vm_seals)) {
 		merge_next = true;
 	}
 
@@ -2727,13 +2730,8 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 
 	next = vma_next(&vmi);
 	prev = vma_prev(&vmi);
-	/*
-	 * For now, sealed VMA doesn't merge with other VMA,
-	 * Will change this in later commit when we make sealed VMA
-	 * also mergeable.
-	 */
-	if ((vm_flags & VM_SPECIAL) ||
-		(vm_seals & MM_SEAL_ALL)) {
+
+	if (vm_flags & VM_SPECIAL) {
 		if (prev)
 			vma_iter_next_range(&vmi);
 		goto cannot_expand;
@@ -2743,7 +2741,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	/* Check next */
 	if (next && next->vm_start == end && !vma_policy(next) &&
 	    can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
-				 NULL_VM_UFFD_CTX, NULL)) {
+			NULL_VM_UFFD_CTX, NULL, vm_seals)) {
 		merge_end = next->vm_end;
 		vma = next;
 		vm_pgoff = next->vm_pgoff - pglen;
@@ -2752,9 +2750,9 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 	/* Check prev */
 	if (prev && prev->vm_end == addr && !vma_policy(prev) &&
 	    (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,
-				       pgoff, vma->vm_userfaultfd_ctx, NULL) :
+			pgoff, vma->vm_userfaultfd_ctx, NULL, vm_seals) :
 		   can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
-				       NULL_VM_UFFD_CTX, NULL))) {
+			NULL_VM_UFFD_CTX, NULL, vm_seals))) {
 		merge_start = prev->vm_start;
 		vma = prev;
 		vm_pgoff = prev->vm_pgoff;
@@ -2822,7 +2820,7 @@  unsigned long mmap_region(struct file *file, unsigned long addr,
 			merge = vma_merge(&vmi, mm, prev, vma->vm_start,
 				    vma->vm_end, vma->vm_flags, NULL,
 				    vma->vm_file, vma->vm_pgoff, NULL,
-				    NULL_VM_UFFD_CTX, NULL);
+				    NULL_VM_UFFD_CTX, NULL, vma_seals(vma));
 			if (merge) {
 				/*
 				 * ->mmap() can change vma->vm_file and fput
@@ -3130,14 +3128,14 @@  static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
 	if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
 		return -ENOMEM;
-
 	/*
 	 * Expand the existing vma if possible; Note that singular lists do not
 	 * occur after forking, so the expand will only happen on new VMAs.
 	 */
 	if (vma && vma->vm_end == addr && !vma_policy(vma) &&
 	    can_vma_merge_after(vma, flags, NULL, NULL,
-				addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
+			addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL,
+			vma_seals(vma))) {
 		vma_iter_config(vmi, vma->vm_start, addr + len);
 		if (vma_iter_prealloc(vmi, vma))
 			goto unacct_fail;
@@ -3380,7 +3378,7 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 
 	new_vma = vma_merge(&vmi, mm, prev, addr, addr + len, vma->vm_flags,
 			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			    vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+			    vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma));
 	if (new_vma) {
 		/*
 		 * Source vma may have been merged into new_vma
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1527188b1e92..a4c90e71607b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -632,7 +632,7 @@  mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
 	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
 	*pprev = vma_merge(vmi, mm, *pprev, start, end, newflags,
 			   vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			   vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+			   vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma));
 	if (*pprev) {
 		vma = *pprev;
 		VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
diff --git a/mm/mremap.c b/mm/mremap.c
index ff7429bfbbe1..357efd6b48b9 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1098,7 +1098,7 @@  SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 			vma = vma_merge(&vmi, mm, vma, extension_start,
 				extension_end, vma->vm_flags, vma->anon_vma,
 				vma->vm_file, extension_pgoff, vma_policy(vma),
-				vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+				vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma));
 			if (!vma) {
 				vm_unacct_memory(pages);
 				ret = -ENOMEM;
diff --git a/mm/mseal.c b/mm/mseal.c
index d12aa628ebdc..3b90dce7d20e 100644
--- a/mm/mseal.c
+++ b/mm/mseal.c
@@ -7,8 +7,10 @@ 
  *  Author: Jeff Xu <jeffxu@chromium.org>
  */
 
+#include <linux/mempolicy.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/mm_inline.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 #include "internal.h"
@@ -81,14 +83,25 @@  static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		struct vm_area_struct **prev, unsigned long start,
 		unsigned long end, unsigned long addtypes)
 {
+	pgoff_t pgoff;
 	int ret = 0;
+	unsigned long newtypes =  vma_seals(vma) | addtypes;
+
+	if (newtypes != vma_seals(vma)) {
+		/*
+		 * Attempt to merge with prev and next vma.
+		 */
+		pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+		*prev = vma_merge(vmi, vma->vm_mm, *prev, start, end, vma->vm_flags,
+				vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
+				vma->vm_userfaultfd_ctx, anon_vma_name(vma), newtypes);
+		if (*prev) {
+			vma = *prev;
+			goto out;
+		}
 
-	if (addtypes & ~(vma_seals(vma))) {
 		/*
 		 * Handle split at start and end.
-		 * For now sealed VMA doesn't merge with other VMAs.
-		 * This will be updated in later commit to make
-		 * sealed VMA also mergeable.
 		 */
 		if (start != vma->vm_start) {
 			ret = split_vma(vmi, vma, start, 1);
@@ -102,7 +115,7 @@  static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
 				goto out;
 		}
 
-		vma->vm_seals |= addtypes;
+		vma->vm_seals = newtypes;
 	}
 
 out: