[3/4] mm: Add a NO_INHERIT flag to the PR_SET_MDWE prctl
Commit Message
This extends the current PR_SET_MDWE prctl arg with a bit to indicate
that the process doesn't want MDWE protection to propagate to children.
To implement this no-inherit mode, the tag in current->mm->flags must be
absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
without inherit" is different in the prctl than in the mm flags. This
leads to a bit of bit-mangling in the prctl implementation.
Signed-off-by: Florent Revest <revest@chromium.org>
---
include/linux/mman.h | 8 +++++++-
include/linux/sched/coredump.h | 1 +
include/uapi/linux/prctl.h | 1 +
kernel/sys.c | 29 +++++++++++++++++++++++------
tools/include/uapi/linux/prctl.h | 1 +
5 files changed, 33 insertions(+), 7 deletions(-)
Comments
On Thu, May 04, 2023 at 07:09:41PM +0200, Florent Revest wrote:
> This extends the current PR_SET_MDWE prctl arg with a bit to indicate
> that the process doesn't want MDWE protection to propagate to children.
>
> To implement this no-inherit mode, the tag in current->mm->flags must be
> absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
> without inherit" is different in the prctl than in the mm flags. This
> leads to a bit of bit-mangling in the prctl implementation.
That bit mangling is not that bad but it complicates the code a bit,
especially if we'll add new bits in the future. We also need to check
both the original and the no-inherit bits for each feature.
Another question is whether we want to support more fine-grained
inheriting or just a big knob that disables inheriting for all the
(future) MDWE flags.
I think a somewhat simpler way would be to clear the flags on fork(),
either based on a big MMF_HAS_MDWE_NO_INHERIT knob or individual ones.
Something like below (completely untested):
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 0ee96ea7a0e9..ca83a0c8d19c 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -91,4 +91,12 @@ static inline int get_dumpable(struct mm_struct *mm)
MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
#define MMF_VM_MERGE_ANY 29
+
+#define MMF_INIT_FLAGS(flags) ({ \
+ unsigned long new_flags = flags; \
+ if (new_flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) \
+ new_flags &= ~(1UL << MMF_HAS_MDWE_MASK); \
+ new_flags & MMF_INIT_MASK; \
+})
+
#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..53f0b68a5451 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1288,7 +1288,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
hugetlb_count_init(mm);
if (current->mm) {
- mm->flags = current->mm->flags & MMF_INIT_MASK;
+ mm->flags = MMF_INIT_FLAGS(current->mm->flags);
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
} else {
mm->flags = default_dump_filter;
The checks in MMF_INIT_FLAGS() can grow in time if we add more bits in
there but we still only keep a single flag that determines whether the
feature is enabled (maybe that's more like bikeshedding at this moment
when we have a single bit).
(fun remark: I see you cc'ed nd@arm.com'; that's not a real person, it's
what our IT folk asked us to add on cc so that the Exchange server
doesn't append the legal disclaimer; most lists are covered already
without such cc but I guess people feel safer to add it, just in case)
On Fri, May 5, 2023 at 8:34 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Thu, May 04, 2023 at 07:09:41PM +0200, Florent Revest wrote:
> > This extends the current PR_SET_MDWE prctl arg with a bit to indicate
> > that the process doesn't want MDWE protection to propagate to children.
> >
> > To implement this no-inherit mode, the tag in current->mm->flags must be
> > absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
> > without inherit" is different in the prctl than in the mm flags. This
> > leads to a bit of bit-mangling in the prctl implementation.
>
> That bit mangling is not that bad but it complicates the code a bit,
> especially if we'll add new bits in the future. We also need to check
> both the original and the no-inherit bits for each feature.
I agree :)
> Another question is whether we want to support more fine-grained
> inheriting or just a big knob that disables inheriting for all the
> (future) MDWE flags.
Yep, I can't think of a more fine-grained inheritance model off the
top of my head but it would be good to have a sane base for when the
need arises.
> I think a somewhat simpler way would be to clear the flags on fork(),
> either based on a big MMF_HAS_MDWE_NO_INHERIT knob or individual ones.
> Something like below (completely untested):
>
> diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
> index 0ee96ea7a0e9..ca83a0c8d19c 100644
> --- a/include/linux/sched/coredump.h
> +++ b/include/linux/sched/coredump.h
> @@ -91,4 +91,12 @@ static inline int get_dumpable(struct mm_struct *mm)
> MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
>
> #define MMF_VM_MERGE_ANY 29
> +
> +#define MMF_INIT_FLAGS(flags) ({ \
> + unsigned long new_flags = flags; \
> + if (new_flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) \
> + new_flags &= ~(1UL << MMF_HAS_MDWE_MASK); \
> + new_flags & MMF_INIT_MASK; \
> +})
> +
> #endif /* _LINUX_SCHED_COREDUMP_H */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index ed4e01daccaa..53f0b68a5451 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1288,7 +1288,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
> hugetlb_count_init(mm);
>
> if (current->mm) {
> - mm->flags = current->mm->flags & MMF_INIT_MASK;
> + mm->flags = MMF_INIT_FLAGS(current->mm->flags);
> mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
> } else {
> mm->flags = default_dump_filter;
>
> The checks in MMF_INIT_FLAGS() can grow in time if we add more bits in
> there but we still only keep a single flag that determines whether the
> feature is enabled (maybe that's more like bikeshedding at this moment
> when we have a single bit).
Sounds good!
I had considered something like this but I was afraid I'd spill too
much logic into fork.c... I didn't think of making it a neat macro in
coredump.h. That's a good point, I'll do this in v2.
> (fun remark: I see you cc'ed nd@arm.com'; that's not a real person, it's
> what our IT folk asked us to add on cc so that the Exchange server
> doesn't append the legal disclaimer; most lists are covered already
> without such cc but I guess people feel safer to add it, just in case)
Ahah! I mostly just copied the cc list from Joey's series. I remember
wondering why I couldn't find any patch sent by this mysterious ND but
I thought that if they got such a cool username, surely they must have
been at ARM since the early days and have some important role... :)
Then... mister nd won't get to see my v2! Thanks for the heads up.
@@ -157,6 +157,12 @@ calc_vm_flag_bits(unsigned long flags)
unsigned long vm_commit_limit(void);
+static inline bool has_mdwe_enabled(struct task_struct *task)
+{
+ return test_bit(MMF_HAS_MDWE, &task->mm->flags) ||
+ test_bit(MMF_HAS_MDWE_NO_INHERIT, &task->mm->flags);
+}
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
@@ -178,7 +184,7 @@ unsigned long vm_commit_limit(void);
*/
static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags)
{
- if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
+ if (!has_mdwe_enabled(current))
return false;
if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE))
@@ -91,4 +91,5 @@ static inline int get_dumpable(struct mm_struct *mm)
MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
#define MMF_VM_MERGE_ANY 29
+#define MMF_HAS_MDWE_NO_INHERIT 30
#endif /* _LINUX_SCHED_COREDUMP_H */
@@ -284,6 +284,7 @@ struct prctl_mm_map {
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN 1
+# define PR_MDWE_NO_INHERIT 2
#define PR_GET_MDWE 66
@@ -2368,12 +2368,25 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
if (arg3 || arg4 || arg5)
return -EINVAL;
- if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
+ if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
return -EINVAL;
- if (bits & PR_MDWE_REFUSE_EXEC_GAIN)
- set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
- else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
+ /* Cannot set NO_INHERIT without REFUSE_EXEC_GAIN */
+ if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
+ return -EINVAL;
+
+ if (bits & PR_MDWE_REFUSE_EXEC_GAIN) {
+ if (bits & PR_MDWE_NO_INHERIT) {
+ /* Cannot go from inherit mode to no inherit */
+ if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
+ return -EPERM;
+
+ set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
+ } else {
+ set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
+ clear_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
+ }
+ } else if (has_mdwe_enabled(current))
return -EPERM; /* Cannot unset the flag */
return 0;
@@ -2385,8 +2398,12 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
- return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
- PR_MDWE_REFUSE_EXEC_GAIN : 0;
+ if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
+ return PR_MDWE_REFUSE_EXEC_GAIN;
+ else if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags))
+ return PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT;
+
+ return 0;
}
static int prctl_get_auxv(void __user *addr, unsigned long len)
@@ -284,6 +284,7 @@ struct prctl_mm_map {
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN 1
+# define PR_MDWE_NO_INHERIT 2
#define PR_GET_MDWE 66