[v9,05/11] tracing/user_events: Add ioctl for disabling addresses

Message ID 20230324223028.172-6-beaub@linux.microsoft.com
State New
Headers
Series tracing/user_events: Remote write ABI |

Commit Message

Beau Belgrave March 24, 2023, 10:30 p.m. UTC
  Enablements are now tracked by the lifetime of the task/mm. User
processes need to be able to disable their addresses if tracing is
requested to be turned off. Before unmapping the page would suffice.
However, we now need a stronger contract. Add an ioctl to enable this.

A new flag bit is added, freeing, to user_event_enabler to ensure that
if the event is attempted to be removed while a fault is being handled
that the remove is delayed until after the fault is reattempted.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 include/uapi/linux/user_events.h | 24 +++++++++
 kernel/trace/trace_events_user.c | 93 +++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 2 deletions(-)
  

Comments

Steven Rostedt March 28, 2023, 9:32 p.m. UTC | #1
On Fri, 24 Mar 2023 15:30:22 -0700
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> Enablements are now tracked by the lifetime of the task/mm. User
> processes need to be able to disable their addresses if tracing is
> requested to be turned off. Before unmapping the page would suffice.
> However, we now need a stronger contract. Add an ioctl to enable this.
> 
> A new flag bit is added, freeing, to user_event_enabler to ensure that
> if the event is attempted to be removed while a fault is being handled
> that the remove is delayed until after the fault is reattempted.
> 
> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
> ---
>  include/uapi/linux/user_events.h | 24 +++++++++
>  kernel/trace/trace_events_user.c | 93 +++++++++++++++++++++++++++++++-
>  2 files changed, 115 insertions(+), 2 deletions(-)
> 
> diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h
> index 22521bc622db..3e7275e3234a 100644
> --- a/include/uapi/linux/user_events.h
> +++ b/include/uapi/linux/user_events.h
> @@ -46,6 +46,27 @@ struct user_reg {
>  	__u32 write_index;
>  } __attribute__((__packed__));
>  
> +/*
> + * Describes an event unregister, callers must set the size, address and bit.
> + * This structure is passed to the DIAG_IOCSUNREG ioctl to disable bit updates.
> + */
> +struct user_unreg {
> +	/* Input: Size of the user_unreg structure being used */
> +	__u32 size;
> +
> +	/* Input: Bit to unregister */
> +	__u8 disable_bit;
> +
> +	/* Input: Reserved, set to 0 */
> +	__u8 __reserved;
> +
> +	/* Input: Reserved, set to 0 */
> +	__u16 __reserved2;

These are reserved so they must be set to zero, such that old programs do
not break when they become meaningful and something had it randomly set
(due to being uninitialized).


> +
> +	/* Input: Address to unregister */
> +	__u64 disable_addr;
> +} __attribute__((__packed__));
> +
>  #define DIAG_IOC_MAGIC '*'
> 

> @@ -2086,6 +2100,75 @@ static long user_events_ioctl_del(struct user_event_file_info *info,
>  	return ret;
>  }
>  
> +static long user_unreg_get(struct user_unreg __user *ureg,
> +			   struct user_unreg *kreg)
> +{
> +	u32 size;
> +	long ret;
> +
> +	ret = get_user(size, &ureg->size);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (size > PAGE_SIZE)
> +		return -E2BIG;
> +
> +	if (size < offsetofend(struct user_unreg, disable_addr))
> +		return -EINVAL;
> +
> +	ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
> +
> +	return ret;
> +}
> +
> +/*
> + * Unregisters an enablement address/bit within a task/user mm.
> + */
> +static long user_events_ioctl_unreg(unsigned long uarg)
> +{
> +	struct user_unreg __user *ureg = (struct user_unreg __user *)uarg;
> +	struct user_event_mm *mm = current->user_event_mm;
> +	struct user_event_enabler *enabler, *next;
> +	struct user_unreg reg;
> +	long ret;
> +
> +	ret = user_unreg_get(ureg, &reg);
> +
> +	if (ret)
> +		return ret;
> +
> +	if (!mm)
> +		return -ENOENT;
> +
> +	ret = -ENOENT;

Probably should add:

	if (reg.__reserved || reg.__reserved2)
		return -EINVAL;

here.

-- Steve

> +
> +	/*
> +	 * Flags freeing and faulting are used to indicate if the enabler is in
> +	 * use at all. When faulting is set a page-fault is occurring asyncly.
> +	 * During async fault if freeing is set, the enabler will be destroyed.
> +	 * If no async fault is happening, we can destroy it now since we hold
> +	 * the event_mutex during these checks.
> +	 */
> +	mutex_lock(&event_mutex);
> +
> +	list_for_each_entry_safe(enabler, next, &mm->enablers, link)
> +		if (enabler->addr == reg.disable_addr &&
> +		    (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) {
> +			set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
> +
> +			if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
> +				user_event_enabler_destroy(enabler);
> +
> +			/* Removed at least one */
> +			ret = 0;
> +		}
> +
> +	mutex_unlock(&event_mutex);
> +
> +	return ret;
> +}
> +
>  /*
>   * Handles the ioctl from user mode to register or alter operations.
>   */
> @@ -2108,6 +2191,12 @@ static long user_events_ioctl(struct file *file, unsigned int cmd,
>  		ret = user_events_ioctl_del(info, uarg);
>  		mutex_unlock(&group->reg_mutex);
>  		break;
> +
> +	case DIAG_IOCSUNREG:
> +		mutex_lock(&group->reg_mutex);
> +		ret = user_events_ioctl_unreg(uarg);
> +		mutex_unlock(&group->reg_mutex);
> +		break;
>  	}
>  
>  	return ret;
  
Steven Rostedt March 28, 2023, 9:37 p.m. UTC | #2
On Tue, 28 Mar 2023 17:32:00 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > +static long user_events_ioctl_unreg(unsigned long uarg)
> > +{
> > +	struct user_unreg __user *ureg = (struct user_unreg __user *)uarg;
> > +	struct user_event_mm *mm = current->user_event_mm;
> > +	struct user_event_enabler *enabler, *next;
> > +	struct user_unreg reg;
> > +	long ret;
> > +
> > +	ret = user_unreg_get(ureg, &reg);
> > +
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (!mm)
> > +		return -ENOENT;
> > +
> > +	ret = -ENOENT;  
> 
> Probably should add:
> 
> 	if (reg.__reserved || reg.__reserved2)
> 		return -EINVAL;
> 
> here.

I finished my review.

Can to send a v10 out with this update, and also update all the structs to
have the fields tabbed out for easier reading.

Thanks!

-- Steve
  
Beau Belgrave March 28, 2023, 9:53 p.m. UTC | #3
On Tue, Mar 28, 2023 at 05:37:40PM -0400, Steven Rostedt wrote:
> On Tue, 28 Mar 2023 17:32:00 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > > +static long user_events_ioctl_unreg(unsigned long uarg)
> > > +{
> > > +	struct user_unreg __user *ureg = (struct user_unreg __user *)uarg;
> > > +	struct user_event_mm *mm = current->user_event_mm;
> > > +	struct user_event_enabler *enabler, *next;
> > > +	struct user_unreg reg;
> > > +	long ret;
> > > +
> > > +	ret = user_unreg_get(ureg, &reg);
> > > +
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	if (!mm)
> > > +		return -ENOENT;
> > > +
> > > +	ret = -ENOENT;  
> > 
> > Probably should add:
> > 
> > 	if (reg.__reserved || reg.__reserved2)
> > 		return -EINVAL;
> > 
> > here.

Nice catch! Yes I'll add this.

> 
> I finished my review.
> 
> Can to send a v10 out with this update, and also update all the structs to
> have the fields tabbed out for easier reading.
> 

Yep will do, I will have the tabbed out changes in a isolated patch in
the v10 series.

Thanks,
-Beau

> Thanks!
> 
> -- Steve
  

Patch

diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h
index 22521bc622db..3e7275e3234a 100644
--- a/include/uapi/linux/user_events.h
+++ b/include/uapi/linux/user_events.h
@@ -46,6 +46,27 @@  struct user_reg {
 	__u32 write_index;
 } __attribute__((__packed__));
 
+/*
+ * Describes an event unregister, callers must set the size, address and bit.
+ * This structure is passed to the DIAG_IOCSUNREG ioctl to disable bit updates.
+ */
+struct user_unreg {
+	/* Input: Size of the user_unreg structure being used */
+	__u32 size;
+
+	/* Input: Bit to unregister */
+	__u8 disable_bit;
+
+	/* Input: Reserved, set to 0 */
+	__u8 __reserved;
+
+	/* Input: Reserved, set to 0 */
+	__u16 __reserved2;
+
+	/* Input: Address to unregister */
+	__u64 disable_addr;
+} __attribute__((__packed__));
+
 #define DIAG_IOC_MAGIC '*'
 
 /* Request to register a user_event */
@@ -54,4 +75,7 @@  struct user_reg {
 /* Request to delete a user_event */
 #define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char *)
 
+/* Requests to unregister a user_event */
+#define DIAG_IOCSUNREG _IOW(DIAG_IOC_MAGIC, 2, struct user_unreg*)
+
 #endif /* _UAPI_LINUX_USER_EVENTS_H */
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 86bda1660536..e4ee25d16f3b 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -102,6 +102,9 @@  struct user_event_enabler {
 /* Bit 6 is for faulting status of enablement */
 #define ENABLE_VAL_FAULTING_BIT 6
 
+/* Bit 7 is for freeing status of enablement */
+#define ENABLE_VAL_FREEING_BIT 7
+
 /* Only duplicate the bit value */
 #define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK
 
@@ -301,6 +304,12 @@  static void user_event_enabler_fault_fixup(struct work_struct *work)
 	/* Prevent state changes from racing */
 	mutex_lock(&event_mutex);
 
+	/* User asked for enabler to be removed during fault */
+	if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) {
+		user_event_enabler_destroy(enabler);
+		goto out;
+	}
+
 	/*
 	 * If we managed to get the page, re-issue the write. We do not
 	 * want to get into a possible infinite loop, which is why we only
@@ -315,7 +324,7 @@  static void user_event_enabler_fault_fixup(struct work_struct *work)
 		user_event_enabler_write(mm, enabler, true);
 		mmap_read_unlock(mm->mm);
 	}
-
+out:
 	mutex_unlock(&event_mutex);
 
 	/* In all cases we no longer need the mm or fault */
@@ -370,7 +379,8 @@  static int user_event_enabler_write(struct user_event_mm *mm,
 	if (refcount_read(&mm->tasks) == 0)
 		return -ENOENT;
 
-	if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))))
+	if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)) ||
+		     test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))))
 		return -EBUSY;
 
 	ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT,
@@ -428,6 +438,10 @@  static bool user_event_enabler_dup(struct user_event_enabler *orig,
 {
 	struct user_event_enabler *enabler;
 
+	/* Skip pending frees */
+	if (unlikely(test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(orig))))
+		return true;
+
 	enabler = kzalloc(sizeof(*enabler), GFP_NOWAIT);
 
 	if (!enabler)
@@ -2086,6 +2100,75 @@  static long user_events_ioctl_del(struct user_event_file_info *info,
 	return ret;
 }
 
+static long user_unreg_get(struct user_unreg __user *ureg,
+			   struct user_unreg *kreg)
+{
+	u32 size;
+	long ret;
+
+	ret = get_user(size, &ureg->size);
+
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)
+		return -E2BIG;
+
+	if (size < offsetofend(struct user_unreg, disable_addr))
+		return -EINVAL;
+
+	ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
+
+	return ret;
+}
+
+/*
+ * Unregisters an enablement address/bit within a task/user mm.
+ */
+static long user_events_ioctl_unreg(unsigned long uarg)
+{
+	struct user_unreg __user *ureg = (struct user_unreg __user *)uarg;
+	struct user_event_mm *mm = current->user_event_mm;
+	struct user_event_enabler *enabler, *next;
+	struct user_unreg reg;
+	long ret;
+
+	ret = user_unreg_get(ureg, &reg);
+
+	if (ret)
+		return ret;
+
+	if (!mm)
+		return -ENOENT;
+
+	ret = -ENOENT;
+
+	/*
+	 * Flags freeing and faulting are used to indicate if the enabler is in
+	 * use at all. When faulting is set a page-fault is occurring asyncly.
+	 * During async fault if freeing is set, the enabler will be destroyed.
+	 * If no async fault is happening, we can destroy it now since we hold
+	 * the event_mutex during these checks.
+	 */
+	mutex_lock(&event_mutex);
+
+	list_for_each_entry_safe(enabler, next, &mm->enablers, link)
+		if (enabler->addr == reg.disable_addr &&
+		    (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) {
+			set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
+
+			if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
+				user_event_enabler_destroy(enabler);
+
+			/* Removed at least one */
+			ret = 0;
+		}
+
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
 /*
  * Handles the ioctl from user mode to register or alter operations.
  */
@@ -2108,6 +2191,12 @@  static long user_events_ioctl(struct file *file, unsigned int cmd,
 		ret = user_events_ioctl_del(info, uarg);
 		mutex_unlock(&group->reg_mutex);
 		break;
+
+	case DIAG_IOCSUNREG:
+		mutex_lock(&group->reg_mutex);
+		ret = user_events_ioctl_unreg(uarg);
+		mutex_unlock(&group->reg_mutex);
+		break;
 	}
 
 	return ret;